code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti import network
  65 from ganeti.masterd import iallocator
  66
  67 import ganeti.masterd.instance # pylint: disable=W0611
  68
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141
 142     # Dictionaries used to declare locking needs to mcpu
 143     self.needed_locks = None
 144     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 145     self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
 146
 147     self.add_locks = {}
 148     self.remove_locks = {}
 149
 150     # Used to force good behavior when calling helper functions
 151     self.recalculate_locks = {}
 152
 153     # logging
 154     self.Log = processor.Log # pylint: disable=C0103
 155     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 156     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 157     self.LogStep = processor.LogStep # pylint: disable=C0103
 158     # support for dry-run
 159     self.dry_run_result = None
 160     # support for generic debug attribute
 161     if (not hasattr(self.op, "debug_level") or
 162         not isinstance(self.op.debug_level, int)):
 163       self.op.debug_level = 0
 164
 165     # Tasklets
 166     self.tasklets = None
 167
 168     # Validate opcode parameters and set defaults
 169     self.op.Validate(True)
 170
 171     self.CheckArguments()
 172
 173   def CheckArguments(self):
 174     """Check syntactic validity for the opcode arguments.
 175
 176     This method is for doing a simple syntactic check and ensure
 177     validity of opcode parameters, without any cluster-related
 178     checks. While the same can be accomplished in ExpandNames and/or
 179     CheckPrereq, doing these separate is better because:
 180
 181       - ExpandNames is left as as purely a lock-related function
 182       - CheckPrereq is run after we have acquired locks (and possible
 183         waited for them)
 184
 185     The function is allowed to change the self.op attribute so that
 186     later methods can no longer worry about missing parameters.
 187
 188     """
 189     pass
 190
 191   def ExpandNames(self):
 192     """Expand names for this LU.
 193
 194     This method is called before starting to execute the opcode, and it should
 195     update all the parameters of the opcode to their canonical form (e.g. a
 196     short node name must be fully expanded after this method has successfully
 197     completed). This way locking, hooks, logging, etc. can work correctly.
 198
 199     LUs which implement this method must also populate the self.needed_locks
 200     member, as a dict with lock levels as keys, and a list of needed lock names
 201     as values. Rules:
 202
 203       - use an empty dict if you don't need any lock
 204       - if you don't need any lock at a particular level omit that
 205         level (note that in this case C{DeclareLocks} won't be called
 206         at all for that level)
 207       - if you need locks at a level, but you can't calculate it in
 208         this function, initialise that level with an empty list and do
 209         further processing in L{LogicalUnit.DeclareLocks} (see that
 210         function's docstring)
 211       - don't put anything for the BGL level
 212       - if you want all locks at a level use L{locking.ALL_SET} as a value
 213
 214     If you need to share locks (rather than acquire them exclusively) at one
 215     level you can modify self.share_locks, setting a true value (usually 1) for
 216     that level. By default locks are not shared.
 217
 218     This function can also define a list of tasklets, which then will be
 219     executed in order instead of the usual LU-level CheckPrereq and Exec
 220     functions, if those are not defined by the LU.
 221
 222     Examples::
 223
 224       # Acquire all nodes and one instance
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: locking.ALL_SET,
 227         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 228       }
 229       # Acquire just two nodes
 230       self.needed_locks = {
 231         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 232       }
 233       # Acquire no locks
 234       self.needed_locks = {} # No, you can't leave it to the default value None
 235
 236     """
 237     # The implementation of this method is mandatory only if the new LU is
 238     # concurrent, so that old LUs don't need to be changed all at the same
 239     # time.
 240     if self.REQ_BGL:
 241       self.needed_locks = {} # Exclusive LUs don't need locks.
 242     else:
 243       raise NotImplementedError
 244
 245   def DeclareLocks(self, level):
 246     """Declare LU locking needs for a level
 247
 248     While most LUs can just declare their locking needs at ExpandNames time,
 249     sometimes there's the need to calculate some locks after having acquired
 250     the ones before. This function is called just before acquiring locks at a
 251     particular level, but after acquiring the ones at lower levels, and permits
 252     such calculations. It can be used to modify self.needed_locks, and by
 253     default it does nothing.
 254
 255     This function is only called if you have something already set in
 256     self.needed_locks for the level.
 257
 258     @param level: Locking level which is going to be locked
 259     @type level: member of L{ganeti.locking.LEVELS}
 260
 261     """
 262
 263   def CheckPrereq(self):
 264     """Check prerequisites for this LU.
 265
 266     This method should check that the prerequisites for the execution
 267     of this LU are fulfilled. It can do internode communication, but
 268     it should be idempotent - no cluster or system changes are
 269     allowed.
 270
 271     The method should raise errors.OpPrereqError in case something is
 272     not fulfilled. Its return value is ignored.
 273
 274     This method should also update all the parameters of the opcode to
 275     their canonical form if it hasn't been done by ExpandNames before.
 276
 277     """
 278     if self.tasklets is not None:
 279       for (idx, tl) in enumerate(self.tasklets):
 280         logging.debug("Checking prerequisites for tasklet %s/%s",
 281                       idx + 1, len(self.tasklets))
 282         tl.CheckPrereq()
 283     else:
 284       pass
 285
 286   def Exec(self, feedback_fn):
 287     """Execute the LU.
 288
 289     This method should implement the actual work. It should raise
 290     errors.OpExecError for failures that are somewhat dealt with in
 291     code, or expected.
 292
 293     """
 294     if self.tasklets is not None:
 295       for (idx, tl) in enumerate(self.tasklets):
 296         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 297         tl.Exec(feedback_fn)
 298     else:
 299       raise NotImplementedError
 300
 301   def BuildHooksEnv(self):
 302     """Build hooks environment for this LU.
 303
 304     @rtype: dict
 305     @return: Dictionary containing the environment that will be used for
 306       running the hooks for this LU. The keys of the dict must not be prefixed
 307       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 308       will extend the environment with additional variables. If no environment
 309       should be defined, an empty dictionary should be returned (not C{None}).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def BuildHooksNodes(self):
 317     """Build list of nodes to run LU's hooks.
 318
 319     @rtype: tuple; (list, list)
 320     @return: Tuple containing a list of node names on which the hook
 321       should run before the execution and a list of node names on which the
 322       hook should run after the execution. No nodes should be returned as an
 323       empty list (and not None).
 324     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 325       will not be called.
 326
 327     """
 328     raise NotImplementedError
 329
 330   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 331     """Notify the LU about the results of its hooks.
 332
 333     This method is called every time a hooks phase is executed, and notifies
 334     the Logical Unit about the hooks' result. The LU can then use it to alter
 335     its result based on the hooks.  By default the method does nothing and the
 336     previous result is passed back unchanged but any LU can define it if it
 337     wants to use the local cluster hook-scripts somehow.
 338
 339     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 340         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 341     @param hook_results: the results of the multi-node hooks rpc call
 342     @param feedback_fn: function used send feedback back to the caller
 343     @param lu_result: the previous Exec result this LU had, or None
 344         in the PRE phase
 345     @return: the new Exec result, based on the previous result
 346         and hook results
 347
 348     """
 349     # API must be kept, thus we ignore the unused argument and could
 350     # be a function warnings
 351     # pylint: disable=W0613,R0201
 352     return lu_result
 353
 354   def _ExpandAndLockInstance(self):
 355     """Helper function to expand and lock an instance.
 356
 357     Many LUs that work on an instance take its name in self.op.instance_name
 358     and need to expand it and then declare the expanded name for locking. This
 359     function does it, and then updates self.op.instance_name to the expanded
 360     name. It also initializes needed_locks as a dict, if this hasn't been done
 361     before.
 362
 363     """
 364     if self.needed_locks is None:
 365       self.needed_locks = {}
 366     else:
 367       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 368         "_ExpandAndLockInstance called with instance-level locks set"
 369     self.op.instance_name = _ExpandInstanceName(self.cfg,
 370                                                 self.op.instance_name)
 371     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 372
 373   def _LockInstancesNodes(self, primary_only=False,
 374                           level=locking.LEVEL_NODE):
 375     """Helper function to declare instances' nodes for locking.
 376
 377     This function should be called after locking one or more instances to lock
 378     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 379     with all primary or secondary nodes for instances already locked and
 380     present in self.needed_locks[locking.LEVEL_INSTANCE].
 381
 382     It should be called from DeclareLocks, and for safety only works if
 383     self.recalculate_locks[locking.LEVEL_NODE] is set.
 384
 385     In the future it may grow parameters to just lock some instance's nodes, or
 386     to just lock primaries or secondary nodes, if needed.
 387
 388     If should be called in DeclareLocks in a way similar to::
 389
 390       if level == locking.LEVEL_NODE:
 391         self._LockInstancesNodes()
 392
 393     @type primary_only: boolean
 394     @param primary_only: only lock primary nodes of locked instances
 395     @param level: Which lock level to use for locking nodes
 396
 397     """
 398     assert level in self.recalculate_locks, \
 399       "_LockInstancesNodes helper function called with no nodes to recalculate"
 400
 401     # TODO: check if we're really been called with the instance locks held
 402
 403     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 404     # future we might want to have different behaviors depending on the value
 405     # of self.recalculate_locks[locking.LEVEL_NODE]
 406     wanted_nodes = []
 407     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 408     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 409       wanted_nodes.append(instance.primary_node)
 410       if not primary_only:
 411         wanted_nodes.extend(instance.secondary_nodes)
 412
 413     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 414       self.needed_locks[level] = wanted_nodes
 415     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 416       self.needed_locks[level].extend(wanted_nodes)
 417     else:
 418       raise errors.ProgrammerError("Unknown recalculation mode")
 419
 420     del self.recalculate_locks[level]
 421
 422
 423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 424   """Simple LU which runs no hooks.
 425
 426   This LU is intended as a parent for other LogicalUnits which will
 427   run no hooks, in order to reduce duplicate code.
 428
 429   """
 430   HPATH = None
 431   HTYPE = None
 432
 433   def BuildHooksEnv(self):
 434     """Empty BuildHooksEnv for NoHooksLu.
 435
 436     This just raises an error.
 437
 438     """
 439     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 440
 441   def BuildHooksNodes(self):
 442     """Empty BuildHooksNodes for NoHooksLU.
 443
 444     """
 445     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 446
 447
 448 class Tasklet:
 449   """Tasklet base class.
 450
 451   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 452   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 453   tasklets know nothing about locks.
 454
 455   Subclasses must follow these rules:
 456     - Implement CheckPrereq
 457     - Implement Exec
 458
 459   """
 460   def __init__(self, lu):
 461     self.lu = lu
 462
 463     # Shortcuts
 464     self.cfg = lu.cfg
 465     self.rpc = lu.rpc
 466
 467   def CheckPrereq(self):
 468     """Check prerequisites for this tasklets.
 469
 470     This method should check whether the prerequisites for the execution of
 471     this tasklet are fulfilled. It can do internode communication, but it
 472     should be idempotent - no cluster or system changes are allowed.
 473
 474     The method should raise errors.OpPrereqError in case something is not
 475     fulfilled. Its return value is ignored.
 476
 477     This method should also update all parameters to their canonical form if it
 478     hasn't been done before.
 479
 480     """
 481     pass
 482
 483   def Exec(self, feedback_fn):
 484     """Execute the tasklet.
 485
 486     This method should implement the actual work. It should raise
 487     errors.OpExecError for failures that are somewhat dealt with in code, or
 488     expected.
 489
 490     """
 491     raise NotImplementedError
 492
 493
 494 class _QueryBase:
 495   """Base for query utility classes.
 496
 497   """
 498   #: Attribute holding field definitions
 499   FIELDS = None
 500
 501   #: Field to sort by
 502   SORT_FIELD = "name"
 503
 504   def __init__(self, qfilter, fields, use_locking):
 505     """Initializes this class.
 506
 507     """
 508     self.use_locking = use_locking
 509
 510     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 511                              namefield=self.SORT_FIELD)
 512     self.requested_data = self.query.RequestedData()
 513     self.names = self.query.RequestedNames()
 514
 515     # Sort only if no names were requested
 516     self.sort_by_name = not self.names
 517
 518     self.do_locking = None
 519     self.wanted = None
 520
 521   def _GetNames(self, lu, all_names, lock_level):
 522     """Helper function to determine names asked for in the query.
 523
 524     """
 525     if self.do_locking:
 526       names = lu.owned_locks(lock_level)
 527     else:
 528       names = all_names
 529
 530     if self.wanted == locking.ALL_SET:
 531       assert not self.names
 532       # caller didn't specify names, so ordering is not important
 533       return utils.NiceSort(names)
 534
 535     # caller specified names and we must keep the same order
 536     assert self.names
 537     assert not self.do_locking or lu.glm.is_owned(lock_level)
 538
 539     missing = set(self.wanted).difference(names)
 540     if missing:
 541       raise errors.OpExecError("Some items were removed before retrieving"
 542                                " their data: %s" % missing)
 543
 544     # Return expanded names
 545     return self.wanted
 546
 547   def ExpandNames(self, lu):
 548     """Expand names for this query.
 549
 550     See L{LogicalUnit.ExpandNames}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def DeclareLocks(self, lu, level):
 556     """Declare locks for this query.
 557
 558     See L{LogicalUnit.DeclareLocks}.
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def _GetQueryData(self, lu):
 564     """Collects all data for this query.
 565
 566     @return: Query data object
 567
 568     """
 569     raise NotImplementedError()
 570
 571   def NewStyleQuery(self, lu):
 572     """Collect data and execute query.
 573
 574     """
 575     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 576                                   sort_by_name=self.sort_by_name)
 577
 578   def OldStyleQuery(self, lu):
 579     """Collect data and execute query.
 580
 581     """
 582     return self.query.OldStyleQuery(self._GetQueryData(lu),
 583                                     sort_by_name=self.sort_by_name)
 584
 585
 586 def _ShareAll():
 587   """Returns a dict declaring all lock levels shared.
 588
 589   """
 590   return dict.fromkeys(locking.LEVELS, 1)
 591
 592
 593 def _AnnotateDiskParams(instance, devs, cfg):
 594   """Little helper wrapper to the rpc annotation method.
 595
 596   @param instance: The instance object
 597   @type devs: List of L{objects.Disk}
 598   @param devs: The root devices (not any of its children!)
 599   @param cfg: The config object
 600   @returns The annotated disk copies
 601   @see L{rpc.AnnotateDiskParams}
 602
 603   """
 604   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 605                                 cfg.GetInstanceDiskParams(instance))
 606
 607
 608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 609                               cur_group_uuid):
 610   """Checks if node groups for locked instances are still correct.
 611
 612   @type cfg: L{config.ConfigWriter}
 613   @param cfg: Cluster configuration
 614   @type instances: dict; string as key, L{objects.Instance} as value
 615   @param instances: Dictionary, instance name as key, instance object as value
 616   @type owned_groups: iterable of string
 617   @param owned_groups: List of owned groups
 618   @type owned_nodes: iterable of string
 619   @param owned_nodes: List of owned nodes
 620   @type cur_group_uuid: string or None
 621   @param cur_group_uuid: Optional group UUID to check against instance's groups
 622
 623   """
 624   for (name, inst) in instances.items():
 625     assert owned_nodes.issuperset(inst.all_nodes), \
 626       "Instance %s's nodes changed while we kept the lock" % name
 627
 628     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 629
 630     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 631       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 632
 633
 634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 635                              primary_only=False):
 636   """Checks if the owned node groups are still correct for an instance.
 637
 638   @type cfg: L{config.ConfigWriter}
 639   @param cfg: The cluster configuration
 640   @type instance_name: string
 641   @param instance_name: Instance name
 642   @type owned_groups: set or frozenset
 643   @param owned_groups: List of currently owned node groups
 644   @type primary_only: boolean
 645   @param primary_only: Whether to check node groups for only the primary node
 646
 647   """
 648   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 649
 650   if not owned_groups.issuperset(inst_groups):
 651     raise errors.OpPrereqError("Instance %s's node groups changed since"
 652                                " locks were acquired, current groups are"
 653                                " are '%s', owning groups '%s'; retry the"
 654                                " operation" %
 655                                (instance_name,
 656                                 utils.CommaJoin(inst_groups),
 657                                 utils.CommaJoin(owned_groups)),
 658                                errors.ECODE_STATE)
 659
 660   return inst_groups
 661
 662
 663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 664   """Checks if the instances in a node group are still correct.
 665
 666   @type cfg: L{config.ConfigWriter}
 667   @param cfg: The cluster configuration
 668   @type group_uuid: string
 669   @param group_uuid: Node group UUID
 670   @type owned_instances: set or frozenset
 671   @param owned_instances: List of currently owned instances
 672
 673   """
 674   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 675   if owned_instances != wanted_instances:
 676     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 677                                " locks were acquired, wanted '%s', have '%s';"
 678                                " retry the operation" %
 679                                (group_uuid,
 680                                 utils.CommaJoin(wanted_instances),
 681                                 utils.CommaJoin(owned_instances)),
 682                                errors.ECODE_STATE)
 683
 684   return wanted_instances
 685
 686
 687 def _SupportsOob(cfg, node):
 688   """Tells if node supports OOB.
 689
 690   @type cfg: L{config.ConfigWriter}
 691   @param cfg: The cluster configuration
 692   @type node: L{objects.Node}
 693   @param node: The node
 694   @return: The OOB script if supported or an empty string otherwise
 695
 696   """
 697   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 698
 699
 700 def _CopyLockList(names):
 701   """Makes a copy of a list of lock names.
 702
 703   Handles L{locking.ALL_SET} correctly.
 704
 705   """
 706   if names == locking.ALL_SET:
 707     return locking.ALL_SET
 708   else:
 709     return names[:]
 710
 711
 712 def _GetWantedNodes(lu, nodes):
 713   """Returns list of checked and expanded node names.
 714
 715   @type lu: L{LogicalUnit}
 716   @param lu: the logical unit on whose behalf we execute
 717   @type nodes: list
 718   @param nodes: list of node names or None for all nodes
 719   @rtype: list
 720   @return: the list of nodes, sorted
 721   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 722
 723   """
 724   if nodes:
 725     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 726
 727   return utils.NiceSort(lu.cfg.GetNodeList())
 728
 729
 730 def _GetWantedInstances(lu, instances):
 731   """Returns list of checked and expanded instance names.
 732
 733   @type lu: L{LogicalUnit}
 734   @param lu: the logical unit on whose behalf we execute
 735   @type instances: list
 736   @param instances: list of instance names or None for all instances
 737   @rtype: list
 738   @return: the list of instances, sorted
 739   @raise errors.OpPrereqError: if the instances parameter is wrong type
 740   @raise errors.OpPrereqError: if any of the passed instances is not found
 741
 742   """
 743   if instances:
 744     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 745   else:
 746     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 747   return wanted
 748
 749
 750 def _GetUpdatedParams(old_params, update_dict,
 751                       use_default=True, use_none=False):
 752   """Return the new version of a parameter dictionary.
 753
 754   @type old_params: dict
 755   @param old_params: old parameters
 756   @type update_dict: dict
 757   @param update_dict: dict containing new parameter values, or
 758       constants.VALUE_DEFAULT to reset the parameter to its default
 759       value
 760   @param use_default: boolean
 761   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 762       values as 'to be deleted' values
 763   @param use_none: boolean
 764   @type use_none: whether to recognise C{None} values as 'to be
 765       deleted' values
 766   @rtype: dict
 767   @return: the new parameter dictionary
 768
 769   """
 770   params_copy = copy.deepcopy(old_params)
 771   for key, val in update_dict.iteritems():
 772     if ((use_default and val == constants.VALUE_DEFAULT) or
 773         (use_none and val is None)):
 774       try:
 775         del params_copy[key]
 776       except KeyError:
 777         pass
 778     else:
 779       params_copy[key] = val
 780   return params_copy
 781
 782
 783 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 784   """Return the new version of a instance policy.
 785
 786   @param group_policy: whether this policy applies to a group and thus
 787     we should support removal of policy entries
 788
 789   """
 790   use_none = use_default = group_policy
 791   ipolicy = copy.deepcopy(old_ipolicy)
 792   for key, value in new_ipolicy.items():
 793     if key not in constants.IPOLICY_ALL_KEYS:
 794       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 795                                  errors.ECODE_INVAL)
 796     if key in constants.IPOLICY_ISPECS:
 797       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 798       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 799                                        use_none=use_none,
 800                                        use_default=use_default)
 801     else:
 802       if (not value or value == [constants.VALUE_DEFAULT] or
 803           value == constants.VALUE_DEFAULT):
 804         if group_policy:
 805           del ipolicy[key]
 806         else:
 807           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 808                                      " on the cluster'" % key,
 809                                      errors.ECODE_INVAL)
 810       else:
 811         if key in constants.IPOLICY_PARAMETERS:
 812           # FIXME: we assume all such values are float
 813           try:
 814             ipolicy[key] = float(value)
 815           except (TypeError, ValueError), err:
 816             raise errors.OpPrereqError("Invalid value for attribute"
 817                                        " '%s': '%s', error: %s" %
 818                                        (key, value, err), errors.ECODE_INVAL)
 819         else:
 820           # FIXME: we assume all others are lists; this should be redone
 821           # in a nicer way
 822           ipolicy[key] = list(value)
 823   try:
 824     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 825   except errors.ConfigurationError, err:
 826     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 827                                errors.ECODE_INVAL)
 828   return ipolicy
 829
 830
 831 def _UpdateAndVerifySubDict(base, updates, type_check):
 832   """Updates and verifies a dict with sub dicts of the same type.
 833
 834   @param base: The dict with the old data
 835   @param updates: The dict with the new data
 836   @param type_check: Dict suitable to ForceDictType to verify correct types
 837   @returns: A new dict with updated and verified values
 838
 839   """
 840   def fn(old, value):
 841     new = _GetUpdatedParams(old, value)
 842     utils.ForceDictType(new, type_check)
 843     return new
 844
 845   ret = copy.deepcopy(base)
 846   ret.update(dict((key, fn(base.get(key, {}), value))
 847                   for key, value in updates.items()))
 848   return ret
 849
 850
 851 def _MergeAndVerifyHvState(op_input, obj_input):
 852   """Combines the hv state from an opcode with the one of the object
 853
 854   @param op_input: The input dict from the opcode
 855   @param obj_input: The input dict from the objects
 856   @return: The verified and updated dict
 857
 858   """
 859   if op_input:
 860     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 861     if invalid_hvs:
 862       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 863                                  " %s" % utils.CommaJoin(invalid_hvs),
 864                                  errors.ECODE_INVAL)
 865     if obj_input is None:
 866       obj_input = {}
 867     type_check = constants.HVSTS_PARAMETER_TYPES
 868     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 869
 870   return None
 871
 872
 873 def _MergeAndVerifyDiskState(op_input, obj_input):
 874   """Combines the disk state from an opcode with the one of the object
 875
 876   @param op_input: The input dict from the opcode
 877   @param obj_input: The input dict from the objects
 878   @return: The verified and updated dict
 879   """
 880   if op_input:
 881     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 882     if invalid_dst:
 883       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 884                                  utils.CommaJoin(invalid_dst),
 885                                  errors.ECODE_INVAL)
 886     type_check = constants.DSS_PARAMETER_TYPES
 887     if obj_input is None:
 888       obj_input = {}
 889     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 890                                               type_check))
 891                 for key, value in op_input.items())
 892
 893   return None
 894
 895
 896 def _ReleaseLocks(lu, level, names=None, keep=None):
 897   """Releases locks owned by an LU.
 898
 899   @type lu: L{LogicalUnit}
 900   @param level: Lock level
 901   @type names: list or None
 902   @param names: Names of locks to release
 903   @type keep: list or None
 904   @param keep: Names of locks to retain
 905
 906   """
 907   assert not (keep is not None and names is not None), \
 908          "Only one of the 'names' and the 'keep' parameters can be given"
 909
 910   if names is not None:
 911     should_release = names.__contains__
 912   elif keep:
 913     should_release = lambda name: name not in keep
 914   else:
 915     should_release = None
 916
 917   owned = lu.owned_locks(level)
 918   if not owned:
 919     # Not owning any lock at this level, do nothing
 920     pass
 921
 922   elif should_release:
 923     retain = []
 924     release = []
 925
 926     # Determine which locks to release
 927     for name in owned:
 928       if should_release(name):
 929         release.append(name)
 930       else:
 931         retain.append(name)
 932
 933     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 934
 935     # Release just some locks
 936     lu.glm.release(level, names=release)
 937
 938     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 939   else:
 940     # Release everything
 941     lu.glm.release(level)
 942
 943     assert not lu.glm.is_owned(level), "No locks should be owned"
 944
 945
 946 def _MapInstanceDisksToNodes(instances):
 947   """Creates a map from (node, volume) to instance name.
 948
 949   @type instances: list of L{objects.Instance}
 950   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 951
 952   """
 953   return dict(((node, vol), inst.name)
 954               for inst in instances
 955               for (node, vols) in inst.MapLVsByNode().items()
 956               for vol in vols)
 957
 958
 959 def _RunPostHook(lu, node_name):
 960   """Runs the post-hook for an opcode on a single node.
 961
 962   """
 963   hm = lu.proc.BuildHooksManager(lu)
 964   try:
 965     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 966   except Exception, err: # pylint: disable=W0703
 967     lu.LogWarning("Errors occurred running hooks on %s: %s",
 968                   node_name, err)
 969
 970
 971 def _CheckOutputFields(static, dynamic, selected):
 972   """Checks whether all selected fields are valid.
 973
 974   @type static: L{utils.FieldSet}
 975   @param static: static fields set
 976   @type dynamic: L{utils.FieldSet}
 977   @param dynamic: dynamic fields set
 978
 979   """
 980   f = utils.FieldSet()
 981   f.Extend(static)
 982   f.Extend(dynamic)
 983
 984   delta = f.NonMatching(selected)
 985   if delta:
 986     raise errors.OpPrereqError("Unknown output fields selected: %s"
 987                                % ",".join(delta), errors.ECODE_INVAL)
 988
 989
 990 def _CheckGlobalHvParams(params):
 991   """Validates that given hypervisor params are not global ones.
 992
 993   This will ensure that instances don't get customised versions of
 994   global params.
 995
 996   """
 997   used_globals = constants.HVC_GLOBALS.intersection(params)
 998   if used_globals:
 999     msg = ("The following hypervisor parameters are global and cannot"
1000            " be customized at instance level, please modify them at"
1001            " cluster level: %s" % utils.CommaJoin(used_globals))
1002     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1003
1004
1005 def _CheckNodeOnline(lu, node, msg=None):
1006   """Ensure that a given node is online.
1007
1008   @param lu: the LU on behalf of which we make the check
1009   @param node: the node to check
1010   @param msg: if passed, should be a message to replace the default one
1011   @raise errors.OpPrereqError: if the node is offline
1012
1013   """
1014   if msg is None:
1015     msg = "Can't use offline node"
1016   if lu.cfg.GetNodeInfo(node).offline:
1017     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1018
1019
1020 def _CheckNodeNotDrained(lu, node):
1021   """Ensure that a given node is not drained.
1022
1023   @param lu: the LU on behalf of which we make the check
1024   @param node: the node to check
1025   @raise errors.OpPrereqError: if the node is drained
1026
1027   """
1028   if lu.cfg.GetNodeInfo(node).drained:
1029     raise errors.OpPrereqError("Can't use drained node %s" % node,
1030                                errors.ECODE_STATE)
1031
1032
1033 def _CheckNodeVmCapable(lu, node):
1034   """Ensure that a given node is vm capable.
1035
1036   @param lu: the LU on behalf of which we make the check
1037   @param node: the node to check
1038   @raise errors.OpPrereqError: if the node is not vm capable
1039
1040   """
1041   if not lu.cfg.GetNodeInfo(node).vm_capable:
1042     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1043                                errors.ECODE_STATE)
1044
1045
1046 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1047   """Ensure that a node supports a given OS.
1048
1049   @param lu: the LU on behalf of which we make the check
1050   @param node: the node to check
1051   @param os_name: the OS to query about
1052   @param force_variant: whether to ignore variant errors
1053   @raise errors.OpPrereqError: if the node is not supporting the OS
1054
1055   """
1056   result = lu.rpc.call_os_get(node, os_name)
1057   result.Raise("OS '%s' not in supported OS list for node %s" %
1058                (os_name, node),
1059                prereq=True, ecode=errors.ECODE_INVAL)
1060   if not force_variant:
1061     _CheckOSVariant(result.payload, os_name)
1062
1063
1064 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1065   """Ensure that a node has the given secondary ip.
1066
1067   @type lu: L{LogicalUnit}
1068   @param lu: the LU on behalf of which we make the check
1069   @type node: string
1070   @param node: the node to check
1071   @type secondary_ip: string
1072   @param secondary_ip: the ip to check
1073   @type prereq: boolean
1074   @param prereq: whether to throw a prerequisite or an execute error
1075   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1076   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1077
1078   """
1079   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1080   result.Raise("Failure checking secondary ip on node %s" % node,
1081                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1082   if not result.payload:
1083     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1084            " please fix and re-run this command" % secondary_ip)
1085     if prereq:
1086       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1087     else:
1088       raise errors.OpExecError(msg)
1089
1090
1091 def _GetClusterDomainSecret():
1092   """Reads the cluster domain secret.
1093
1094   """
1095   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1096                                strict=True)
1097
1098
1099 def _CheckInstanceState(lu, instance, req_states, msg=None):
1100   """Ensure that an instance is in one of the required states.
1101
1102   @param lu: the LU on behalf of which we make the check
1103   @param instance: the instance to check
1104   @param msg: if passed, should be a message to replace the default one
1105   @raise errors.OpPrereqError: if the instance is not in the required state
1106
1107   """
1108   if msg is None:
1109     msg = ("can't use instance from outside %s states" %
1110            utils.CommaJoin(req_states))
1111   if instance.admin_state not in req_states:
1112     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1113                                (instance.name, instance.admin_state, msg),
1114                                errors.ECODE_STATE)
1115
1116   if constants.ADMINST_UP not in req_states:
1117     pnode = instance.primary_node
1118     if not lu.cfg.GetNodeInfo(pnode).offline:
1119       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1120       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1121                   prereq=True, ecode=errors.ECODE_ENVIRON)
1122       if instance.name in ins_l.payload:
1123         raise errors.OpPrereqError("Instance %s is running, %s" %
1124                                    (instance.name, msg), errors.ECODE_STATE)
1125     else:
1126       lu.LogWarning("Primary node offline, ignoring check that instance"
1127                      " is down")
1128
1129
1130 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1131   """Computes if value is in the desired range.
1132
1133   @param name: name of the parameter for which we perform the check
1134   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1135       not just 'disk')
1136   @param ipolicy: dictionary containing min, max and std values
1137   @param value: actual value that we want to use
1138   @return: None or element not meeting the criteria
1139
1140
1141   """
1142   if value in [None, constants.VALUE_AUTO]:
1143     return None
1144   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1145   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1146   if value > max_v or min_v > value:
1147     if qualifier:
1148       fqn = "%s/%s" % (name, qualifier)
1149     else:
1150       fqn = name
1151     return ("%s value %s is not in range [%s, %s]" %
1152             (fqn, value, min_v, max_v))
1153   return None
1154
1155
1156 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1157                                  nic_count, disk_sizes, spindle_use,
1158                                  _compute_fn=_ComputeMinMaxSpec):
1159   """Verifies ipolicy against provided specs.
1160
1161   @type ipolicy: dict
1162   @param ipolicy: The ipolicy
1163   @type mem_size: int
1164   @param mem_size: The memory size
1165   @type cpu_count: int
1166   @param cpu_count: Used cpu cores
1167   @type disk_count: int
1168   @param disk_count: Number of disks used
1169   @type nic_count: int
1170   @param nic_count: Number of nics used
1171   @type disk_sizes: list of ints
1172   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1173   @type spindle_use: int
1174   @param spindle_use: The number of spindles this instance uses
1175   @param _compute_fn: The compute function (unittest only)
1176   @return: A list of violations, or an empty list of no violations are found
1177
1178   """
1179   assert disk_count == len(disk_sizes)
1180
1181   test_settings = [
1182     (constants.ISPEC_MEM_SIZE, "", mem_size),
1183     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1184     (constants.ISPEC_DISK_COUNT, "", disk_count),
1185     (constants.ISPEC_NIC_COUNT, "", nic_count),
1186     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1187     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1188          for idx, d in enumerate(disk_sizes)]
1189
1190   return filter(None,
1191                 (_compute_fn(name, qualifier, ipolicy, value)
1192                  for (name, qualifier, value) in test_settings))
1193
1194
1195 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1196                                      _compute_fn=_ComputeIPolicySpecViolation):
1197   """Compute if instance meets the specs of ipolicy.
1198
1199   @type ipolicy: dict
1200   @param ipolicy: The ipolicy to verify against
1201   @type instance: L{objects.Instance}
1202   @param instance: The instance to verify
1203   @param _compute_fn: The function to verify ipolicy (unittest only)
1204   @see: L{_ComputeIPolicySpecViolation}
1205
1206   """
1207   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1208   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1209   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1210   disk_count = len(instance.disks)
1211   disk_sizes = [disk.size for disk in instance.disks]
1212   nic_count = len(instance.nics)
1213
1214   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1215                      disk_sizes, spindle_use)
1216
1217
1218 def _ComputeIPolicyInstanceSpecViolation(
1219   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1220   """Compute if instance specs meets the specs of ipolicy.
1221
1222   @type ipolicy: dict
1223   @param ipolicy: The ipolicy to verify against
1224   @param instance_spec: dict
1225   @param instance_spec: The instance spec to verify
1226   @param _compute_fn: The function to verify ipolicy (unittest only)
1227   @see: L{_ComputeIPolicySpecViolation}
1228
1229   """
1230   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1231   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1232   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1233   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1234   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1235   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1236
1237   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1238                      disk_sizes, spindle_use)
1239
1240
1241 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1242                                  target_group,
1243                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1244   """Compute if instance meets the specs of the new target group.
1245
1246   @param ipolicy: The ipolicy to verify
1247   @param instance: The instance object to verify
1248   @param current_group: The current group of the instance
1249   @param target_group: The new group of the instance
1250   @param _compute_fn: The function to verify ipolicy (unittest only)
1251   @see: L{_ComputeIPolicySpecViolation}
1252
1253   """
1254   if current_group == target_group:
1255     return []
1256   else:
1257     return _compute_fn(ipolicy, instance)
1258
1259
1260 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1261                             _compute_fn=_ComputeIPolicyNodeViolation):
1262   """Checks that the target node is correct in terms of instance policy.
1263
1264   @param ipolicy: The ipolicy to verify
1265   @param instance: The instance object to verify
1266   @param node: The new node to relocate
1267   @param ignore: Ignore violations of the ipolicy
1268   @param _compute_fn: The function to verify ipolicy (unittest only)
1269   @see: L{_ComputeIPolicySpecViolation}
1270
1271   """
1272   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1273   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1274
1275   if res:
1276     msg = ("Instance does not meet target node group's (%s) instance"
1277            " policy: %s") % (node.group, utils.CommaJoin(res))
1278     if ignore:
1279       lu.LogWarning(msg)
1280     else:
1281       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1282
1283
1284 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1285   """Computes a set of any instances that would violate the new ipolicy.
1286
1287   @param old_ipolicy: The current (still in-place) ipolicy
1288   @param new_ipolicy: The new (to become) ipolicy
1289   @param instances: List of instances to verify
1290   @return: A list of instances which violates the new ipolicy but
1291       did not before
1292
1293   """
1294   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1295           _ComputeViolatingInstances(old_ipolicy, instances))
1296
1297
1298 def _ExpandItemName(fn, name, kind):
1299   """Expand an item name.
1300
1301   @param fn: the function to use for expansion
1302   @param name: requested item name
1303   @param kind: text description ('Node' or 'Instance')
1304   @return: the resolved (full) name
1305   @raise errors.OpPrereqError: if the item is not found
1306
1307   """
1308   full_name = fn(name)
1309   if full_name is None:
1310     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1311                                errors.ECODE_NOENT)
1312   return full_name
1313
1314
1315 def _ExpandNodeName(cfg, name):
1316   """Wrapper over L{_ExpandItemName} for nodes."""
1317   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1318
1319
1320 def _ExpandInstanceName(cfg, name):
1321   """Wrapper over L{_ExpandItemName} for instance."""
1322   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1323
1324
1325 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1326                          network_type, mac_prefix, tags):
1327   """Builds network related env variables for hooks
1328
1329   This builds the hook environment from individual variables.
1330
1331   @type name: string
1332   @param name: the name of the network
1333   @type subnet: string
1334   @param subnet: the ipv4 subnet
1335   @type gateway: string
1336   @param gateway: the ipv4 gateway
1337   @type network6: string
1338   @param network6: the ipv6 subnet
1339   @type gateway6: string
1340   @param gateway6: the ipv6 gateway
1341   @type network_type: string
1342   @param network_type: the type of the network
1343   @type mac_prefix: string
1344   @param mac_prefix: the mac_prefix
1345   @type tags: list
1346   @param tags: the tags of the network
1347
1348   """
1349   env = {}
1350   if name:
1351     env["NETWORK_NAME"] = name
1352   if subnet:
1353     env["NETWORK_SUBNET"] = subnet
1354   if gateway:
1355     env["NETWORK_GATEWAY"] = gateway
1356   if network6:
1357     env["NETWORK_SUBNET6"] = network6
1358   if gateway6:
1359     env["NETWORK_GATEWAY6"] = gateway6
1360   if mac_prefix:
1361     env["NETWORK_MAC_PREFIX"] = mac_prefix
1362   if network_type:
1363     env["NETWORK_TYPE"] = network_type
1364   if tags:
1365     env["NETWORK_TAGS"] = " ".join(tags)
1366
1367   return env
1368
1369
1370 def _BuildNetworkHookEnvByObject(net):
1371   """Builds network related env varliables for hooks
1372
1373   @type net: L{objects.Network}
1374   @param net: the network object
1375
1376   """
1377   args = {
1378     "name": net.name,
1379     "subnet": net.network,
1380     "gateway": net.gateway,
1381     "network6": net.network6,
1382     "gateway6": net.gateway6,
1383     "network_type": net.network_type,
1384     "mac_prefix": net.mac_prefix,
1385     "tags": net.tags,
1386   }
1387
1388   return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
1389
1390
1391 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1392                           minmem, maxmem, vcpus, nics, disk_template, disks,
1393                           bep, hvp, hypervisor_name, tags):
1394   """Builds instance related env variables for hooks
1395
1396   This builds the hook environment from individual variables.
1397
1398   @type name: string
1399   @param name: the name of the instance
1400   @type primary_node: string
1401   @param primary_node: the name of the instance's primary node
1402   @type secondary_nodes: list
1403   @param secondary_nodes: list of secondary nodes as strings
1404   @type os_type: string
1405   @param os_type: the name of the instance's OS
1406   @type status: string
1407   @param status: the desired status of the instance
1408   @type minmem: string
1409   @param minmem: the minimum memory size of the instance
1410   @type maxmem: string
1411   @param maxmem: the maximum memory size of the instance
1412   @type vcpus: string
1413   @param vcpus: the count of VCPUs the instance has
1414   @type nics: list
1415   @param nics: list of tuples (ip, mac, mode, link, network) representing
1416       the NICs the instance has
1417   @type disk_template: string
1418   @param disk_template: the disk template of the instance
1419   @type disks: list
1420   @param disks: the list of (size, mode) pairs
1421   @type bep: dict
1422   @param bep: the backend parameters for the instance
1423   @type hvp: dict
1424   @param hvp: the hypervisor parameters for the instance
1425   @type hypervisor_name: string
1426   @param hypervisor_name: the hypervisor for the instance
1427   @type tags: list
1428   @param tags: list of instance tags as strings
1429   @rtype: dict
1430   @return: the hook environment for this instance
1431
1432   """
1433   env = {
1434     "OP_TARGET": name,
1435     "INSTANCE_NAME": name,
1436     "INSTANCE_PRIMARY": primary_node,
1437     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1438     "INSTANCE_OS_TYPE": os_type,
1439     "INSTANCE_STATUS": status,
1440     "INSTANCE_MINMEM": minmem,
1441     "INSTANCE_MAXMEM": maxmem,
1442     # TODO(2.7) remove deprecated "memory" value
1443     "INSTANCE_MEMORY": maxmem,
1444     "INSTANCE_VCPUS": vcpus,
1445     "INSTANCE_DISK_TEMPLATE": disk_template,
1446     "INSTANCE_HYPERVISOR": hypervisor_name,
1447   }
1448   if nics:
1449     nic_count = len(nics)
1450     for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1451       if ip is None:
1452         ip = ""
1453       env["INSTANCE_NIC%d_IP" % idx] = ip
1454       env["INSTANCE_NIC%d_MAC" % idx] = mac
1455       env["INSTANCE_NIC%d_MODE" % idx] = mode
1456       env["INSTANCE_NIC%d_LINK" % idx] = link
1457       if network:
1458         env["INSTANCE_NIC%d_NETWORK" % idx] = net
1459         if netinfo:
1460           nobj = objects.Network.FromDict(netinfo)
1461           if nobj.network:
1462             env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1463           if nobj.gateway:
1464             env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1465           if nobj.network6:
1466             env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1467           if nobj.gateway6:
1468             env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1469           if nobj.mac_prefix:
1470             env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1471           if nobj.network_type:
1472             env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1473           if nobj.tags:
1474             env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1475       if mode == constants.NIC_MODE_BRIDGED:
1476         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1477   else:
1478     nic_count = 0
1479
1480   env["INSTANCE_NIC_COUNT"] = nic_count
1481
1482   if disks:
1483     disk_count = len(disks)
1484     for idx, (size, mode) in enumerate(disks):
1485       env["INSTANCE_DISK%d_SIZE" % idx] = size
1486       env["INSTANCE_DISK%d_MODE" % idx] = mode
1487   else:
1488     disk_count = 0
1489
1490   env["INSTANCE_DISK_COUNT"] = disk_count
1491
1492   if not tags:
1493     tags = []
1494
1495   env["INSTANCE_TAGS"] = " ".join(tags)
1496
1497   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1498     for key, value in source.items():
1499       env["INSTANCE_%s_%s" % (kind, key)] = value
1500
1501   return env
1502
1503
1504 def _NICToTuple(lu, nic):
1505   """Build a tupple of nic information.
1506
1507   @type lu:  L{LogicalUnit}
1508   @param lu: the logical unit on whose behalf we execute
1509   @type nic: L{objects.NIC}
1510   @param nic: nic to convert to hooks tuple
1511
1512   """
1513   ip = nic.ip
1514   mac = nic.mac
1515   cluster = lu.cfg.GetClusterInfo()
1516   filled_params = cluster.SimpleFillNIC(nic.nicparams)
1517   mode = filled_params[constants.NIC_MODE]
1518   link = filled_params[constants.NIC_LINK]
1519   net = nic.network
1520   netinfo = None
1521   if net:
1522     net_uuid = lu.cfg.LookupNetwork(net)
1523     if net_uuid:
1524       nobj = lu.cfg.GetNetwork(net_uuid)
1525       netinfo = objects.Network.ToDict(nobj)
1526   return (ip, mac, mode, link, net, netinfo)
1527
1528
1529 def _NICListToTuple(lu, nics):
1530   """Build a list of nic information tuples.
1531
1532   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1533   value in LUInstanceQueryData.
1534
1535   @type lu:  L{LogicalUnit}
1536   @param lu: the logical unit on whose behalf we execute
1537   @type nics: list of L{objects.NIC}
1538   @param nics: list of nics to convert to hooks tuples
1539
1540   """
1541   hooks_nics = []
1542   for nic in nics:
1543     hooks_nics.append(_NICToTuple(lu, nic))
1544   return hooks_nics
1545
1546
1547 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1548   """Builds instance related env variables for hooks from an object.
1549
1550   @type lu: L{LogicalUnit}
1551   @param lu: the logical unit on whose behalf we execute
1552   @type instance: L{objects.Instance}
1553   @param instance: the instance for which we should build the
1554       environment
1555   @type override: dict
1556   @param override: dictionary with key/values that will override
1557       our values
1558   @rtype: dict
1559   @return: the hook environment dictionary
1560
1561   """
1562   cluster = lu.cfg.GetClusterInfo()
1563   bep = cluster.FillBE(instance)
1564   hvp = cluster.FillHV(instance)
1565   args = {
1566     "name": instance.name,
1567     "primary_node": instance.primary_node,
1568     "secondary_nodes": instance.secondary_nodes,
1569     "os_type": instance.os,
1570     "status": instance.admin_state,
1571     "maxmem": bep[constants.BE_MAXMEM],
1572     "minmem": bep[constants.BE_MINMEM],
1573     "vcpus": bep[constants.BE_VCPUS],
1574     "nics": _NICListToTuple(lu, instance.nics),
1575     "disk_template": instance.disk_template,
1576     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1577     "bep": bep,
1578     "hvp": hvp,
1579     "hypervisor_name": instance.hypervisor,
1580     "tags": instance.tags,
1581   }
1582   if override:
1583     args.update(override)
1584   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1585
1586
1587 def _AdjustCandidatePool(lu, exceptions):
1588   """Adjust the candidate pool after node operations.
1589
1590   """
1591   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1592   if mod_list:
1593     lu.LogInfo("Promoted nodes to master candidate role: %s",
1594                utils.CommaJoin(node.name for node in mod_list))
1595     for name in mod_list:
1596       lu.context.ReaddNode(name)
1597   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1598   if mc_now > mc_max:
1599     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1600                (mc_now, mc_max))
1601
1602
1603 def _DecideSelfPromotion(lu, exceptions=None):
1604   """Decide whether I should promote myself as a master candidate.
1605
1606   """
1607   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1608   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1609   # the new node will increase mc_max with one, so:
1610   mc_should = min(mc_should + 1, cp_size)
1611   return mc_now < mc_should
1612
1613
1614 def _ComputeViolatingInstances(ipolicy, instances):
1615   """Computes a set of instances who violates given ipolicy.
1616
1617   @param ipolicy: The ipolicy to verify
1618   @type instances: object.Instance
1619   @param instances: List of instances to verify
1620   @return: A frozenset of instance names violating the ipolicy
1621
1622   """
1623   return frozenset([inst.name for inst in instances
1624                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1625
1626
1627 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1628   """Check that the brigdes needed by a list of nics exist.
1629
1630   """
1631   cluster = lu.cfg.GetClusterInfo()
1632   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1633   brlist = [params[constants.NIC_LINK] for params in paramslist
1634             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1635   if brlist:
1636     result = lu.rpc.call_bridges_exist(target_node, brlist)
1637     result.Raise("Error checking bridges on destination node '%s'" %
1638                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1639
1640
1641 def _CheckInstanceBridgesExist(lu, instance, node=None):
1642   """Check that the brigdes needed by an instance exist.
1643
1644   """
1645   if node is None:
1646     node = instance.primary_node
1647   _CheckNicsBridgesExist(lu, instance.nics, node)
1648
1649
1650 def _CheckOSVariant(os_obj, name):
1651   """Check whether an OS name conforms to the os variants specification.
1652
1653   @type os_obj: L{objects.OS}
1654   @param os_obj: OS object to check
1655   @type name: string
1656   @param name: OS name passed by the user, to check for validity
1657
1658   """
1659   variant = objects.OS.GetVariant(name)
1660   if not os_obj.supported_variants:
1661     if variant:
1662       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1663                                  " passed)" % (os_obj.name, variant),
1664                                  errors.ECODE_INVAL)
1665     return
1666   if not variant:
1667     raise errors.OpPrereqError("OS name must include a variant",
1668                                errors.ECODE_INVAL)
1669
1670   if variant not in os_obj.supported_variants:
1671     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1672
1673
1674 def _GetNodeInstancesInner(cfg, fn):
1675   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1676
1677
1678 def _GetNodeInstances(cfg, node_name):
1679   """Returns a list of all primary and secondary instances on a node.
1680
1681   """
1682
1683   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1684
1685
1686 def _GetNodePrimaryInstances(cfg, node_name):
1687   """Returns primary instances on a node.
1688
1689   """
1690   return _GetNodeInstancesInner(cfg,
1691                                 lambda inst: node_name == inst.primary_node)
1692
1693
1694 def _GetNodeSecondaryInstances(cfg, node_name):
1695   """Returns secondary instances on a node.
1696
1697   """
1698   return _GetNodeInstancesInner(cfg,
1699                                 lambda inst: node_name in inst.secondary_nodes)
1700
1701
1702 def _GetStorageTypeArgs(cfg, storage_type):
1703   """Returns the arguments for a storage type.
1704
1705   """
1706   # Special case for file storage
1707   if storage_type == constants.ST_FILE:
1708     # storage.FileStorage wants a list of storage directories
1709     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1710
1711   return []
1712
1713
1714 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1715   faulty = []
1716
1717   for dev in instance.disks:
1718     cfg.SetDiskID(dev, node_name)
1719
1720   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1721                                                                 instance))
1722   result.Raise("Failed to get disk status from node %s" % node_name,
1723                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1724
1725   for idx, bdev_status in enumerate(result.payload):
1726     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1727       faulty.append(idx)
1728
1729   return faulty
1730
1731
1732 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1733   """Check the sanity of iallocator and node arguments and use the
1734   cluster-wide iallocator if appropriate.
1735
1736   Check that at most one of (iallocator, node) is specified. If none is
1737   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1738   then the LU's opcode's iallocator slot is filled with the cluster-wide
1739   default iallocator.
1740
1741   @type iallocator_slot: string
1742   @param iallocator_slot: the name of the opcode iallocator slot
1743   @type node_slot: string
1744   @param node_slot: the name of the opcode target node slot
1745
1746   """
1747   node = getattr(lu.op, node_slot, None)
1748   ialloc = getattr(lu.op, iallocator_slot, None)
1749   if node == []:
1750     node = None
1751
1752   if node is not None and ialloc is not None:
1753     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1754                                errors.ECODE_INVAL)
1755   elif ((node is None and ialloc is None) or
1756         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1757     default_iallocator = lu.cfg.GetDefaultIAllocator()
1758     if default_iallocator:
1759       setattr(lu.op, iallocator_slot, default_iallocator)
1760     else:
1761       raise errors.OpPrereqError("No iallocator or node given and no"
1762                                  " cluster-wide default iallocator found;"
1763                                  " please specify either an iallocator or a"
1764                                  " node, or set a cluster-wide default"
1765                                  " iallocator", errors.ECODE_INVAL)
1766
1767
1768 def _GetDefaultIAllocator(cfg, ialloc):
1769   """Decides on which iallocator to use.
1770
1771   @type cfg: L{config.ConfigWriter}
1772   @param cfg: Cluster configuration object
1773   @type ialloc: string or None
1774   @param ialloc: Iallocator specified in opcode
1775   @rtype: string
1776   @return: Iallocator name
1777
1778   """
1779   if not ialloc:
1780     # Use default iallocator
1781     ialloc = cfg.GetDefaultIAllocator()
1782
1783   if not ialloc:
1784     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1785                                " opcode nor as a cluster-wide default",
1786                                errors.ECODE_INVAL)
1787
1788   return ialloc
1789
1790
1791 def _CheckHostnameSane(lu, name):
1792   """Ensures that a given hostname resolves to a 'sane' name.
1793
1794   The given name is required to be a prefix of the resolved hostname,
1795   to prevent accidental mismatches.
1796
1797   @param lu: the logical unit on behalf of which we're checking
1798   @param name: the name we should resolve and check
1799   @return: the resolved hostname object
1800
1801   """
1802   hostname = netutils.GetHostname(name=name)
1803   if hostname.name != name:
1804     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1805   if not utils.MatchNameComponent(name, [hostname.name]):
1806     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1807                                 " same as given hostname '%s'") %
1808                                 (hostname.name, name), errors.ECODE_INVAL)
1809   return hostname
1810
1811
1812 class LUClusterPostInit(LogicalUnit):
1813   """Logical unit for running hooks after cluster initialization.
1814
1815   """
1816   HPATH = "cluster-init"
1817   HTYPE = constants.HTYPE_CLUSTER
1818
1819   def BuildHooksEnv(self):
1820     """Build hooks env.
1821
1822     """
1823     return {
1824       "OP_TARGET": self.cfg.GetClusterName(),
1825       }
1826
1827   def BuildHooksNodes(self):
1828     """Build hooks nodes.
1829
1830     """
1831     return ([], [self.cfg.GetMasterNode()])
1832
1833   def Exec(self, feedback_fn):
1834     """Nothing to do.
1835
1836     """
1837     return True
1838
1839
1840 class LUClusterDestroy(LogicalUnit):
1841   """Logical unit for destroying the cluster.
1842
1843   """
1844   HPATH = "cluster-destroy"
1845   HTYPE = constants.HTYPE_CLUSTER
1846
1847   def BuildHooksEnv(self):
1848     """Build hooks env.
1849
1850     """
1851     return {
1852       "OP_TARGET": self.cfg.GetClusterName(),
1853       }
1854
1855   def BuildHooksNodes(self):
1856     """Build hooks nodes.
1857
1858     """
1859     return ([], [])
1860
1861   def CheckPrereq(self):
1862     """Check prerequisites.
1863
1864     This checks whether the cluster is empty.
1865
1866     Any errors are signaled by raising errors.OpPrereqError.
1867
1868     """
1869     master = self.cfg.GetMasterNode()
1870
1871     nodelist = self.cfg.GetNodeList()
1872     if len(nodelist) != 1 or nodelist[0] != master:
1873       raise errors.OpPrereqError("There are still %d node(s) in"
1874                                  " this cluster." % (len(nodelist) - 1),
1875                                  errors.ECODE_INVAL)
1876     instancelist = self.cfg.GetInstanceList()
1877     if instancelist:
1878       raise errors.OpPrereqError("There are still %d instance(s) in"
1879                                  " this cluster." % len(instancelist),
1880                                  errors.ECODE_INVAL)
1881
1882   def Exec(self, feedback_fn):
1883     """Destroys the cluster.
1884
1885     """
1886     master_params = self.cfg.GetMasterNetworkParameters()
1887
1888     # Run post hooks on master node before it's removed
1889     _RunPostHook(self, master_params.name)
1890
1891     ems = self.cfg.GetUseExternalMipScript()
1892     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1893                                                      master_params, ems)
1894     if result.fail_msg:
1895       self.LogWarning("Error disabling the master IP address: %s",
1896                       result.fail_msg)
1897
1898     return master_params.name
1899
1900
1901 def _VerifyCertificate(filename):
1902   """Verifies a certificate for L{LUClusterVerifyConfig}.
1903
1904   @type filename: string
1905   @param filename: Path to PEM file
1906
1907   """
1908   try:
1909     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1910                                            utils.ReadFile(filename))
1911   except Exception, err: # pylint: disable=W0703
1912     return (LUClusterVerifyConfig.ETYPE_ERROR,
1913             "Failed to load X509 certificate %s: %s" % (filename, err))
1914
1915   (errcode, msg) = \
1916     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1917                                 constants.SSL_CERT_EXPIRATION_ERROR)
1918
1919   if msg:
1920     fnamemsg = "While verifying %s: %s" % (filename, msg)
1921   else:
1922     fnamemsg = None
1923
1924   if errcode is None:
1925     return (None, fnamemsg)
1926   elif errcode == utils.CERT_WARNING:
1927     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1928   elif errcode == utils.CERT_ERROR:
1929     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1930
1931   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1932
1933
1934 def _GetAllHypervisorParameters(cluster, instances):
1935   """Compute the set of all hypervisor parameters.
1936
1937   @type cluster: L{objects.Cluster}
1938   @param cluster: the cluster object
1939   @param instances: list of L{objects.Instance}
1940   @param instances: additional instances from which to obtain parameters
1941   @rtype: list of (origin, hypervisor, parameters)
1942   @return: a list with all parameters found, indicating the hypervisor they
1943        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1944
1945   """
1946   hvp_data = []
1947
1948   for hv_name in cluster.enabled_hypervisors:
1949     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1950
1951   for os_name, os_hvp in cluster.os_hvp.items():
1952     for hv_name, hv_params in os_hvp.items():
1953       if hv_params:
1954         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1955         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1956
1957   # TODO: collapse identical parameter values in a single one
1958   for instance in instances:
1959     if instance.hvparams:
1960       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1961                        cluster.FillHV(instance)))
1962
1963   return hvp_data
1964
1965
1966 class _VerifyErrors(object):
1967   """Mix-in for cluster/group verify LUs.
1968
1969   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1970   self.op and self._feedback_fn to be available.)
1971
1972   """
1973
1974   ETYPE_FIELD = "code"
1975   ETYPE_ERROR = "ERROR"
1976   ETYPE_WARNING = "WARNING"
1977
1978   def _Error(self, ecode, item, msg, *args, **kwargs):
1979     """Format an error message.
1980
1981     Based on the opcode's error_codes parameter, either format a
1982     parseable error code, or a simpler error string.
1983
1984     This must be called only from Exec and functions called from Exec.
1985
1986     """
1987     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1988     itype, etxt, _ = ecode
1989     # first complete the msg
1990     if args:
1991       msg = msg % args
1992     # then format the whole message
1993     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1994       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1995     else:
1996       if item:
1997         item = " " + item
1998       else:
1999         item = ""
2000       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2001     # and finally report it via the feedback_fn
2002     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
2003
2004   def _ErrorIf(self, cond, ecode, *args, **kwargs):
2005     """Log an error message if the passed condition is True.
2006
2007     """
2008     cond = (bool(cond)
2009             or self.op.debug_simulate_errors) # pylint: disable=E1101
2010
2011     # If the error code is in the list of ignored errors, demote the error to a
2012     # warning
2013     (_, etxt, _) = ecode
2014     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
2015       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
2016
2017     if cond:
2018       self._Error(ecode, *args, **kwargs)
2019
2020     # do not mark the operation as failed for WARN cases only
2021     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2022       self.bad = self.bad or cond
2023
2024
2025 class LUClusterVerify(NoHooksLU):
2026   """Submits all jobs necessary to verify the cluster.
2027
2028   """
2029   REQ_BGL = False
2030
2031   def ExpandNames(self):
2032     self.needed_locks = {}
2033
2034   def Exec(self, feedback_fn):
2035     jobs = []
2036
2037     if self.op.group_name:
2038       groups = [self.op.group_name]
2039       depends_fn = lambda: None
2040     else:
2041       groups = self.cfg.GetNodeGroupList()
2042
2043       # Verify global configuration
2044       jobs.append([
2045         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2046         ])
2047
2048       # Always depend on global verification
2049       depends_fn = lambda: [(-len(jobs), [])]
2050
2051     jobs.extend(
2052       [opcodes.OpClusterVerifyGroup(group_name=group,
2053                                     ignore_errors=self.op.ignore_errors,
2054                                     depends=depends_fn())]
2055       for group in groups)
2056
2057     # Fix up all parameters
2058     for op in itertools.chain(*jobs): # pylint: disable=W0142
2059       op.debug_simulate_errors = self.op.debug_simulate_errors
2060       op.verbose = self.op.verbose
2061       op.error_codes = self.op.error_codes
2062       try:
2063         op.skip_checks = self.op.skip_checks
2064       except AttributeError:
2065         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2066
2067     return ResultWithJobs(jobs)
2068
2069
2070 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2071   """Verifies the cluster config.
2072
2073   """
2074   REQ_BGL = False
2075
2076   def _VerifyHVP(self, hvp_data):
2077     """Verifies locally the syntax of the hypervisor parameters.
2078
2079     """
2080     for item, hv_name, hv_params in hvp_data:
2081       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2082              (item, hv_name))
2083       try:
2084         hv_class = hypervisor.GetHypervisor(hv_name)
2085         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2086         hv_class.CheckParameterSyntax(hv_params)
2087       except errors.GenericError, err:
2088         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2089
2090   def ExpandNames(self):
2091     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2092     self.share_locks = _ShareAll()
2093
2094   def CheckPrereq(self):
2095     """Check prerequisites.
2096
2097     """
2098     # Retrieve all information
2099     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2100     self.all_node_info = self.cfg.GetAllNodesInfo()
2101     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2102
2103   def Exec(self, feedback_fn):
2104     """Verify integrity of cluster, performing various test on nodes.
2105
2106     """
2107     self.bad = False
2108     self._feedback_fn = feedback_fn
2109
2110     feedback_fn("* Verifying cluster config")
2111
2112     for msg in self.cfg.VerifyConfig():
2113       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2114
2115     feedback_fn("* Verifying cluster certificate files")
2116
2117     for cert_filename in pathutils.ALL_CERT_FILES:
2118       (errcode, msg) = _VerifyCertificate(cert_filename)
2119       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2120
2121     feedback_fn("* Verifying hypervisor parameters")
2122
2123     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2124                                                 self.all_inst_info.values()))
2125
2126     feedback_fn("* Verifying all nodes belong to an existing group")
2127
2128     # We do this verification here because, should this bogus circumstance
2129     # occur, it would never be caught by VerifyGroup, which only acts on
2130     # nodes/instances reachable from existing node groups.
2131
2132     dangling_nodes = set(node.name for node in self.all_node_info.values()
2133                          if node.group not in self.all_group_info)
2134
2135     dangling_instances = {}
2136     no_node_instances = []
2137
2138     for inst in self.all_inst_info.values():
2139       if inst.primary_node in dangling_nodes:
2140         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2141       elif inst.primary_node not in self.all_node_info:
2142         no_node_instances.append(inst.name)
2143
2144     pretty_dangling = [
2145         "%s (%s)" %
2146         (node.name,
2147          utils.CommaJoin(dangling_instances.get(node.name,
2148                                                 ["no instances"])))
2149         for node in dangling_nodes]
2150
2151     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2152                   None,
2153                   "the following nodes (and their instances) belong to a non"
2154                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2155
2156     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2157                   None,
2158                   "the following instances have a non-existing primary-node:"
2159                   " %s", utils.CommaJoin(no_node_instances))
2160
2161     return not self.bad
2162
2163
2164 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2165   """Verifies the status of a node group.
2166
2167   """
2168   HPATH = "cluster-verify"
2169   HTYPE = constants.HTYPE_CLUSTER
2170   REQ_BGL = False
2171
2172   _HOOKS_INDENT_RE = re.compile("^", re.M)
2173
2174   class NodeImage(object):
2175     """A class representing the logical and physical status of a node.
2176
2177     @type name: string
2178     @ivar name: the node name to which this object refers
2179     @ivar volumes: a structure as returned from
2180         L{ganeti.backend.GetVolumeList} (runtime)
2181     @ivar instances: a list of running instances (runtime)
2182     @ivar pinst: list of configured primary instances (config)
2183     @ivar sinst: list of configured secondary instances (config)
2184     @ivar sbp: dictionary of {primary-node: list of instances} for all
2185         instances for which this node is secondary (config)
2186     @ivar mfree: free memory, as reported by hypervisor (runtime)
2187     @ivar dfree: free disk, as reported by the node (runtime)
2188     @ivar offline: the offline status (config)
2189     @type rpc_fail: boolean
2190     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2191         not whether the individual keys were correct) (runtime)
2192     @type lvm_fail: boolean
2193     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2194     @type hyp_fail: boolean
2195     @ivar hyp_fail: whether the RPC call didn't return the instance list
2196     @type ghost: boolean
2197     @ivar ghost: whether this is a known node or not (config)
2198     @type os_fail: boolean
2199     @ivar os_fail: whether the RPC call didn't return valid OS data
2200     @type oslist: list
2201     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2202     @type vm_capable: boolean
2203     @ivar vm_capable: whether the node can host instances
2204
2205     """
2206     def __init__(self, offline=False, name=None, vm_capable=True):
2207       self.name = name
2208       self.volumes = {}
2209       self.instances = []
2210       self.pinst = []
2211       self.sinst = []
2212       self.sbp = {}
2213       self.mfree = 0
2214       self.dfree = 0
2215       self.offline = offline
2216       self.vm_capable = vm_capable
2217       self.rpc_fail = False
2218       self.lvm_fail = False
2219       self.hyp_fail = False
2220       self.ghost = False
2221       self.os_fail = False
2222       self.oslist = {}
2223
2224   def ExpandNames(self):
2225     # This raises errors.OpPrereqError on its own:
2226     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2227
2228     # Get instances in node group; this is unsafe and needs verification later
2229     inst_names = \
2230       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2231
2232     self.needed_locks = {
2233       locking.LEVEL_INSTANCE: inst_names,
2234       locking.LEVEL_NODEGROUP: [self.group_uuid],
2235       locking.LEVEL_NODE: [],
2236
2237       # This opcode is run by watcher every five minutes and acquires all nodes
2238       # for a group. It doesn't run for a long time, so it's better to acquire
2239       # the node allocation lock as well.
2240       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2241       }
2242
2243     self.share_locks = _ShareAll()
2244
2245   def DeclareLocks(self, level):
2246     if level == locking.LEVEL_NODE:
2247       # Get members of node group; this is unsafe and needs verification later
2248       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2249
2250       all_inst_info = self.cfg.GetAllInstancesInfo()
2251
2252       # In Exec(), we warn about mirrored instances that have primary and
2253       # secondary living in separate node groups. To fully verify that
2254       # volumes for these instances are healthy, we will need to do an
2255       # extra call to their secondaries. We ensure here those nodes will
2256       # be locked.
2257       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2258         # Important: access only the instances whose lock is owned
2259         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2260           nodes.update(all_inst_info[inst].secondary_nodes)
2261
2262       self.needed_locks[locking.LEVEL_NODE] = nodes
2263
2264   def CheckPrereq(self):
2265     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2266     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2267
2268     group_nodes = set(self.group_info.members)
2269     group_instances = \
2270       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2271
2272     unlocked_nodes = \
2273         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2274
2275     unlocked_instances = \
2276         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2277
2278     if unlocked_nodes:
2279       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2280                                  utils.CommaJoin(unlocked_nodes),
2281                                  errors.ECODE_STATE)
2282
2283     if unlocked_instances:
2284       raise errors.OpPrereqError("Missing lock for instances: %s" %
2285                                  utils.CommaJoin(unlocked_instances),
2286                                  errors.ECODE_STATE)
2287
2288     self.all_node_info = self.cfg.GetAllNodesInfo()
2289     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2290
2291     self.my_node_names = utils.NiceSort(group_nodes)
2292     self.my_inst_names = utils.NiceSort(group_instances)
2293
2294     self.my_node_info = dict((name, self.all_node_info[name])
2295                              for name in self.my_node_names)
2296
2297     self.my_inst_info = dict((name, self.all_inst_info[name])
2298                              for name in self.my_inst_names)
2299
2300     # We detect here the nodes that will need the extra RPC calls for verifying
2301     # split LV volumes; they should be locked.
2302     extra_lv_nodes = set()
2303
2304     for inst in self.my_inst_info.values():
2305       if inst.disk_template in constants.DTS_INT_MIRROR:
2306         for nname in inst.all_nodes:
2307           if self.all_node_info[nname].group != self.group_uuid:
2308             extra_lv_nodes.add(nname)
2309
2310     unlocked_lv_nodes = \
2311         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2312
2313     if unlocked_lv_nodes:
2314       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2315                                  utils.CommaJoin(unlocked_lv_nodes),
2316                                  errors.ECODE_STATE)
2317     self.extra_lv_nodes = list(extra_lv_nodes)
2318
2319   def _VerifyNode(self, ninfo, nresult):
2320     """Perform some basic validation on data returned from a node.
2321
2322       - check the result data structure is well formed and has all the
2323         mandatory fields
2324       - check ganeti version
2325
2326     @type ninfo: L{objects.Node}
2327     @param ninfo: the node to check
2328     @param nresult: the results from the node
2329     @rtype: boolean
2330     @return: whether overall this call was successful (and we can expect
2331          reasonable values in the respose)
2332
2333     """
2334     node = ninfo.name
2335     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2336
2337     # main result, nresult should be a non-empty dict
2338     test = not nresult or not isinstance(nresult, dict)
2339     _ErrorIf(test, constants.CV_ENODERPC, node,
2340                   "unable to verify node: no data returned")
2341     if test:
2342       return False
2343
2344     # compares ganeti version
2345     local_version = constants.PROTOCOL_VERSION
2346     remote_version = nresult.get("version", None)
2347     test = not (remote_version and
2348                 isinstance(remote_version, (list, tuple)) and
2349                 len(remote_version) == 2)
2350     _ErrorIf(test, constants.CV_ENODERPC, node,
2351              "connection to node returned invalid data")
2352     if test:
2353       return False
2354
2355     test = local_version != remote_version[0]
2356     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2357              "incompatible protocol versions: master %s,"
2358              " node %s", local_version, remote_version[0])
2359     if test:
2360       return False
2361
2362     # node seems compatible, we can actually try to look into its results
2363
2364     # full package version
2365     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2366                   constants.CV_ENODEVERSION, node,
2367                   "software version mismatch: master %s, node %s",
2368                   constants.RELEASE_VERSION, remote_version[1],
2369                   code=self.ETYPE_WARNING)
2370
2371     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2372     if ninfo.vm_capable and isinstance(hyp_result, dict):
2373       for hv_name, hv_result in hyp_result.iteritems():
2374         test = hv_result is not None
2375         _ErrorIf(test, constants.CV_ENODEHV, node,
2376                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2377
2378     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2379     if ninfo.vm_capable and isinstance(hvp_result, list):
2380       for item, hv_name, hv_result in hvp_result:
2381         _ErrorIf(True, constants.CV_ENODEHV, node,
2382                  "hypervisor %s parameter verify failure (source %s): %s",
2383                  hv_name, item, hv_result)
2384
2385     test = nresult.get(constants.NV_NODESETUP,
2386                        ["Missing NODESETUP results"])
2387     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2388              "; ".join(test))
2389
2390     return True
2391
2392   def _VerifyNodeTime(self, ninfo, nresult,
2393                       nvinfo_starttime, nvinfo_endtime):
2394     """Check the node time.
2395
2396     @type ninfo: L{objects.Node}
2397     @param ninfo: the node to check
2398     @param nresult: the remote results for the node
2399     @param nvinfo_starttime: the start time of the RPC call
2400     @param nvinfo_endtime: the end time of the RPC call
2401
2402     """
2403     node = ninfo.name
2404     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2405
2406     ntime = nresult.get(constants.NV_TIME, None)
2407     try:
2408       ntime_merged = utils.MergeTime(ntime)
2409     except (ValueError, TypeError):
2410       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2411       return
2412
2413     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2414       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2415     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2416       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2417     else:
2418       ntime_diff = None
2419
2420     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2421              "Node time diverges by at least %s from master node time",
2422              ntime_diff)
2423
2424   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2425     """Check the node LVM results.
2426
2427     @type ninfo: L{objects.Node}
2428     @param ninfo: the node to check
2429     @param nresult: the remote results for the node
2430     @param vg_name: the configured VG name
2431
2432     """
2433     if vg_name is None:
2434       return
2435
2436     node = ninfo.name
2437     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2438
2439     # checks vg existence and size > 20G
2440     vglist = nresult.get(constants.NV_VGLIST, None)
2441     test = not vglist
2442     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2443     if not test:
2444       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2445                                             constants.MIN_VG_SIZE)
2446       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2447
2448     # check pv names
2449     pvlist = nresult.get(constants.NV_PVLIST, None)
2450     test = pvlist is None
2451     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2452     if not test:
2453       # check that ':' is not present in PV names, since it's a
2454       # special character for lvcreate (denotes the range of PEs to
2455       # use on the PV)
2456       for _, pvname, owner_vg in pvlist:
2457         test = ":" in pvname
2458         _ErrorIf(test, constants.CV_ENODELVM, node,
2459                  "Invalid character ':' in PV '%s' of VG '%s'",
2460                  pvname, owner_vg)
2461
2462   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2463     """Check the node bridges.
2464
2465     @type ninfo: L{objects.Node}
2466     @param ninfo: the node to check
2467     @param nresult: the remote results for the node
2468     @param bridges: the expected list of bridges
2469
2470     """
2471     if not bridges:
2472       return
2473
2474     node = ninfo.name
2475     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2476
2477     missing = nresult.get(constants.NV_BRIDGES, None)
2478     test = not isinstance(missing, list)
2479     _ErrorIf(test, constants.CV_ENODENET, node,
2480              "did not return valid bridge information")
2481     if not test:
2482       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2483                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2484
2485   def _VerifyNodeUserScripts(self, ninfo, nresult):
2486     """Check the results of user scripts presence and executability on the node
2487
2488     @type ninfo: L{objects.Node}
2489     @param ninfo: the node to check
2490     @param nresult: the remote results for the node
2491
2492     """
2493     node = ninfo.name
2494
2495     test = not constants.NV_USERSCRIPTS in nresult
2496     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2497                   "did not return user scripts information")
2498
2499     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2500     if not test:
2501       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2502                     "user scripts not present or not executable: %s" %
2503                     utils.CommaJoin(sorted(broken_scripts)))
2504
2505   def _VerifyNodeNetwork(self, ninfo, nresult):
2506     """Check the node network connectivity results.
2507
2508     @type ninfo: L{objects.Node}
2509     @param ninfo: the node to check
2510     @param nresult: the remote results for the node
2511
2512     """
2513     node = ninfo.name
2514     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2515
2516     test = constants.NV_NODELIST not in nresult
2517     _ErrorIf(test, constants.CV_ENODESSH, node,
2518              "node hasn't returned node ssh connectivity data")
2519     if not test:
2520       if nresult[constants.NV_NODELIST]:
2521         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2522           _ErrorIf(True, constants.CV_ENODESSH, node,
2523                    "ssh communication with node '%s': %s", a_node, a_msg)
2524
2525     test = constants.NV_NODENETTEST not in nresult
2526     _ErrorIf(test, constants.CV_ENODENET, node,
2527              "node hasn't returned node tcp connectivity data")
2528     if not test:
2529       if nresult[constants.NV_NODENETTEST]:
2530         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2531         for anode in nlist:
2532           _ErrorIf(True, constants.CV_ENODENET, node,
2533                    "tcp communication with node '%s': %s",
2534                    anode, nresult[constants.NV_NODENETTEST][anode])
2535
2536     test = constants.NV_MASTERIP not in nresult
2537     _ErrorIf(test, constants.CV_ENODENET, node,
2538              "node hasn't returned node master IP reachability data")
2539     if not test:
2540       if not nresult[constants.NV_MASTERIP]:
2541         if node == self.master_node:
2542           msg = "the master node cannot reach the master IP (not configured?)"
2543         else:
2544           msg = "cannot reach the master IP"
2545         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2546
2547   def _VerifyInstance(self, instance, instanceconfig, node_image,
2548                       diskstatus):
2549     """Verify an instance.
2550
2551     This function checks to see if the required block devices are
2552     available on the instance's node.
2553
2554     """
2555     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2556     node_current = instanceconfig.primary_node
2557
2558     node_vol_should = {}
2559     instanceconfig.MapLVsByNode(node_vol_should)
2560
2561     cluster = self.cfg.GetClusterInfo()
2562     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2563                                                             self.group_info)
2564     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2565     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2566              code=self.ETYPE_WARNING)
2567
2568     for node in node_vol_should:
2569       n_img = node_image[node]
2570       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2571         # ignore missing volumes on offline or broken nodes
2572         continue
2573       for volume in node_vol_should[node]:
2574         test = volume not in n_img.volumes
2575         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2576                  "volume %s missing on node %s", volume, node)
2577
2578     if instanceconfig.admin_state == constants.ADMINST_UP:
2579       pri_img = node_image[node_current]
2580       test = instance not in pri_img.instances and not pri_img.offline
2581       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2582                "instance not running on its primary node %s",
2583                node_current)
2584
2585     diskdata = [(nname, success, status, idx)
2586                 for (nname, disks) in diskstatus.items()
2587                 for idx, (success, status) in enumerate(disks)]
2588
2589     for nname, success, bdev_status, idx in diskdata:
2590       # the 'ghost node' construction in Exec() ensures that we have a
2591       # node here
2592       snode = node_image[nname]
2593       bad_snode = snode.ghost or snode.offline
2594       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2595                not success and not bad_snode,
2596                constants.CV_EINSTANCEFAULTYDISK, instance,
2597                "couldn't retrieve status for disk/%s on %s: %s",
2598                idx, nname, bdev_status)
2599       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2600                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2601                constants.CV_EINSTANCEFAULTYDISK, instance,
2602                "disk/%s on %s is faulty", idx, nname)
2603
2604   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2605     """Verify if there are any unknown volumes in the cluster.
2606
2607     The .os, .swap and backup volumes are ignored. All other volumes are
2608     reported as unknown.
2609
2610     @type reserved: L{ganeti.utils.FieldSet}
2611     @param reserved: a FieldSet of reserved volume names
2612
2613     """
2614     for node, n_img in node_image.items():
2615       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2616           self.all_node_info[node].group != self.group_uuid):
2617         # skip non-healthy nodes
2618         continue
2619       for volume in n_img.volumes:
2620         test = ((node not in node_vol_should or
2621                 volume not in node_vol_should[node]) and
2622                 not reserved.Matches(volume))
2623         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2624                       "volume %s is unknown", volume)
2625
2626   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2627     """Verify N+1 Memory Resilience.
2628
2629     Check that if one single node dies we can still start all the
2630     instances it was primary for.
2631
2632     """
2633     cluster_info = self.cfg.GetClusterInfo()
2634     for node, n_img in node_image.items():
2635       # This code checks that every node which is now listed as
2636       # secondary has enough memory to host all instances it is
2637       # supposed to should a single other node in the cluster fail.
2638       # FIXME: not ready for failover to an arbitrary node
2639       # FIXME: does not support file-backed instances
2640       # WARNING: we currently take into account down instances as well
2641       # as up ones, considering that even if they're down someone
2642       # might want to start them even in the event of a node failure.
2643       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2644         # we're skipping nodes marked offline and nodes in other groups from
2645         # the N+1 warning, since most likely we don't have good memory
2646         # infromation from them; we already list instances living on such
2647         # nodes, and that's enough warning
2648         continue
2649       #TODO(dynmem): also consider ballooning out other instances
2650       for prinode, instances in n_img.sbp.items():
2651         needed_mem = 0
2652         for instance in instances:
2653           bep = cluster_info.FillBE(instance_cfg[instance])
2654           if bep[constants.BE_AUTO_BALANCE]:
2655             needed_mem += bep[constants.BE_MINMEM]
2656         test = n_img.mfree < needed_mem
2657         self._ErrorIf(test, constants.CV_ENODEN1, node,
2658                       "not enough memory to accomodate instance failovers"
2659                       " should node %s fail (%dMiB needed, %dMiB available)",
2660                       prinode, needed_mem, n_img.mfree)
2661
2662   @classmethod
2663   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2664                    (files_all, files_opt, files_mc, files_vm)):
2665     """Verifies file checksums collected from all nodes.
2666
2667     @param errorif: Callback for reporting errors
2668     @param nodeinfo: List of L{objects.Node} objects
2669     @param master_node: Name of master node
2670     @param all_nvinfo: RPC results
2671
2672     """
2673     # Define functions determining which nodes to consider for a file
2674     files2nodefn = [
2675       (files_all, None),
2676       (files_mc, lambda node: (node.master_candidate or
2677                                node.name == master_node)),
2678       (files_vm, lambda node: node.vm_capable),
2679       ]
2680
2681     # Build mapping from filename to list of nodes which should have the file
2682     nodefiles = {}
2683     for (files, fn) in files2nodefn:
2684       if fn is None:
2685         filenodes = nodeinfo
2686       else:
2687         filenodes = filter(fn, nodeinfo)
2688       nodefiles.update((filename,
2689                         frozenset(map(operator.attrgetter("name"), filenodes)))
2690                        for filename in files)
2691
2692     assert set(nodefiles) == (files_all | files_mc | files_vm)
2693
2694     fileinfo = dict((filename, {}) for filename in nodefiles)
2695     ignore_nodes = set()
2696
2697     for node in nodeinfo:
2698       if node.offline:
2699         ignore_nodes.add(node.name)
2700         continue
2701
2702       nresult = all_nvinfo[node.name]
2703
2704       if nresult.fail_msg or not nresult.payload:
2705         node_files = None
2706       else:
2707         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2708         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2709                           for (key, value) in fingerprints.items())
2710         del fingerprints
2711
2712       test = not (node_files and isinstance(node_files, dict))
2713       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2714               "Node did not return file checksum data")
2715       if test:
2716         ignore_nodes.add(node.name)
2717         continue
2718
2719       # Build per-checksum mapping from filename to nodes having it
2720       for (filename, checksum) in node_files.items():
2721         assert filename in nodefiles
2722         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2723
2724     for (filename, checksums) in fileinfo.items():
2725       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2726
2727       # Nodes having the file
2728       with_file = frozenset(node_name
2729                             for nodes in fileinfo[filename].values()
2730                             for node_name in nodes) - ignore_nodes
2731
2732       expected_nodes = nodefiles[filename] - ignore_nodes
2733
2734       # Nodes missing file
2735       missing_file = expected_nodes - with_file
2736
2737       if filename in files_opt:
2738         # All or no nodes
2739         errorif(missing_file and missing_file != expected_nodes,
2740                 constants.CV_ECLUSTERFILECHECK, None,
2741                 "File %s is optional, but it must exist on all or no"
2742                 " nodes (not found on %s)",
2743                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2744       else:
2745         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2746                 "File %s is missing from node(s) %s", filename,
2747                 utils.CommaJoin(utils.NiceSort(missing_file)))
2748
2749         # Warn if a node has a file it shouldn't
2750         unexpected = with_file - expected_nodes
2751         errorif(unexpected,
2752                 constants.CV_ECLUSTERFILECHECK, None,
2753                 "File %s should not exist on node(s) %s",
2754                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2755
2756       # See if there are multiple versions of the file
2757       test = len(checksums) > 1
2758       if test:
2759         variants = ["variant %s on %s" %
2760                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2761                     for (idx, (checksum, nodes)) in
2762                       enumerate(sorted(checksums.items()))]
2763       else:
2764         variants = []
2765
2766       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2767               "File %s found with %s different checksums (%s)",
2768               filename, len(checksums), "; ".join(variants))
2769
2770   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2771                       drbd_map):
2772     """Verifies and the node DRBD status.
2773
2774     @type ninfo: L{objects.Node}
2775     @param ninfo: the node to check
2776     @param nresult: the remote results for the node
2777     @param instanceinfo: the dict of instances
2778     @param drbd_helper: the configured DRBD usermode helper
2779     @param drbd_map: the DRBD map as returned by
2780         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2781
2782     """
2783     node = ninfo.name
2784     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2785
2786     if drbd_helper:
2787       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2788       test = (helper_result is None)
2789       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2790                "no drbd usermode helper returned")
2791       if helper_result:
2792         status, payload = helper_result
2793         test = not status
2794         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2795                  "drbd usermode helper check unsuccessful: %s", payload)
2796         test = status and (payload != drbd_helper)
2797         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2798                  "wrong drbd usermode helper: %s", payload)
2799
2800     # compute the DRBD minors
2801     node_drbd = {}
2802     for minor, instance in drbd_map[node].items():
2803       test = instance not in instanceinfo
2804       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2805                "ghost instance '%s' in temporary DRBD map", instance)
2806         # ghost instance should not be running, but otherwise we
2807         # don't give double warnings (both ghost instance and
2808         # unallocated minor in use)
2809       if test:
2810         node_drbd[minor] = (instance, False)
2811       else:
2812         instance = instanceinfo[instance]
2813         node_drbd[minor] = (instance.name,
2814                             instance.admin_state == constants.ADMINST_UP)
2815
2816     # and now check them
2817     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2818     test = not isinstance(used_minors, (tuple, list))
2819     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2820              "cannot parse drbd status file: %s", str(used_minors))
2821     if test:
2822       # we cannot check drbd status
2823       return
2824
2825     for minor, (iname, must_exist) in node_drbd.items():
2826       test = minor not in used_minors and must_exist
2827       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2828                "drbd minor %d of instance %s is not active", minor, iname)
2829     for minor in used_minors:
2830       test = minor not in node_drbd
2831       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2832                "unallocated drbd minor %d is in use", minor)
2833
2834   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2835     """Builds the node OS structures.
2836
2837     @type ninfo: L{objects.Node}
2838     @param ninfo: the node to check
2839     @param nresult: the remote results for the node
2840     @param nimg: the node image object
2841
2842     """
2843     node = ninfo.name
2844     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2845
2846     remote_os = nresult.get(constants.NV_OSLIST, None)
2847     test = (not isinstance(remote_os, list) or
2848             not compat.all(isinstance(v, list) and len(v) == 7
2849                            for v in remote_os))
2850
2851     _ErrorIf(test, constants.CV_ENODEOS, node,
2852              "node hasn't returned valid OS data")
2853
2854     nimg.os_fail = test
2855
2856     if test:
2857       return
2858
2859     os_dict = {}
2860
2861     for (name, os_path, status, diagnose,
2862          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2863
2864       if name not in os_dict:
2865         os_dict[name] = []
2866
2867       # parameters is a list of lists instead of list of tuples due to
2868       # JSON lacking a real tuple type, fix it:
2869       parameters = [tuple(v) for v in parameters]
2870       os_dict[name].append((os_path, status, diagnose,
2871                             set(variants), set(parameters), set(api_ver)))
2872
2873     nimg.oslist = os_dict
2874
2875   def _VerifyNodeOS(self, ninfo, nimg, base):
2876     """Verifies the node OS list.
2877
2878     @type ninfo: L{objects.Node}
2879     @param ninfo: the node to check
2880     @param nimg: the node image object
2881     @param base: the 'template' node we match against (e.g. from the master)
2882
2883     """
2884     node = ninfo.name
2885     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2886
2887     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2888
2889     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2890     for os_name, os_data in nimg.oslist.items():
2891       assert os_data, "Empty OS status for OS %s?!" % os_name
2892       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2893       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2894                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2895       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2896                "OS '%s' has multiple entries (first one shadows the rest): %s",
2897                os_name, utils.CommaJoin([v[0] for v in os_data]))
2898       # comparisons with the 'base' image
2899       test = os_name not in base.oslist
2900       _ErrorIf(test, constants.CV_ENODEOS, node,
2901                "Extra OS %s not present on reference node (%s)",
2902                os_name, base.name)
2903       if test:
2904         continue
2905       assert base.oslist[os_name], "Base node has empty OS status?"
2906       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2907       if not b_status:
2908         # base OS is invalid, skipping
2909         continue
2910       for kind, a, b in [("API version", f_api, b_api),
2911                          ("variants list", f_var, b_var),
2912                          ("parameters", beautify_params(f_param),
2913                           beautify_params(b_param))]:
2914         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2915                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2916                  kind, os_name, base.name,
2917                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2918
2919     # check any missing OSes
2920     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2921     _ErrorIf(missing, constants.CV_ENODEOS, node,
2922              "OSes present on reference node %s but missing on this node: %s",
2923              base.name, utils.CommaJoin(missing))
2924
2925   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2926     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2927
2928     @type ninfo: L{objects.Node}
2929     @param ninfo: the node to check
2930     @param nresult: the remote results for the node
2931     @type is_master: bool
2932     @param is_master: Whether node is the master node
2933
2934     """
2935     node = ninfo.name
2936
2937     if (is_master and
2938         (constants.ENABLE_FILE_STORAGE or
2939          constants.ENABLE_SHARED_FILE_STORAGE)):
2940       try:
2941         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2942       except KeyError:
2943         # This should never happen
2944         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2945                       "Node did not return forbidden file storage paths")
2946       else:
2947         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2948                       "Found forbidden file storage paths: %s",
2949                       utils.CommaJoin(fspaths))
2950     else:
2951       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2952                     constants.CV_ENODEFILESTORAGEPATHS, node,
2953                     "Node should not have returned forbidden file storage"
2954                     " paths")
2955
2956   def _VerifyOob(self, ninfo, nresult):
2957     """Verifies out of band functionality of a node.
2958
2959     @type ninfo: L{objects.Node}
2960     @param ninfo: the node to check
2961     @param nresult: the remote results for the node
2962
2963     """
2964     node = ninfo.name
2965     # We just have to verify the paths on master and/or master candidates
2966     # as the oob helper is invoked on the master
2967     if ((ninfo.master_candidate or ninfo.master_capable) and
2968         constants.NV_OOB_PATHS in nresult):
2969       for path_result in nresult[constants.NV_OOB_PATHS]:
2970         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2971
2972   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2973     """Verifies and updates the node volume data.
2974
2975     This function will update a L{NodeImage}'s internal structures
2976     with data from the remote call.
2977
2978     @type ninfo: L{objects.Node}
2979     @param ninfo: the node to check
2980     @param nresult: the remote results for the node
2981     @param nimg: the node image object
2982     @param vg_name: the configured VG name
2983
2984     """
2985     node = ninfo.name
2986     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2987
2988     nimg.lvm_fail = True
2989     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2990     if vg_name is None:
2991       pass
2992     elif isinstance(lvdata, basestring):
2993       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2994                utils.SafeEncode(lvdata))
2995     elif not isinstance(lvdata, dict):
2996       _ErrorIf(True, constants.CV_ENODELVM, node,
2997                "rpc call to node failed (lvlist)")
2998     else:
2999       nimg.volumes = lvdata
3000       nimg.lvm_fail = False
3001
3002   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3003     """Verifies and updates the node instance list.
3004
3005     If the listing was successful, then updates this node's instance
3006     list. Otherwise, it marks the RPC call as failed for the instance
3007     list key.
3008
3009     @type ninfo: L{objects.Node}
3010     @param ninfo: the node to check
3011     @param nresult: the remote results for the node
3012     @param nimg: the node image object
3013
3014     """
3015     idata = nresult.get(constants.NV_INSTANCELIST, None)
3016     test = not isinstance(idata, list)
3017     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3018                   "rpc call to node failed (instancelist): %s",
3019                   utils.SafeEncode(str(idata)))
3020     if test:
3021       nimg.hyp_fail = True
3022     else:
3023       nimg.instances = idata
3024
3025   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3026     """Verifies and computes a node information map
3027
3028     @type ninfo: L{objects.Node}
3029     @param ninfo: the node to check
3030     @param nresult: the remote results for the node
3031     @param nimg: the node image object
3032     @param vg_name: the configured VG name
3033
3034     """
3035     node = ninfo.name
3036     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3037
3038     # try to read free memory (from the hypervisor)
3039     hv_info = nresult.get(constants.NV_HVINFO, None)
3040     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3041     _ErrorIf(test, constants.CV_ENODEHV, node,
3042              "rpc call to node failed (hvinfo)")
3043     if not test:
3044       try:
3045         nimg.mfree = int(hv_info["memory_free"])
3046       except (ValueError, TypeError):
3047         _ErrorIf(True, constants.CV_ENODERPC, node,
3048                  "node returned invalid nodeinfo, check hypervisor")
3049
3050     # FIXME: devise a free space model for file based instances as well
3051     if vg_name is not None:
3052       test = (constants.NV_VGLIST not in nresult or
3053               vg_name not in nresult[constants.NV_VGLIST])
3054       _ErrorIf(test, constants.CV_ENODELVM, node,
3055                "node didn't return data for the volume group '%s'"
3056                " - it is either missing or broken", vg_name)
3057       if not test:
3058         try:
3059           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3060         except (ValueError, TypeError):
3061           _ErrorIf(True, constants.CV_ENODERPC, node,
3062                    "node returned invalid LVM info, check LVM status")
3063
3064   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3065     """Gets per-disk status information for all instances.
3066
3067     @type nodelist: list of strings
3068     @param nodelist: Node names
3069     @type node_image: dict of (name, L{objects.Node})
3070     @param node_image: Node objects
3071     @type instanceinfo: dict of (name, L{objects.Instance})
3072     @param instanceinfo: Instance objects
3073     @rtype: {instance: {node: [(succes, payload)]}}
3074     @return: a dictionary of per-instance dictionaries with nodes as
3075         keys and disk information as values; the disk information is a
3076         list of tuples (success, payload)
3077
3078     """
3079     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3080
3081     node_disks = {}
3082     node_disks_devonly = {}
3083     diskless_instances = set()
3084     diskless = constants.DT_DISKLESS
3085
3086     for nname in nodelist:
3087       node_instances = list(itertools.chain(node_image[nname].pinst,
3088                                             node_image[nname].sinst))
3089       diskless_instances.update(inst for inst in node_instances
3090                                 if instanceinfo[inst].disk_template == diskless)
3091       disks = [(inst, disk)
3092                for inst in node_instances
3093                for disk in instanceinfo[inst].disks]
3094
3095       if not disks:
3096         # No need to collect data
3097         continue
3098
3099       node_disks[nname] = disks
3100
3101       # _AnnotateDiskParams makes already copies of the disks
3102       devonly = []
3103       for (inst, dev) in disks:
3104         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3105         self.cfg.SetDiskID(anno_disk, nname)
3106         devonly.append(anno_disk)
3107
3108       node_disks_devonly[nname] = devonly
3109
3110     assert len(node_disks) == len(node_disks_devonly)
3111
3112     # Collect data from all nodes with disks
3113     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3114                                                           node_disks_devonly)
3115
3116     assert len(result) == len(node_disks)
3117
3118     instdisk = {}
3119
3120     for (nname, nres) in result.items():
3121       disks = node_disks[nname]
3122
3123       if nres.offline:
3124         # No data from this node
3125         data = len(disks) * [(False, "node offline")]
3126       else:
3127         msg = nres.fail_msg
3128         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3129                  "while getting disk information: %s", msg)
3130         if msg:
3131           # No data from this node
3132           data = len(disks) * [(False, msg)]
3133         else:
3134           data = []
3135           for idx, i in enumerate(nres.payload):
3136             if isinstance(i, (tuple, list)) and len(i) == 2:
3137               data.append(i)
3138             else:
3139               logging.warning("Invalid result from node %s, entry %d: %s",
3140                               nname, idx, i)
3141               data.append((False, "Invalid result from the remote node"))
3142
3143       for ((inst, _), status) in zip(disks, data):
3144         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3145
3146     # Add empty entries for diskless instances.
3147     for inst in diskless_instances:
3148       assert inst not in instdisk
3149       instdisk[inst] = {}
3150
3151     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3152                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3153                       compat.all(isinstance(s, (tuple, list)) and
3154                                  len(s) == 2 for s in statuses)
3155                       for inst, nnames in instdisk.items()
3156                       for nname, statuses in nnames.items())
3157     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3158
3159     return instdisk
3160
3161   @staticmethod
3162   def _SshNodeSelector(group_uuid, all_nodes):
3163     """Create endless iterators for all potential SSH check hosts.
3164
3165     """
3166     nodes = [node for node in all_nodes
3167              if (node.group != group_uuid and
3168                  not node.offline)]
3169     keyfunc = operator.attrgetter("group")
3170
3171     return map(itertools.cycle,
3172                [sorted(map(operator.attrgetter("name"), names))
3173                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3174                                                   keyfunc)])
3175
3176   @classmethod
3177   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3178     """Choose which nodes should talk to which other nodes.
3179
3180     We will make nodes contact all nodes in their group, and one node from
3181     every other group.
3182
3183     @warning: This algorithm has a known issue if one node group is much
3184       smaller than others (e.g. just one node). In such a case all other
3185       nodes will talk to the single node.
3186
3187     """
3188     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3189     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3190
3191     return (online_nodes,
3192             dict((name, sorted([i.next() for i in sel]))
3193                  for name in online_nodes))
3194
3195   def BuildHooksEnv(self):
3196     """Build hooks env.
3197
3198     Cluster-Verify hooks just ran in the post phase and their failure makes
3199     the output be logged in the verify output and the verification to fail.
3200
3201     """
3202     env = {
3203       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3204       }
3205
3206     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3207                for node in self.my_node_info.values())
3208
3209     return env
3210
3211   def BuildHooksNodes(self):
3212     """Build hooks nodes.
3213
3214     """
3215     return ([], self.my_node_names)
3216
3217   def Exec(self, feedback_fn):
3218     """Verify integrity of the node group, performing various test on nodes.
3219
3220     """
3221     # This method has too many local variables. pylint: disable=R0914
3222     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3223
3224     if not self.my_node_names:
3225       # empty node group
3226       feedback_fn("* Empty node group, skipping verification")
3227       return True
3228
3229     self.bad = False
3230     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3231     verbose = self.op.verbose
3232     self._feedback_fn = feedback_fn
3233
3234     vg_name = self.cfg.GetVGName()
3235     drbd_helper = self.cfg.GetDRBDHelper()
3236     cluster = self.cfg.GetClusterInfo()
3237     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3238     hypervisors = cluster.enabled_hypervisors
3239     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3240
3241     i_non_redundant = [] # Non redundant instances
3242     i_non_a_balanced = [] # Non auto-balanced instances
3243     i_offline = 0 # Count of offline instances
3244     n_offline = 0 # Count of offline nodes
3245     n_drained = 0 # Count of nodes being drained
3246     node_vol_should = {}
3247
3248     # FIXME: verify OS list
3249
3250     # File verification
3251     filemap = _ComputeAncillaryFiles(cluster, False)
3252
3253     # do local checksums
3254     master_node = self.master_node = self.cfg.GetMasterNode()
3255     master_ip = self.cfg.GetMasterIP()
3256
3257     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3258
3259     user_scripts = []
3260     if self.cfg.GetUseExternalMipScript():
3261       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3262
3263     node_verify_param = {
3264       constants.NV_FILELIST:
3265         map(vcluster.MakeVirtualPath,
3266             utils.UniqueSequence(filename
3267                                  for files in filemap
3268                                  for filename in files)),
3269       constants.NV_NODELIST:
3270         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3271                                   self.all_node_info.values()),
3272       constants.NV_HYPERVISOR: hypervisors,
3273       constants.NV_HVPARAMS:
3274         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3275       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3276                                  for node in node_data_list
3277                                  if not node.offline],
3278       constants.NV_INSTANCELIST: hypervisors,
3279       constants.NV_VERSION: None,
3280       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3281       constants.NV_NODESETUP: None,
3282       constants.NV_TIME: None,
3283       constants.NV_MASTERIP: (master_node, master_ip),
3284       constants.NV_OSLIST: None,
3285       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3286       constants.NV_USERSCRIPTS: user_scripts,
3287       }
3288
3289     if vg_name is not None:
3290       node_verify_param[constants.NV_VGLIST] = None
3291       node_verify_param[constants.NV_LVLIST] = vg_name
3292       node_verify_param[constants.NV_PVLIST] = [vg_name]
3293
3294     if drbd_helper:
3295       node_verify_param[constants.NV_DRBDLIST] = None
3296       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3297
3298     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3299       # Load file storage paths only from master node
3300       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3301
3302     # bridge checks
3303     # FIXME: this needs to be changed per node-group, not cluster-wide
3304     bridges = set()
3305     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3306     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3307       bridges.add(default_nicpp[constants.NIC_LINK])
3308     for instance in self.my_inst_info.values():
3309       for nic in instance.nics:
3310         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3311         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3312           bridges.add(full_nic[constants.NIC_LINK])
3313
3314     if bridges:
3315       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3316
3317     # Build our expected cluster state
3318     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3319                                                  name=node.name,
3320                                                  vm_capable=node.vm_capable))
3321                       for node in node_data_list)
3322
3323     # Gather OOB paths
3324     oob_paths = []
3325     for node in self.all_node_info.values():
3326       path = _SupportsOob(self.cfg, node)
3327       if path and path not in oob_paths:
3328         oob_paths.append(path)
3329
3330     if oob_paths:
3331       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3332
3333     for instance in self.my_inst_names:
3334       inst_config = self.my_inst_info[instance]
3335       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3336         i_offline += 1
3337
3338       for nname in inst_config.all_nodes:
3339         if nname not in node_image:
3340           gnode = self.NodeImage(name=nname)
3341           gnode.ghost = (nname not in self.all_node_info)
3342           node_image[nname] = gnode
3343
3344       inst_config.MapLVsByNode(node_vol_should)
3345
3346       pnode = inst_config.primary_node
3347       node_image[pnode].pinst.append(instance)
3348
3349       for snode in inst_config.secondary_nodes:
3350         nimg = node_image[snode]
3351         nimg.sinst.append(instance)
3352         if pnode not in nimg.sbp:
3353           nimg.sbp[pnode] = []
3354         nimg.sbp[pnode].append(instance)
3355
3356     # At this point, we have the in-memory data structures complete,
3357     # except for the runtime information, which we'll gather next
3358
3359     # Due to the way our RPC system works, exact response times cannot be
3360     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3361     # time before and after executing the request, we can at least have a time
3362     # window.
3363     nvinfo_starttime = time.time()
3364     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3365                                            node_verify_param,
3366                                            self.cfg.GetClusterName())
3367     nvinfo_endtime = time.time()
3368
3369     if self.extra_lv_nodes and vg_name is not None:
3370       extra_lv_nvinfo = \
3371           self.rpc.call_node_verify(self.extra_lv_nodes,
3372                                     {constants.NV_LVLIST: vg_name},
3373                                     self.cfg.GetClusterName())
3374     else:
3375       extra_lv_nvinfo = {}
3376
3377     all_drbd_map = self.cfg.ComputeDRBDMap()
3378
3379     feedback_fn("* Gathering disk information (%s nodes)" %
3380                 len(self.my_node_names))
3381     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3382                                      self.my_inst_info)
3383
3384     feedback_fn("* Verifying configuration file consistency")
3385
3386     # If not all nodes are being checked, we need to make sure the master node
3387     # and a non-checked vm_capable node are in the list.
3388     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3389     if absent_nodes:
3390       vf_nvinfo = all_nvinfo.copy()
3391       vf_node_info = list(self.my_node_info.values())
3392       additional_nodes = []
3393       if master_node not in self.my_node_info:
3394         additional_nodes.append(master_node)
3395         vf_node_info.append(self.all_node_info[master_node])
3396       # Add the first vm_capable node we find which is not included,
3397       # excluding the master node (which we already have)
3398       for node in absent_nodes:
3399         nodeinfo = self.all_node_info[node]
3400         if (nodeinfo.vm_capable and not nodeinfo.offline and
3401             node != master_node):
3402           additional_nodes.append(node)
3403           vf_node_info.append(self.all_node_info[node])
3404           break
3405       key = constants.NV_FILELIST
3406       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3407                                                  {key: node_verify_param[key]},
3408                                                  self.cfg.GetClusterName()))
3409     else:
3410       vf_nvinfo = all_nvinfo
3411       vf_node_info = self.my_node_info.values()
3412
3413     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3414
3415     feedback_fn("* Verifying node status")
3416
3417     refos_img = None
3418
3419     for node_i in node_data_list:
3420       node = node_i.name
3421       nimg = node_image[node]
3422
3423       if node_i.offline:
3424         if verbose:
3425           feedback_fn("* Skipping offline node %s" % (node,))
3426         n_offline += 1
3427         continue
3428
3429       if node == master_node:
3430         ntype = "master"
3431       elif node_i.master_candidate:
3432         ntype = "master candidate"
3433       elif node_i.drained:
3434         ntype = "drained"
3435         n_drained += 1
3436       else:
3437         ntype = "regular"
3438       if verbose:
3439         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3440
3441       msg = all_nvinfo[node].fail_msg
3442       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3443                msg)
3444       if msg:
3445         nimg.rpc_fail = True
3446         continue
3447
3448       nresult = all_nvinfo[node].payload
3449
3450       nimg.call_ok = self._VerifyNode(node_i, nresult)
3451       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3452       self._VerifyNodeNetwork(node_i, nresult)
3453       self._VerifyNodeUserScripts(node_i, nresult)
3454       self._VerifyOob(node_i, nresult)
3455       self._VerifyFileStoragePaths(node_i, nresult,
3456                                    node == master_node)
3457
3458       if nimg.vm_capable:
3459         self._VerifyNodeLVM(node_i, nresult, vg_name)
3460         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3461                              all_drbd_map)
3462
3463         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3464         self._UpdateNodeInstances(node_i, nresult, nimg)
3465         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3466         self._UpdateNodeOS(node_i, nresult, nimg)
3467
3468         if not nimg.os_fail:
3469           if refos_img is None:
3470             refos_img = nimg
3471           self._VerifyNodeOS(node_i, nimg, refos_img)
3472         self._VerifyNodeBridges(node_i, nresult, bridges)
3473
3474         # Check whether all running instancies are primary for the node. (This
3475         # can no longer be done from _VerifyInstance below, since some of the
3476         # wrong instances could be from other node groups.)
3477         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3478
3479         for inst in non_primary_inst:
3480           test = inst in self.all_inst_info
3481           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3482                    "instance should not run on node %s", node_i.name)
3483           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3484                    "node is running unknown instance %s", inst)
3485
3486     for node, result in extra_lv_nvinfo.items():
3487       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3488                               node_image[node], vg_name)
3489
3490     feedback_fn("* Verifying instance status")
3491     for instance in self.my_inst_names:
3492       if verbose:
3493         feedback_fn("* Verifying instance %s" % instance)
3494       inst_config = self.my_inst_info[instance]
3495       self._VerifyInstance(instance, inst_config, node_image,
3496                            instdisk[instance])
3497       inst_nodes_offline = []
3498
3499       pnode = inst_config.primary_node
3500       pnode_img = node_image[pnode]
3501       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3502                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3503                " primary node failed", instance)
3504
3505       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3506                pnode_img.offline,
3507                constants.CV_EINSTANCEBADNODE, instance,
3508                "instance is marked as running and lives on offline node %s",
3509                inst_config.primary_node)
3510
3511       # If the instance is non-redundant we cannot survive losing its primary
3512       # node, so we are not N+1 compliant.
3513       if inst_config.disk_template not in constants.DTS_MIRRORED:
3514         i_non_redundant.append(instance)
3515
3516       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3517                constants.CV_EINSTANCELAYOUT,
3518                instance, "instance has multiple secondary nodes: %s",
3519                utils.CommaJoin(inst_config.secondary_nodes),
3520                code=self.ETYPE_WARNING)
3521
3522       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3523         pnode = inst_config.primary_node
3524         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3525         instance_groups = {}
3526
3527         for node in instance_nodes:
3528           instance_groups.setdefault(self.all_node_info[node].group,
3529                                      []).append(node)
3530
3531         pretty_list = [
3532           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3533           # Sort so that we always list the primary node first.
3534           for group, nodes in sorted(instance_groups.items(),
3535                                      key=lambda (_, nodes): pnode in nodes,
3536                                      reverse=True)]
3537
3538         self._ErrorIf(len(instance_groups) > 1,
3539                       constants.CV_EINSTANCESPLITGROUPS,
3540                       instance, "instance has primary and secondary nodes in"
3541                       " different groups: %s", utils.CommaJoin(pretty_list),
3542                       code=self.ETYPE_WARNING)
3543
3544       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3545         i_non_a_balanced.append(instance)
3546
3547       for snode in inst_config.secondary_nodes:
3548         s_img = node_image[snode]
3549         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3550                  snode, "instance %s, connection to secondary node failed",
3551                  instance)
3552
3553         if s_img.offline:
3554           inst_nodes_offline.append(snode)
3555
3556       # warn that the instance lives on offline nodes
3557       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3558                "instance has offline secondary node(s) %s",
3559                utils.CommaJoin(inst_nodes_offline))
3560       # ... or ghost/non-vm_capable nodes
3561       for node in inst_config.all_nodes:
3562         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3563                  instance, "instance lives on ghost node %s", node)
3564         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3565                  instance, "instance lives on non-vm_capable node %s", node)
3566
3567     feedback_fn("* Verifying orphan volumes")
3568     reserved = utils.FieldSet(*cluster.reserved_lvs)
3569
3570     # We will get spurious "unknown volume" warnings if any node of this group
3571     # is secondary for an instance whose primary is in another group. To avoid
3572     # them, we find these instances and add their volumes to node_vol_should.
3573     for inst in self.all_inst_info.values():
3574       for secondary in inst.secondary_nodes:
3575         if (secondary in self.my_node_info
3576             and inst.name not in self.my_inst_info):
3577           inst.MapLVsByNode(node_vol_should)
3578           break
3579
3580     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3581
3582     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3583       feedback_fn("* Verifying N+1 Memory redundancy")
3584       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3585
3586     feedback_fn("* Other Notes")
3587     if i_non_redundant:
3588       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3589                   % len(i_non_redundant))
3590
3591     if i_non_a_balanced:
3592       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3593                   % len(i_non_a_balanced))
3594
3595     if i_offline:
3596       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3597
3598     if n_offline:
3599       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3600
3601     if n_drained:
3602       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3603
3604     return not self.bad
3605
3606   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3607     """Analyze the post-hooks' result
3608
3609     This method analyses the hook result, handles it, and sends some
3610     nicely-formatted feedback back to the user.
3611
3612     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3613         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3614     @param hooks_results: the results of the multi-node hooks rpc call
3615     @param feedback_fn: function used send feedback back to the caller
3616     @param lu_result: previous Exec result
3617     @return: the new Exec result, based on the previous result
3618         and hook results
3619
3620     """
3621     # We only really run POST phase hooks, only for non-empty groups,
3622     # and are only interested in their results
3623     if not self.my_node_names:
3624       # empty node group
3625       pass
3626     elif phase == constants.HOOKS_PHASE_POST:
3627       # Used to change hooks' output to proper indentation
3628       feedback_fn("* Hooks Results")
3629       assert hooks_results, "invalid result from hooks"
3630
3631       for node_name in hooks_results:
3632         res = hooks_results[node_name]
3633         msg = res.fail_msg
3634         test = msg and not res.offline
3635         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3636                       "Communication failure in hooks execution: %s", msg)
3637         if res.offline or msg:
3638           # No need to investigate payload if node is offline or gave
3639           # an error.
3640           continue
3641         for script, hkr, output in res.payload:
3642           test = hkr == constants.HKR_FAIL
3643           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3644                         "Script %s failed, output:", script)
3645           if test:
3646             output = self._HOOKS_INDENT_RE.sub("      ", output)
3647             feedback_fn("%s" % output)
3648             lu_result = False
3649
3650     return lu_result
3651
3652
3653 class LUClusterVerifyDisks(NoHooksLU):
3654   """Verifies the cluster disks status.
3655
3656   """
3657   REQ_BGL = False
3658
3659   def ExpandNames(self):
3660     self.share_locks = _ShareAll()
3661     self.needed_locks = {
3662       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3663       }
3664
3665   def Exec(self, feedback_fn):
3666     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3667
3668     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3669     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3670                            for group in group_names])
3671
3672
3673 class LUGroupVerifyDisks(NoHooksLU):
3674   """Verifies the status of all disks in a node group.
3675
3676   """
3677   REQ_BGL = False
3678
3679   def ExpandNames(self):
3680     # Raises errors.OpPrereqError on its own if group can't be found
3681     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3682
3683     self.share_locks = _ShareAll()
3684     self.needed_locks = {
3685       locking.LEVEL_INSTANCE: [],
3686       locking.LEVEL_NODEGROUP: [],
3687       locking.LEVEL_NODE: [],
3688
3689       # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3690       # starts one instance of this opcode for every group, which means all
3691       # nodes will be locked for a short amount of time, so it's better to
3692       # acquire the node allocation lock as well.
3693       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3694       }
3695
3696   def DeclareLocks(self, level):
3697     if level == locking.LEVEL_INSTANCE:
3698       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3699
3700       # Lock instances optimistically, needs verification once node and group
3701       # locks have been acquired
3702       self.needed_locks[locking.LEVEL_INSTANCE] = \
3703         self.cfg.GetNodeGroupInstances(self.group_uuid)
3704
3705     elif level == locking.LEVEL_NODEGROUP:
3706       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3707
3708       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3709         set([self.group_uuid] +
3710             # Lock all groups used by instances optimistically; this requires
3711             # going via the node before it's locked, requiring verification
3712             # later on
3713             [group_uuid
3714              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3715              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3716
3717     elif level == locking.LEVEL_NODE:
3718       # This will only lock the nodes in the group to be verified which contain
3719       # actual instances
3720       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3721       self._LockInstancesNodes()
3722
3723       # Lock all nodes in group to be verified
3724       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3725       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3726       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3727
3728   def CheckPrereq(self):
3729     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3730     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3731     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3732
3733     assert self.group_uuid in owned_groups
3734
3735     # Check if locked instances are still correct
3736     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3737
3738     # Get instance information
3739     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3740
3741     # Check if node groups for locked instances are still correct
3742     _CheckInstancesNodeGroups(self.cfg, self.instances,
3743                               owned_groups, owned_nodes, self.group_uuid)
3744
3745   def Exec(self, feedback_fn):
3746     """Verify integrity of cluster disks.
3747
3748     @rtype: tuple of three items
3749     @return: a tuple of (dict of node-to-node_error, list of instances
3750         which need activate-disks, dict of instance: (node, volume) for
3751         missing volumes
3752
3753     """
3754     res_nodes = {}
3755     res_instances = set()
3756     res_missing = {}
3757
3758     nv_dict = _MapInstanceDisksToNodes(
3759       [inst for inst in self.instances.values()
3760        if inst.admin_state == constants.ADMINST_UP])
3761
3762     if nv_dict:
3763       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3764                              set(self.cfg.GetVmCapableNodeList()))
3765
3766       node_lvs = self.rpc.call_lv_list(nodes, [])
3767
3768       for (node, node_res) in node_lvs.items():
3769         if node_res.offline:
3770           continue
3771
3772         msg = node_res.fail_msg
3773         if msg:
3774           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3775           res_nodes[node] = msg
3776           continue
3777
3778         for lv_name, (_, _, lv_online) in node_res.payload.items():
3779           inst = nv_dict.pop((node, lv_name), None)
3780           if not (lv_online or inst is None):
3781             res_instances.add(inst)
3782
3783       # any leftover items in nv_dict are missing LVs, let's arrange the data
3784       # better
3785       for key, inst in nv_dict.iteritems():
3786         res_missing.setdefault(inst, []).append(list(key))
3787
3788     return (res_nodes, list(res_instances), res_missing)
3789
3790
3791 class LUClusterRepairDiskSizes(NoHooksLU):
3792   """Verifies the cluster disks sizes.
3793
3794   """
3795   REQ_BGL = False
3796
3797   def ExpandNames(self):
3798     if self.op.instances:
3799       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3800       # Not getting the node allocation lock as only a specific set of
3801       # instances (and their nodes) is going to be acquired
3802       self.needed_locks = {
3803         locking.LEVEL_NODE_RES: [],
3804         locking.LEVEL_INSTANCE: self.wanted_names,
3805         }
3806       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3807     else:
3808       self.wanted_names = None
3809       self.needed_locks = {
3810         locking.LEVEL_NODE_RES: locking.ALL_SET,
3811         locking.LEVEL_INSTANCE: locking.ALL_SET,
3812
3813         # This opcode is acquires the node locks for all instances
3814         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3815         }
3816
3817     self.share_locks = {
3818       locking.LEVEL_NODE_RES: 1,
3819       locking.LEVEL_INSTANCE: 0,
3820       locking.LEVEL_NODE_ALLOC: 1,
3821       }
3822
3823   def DeclareLocks(self, level):
3824     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3825       self._LockInstancesNodes(primary_only=True, level=level)
3826
3827   def CheckPrereq(self):
3828     """Check prerequisites.
3829
3830     This only checks the optional instance list against the existing names.
3831
3832     """
3833     if self.wanted_names is None:
3834       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3835
3836     self.wanted_instances = \
3837         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3838
3839   def _EnsureChildSizes(self, disk):
3840     """Ensure children of the disk have the needed disk size.
3841
3842     This is valid mainly for DRBD8 and fixes an issue where the
3843     children have smaller disk size.
3844
3845     @param disk: an L{ganeti.objects.Disk} object
3846
3847     """
3848     if disk.dev_type == constants.LD_DRBD8:
3849       assert disk.children, "Empty children for DRBD8?"
3850       fchild = disk.children[0]
3851       mismatch = fchild.size < disk.size
3852       if mismatch:
3853         self.LogInfo("Child disk has size %d, parent %d, fixing",
3854                      fchild.size, disk.size)
3855         fchild.size = disk.size
3856
3857       # and we recurse on this child only, not on the metadev
3858       return self._EnsureChildSizes(fchild) or mismatch
3859     else:
3860       return False
3861
3862   def Exec(self, feedback_fn):
3863     """Verify the size of cluster disks.
3864
3865     """
3866     # TODO: check child disks too
3867     # TODO: check differences in size between primary/secondary nodes
3868     per_node_disks = {}
3869     for instance in self.wanted_instances:
3870       pnode = instance.primary_node
3871       if pnode not in per_node_disks:
3872         per_node_disks[pnode] = []
3873       for idx, disk in enumerate(instance.disks):
3874         per_node_disks[pnode].append((instance, idx, disk))
3875
3876     assert not (frozenset(per_node_disks.keys()) -
3877                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3878       "Not owning correct locks"
3879     assert not self.owned_locks(locking.LEVEL_NODE)
3880
3881     changed = []
3882     for node, dskl in per_node_disks.items():
3883       newl = [v[2].Copy() for v in dskl]
3884       for dsk in newl:
3885         self.cfg.SetDiskID(dsk, node)
3886       result = self.rpc.call_blockdev_getsize(node, newl)
3887       if result.fail_msg:
3888         self.LogWarning("Failure in blockdev_getsize call to node"
3889                         " %s, ignoring", node)
3890         continue
3891       if len(result.payload) != len(dskl):
3892         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3893                         " result.payload=%s", node, len(dskl), result.payload)
3894         self.LogWarning("Invalid result from node %s, ignoring node results",
3895                         node)
3896         continue
3897       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3898         if size is None:
3899           self.LogWarning("Disk %d of instance %s did not return size"
3900                           " information, ignoring", idx, instance.name)
3901           continue
3902         if not isinstance(size, (int, long)):
3903           self.LogWarning("Disk %d of instance %s did not return valid"
3904                           " size information, ignoring", idx, instance.name)
3905           continue
3906         size = size >> 20
3907         if size != disk.size:
3908           self.LogInfo("Disk %d of instance %s has mismatched size,"
3909                        " correcting: recorded %d, actual %d", idx,
3910                        instance.name, disk.size, size)
3911           disk.size = size
3912           self.cfg.Update(instance, feedback_fn)
3913           changed.append((instance.name, idx, size))
3914         if self._EnsureChildSizes(disk):
3915           self.cfg.Update(instance, feedback_fn)
3916           changed.append((instance.name, idx, disk.size))
3917     return changed
3918
3919
3920 class LUClusterRename(LogicalUnit):
3921   """Rename the cluster.
3922
3923   """
3924   HPATH = "cluster-rename"
3925   HTYPE = constants.HTYPE_CLUSTER
3926
3927   def BuildHooksEnv(self):
3928     """Build hooks env.
3929
3930     """
3931     return {
3932       "OP_TARGET": self.cfg.GetClusterName(),
3933       "NEW_NAME": self.op.name,
3934       }
3935
3936   def BuildHooksNodes(self):
3937     """Build hooks nodes.
3938
3939     """
3940     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3941
3942   def CheckPrereq(self):
3943     """Verify that the passed name is a valid one.
3944
3945     """
3946     hostname = netutils.GetHostname(name=self.op.name,
3947                                     family=self.cfg.GetPrimaryIPFamily())
3948
3949     new_name = hostname.name
3950     self.ip = new_ip = hostname.ip
3951     old_name = self.cfg.GetClusterName()
3952     old_ip = self.cfg.GetMasterIP()
3953     if new_name == old_name and new_ip == old_ip:
3954       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3955                                  " cluster has changed",
3956                                  errors.ECODE_INVAL)
3957     if new_ip != old_ip:
3958       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3959         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3960                                    " reachable on the network" %
3961                                    new_ip, errors.ECODE_NOTUNIQUE)
3962
3963     self.op.name = new_name
3964
3965   def Exec(self, feedback_fn):
3966     """Rename the cluster.
3967
3968     """
3969     clustername = self.op.name
3970     new_ip = self.ip
3971
3972     # shutdown the master IP
3973     master_params = self.cfg.GetMasterNetworkParameters()
3974     ems = self.cfg.GetUseExternalMipScript()
3975     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3976                                                      master_params, ems)
3977     result.Raise("Could not disable the master role")
3978
3979     try:
3980       cluster = self.cfg.GetClusterInfo()
3981       cluster.cluster_name = clustername
3982       cluster.master_ip = new_ip
3983       self.cfg.Update(cluster, feedback_fn)
3984
3985       # update the known hosts file
3986       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3987       node_list = self.cfg.GetOnlineNodeList()
3988       try:
3989         node_list.remove(master_params.name)
3990       except ValueError:
3991         pass
3992       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3993     finally:
3994       master_params.ip = new_ip
3995       result = self.rpc.call_node_activate_master_ip(master_params.name,
3996                                                      master_params, ems)
3997       msg = result.fail_msg
3998       if msg:
3999         self.LogWarning("Could not re-enable the master role on"
4000                         " the master, please restart manually: %s", msg)
4001
4002     return clustername
4003
4004
4005 def _ValidateNetmask(cfg, netmask):
4006   """Checks if a netmask is valid.
4007
4008   @type cfg: L{config.ConfigWriter}
4009   @param cfg: The cluster configuration
4010   @type netmask: int
4011   @param netmask: the netmask to be verified
4012   @raise errors.OpPrereqError: if the validation fails
4013
4014   """
4015   ip_family = cfg.GetPrimaryIPFamily()
4016   try:
4017     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4018   except errors.ProgrammerError:
4019     raise errors.OpPrereqError("Invalid primary ip family: %s." %
4020                                ip_family, errors.ECODE_INVAL)
4021   if not ipcls.ValidateNetmask(netmask):
4022     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4023                                 (netmask), errors.ECODE_INVAL)
4024
4025
4026 class LUClusterSetParams(LogicalUnit):
4027   """Change the parameters of the cluster.
4028
4029   """
4030   HPATH = "cluster-modify"
4031   HTYPE = constants.HTYPE_CLUSTER
4032   REQ_BGL = False
4033
4034   def CheckArguments(self):
4035     """Check parameters
4036
4037     """
4038     if self.op.uid_pool:
4039       uidpool.CheckUidPool(self.op.uid_pool)
4040
4041     if self.op.add_uids:
4042       uidpool.CheckUidPool(self.op.add_uids)
4043
4044     if self.op.remove_uids:
4045       uidpool.CheckUidPool(self.op.remove_uids)
4046
4047     if self.op.master_netmask is not None:
4048       _ValidateNetmask(self.cfg, self.op.master_netmask)
4049
4050     if self.op.diskparams:
4051       for dt_params in self.op.diskparams.values():
4052         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4053       try:
4054         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4055       except errors.OpPrereqError, err:
4056         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4057                                    errors.ECODE_INVAL)
4058
4059   def ExpandNames(self):
4060     # FIXME: in the future maybe other cluster params won't require checking on
4061     # all nodes to be modified.
4062     # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4063     # resource locks the right thing, shouldn't it be the BGL instead?
4064     self.needed_locks = {
4065       locking.LEVEL_NODE: locking.ALL_SET,
4066       locking.LEVEL_INSTANCE: locking.ALL_SET,
4067       locking.LEVEL_NODEGROUP: locking.ALL_SET,
4068       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4069     }
4070     self.share_locks = _ShareAll()
4071
4072   def BuildHooksEnv(self):
4073     """Build hooks env.
4074
4075     """
4076     return {
4077       "OP_TARGET": self.cfg.GetClusterName(),
4078       "NEW_VG_NAME": self.op.vg_name,
4079       }
4080
4081   def BuildHooksNodes(self):
4082     """Build hooks nodes.
4083
4084     """
4085     mn = self.cfg.GetMasterNode()
4086     return ([mn], [mn])
4087
4088   def CheckPrereq(self):
4089     """Check prerequisites.
4090
4091     This checks whether the given params don't conflict and
4092     if the given volume group is valid.
4093
4094     """
4095     if self.op.vg_name is not None and not self.op.vg_name:
4096       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4097         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4098                                    " instances exist", errors.ECODE_INVAL)
4099
4100     if self.op.drbd_helper is not None and not self.op.drbd_helper:
4101       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4102         raise errors.OpPrereqError("Cannot disable drbd helper while"
4103                                    " drbd-based instances exist",
4104                                    errors.ECODE_INVAL)
4105
4106     node_list = self.owned_locks(locking.LEVEL_NODE)
4107
4108     # if vg_name not None, checks given volume group on all nodes
4109     if self.op.vg_name:
4110       vglist = self.rpc.call_vg_list(node_list)
4111       for node in node_list:
4112         msg = vglist[node].fail_msg
4113         if msg:
4114           # ignoring down node
4115           self.LogWarning("Error while gathering data on node %s"
4116                           " (ignoring node): %s", node, msg)
4117           continue
4118         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4119                                               self.op.vg_name,
4120                                               constants.MIN_VG_SIZE)
4121         if vgstatus:
4122           raise errors.OpPrereqError("Error on node '%s': %s" %
4123                                      (node, vgstatus), errors.ECODE_ENVIRON)
4124
4125     if self.op.drbd_helper:
4126       # checks given drbd helper on all nodes
4127       helpers = self.rpc.call_drbd_helper(node_list)
4128       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4129         if ninfo.offline:
4130           self.LogInfo("Not checking drbd helper on offline node %s", node)
4131           continue
4132         msg = helpers[node].fail_msg
4133         if msg:
4134           raise errors.OpPrereqError("Error checking drbd helper on node"
4135                                      " '%s': %s" % (node, msg),
4136                                      errors.ECODE_ENVIRON)
4137         node_helper = helpers[node].payload
4138         if node_helper != self.op.drbd_helper:
4139           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4140                                      (node, node_helper), errors.ECODE_ENVIRON)
4141
4142     self.cluster = cluster = self.cfg.GetClusterInfo()
4143     # validate params changes
4144     if self.op.beparams:
4145       objects.UpgradeBeParams(self.op.beparams)
4146       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4147       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4148
4149     if self.op.ndparams:
4150       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4151       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4152
4153       # TODO: we need a more general way to handle resetting
4154       # cluster-level parameters to default values
4155       if self.new_ndparams["oob_program"] == "":
4156         self.new_ndparams["oob_program"] = \
4157             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4158
4159     if self.op.hv_state:
4160       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4161                                             self.cluster.hv_state_static)
4162       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4163                                for hv, values in new_hv_state.items())
4164
4165     if self.op.disk_state:
4166       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4167                                                 self.cluster.disk_state_static)
4168       self.new_disk_state = \
4169         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4170                             for name, values in svalues.items()))
4171              for storage, svalues in new_disk_state.items())
4172
4173     if self.op.ipolicy:
4174       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4175                                             group_policy=False)
4176
4177       all_instances = self.cfg.GetAllInstancesInfo().values()
4178       violations = set()
4179       for group in self.cfg.GetAllNodeGroupsInfo().values():
4180         instances = frozenset([inst for inst in all_instances
4181                                if compat.any(node in group.members
4182                                              for node in inst.all_nodes)])
4183         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4184         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4185         new = _ComputeNewInstanceViolations(ipol,
4186                                             new_ipolicy, instances)
4187         if new:
4188           violations.update(new)
4189
4190       if violations:
4191         self.LogWarning("After the ipolicy change the following instances"
4192                         " violate them: %s",
4193                         utils.CommaJoin(utils.NiceSort(violations)))
4194
4195     if self.op.nicparams:
4196       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4197       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4198       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4199       nic_errors = []
4200
4201       # check all instances for consistency
4202       for instance in self.cfg.GetAllInstancesInfo().values():
4203         for nic_idx, nic in enumerate(instance.nics):
4204           params_copy = copy.deepcopy(nic.nicparams)
4205           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4206
4207           # check parameter syntax
4208           try:
4209             objects.NIC.CheckParameterSyntax(params_filled)
4210           except errors.ConfigurationError, err:
4211             nic_errors.append("Instance %s, nic/%d: %s" %
4212                               (instance.name, nic_idx, err))
4213
4214           # if we're moving instances to routed, check that they have an ip
4215           target_mode = params_filled[constants.NIC_MODE]
4216           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4217             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4218                               " address" % (instance.name, nic_idx))
4219       if nic_errors:
4220         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4221                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4222
4223     # hypervisor list/parameters
4224     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4225     if self.op.hvparams:
4226       for hv_name, hv_dict in self.op.hvparams.items():
4227         if hv_name not in self.new_hvparams:
4228           self.new_hvparams[hv_name] = hv_dict
4229         else:
4230           self.new_hvparams[hv_name].update(hv_dict)
4231
4232     # disk template parameters
4233     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4234     if self.op.diskparams:
4235       for dt_name, dt_params in self.op.diskparams.items():
4236         if dt_name not in self.op.diskparams:
4237           self.new_diskparams[dt_name] = dt_params
4238         else:
4239           self.new_diskparams[dt_name].update(dt_params)
4240
4241     # os hypervisor parameters
4242     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4243     if self.op.os_hvp:
4244       for os_name, hvs in self.op.os_hvp.items():
4245         if os_name not in self.new_os_hvp:
4246           self.new_os_hvp[os_name] = hvs
4247         else:
4248           for hv_name, hv_dict in hvs.items():
4249             if hv_name not in self.new_os_hvp[os_name]:
4250               self.new_os_hvp[os_name][hv_name] = hv_dict
4251             else:
4252               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4253
4254     # os parameters
4255     self.new_osp = objects.FillDict(cluster.osparams, {})
4256     if self.op.osparams:
4257       for os_name, osp in self.op.osparams.items():
4258         if os_name not in self.new_osp:
4259           self.new_osp[os_name] = {}
4260
4261         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4262                                                   use_none=True)
4263
4264         if not self.new_osp[os_name]:
4265           # we removed all parameters
4266           del self.new_osp[os_name]
4267         else:
4268           # check the parameter validity (remote check)
4269           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4270                          os_name, self.new_osp[os_name])
4271
4272     # changes to the hypervisor list
4273     if self.op.enabled_hypervisors is not None:
4274       self.hv_list = self.op.enabled_hypervisors
4275       for hv in self.hv_list:
4276         # if the hypervisor doesn't already exist in the cluster
4277         # hvparams, we initialize it to empty, and then (in both
4278         # cases) we make sure to fill the defaults, as we might not
4279         # have a complete defaults list if the hypervisor wasn't
4280         # enabled before
4281         if hv not in new_hvp:
4282           new_hvp[hv] = {}
4283         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4284         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4285     else:
4286       self.hv_list = cluster.enabled_hypervisors
4287
4288     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4289       # either the enabled list has changed, or the parameters have, validate
4290       for hv_name, hv_params in self.new_hvparams.items():
4291         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4292             (self.op.enabled_hypervisors and
4293              hv_name in self.op.enabled_hypervisors)):
4294           # either this is a new hypervisor, or its parameters have changed
4295           hv_class = hypervisor.GetHypervisor(hv_name)
4296           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4297           hv_class.CheckParameterSyntax(hv_params)
4298           _CheckHVParams(self, node_list, hv_name, hv_params)
4299
4300     if self.op.os_hvp:
4301       # no need to check any newly-enabled hypervisors, since the
4302       # defaults have already been checked in the above code-block
4303       for os_name, os_hvp in self.new_os_hvp.items():
4304         for hv_name, hv_params in os_hvp.items():
4305           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4306           # we need to fill in the new os_hvp on top of the actual hv_p
4307           cluster_defaults = self.new_hvparams.get(hv_name, {})
4308           new_osp = objects.FillDict(cluster_defaults, hv_params)
4309           hv_class = hypervisor.GetHypervisor(hv_name)
4310           hv_class.CheckParameterSyntax(new_osp)
4311           _CheckHVParams(self, node_list, hv_name, new_osp)
4312
4313     if self.op.default_iallocator:
4314       alloc_script = utils.FindFile(self.op.default_iallocator,
4315                                     constants.IALLOCATOR_SEARCH_PATH,
4316                                     os.path.isfile)
4317       if alloc_script is None:
4318         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4319                                    " specified" % self.op.default_iallocator,
4320                                    errors.ECODE_INVAL)
4321
4322   def Exec(self, feedback_fn):
4323     """Change the parameters of the cluster.
4324
4325     """
4326     if self.op.vg_name is not None:
4327       new_volume = self.op.vg_name
4328       if not new_volume:
4329         new_volume = None
4330       if new_volume != self.cfg.GetVGName():
4331         self.cfg.SetVGName(new_volume)
4332       else:
4333         feedback_fn("Cluster LVM configuration already in desired"
4334                     " state, not changing")
4335     if self.op.drbd_helper is not None:
4336       new_helper = self.op.drbd_helper
4337       if not new_helper:
4338         new_helper = None
4339       if new_helper != self.cfg.GetDRBDHelper():
4340         self.cfg.SetDRBDHelper(new_helper)
4341       else:
4342         feedback_fn("Cluster DRBD helper already in desired state,"
4343                     " not changing")
4344     if self.op.hvparams:
4345       self.cluster.hvparams = self.new_hvparams
4346     if self.op.os_hvp:
4347       self.cluster.os_hvp = self.new_os_hvp
4348     if self.op.enabled_hypervisors is not None:
4349       self.cluster.hvparams = self.new_hvparams
4350       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4351     if self.op.beparams:
4352       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4353     if self.op.nicparams:
4354       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4355     if self.op.ipolicy:
4356       self.cluster.ipolicy = self.new_ipolicy
4357     if self.op.osparams:
4358       self.cluster.osparams = self.new_osp
4359     if self.op.ndparams:
4360       self.cluster.ndparams = self.new_ndparams
4361     if self.op.diskparams:
4362       self.cluster.diskparams = self.new_diskparams
4363     if self.op.hv_state:
4364       self.cluster.hv_state_static = self.new_hv_state
4365     if self.op.disk_state:
4366       self.cluster.disk_state_static = self.new_disk_state
4367
4368     if self.op.candidate_pool_size is not None:
4369       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4370       # we need to update the pool size here, otherwise the save will fail
4371       _AdjustCandidatePool(self, [])
4372
4373     if self.op.maintain_node_health is not None:
4374       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4375         feedback_fn("Note: CONFD was disabled at build time, node health"
4376                     " maintenance is not useful (still enabling it)")
4377       self.cluster.maintain_node_health = self.op.maintain_node_health
4378
4379     if self.op.prealloc_wipe_disks is not None:
4380       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4381
4382     if self.op.add_uids is not None:
4383       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4384
4385     if self.op.remove_uids is not None:
4386       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4387
4388     if self.op.uid_pool is not None:
4389       self.cluster.uid_pool = self.op.uid_pool
4390
4391     if self.op.default_iallocator is not None:
4392       self.cluster.default_iallocator = self.op.default_iallocator
4393
4394     if self.op.reserved_lvs is not None:
4395       self.cluster.reserved_lvs = self.op.reserved_lvs
4396
4397     if self.op.use_external_mip_script is not None:
4398       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4399
4400     def helper_os(aname, mods, desc):
4401       desc += " OS list"
4402       lst = getattr(self.cluster, aname)
4403       for key, val in mods:
4404         if key == constants.DDM_ADD:
4405           if val in lst:
4406             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4407           else:
4408             lst.append(val)
4409         elif key == constants.DDM_REMOVE:
4410           if val in lst:
4411             lst.remove(val)
4412           else:
4413             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4414         else:
4415           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4416
4417     if self.op.hidden_os:
4418       helper_os("hidden_os", self.op.hidden_os, "hidden")
4419
4420     if self.op.blacklisted_os:
4421       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4422
4423     if self.op.master_netdev:
4424       master_params = self.cfg.GetMasterNetworkParameters()
4425       ems = self.cfg.GetUseExternalMipScript()
4426       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4427                   self.cluster.master_netdev)
4428       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4429                                                        master_params, ems)
4430       result.Raise("Could not disable the master ip")
4431       feedback_fn("Changing master_netdev from %s to %s" %
4432                   (master_params.netdev, self.op.master_netdev))
4433       self.cluster.master_netdev = self.op.master_netdev
4434
4435     if self.op.master_netmask:
4436       master_params = self.cfg.GetMasterNetworkParameters()
4437       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4438       result = self.rpc.call_node_change_master_netmask(master_params.name,
4439                                                         master_params.netmask,
4440                                                         self.op.master_netmask,
4441                                                         master_params.ip,
4442                                                         master_params.netdev)
4443       if result.fail_msg:
4444         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4445         feedback_fn(msg)
4446
4447       self.cluster.master_netmask = self.op.master_netmask
4448
4449     self.cfg.Update(self.cluster, feedback_fn)
4450
4451     if self.op.master_netdev:
4452       master_params = self.cfg.GetMasterNetworkParameters()
4453       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4454                   self.op.master_netdev)
4455       ems = self.cfg.GetUseExternalMipScript()
4456       result = self.rpc.call_node_activate_master_ip(master_params.name,
4457                                                      master_params, ems)
4458       if result.fail_msg:
4459         self.LogWarning("Could not re-enable the master ip on"
4460                         " the master, please restart manually: %s",
4461                         result.fail_msg)
4462
4463
4464 def _UploadHelper(lu, nodes, fname):
4465   """Helper for uploading a file and showing warnings.
4466
4467   """
4468   if os.path.exists(fname):
4469     result = lu.rpc.call_upload_file(nodes, fname)
4470     for to_node, to_result in result.items():
4471       msg = to_result.fail_msg
4472       if msg:
4473         msg = ("Copy of file %s to node %s failed: %s" %
4474                (fname, to_node, msg))
4475         lu.LogWarning(msg)
4476
4477
4478 def _ComputeAncillaryFiles(cluster, redist):
4479   """Compute files external to Ganeti which need to be consistent.
4480
4481   @type redist: boolean
4482   @param redist: Whether to include files which need to be redistributed
4483
4484   """
4485   # Compute files for all nodes
4486   files_all = set([
4487     pathutils.SSH_KNOWN_HOSTS_FILE,
4488     pathutils.CONFD_HMAC_KEY,
4489     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4490     pathutils.SPICE_CERT_FILE,
4491     pathutils.SPICE_CACERT_FILE,
4492     pathutils.RAPI_USERS_FILE,
4493     ])
4494
4495   if redist:
4496     # we need to ship at least the RAPI certificate
4497     files_all.add(pathutils.RAPI_CERT_FILE)
4498   else:
4499     files_all.update(pathutils.ALL_CERT_FILES)
4500     files_all.update(ssconf.SimpleStore().GetFileList())
4501
4502   if cluster.modify_etc_hosts:
4503     files_all.add(pathutils.ETC_HOSTS)
4504
4505   if cluster.use_external_mip_script:
4506     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4507
4508   # Files which are optional, these must:
4509   # - be present in one other category as well
4510   # - either exist or not exist on all nodes of that category (mc, vm all)
4511   files_opt = set([
4512     pathutils.RAPI_USERS_FILE,
4513     ])
4514
4515   # Files which should only be on master candidates
4516   files_mc = set()
4517
4518   if not redist:
4519     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4520
4521   # File storage
4522   if (not redist and
4523       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4524     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4525     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4526
4527   # Files which should only be on VM-capable nodes
4528   files_vm = set(
4529     filename
4530     for hv_name in cluster.enabled_hypervisors
4531     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4532
4533   files_opt |= set(
4534     filename
4535     for hv_name in cluster.enabled_hypervisors
4536     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4537
4538   # Filenames in each category must be unique
4539   all_files_set = files_all | files_mc | files_vm
4540   assert (len(all_files_set) ==
4541           sum(map(len, [files_all, files_mc, files_vm]))), \
4542          "Found file listed in more than one file list"
4543
4544   # Optional files must be present in one other category
4545   assert all_files_set.issuperset(files_opt), \
4546          "Optional file not in a different required list"
4547
4548   # This one file should never ever be re-distributed via RPC
4549   assert not (redist and
4550               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4551
4552   return (files_all, files_opt, files_mc, files_vm)
4553
4554
4555 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4556   """Distribute additional files which are part of the cluster configuration.
4557
4558   ConfigWriter takes care of distributing the config and ssconf files, but
4559   there are more files which should be distributed to all nodes. This function
4560   makes sure those are copied.
4561
4562   @param lu: calling logical unit
4563   @param additional_nodes: list of nodes not in the config to distribute to
4564   @type additional_vm: boolean
4565   @param additional_vm: whether the additional nodes are vm-capable or not
4566
4567   """
4568   # Gather target nodes
4569   cluster = lu.cfg.GetClusterInfo()
4570   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4571
4572   online_nodes = lu.cfg.GetOnlineNodeList()
4573   online_set = frozenset(online_nodes)
4574   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4575
4576   if additional_nodes is not None:
4577     online_nodes.extend(additional_nodes)
4578     if additional_vm:
4579       vm_nodes.extend(additional_nodes)
4580
4581   # Never distribute to master node
4582   for nodelist in [online_nodes, vm_nodes]:
4583     if master_info.name in nodelist:
4584       nodelist.remove(master_info.name)
4585
4586   # Gather file lists
4587   (files_all, _, files_mc, files_vm) = \
4588     _ComputeAncillaryFiles(cluster, True)
4589
4590   # Never re-distribute configuration file from here
4591   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4592               pathutils.CLUSTER_CONF_FILE in files_vm)
4593   assert not files_mc, "Master candidates not handled in this function"
4594
4595   filemap = [
4596     (online_nodes, files_all),
4597     (vm_nodes, files_vm),
4598     ]
4599
4600   # Upload the files
4601   for (node_list, files) in filemap:
4602     for fname in files:
4603       _UploadHelper(lu, node_list, fname)
4604
4605
4606 class LUClusterRedistConf(NoHooksLU):
4607   """Force the redistribution of cluster configuration.
4608
4609   This is a very simple LU.
4610
4611   """
4612   REQ_BGL = False
4613
4614   def ExpandNames(self):
4615     self.needed_locks = {
4616       locking.LEVEL_NODE: locking.ALL_SET,
4617       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4618     }
4619     self.share_locks = _ShareAll()
4620
4621   def Exec(self, feedback_fn):
4622     """Redistribute the configuration.
4623
4624     """
4625     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4626     _RedistributeAncillaryFiles(self)
4627
4628
4629 class LUClusterActivateMasterIp(NoHooksLU):
4630   """Activate the master IP on the master node.
4631
4632   """
4633   def Exec(self, feedback_fn):
4634     """Activate the master IP.
4635
4636     """
4637     master_params = self.cfg.GetMasterNetworkParameters()
4638     ems = self.cfg.GetUseExternalMipScript()
4639     result = self.rpc.call_node_activate_master_ip(master_params.name,
4640                                                    master_params, ems)
4641     result.Raise("Could not activate the master IP")
4642
4643
4644 class LUClusterDeactivateMasterIp(NoHooksLU):
4645   """Deactivate the master IP on the master node.
4646
4647   """
4648   def Exec(self, feedback_fn):
4649     """Deactivate the master IP.
4650
4651     """
4652     master_params = self.cfg.GetMasterNetworkParameters()
4653     ems = self.cfg.GetUseExternalMipScript()
4654     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4655                                                      master_params, ems)
4656     result.Raise("Could not deactivate the master IP")
4657
4658
4659 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4660   """Sleep and poll for an instance's disk to sync.
4661
4662   """
4663   if not instance.disks or disks is not None and not disks:
4664     return True
4665
4666   disks = _ExpandCheckDisks(instance, disks)
4667
4668   if not oneshot:
4669     lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4670
4671   node = instance.primary_node
4672
4673   for dev in disks:
4674     lu.cfg.SetDiskID(dev, node)
4675
4676   # TODO: Convert to utils.Retry
4677
4678   retries = 0
4679   degr_retries = 10 # in seconds, as we sleep 1 second each time
4680   while True:
4681     max_time = 0
4682     done = True
4683     cumul_degraded = False
4684     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4685     msg = rstats.fail_msg
4686     if msg:
4687       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4688       retries += 1
4689       if retries >= 10:
4690         raise errors.RemoteError("Can't contact node %s for mirror data,"
4691                                  " aborting." % node)
4692       time.sleep(6)
4693       continue
4694     rstats = rstats.payload
4695     retries = 0
4696     for i, mstat in enumerate(rstats):
4697       if mstat is None:
4698         lu.LogWarning("Can't compute data for node %s/%s",
4699                            node, disks[i].iv_name)
4700         continue
4701
4702       cumul_degraded = (cumul_degraded or
4703                         (mstat.is_degraded and mstat.sync_percent is None))
4704       if mstat.sync_percent is not None:
4705         done = False
4706         if mstat.estimated_time is not None:
4707           rem_time = ("%s remaining (estimated)" %
4708                       utils.FormatSeconds(mstat.estimated_time))
4709           max_time = mstat.estimated_time
4710         else:
4711           rem_time = "no time estimate"
4712         lu.LogInfo("- device %s: %5.2f%% done, %s",
4713                    disks[i].iv_name, mstat.sync_percent, rem_time)
4714
4715     # if we're done but degraded, let's do a few small retries, to
4716     # make sure we see a stable and not transient situation; therefore
4717     # we force restart of the loop
4718     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4719       logging.info("Degraded disks found, %d retries left", degr_retries)
4720       degr_retries -= 1
4721       time.sleep(1)
4722       continue
4723
4724     if done or oneshot:
4725       break
4726
4727     time.sleep(min(60, max_time))
4728
4729   if done:
4730     lu.LogInfo("Instance %s's disks are in sync", instance.name)
4731
4732   return not cumul_degraded
4733
4734
4735 def _BlockdevFind(lu, node, dev, instance):
4736   """Wrapper around call_blockdev_find to annotate diskparams.
4737
4738   @param lu: A reference to the lu object
4739   @param node: The node to call out
4740   @param dev: The device to find
4741   @param instance: The instance object the device belongs to
4742   @returns The result of the rpc call
4743
4744   """
4745   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4746   return lu.rpc.call_blockdev_find(node, disk)
4747
4748
4749 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4750   """Wrapper around L{_CheckDiskConsistencyInner}.
4751
4752   """
4753   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4754   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4755                                     ldisk=ldisk)
4756
4757
4758 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4759                                ldisk=False):
4760   """Check that mirrors are not degraded.
4761
4762   @attention: The device has to be annotated already.
4763
4764   The ldisk parameter, if True, will change the test from the
4765   is_degraded attribute (which represents overall non-ok status for
4766   the device(s)) to the ldisk (representing the local storage status).
4767
4768   """
4769   lu.cfg.SetDiskID(dev, node)
4770
4771   result = True
4772
4773   if on_primary or dev.AssembleOnSecondary():
4774     rstats = lu.rpc.call_blockdev_find(node, dev)
4775     msg = rstats.fail_msg
4776     if msg:
4777       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4778       result = False
4779     elif not rstats.payload:
4780       lu.LogWarning("Can't find disk on node %s", node)
4781       result = False
4782     else:
4783       if ldisk:
4784         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4785       else:
4786         result = result and not rstats.payload.is_degraded
4787
4788   if dev.children:
4789     for child in dev.children:
4790       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4791                                                      on_primary)
4792
4793   return result
4794
4795
4796 class LUOobCommand(NoHooksLU):
4797   """Logical unit for OOB handling.
4798
4799   """
4800   REQ_BGL = False
4801   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4802
4803   def ExpandNames(self):
4804     """Gather locks we need.
4805
4806     """
4807     if self.op.node_names:
4808       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4809       lock_names = self.op.node_names
4810     else:
4811       lock_names = locking.ALL_SET
4812
4813     self.needed_locks = {
4814       locking.LEVEL_NODE: lock_names,
4815       }
4816
4817     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4818
4819     if not self.op.node_names:
4820       # Acquire node allocation lock only if all nodes are affected
4821       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4822
4823   def CheckPrereq(self):
4824     """Check prerequisites.
4825
4826     This checks:
4827      - the node exists in the configuration
4828      - OOB is supported
4829
4830     Any errors are signaled by raising errors.OpPrereqError.
4831
4832     """
4833     self.nodes = []
4834     self.master_node = self.cfg.GetMasterNode()
4835
4836     assert self.op.power_delay >= 0.0
4837
4838     if self.op.node_names:
4839       if (self.op.command in self._SKIP_MASTER and
4840           self.master_node in self.op.node_names):
4841         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4842         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4843
4844         if master_oob_handler:
4845           additional_text = ("run '%s %s %s' if you want to operate on the"
4846                              " master regardless") % (master_oob_handler,
4847                                                       self.op.command,
4848                                                       self.master_node)
4849         else:
4850           additional_text = "it does not support out-of-band operations"
4851
4852         raise errors.OpPrereqError(("Operating on the master node %s is not"
4853                                     " allowed for %s; %s") %
4854                                    (self.master_node, self.op.command,
4855                                     additional_text), errors.ECODE_INVAL)
4856     else:
4857       self.op.node_names = self.cfg.GetNodeList()
4858       if self.op.command in self._SKIP_MASTER:
4859         self.op.node_names.remove(self.master_node)
4860
4861     if self.op.command in self._SKIP_MASTER:
4862       assert self.master_node not in self.op.node_names
4863
4864     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4865       if node is None:
4866         raise errors.OpPrereqError("Node %s not found" % node_name,
4867                                    errors.ECODE_NOENT)
4868       else:
4869         self.nodes.append(node)
4870
4871       if (not self.op.ignore_status and
4872           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4873         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4874                                     " not marked offline") % node_name,
4875                                    errors.ECODE_STATE)
4876
4877   def Exec(self, feedback_fn):
4878     """Execute OOB and return result if we expect any.
4879
4880     """
4881     master_node = self.master_node
4882     ret = []
4883
4884     for idx, node in enumerate(utils.NiceSort(self.nodes,
4885                                               key=lambda node: node.name)):
4886       node_entry = [(constants.RS_NORMAL, node.name)]
4887       ret.append(node_entry)
4888
4889       oob_program = _SupportsOob(self.cfg, node)
4890
4891       if not oob_program:
4892         node_entry.append((constants.RS_UNAVAIL, None))
4893         continue
4894
4895       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4896                    self.op.command, oob_program, node.name)
4897       result = self.rpc.call_run_oob(master_node, oob_program,
4898                                      self.op.command, node.name,
4899                                      self.op.timeout)
4900
4901       if result.fail_msg:
4902         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4903                         node.name, result.fail_msg)
4904         node_entry.append((constants.RS_NODATA, None))
4905       else:
4906         try:
4907           self._CheckPayload(result)
4908         except errors.OpExecError, err:
4909           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4910                           node.name, err)
4911           node_entry.append((constants.RS_NODATA, None))
4912         else:
4913           if self.op.command == constants.OOB_HEALTH:
4914             # For health we should log important events
4915             for item, status in result.payload:
4916               if status in [constants.OOB_STATUS_WARNING,
4917                             constants.OOB_STATUS_CRITICAL]:
4918                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4919                                 item, node.name, status)
4920
4921           if self.op.command == constants.OOB_POWER_ON:
4922             node.powered = True
4923           elif self.op.command == constants.OOB_POWER_OFF:
4924             node.powered = False
4925           elif self.op.command == constants.OOB_POWER_STATUS:
4926             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4927             if powered != node.powered:
4928               logging.warning(("Recorded power state (%s) of node '%s' does not"
4929                                " match actual power state (%s)"), node.powered,
4930                               node.name, powered)
4931
4932           # For configuration changing commands we should update the node
4933           if self.op.command in (constants.OOB_POWER_ON,
4934                                  constants.OOB_POWER_OFF):
4935             self.cfg.Update(node, feedback_fn)
4936
4937           node_entry.append((constants.RS_NORMAL, result.payload))
4938
4939           if (self.op.command == constants.OOB_POWER_ON and
4940               idx < len(self.nodes) - 1):
4941             time.sleep(self.op.power_delay)
4942
4943     return ret
4944
4945   def _CheckPayload(self, result):
4946     """Checks if the payload is valid.
4947
4948     @param result: RPC result
4949     @raises errors.OpExecError: If payload is not valid
4950
4951     """
4952     errs = []
4953     if self.op.command == constants.OOB_HEALTH:
4954       if not isinstance(result.payload, list):
4955         errs.append("command 'health' is expected to return a list but got %s" %
4956                     type(result.payload))
4957       else:
4958         for item, status in result.payload:
4959           if status not in constants.OOB_STATUSES:
4960             errs.append("health item '%s' has invalid status '%s'" %
4961                         (item, status))
4962
4963     if self.op.command == constants.OOB_POWER_STATUS:
4964       if not isinstance(result.payload, dict):
4965         errs.append("power-status is expected to return a dict but got %s" %
4966                     type(result.payload))
4967
4968     if self.op.command in [
4969       constants.OOB_POWER_ON,
4970       constants.OOB_POWER_OFF,
4971       constants.OOB_POWER_CYCLE,
4972       ]:
4973       if result.payload is not None:
4974         errs.append("%s is expected to not return payload but got '%s'" %
4975                     (self.op.command, result.payload))
4976
4977     if errs:
4978       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4979                                utils.CommaJoin(errs))
4980
4981
4982 class _OsQuery(_QueryBase):
4983   FIELDS = query.OS_FIELDS
4984
4985   def ExpandNames(self, lu):
4986     # Lock all nodes in shared mode
4987     # Temporary removal of locks, should be reverted later
4988     # TODO: reintroduce locks when they are lighter-weight
4989     lu.needed_locks = {}
4990     #self.share_locks[locking.LEVEL_NODE] = 1
4991     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4992
4993     # The following variables interact with _QueryBase._GetNames
4994     if self.names:
4995       self.wanted = self.names
4996     else:
4997       self.wanted = locking.ALL_SET
4998
4999     self.do_locking = self.use_locking
5000
5001   def DeclareLocks(self, lu, level):
5002     pass
5003
5004   @staticmethod
5005   def _DiagnoseByOS(rlist):
5006     """Remaps a per-node return list into an a per-os per-node dictionary
5007
5008     @param rlist: a map with node names as keys and OS objects as values
5009
5010     @rtype: dict
5011     @return: a dictionary with osnames as keys and as value another
5012         map, with nodes as keys and tuples of (path, status, diagnose,
5013         variants, parameters, api_versions) as values, eg::
5014
5015           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5016                                      (/srv/..., False, "invalid api")],
5017                            "node2": [(/srv/..., True, "", [], [])]}
5018           }
5019
5020     """
5021     all_os = {}
5022     # we build here the list of nodes that didn't fail the RPC (at RPC
5023     # level), so that nodes with a non-responding node daemon don't
5024     # make all OSes invalid
5025     good_nodes = [node_name for node_name in rlist
5026                   if not rlist[node_name].fail_msg]
5027     for node_name, nr in rlist.items():
5028       if nr.fail_msg or not nr.payload:
5029         continue
5030       for (name, path, status, diagnose, variants,
5031            params, api_versions) in nr.payload:
5032         if name not in all_os:
5033           # build a list of nodes for this os containing empty lists
5034           # for each node in node_list
5035           all_os[name] = {}
5036           for nname in good_nodes:
5037             all_os[name][nname] = []
5038         # convert params from [name, help] to (name, help)
5039         params = [tuple(v) for v in params]
5040         all_os[name][node_name].append((path, status, diagnose,
5041                                         variants, params, api_versions))
5042     return all_os
5043
5044   def _GetQueryData(self, lu):
5045     """Computes the list of nodes and their attributes.
5046
5047     """
5048     # Locking is not used
5049     assert not (compat.any(lu.glm.is_owned(level)
5050                            for level in locking.LEVELS
5051                            if level != locking.LEVEL_CLUSTER) or
5052                 self.do_locking or self.use_locking)
5053
5054     valid_nodes = [node.name
5055                    for node in lu.cfg.GetAllNodesInfo().values()
5056                    if not node.offline and node.vm_capable]
5057     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5058     cluster = lu.cfg.GetClusterInfo()
5059
5060     data = {}
5061
5062     for (os_name, os_data) in pol.items():
5063       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5064                           hidden=(os_name in cluster.hidden_os),
5065                           blacklisted=(os_name in cluster.blacklisted_os))
5066
5067       variants = set()
5068       parameters = set()
5069       api_versions = set()
5070
5071       for idx, osl in enumerate(os_data.values()):
5072         info.valid = bool(info.valid and osl and osl[0][1])
5073         if not info.valid:
5074           break
5075
5076         (node_variants, node_params, node_api) = osl[0][3:6]
5077         if idx == 0:
5078           # First entry
5079           variants.update(node_variants)
5080           parameters.update(node_params)
5081           api_versions.update(node_api)
5082         else:
5083           # Filter out inconsistent values
5084           variants.intersection_update(node_variants)
5085           parameters.intersection_update(node_params)
5086           api_versions.intersection_update(node_api)
5087
5088       info.variants = list(variants)
5089       info.parameters = list(parameters)
5090       info.api_versions = list(api_versions)
5091
5092       data[os_name] = info
5093
5094     # Prepare data in requested order
5095     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5096             if name in data]
5097
5098
5099 class LUOsDiagnose(NoHooksLU):
5100   """Logical unit for OS diagnose/query.
5101
5102   """
5103   REQ_BGL = False
5104
5105   @staticmethod
5106   def _BuildFilter(fields, names):
5107     """Builds a filter for querying OSes.
5108
5109     """
5110     name_filter = qlang.MakeSimpleFilter("name", names)
5111
5112     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5113     # respective field is not requested
5114     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5115                      for fname in ["hidden", "blacklisted"]
5116                      if fname not in fields]
5117     if "valid" not in fields:
5118       status_filter.append([qlang.OP_TRUE, "valid"])
5119
5120     if status_filter:
5121       status_filter.insert(0, qlang.OP_AND)
5122     else:
5123       status_filter = None
5124
5125     if name_filter and status_filter:
5126       return [qlang.OP_AND, name_filter, status_filter]
5127     elif name_filter:
5128       return name_filter
5129     else:
5130       return status_filter
5131
5132   def CheckArguments(self):
5133     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5134                        self.op.output_fields, False)
5135
5136   def ExpandNames(self):
5137     self.oq.ExpandNames(self)
5138
5139   def Exec(self, feedback_fn):
5140     return self.oq.OldStyleQuery(self)
5141
5142
5143 class LUNodeRemove(LogicalUnit):
5144   """Logical unit for removing a node.
5145
5146   """
5147   HPATH = "node-remove"
5148   HTYPE = constants.HTYPE_NODE
5149
5150   def BuildHooksEnv(self):
5151     """Build hooks env.
5152
5153     """
5154     return {
5155       "OP_TARGET": self.op.node_name,
5156       "NODE_NAME": self.op.node_name,
5157       }
5158
5159   def BuildHooksNodes(self):
5160     """Build hooks nodes.
5161
5162     This doesn't run on the target node in the pre phase as a failed
5163     node would then be impossible to remove.
5164
5165     """
5166     all_nodes = self.cfg.GetNodeList()
5167     try:
5168       all_nodes.remove(self.op.node_name)
5169     except ValueError:
5170       pass
5171     return (all_nodes, all_nodes)
5172
5173   def CheckPrereq(self):
5174     """Check prerequisites.
5175
5176     This checks:
5177      - the node exists in the configuration
5178      - it does not have primary or secondary instances
5179      - it's not the master
5180
5181     Any errors are signaled by raising errors.OpPrereqError.
5182
5183     """
5184     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5185     node = self.cfg.GetNodeInfo(self.op.node_name)
5186     assert node is not None
5187
5188     masternode = self.cfg.GetMasterNode()
5189     if node.name == masternode:
5190       raise errors.OpPrereqError("Node is the master node, failover to another"
5191                                  " node is required", errors.ECODE_INVAL)
5192
5193     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5194       if node.name in instance.all_nodes:
5195         raise errors.OpPrereqError("Instance %s is still running on the node,"
5196                                    " please remove first" % instance_name,
5197                                    errors.ECODE_INVAL)
5198     self.op.node_name = node.name
5199     self.node = node
5200
5201   def Exec(self, feedback_fn):
5202     """Removes the node from the cluster.
5203
5204     """
5205     node = self.node
5206     logging.info("Stopping the node daemon and removing configs from node %s",
5207                  node.name)
5208
5209     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5210
5211     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5212       "Not owning BGL"
5213
5214     # Promote nodes to master candidate as needed
5215     _AdjustCandidatePool(self, exceptions=[node.name])
5216     self.context.RemoveNode(node.name)
5217
5218     # Run post hooks on the node before it's removed
5219     _RunPostHook(self, node.name)
5220
5221     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5222     msg = result.fail_msg
5223     if msg:
5224       self.LogWarning("Errors encountered on the remote node while leaving"
5225                       " the cluster: %s", msg)
5226
5227     # Remove node from our /etc/hosts
5228     if self.cfg.GetClusterInfo().modify_etc_hosts:
5229       master_node = self.cfg.GetMasterNode()
5230       result = self.rpc.call_etc_hosts_modify(master_node,
5231                                               constants.ETC_HOSTS_REMOVE,
5232                                               node.name, None)
5233       result.Raise("Can't update hosts file with new host data")
5234       _RedistributeAncillaryFiles(self)
5235
5236
5237 class _NodeQuery(_QueryBase):
5238   FIELDS = query.NODE_FIELDS
5239
5240   def ExpandNames(self, lu):
5241     lu.needed_locks = {}
5242     lu.share_locks = _ShareAll()
5243
5244     if self.names:
5245       self.wanted = _GetWantedNodes(lu, self.names)
5246     else:
5247       self.wanted = locking.ALL_SET
5248
5249     self.do_locking = (self.use_locking and
5250                        query.NQ_LIVE in self.requested_data)
5251
5252     if self.do_locking:
5253       # If any non-static field is requested we need to lock the nodes
5254       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5255       lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5256
5257   def DeclareLocks(self, lu, level):
5258     pass
5259
5260   def _GetQueryData(self, lu):
5261     """Computes the list of nodes and their attributes.
5262
5263     """
5264     all_info = lu.cfg.GetAllNodesInfo()
5265
5266     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5267
5268     # Gather data as requested
5269     if query.NQ_LIVE in self.requested_data:
5270       # filter out non-vm_capable nodes
5271       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5272
5273       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5274                                         [lu.cfg.GetHypervisorType()])
5275       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5276                        for (name, nresult) in node_data.items()
5277                        if not nresult.fail_msg and nresult.payload)
5278     else:
5279       live_data = None
5280
5281     if query.NQ_INST in self.requested_data:
5282       node_to_primary = dict([(name, set()) for name in nodenames])
5283       node_to_secondary = dict([(name, set()) for name in nodenames])
5284
5285       inst_data = lu.cfg.GetAllInstancesInfo()
5286
5287       for inst in inst_data.values():
5288         if inst.primary_node in node_to_primary:
5289           node_to_primary[inst.primary_node].add(inst.name)
5290         for secnode in inst.secondary_nodes:
5291           if secnode in node_to_secondary:
5292             node_to_secondary[secnode].add(inst.name)
5293     else:
5294       node_to_primary = None
5295       node_to_secondary = None
5296
5297     if query.NQ_OOB in self.requested_data:
5298       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5299                          for name, node in all_info.iteritems())
5300     else:
5301       oob_support = None
5302
5303     if query.NQ_GROUP in self.requested_data:
5304       groups = lu.cfg.GetAllNodeGroupsInfo()
5305     else:
5306       groups = {}
5307
5308     return query.NodeQueryData([all_info[name] for name in nodenames],
5309                                live_data, lu.cfg.GetMasterNode(),
5310                                node_to_primary, node_to_secondary, groups,
5311                                oob_support, lu.cfg.GetClusterInfo())
5312
5313
5314 class LUNodeQuery(NoHooksLU):
5315   """Logical unit for querying nodes.
5316
5317   """
5318   # pylint: disable=W0142
5319   REQ_BGL = False
5320
5321   def CheckArguments(self):
5322     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5323                          self.op.output_fields, self.op.use_locking)
5324
5325   def ExpandNames(self):
5326     self.nq.ExpandNames(self)
5327
5328   def DeclareLocks(self, level):
5329     self.nq.DeclareLocks(self, level)
5330
5331   def Exec(self, feedback_fn):
5332     return self.nq.OldStyleQuery(self)
5333
5334
5335 class LUNodeQueryvols(NoHooksLU):
5336   """Logical unit for getting volumes on node(s).
5337
5338   """
5339   REQ_BGL = False
5340   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5341   _FIELDS_STATIC = utils.FieldSet("node")
5342
5343   def CheckArguments(self):
5344     _CheckOutputFields(static=self._FIELDS_STATIC,
5345                        dynamic=self._FIELDS_DYNAMIC,
5346                        selected=self.op.output_fields)
5347
5348   def ExpandNames(self):
5349     self.share_locks = _ShareAll()
5350
5351     if self.op.nodes:
5352       self.needed_locks = {
5353         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5354         }
5355     else:
5356       self.needed_locks = {
5357         locking.LEVEL_NODE: locking.ALL_SET,
5358         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5359         }
5360
5361   def Exec(self, feedback_fn):
5362     """Computes the list of nodes and their attributes.
5363
5364     """
5365     nodenames = self.owned_locks(locking.LEVEL_NODE)
5366     volumes = self.rpc.call_node_volumes(nodenames)
5367
5368     ilist = self.cfg.GetAllInstancesInfo()
5369     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5370
5371     output = []
5372     for node in nodenames:
5373       nresult = volumes[node]
5374       if nresult.offline:
5375         continue
5376       msg = nresult.fail_msg
5377       if msg:
5378         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5379         continue
5380
5381       node_vols = sorted(nresult.payload,
5382                          key=operator.itemgetter("dev"))
5383
5384       for vol in node_vols:
5385         node_output = []
5386         for field in self.op.output_fields:
5387           if field == "node":
5388             val = node
5389           elif field == "phys":
5390             val = vol["dev"]
5391           elif field == "vg":
5392             val = vol["vg"]
5393           elif field == "name":
5394             val = vol["name"]
5395           elif field == "size":
5396             val = int(float(vol["size"]))
5397           elif field == "instance":
5398             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5399           else:
5400             raise errors.ParameterError(field)
5401           node_output.append(str(val))
5402
5403         output.append(node_output)
5404
5405     return output
5406
5407
5408 class LUNodeQueryStorage(NoHooksLU):
5409   """Logical unit for getting information on storage units on node(s).
5410
5411   """
5412   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5413   REQ_BGL = False
5414
5415   def CheckArguments(self):
5416     _CheckOutputFields(static=self._FIELDS_STATIC,
5417                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5418                        selected=self.op.output_fields)
5419
5420   def ExpandNames(self):
5421     self.share_locks = _ShareAll()
5422
5423     if self.op.nodes:
5424       self.needed_locks = {
5425         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5426         }
5427     else:
5428       self.needed_locks = {
5429         locking.LEVEL_NODE: locking.ALL_SET,
5430         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5431         }
5432
5433   def Exec(self, feedback_fn):
5434     """Computes the list of nodes and their attributes.
5435
5436     """
5437     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5438
5439     # Always get name to sort by
5440     if constants.SF_NAME in self.op.output_fields:
5441       fields = self.op.output_fields[:]
5442     else:
5443       fields = [constants.SF_NAME] + self.op.output_fields
5444
5445     # Never ask for node or type as it's only known to the LU
5446     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5447       while extra in fields:
5448         fields.remove(extra)
5449
5450     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5451     name_idx = field_idx[constants.SF_NAME]
5452
5453     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5454     data = self.rpc.call_storage_list(self.nodes,
5455                                       self.op.storage_type, st_args,
5456                                       self.op.name, fields)
5457
5458     result = []
5459
5460     for node in utils.NiceSort(self.nodes):
5461       nresult = data[node]
5462       if nresult.offline:
5463         continue
5464
5465       msg = nresult.fail_msg
5466       if msg:
5467         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5468         continue
5469
5470       rows = dict([(row[name_idx], row) for row in nresult.payload])
5471
5472       for name in utils.NiceSort(rows.keys()):
5473         row = rows[name]
5474
5475         out = []
5476
5477         for field in self.op.output_fields:
5478           if field == constants.SF_NODE:
5479             val = node
5480           elif field == constants.SF_TYPE:
5481             val = self.op.storage_type
5482           elif field in field_idx:
5483             val = row[field_idx[field]]
5484           else:
5485             raise errors.ParameterError(field)
5486
5487           out.append(val)
5488
5489         result.append(out)
5490
5491     return result
5492
5493
5494 class _InstanceQuery(_QueryBase):
5495   FIELDS = query.INSTANCE_FIELDS
5496
5497   def ExpandNames(self, lu):
5498     lu.needed_locks = {}
5499     lu.share_locks = _ShareAll()
5500
5501     if self.names:
5502       self.wanted = _GetWantedInstances(lu, self.names)
5503     else:
5504       self.wanted = locking.ALL_SET
5505
5506     self.do_locking = (self.use_locking and
5507                        query.IQ_LIVE in self.requested_data)
5508     if self.do_locking:
5509       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5510       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5511       lu.needed_locks[locking.LEVEL_NODE] = []
5512       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5513
5514     self.do_grouplocks = (self.do_locking and
5515                           query.IQ_NODES in self.requested_data)
5516
5517   def DeclareLocks(self, lu, level):
5518     if self.do_locking:
5519       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5520         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5521
5522         # Lock all groups used by instances optimistically; this requires going
5523         # via the node before it's locked, requiring verification later on
5524         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5525           set(group_uuid
5526               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5527               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5528       elif level == locking.LEVEL_NODE:
5529         lu._LockInstancesNodes() # pylint: disable=W0212
5530
5531   @staticmethod
5532   def _CheckGroupLocks(lu):
5533     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5534     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5535
5536     # Check if node groups for locked instances are still correct
5537     for instance_name in owned_instances:
5538       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5539
5540   def _GetQueryData(self, lu):
5541     """Computes the list of instances and their attributes.
5542
5543     """
5544     if self.do_grouplocks:
5545       self._CheckGroupLocks(lu)
5546
5547     cluster = lu.cfg.GetClusterInfo()
5548     all_info = lu.cfg.GetAllInstancesInfo()
5549
5550     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5551
5552     instance_list = [all_info[name] for name in instance_names]
5553     nodes = frozenset(itertools.chain(*(inst.all_nodes
5554                                         for inst in instance_list)))
5555     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5556     bad_nodes = []
5557     offline_nodes = []
5558     wrongnode_inst = set()
5559
5560     # Gather data as requested
5561     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5562       live_data = {}
5563       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5564       for name in nodes:
5565         result = node_data[name]
5566         if result.offline:
5567           # offline nodes will be in both lists
5568           assert result.fail_msg
5569           offline_nodes.append(name)
5570         if result.fail_msg:
5571           bad_nodes.append(name)
5572         elif result.payload:
5573           for inst in result.payload:
5574             if inst in all_info:
5575               if all_info[inst].primary_node == name:
5576                 live_data.update(result.payload)
5577               else:
5578                 wrongnode_inst.add(inst)
5579             else:
5580               # orphan instance; we don't list it here as we don't
5581               # handle this case yet in the output of instance listing
5582               logging.warning("Orphan instance '%s' found on node %s",
5583                               inst, name)
5584         # else no instance is alive
5585     else:
5586       live_data = {}
5587
5588     if query.IQ_DISKUSAGE in self.requested_data:
5589       gmi = ganeti.masterd.instance
5590       disk_usage = dict((inst.name,
5591                          gmi.ComputeDiskSize(inst.disk_template,
5592                                              [{constants.IDISK_SIZE: disk.size}
5593                                               for disk in inst.disks]))
5594                         for inst in instance_list)
5595     else:
5596       disk_usage = None
5597
5598     if query.IQ_CONSOLE in self.requested_data:
5599       consinfo = {}
5600       for inst in instance_list:
5601         if inst.name in live_data:
5602           # Instance is running
5603           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5604         else:
5605           consinfo[inst.name] = None
5606       assert set(consinfo.keys()) == set(instance_names)
5607     else:
5608       consinfo = None
5609
5610     if query.IQ_NODES in self.requested_data:
5611       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5612                                             instance_list)))
5613       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5614       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5615                     for uuid in set(map(operator.attrgetter("group"),
5616                                         nodes.values())))
5617     else:
5618       nodes = None
5619       groups = None
5620
5621     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5622                                    disk_usage, offline_nodes, bad_nodes,
5623                                    live_data, wrongnode_inst, consinfo,
5624                                    nodes, groups)
5625
5626
5627 class LUQuery(NoHooksLU):
5628   """Query for resources/items of a certain kind.
5629
5630   """
5631   # pylint: disable=W0142
5632   REQ_BGL = False
5633
5634   def CheckArguments(self):
5635     qcls = _GetQueryImplementation(self.op.what)
5636
5637     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5638
5639   def ExpandNames(self):
5640     self.impl.ExpandNames(self)
5641
5642   def DeclareLocks(self, level):
5643     self.impl.DeclareLocks(self, level)
5644
5645   def Exec(self, feedback_fn):
5646     return self.impl.NewStyleQuery(self)
5647
5648
5649 class LUQueryFields(NoHooksLU):
5650   """Query for resources/items of a certain kind.
5651
5652   """
5653   # pylint: disable=W0142
5654   REQ_BGL = False
5655
5656   def CheckArguments(self):
5657     self.qcls = _GetQueryImplementation(self.op.what)
5658
5659   def ExpandNames(self):
5660     self.needed_locks = {}
5661
5662   def Exec(self, feedback_fn):
5663     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5664
5665
5666 class LUNodeModifyStorage(NoHooksLU):
5667   """Logical unit for modifying a storage volume on a node.
5668
5669   """
5670   REQ_BGL = False
5671
5672   def CheckArguments(self):
5673     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5674
5675     storage_type = self.op.storage_type
5676
5677     try:
5678       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5679     except KeyError:
5680       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5681                                  " modified" % storage_type,
5682                                  errors.ECODE_INVAL)
5683
5684     diff = set(self.op.changes.keys()) - modifiable
5685     if diff:
5686       raise errors.OpPrereqError("The following fields can not be modified for"
5687                                  " storage units of type '%s': %r" %
5688                                  (storage_type, list(diff)),
5689                                  errors.ECODE_INVAL)
5690
5691   def ExpandNames(self):
5692     self.needed_locks = {
5693       locking.LEVEL_NODE: self.op.node_name,
5694       }
5695
5696   def Exec(self, feedback_fn):
5697     """Computes the list of nodes and their attributes.
5698
5699     """
5700     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5701     result = self.rpc.call_storage_modify(self.op.node_name,
5702                                           self.op.storage_type, st_args,
5703                                           self.op.name, self.op.changes)
5704     result.Raise("Failed to modify storage unit '%s' on %s" %
5705                  (self.op.name, self.op.node_name))
5706
5707
5708 class LUNodeAdd(LogicalUnit):
5709   """Logical unit for adding node to the cluster.
5710
5711   """
5712   HPATH = "node-add"
5713   HTYPE = constants.HTYPE_NODE
5714   _NFLAGS = ["master_capable", "vm_capable"]
5715
5716   def CheckArguments(self):
5717     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5718     # validate/normalize the node name
5719     self.hostname = netutils.GetHostname(name=self.op.node_name,
5720                                          family=self.primary_ip_family)
5721     self.op.node_name = self.hostname.name
5722
5723     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5724       raise errors.OpPrereqError("Cannot readd the master node",
5725                                  errors.ECODE_STATE)
5726
5727     if self.op.readd and self.op.group:
5728       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5729                                  " being readded", errors.ECODE_INVAL)
5730
5731   def BuildHooksEnv(self):
5732     """Build hooks env.
5733
5734     This will run on all nodes before, and on all nodes + the new node after.
5735
5736     """
5737     return {
5738       "OP_TARGET": self.op.node_name,
5739       "NODE_NAME": self.op.node_name,
5740       "NODE_PIP": self.op.primary_ip,
5741       "NODE_SIP": self.op.secondary_ip,
5742       "MASTER_CAPABLE": str(self.op.master_capable),
5743       "VM_CAPABLE": str(self.op.vm_capable),
5744       }
5745
5746   def BuildHooksNodes(self):
5747     """Build hooks nodes.
5748
5749     """
5750     # Exclude added node
5751     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5752     post_nodes = pre_nodes + [self.op.node_name, ]
5753
5754     return (pre_nodes, post_nodes)
5755
5756   def CheckPrereq(self):
5757     """Check prerequisites.
5758
5759     This checks:
5760      - the new node is not already in the config
5761      - it is resolvable
5762      - its parameters (single/dual homed) matches the cluster
5763
5764     Any errors are signaled by raising errors.OpPrereqError.
5765
5766     """
5767     cfg = self.cfg
5768     hostname = self.hostname
5769     node = hostname.name
5770     primary_ip = self.op.primary_ip = hostname.ip
5771     if self.op.secondary_ip is None:
5772       if self.primary_ip_family == netutils.IP6Address.family:
5773         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5774                                    " IPv4 address must be given as secondary",
5775                                    errors.ECODE_INVAL)
5776       self.op.secondary_ip = primary_ip
5777
5778     secondary_ip = self.op.secondary_ip
5779     if not netutils.IP4Address.IsValid(secondary_ip):
5780       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5781                                  " address" % secondary_ip, errors.ECODE_INVAL)
5782
5783     node_list = cfg.GetNodeList()
5784     if not self.op.readd and node in node_list:
5785       raise errors.OpPrereqError("Node %s is already in the configuration" %
5786                                  node, errors.ECODE_EXISTS)
5787     elif self.op.readd and node not in node_list:
5788       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5789                                  errors.ECODE_NOENT)
5790
5791     self.changed_primary_ip = False
5792
5793     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5794       if self.op.readd and node == existing_node_name:
5795         if existing_node.secondary_ip != secondary_ip:
5796           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5797                                      " address configuration as before",
5798                                      errors.ECODE_INVAL)
5799         if existing_node.primary_ip != primary_ip:
5800           self.changed_primary_ip = True
5801
5802         continue
5803
5804       if (existing_node.primary_ip == primary_ip or
5805           existing_node.secondary_ip == primary_ip or
5806           existing_node.primary_ip == secondary_ip or
5807           existing_node.secondary_ip == secondary_ip):
5808         raise errors.OpPrereqError("New node ip address(es) conflict with"
5809                                    " existing node %s" % existing_node.name,
5810                                    errors.ECODE_NOTUNIQUE)
5811
5812     # After this 'if' block, None is no longer a valid value for the
5813     # _capable op attributes
5814     if self.op.readd:
5815       old_node = self.cfg.GetNodeInfo(node)
5816       assert old_node is not None, "Can't retrieve locked node %s" % node
5817       for attr in self._NFLAGS:
5818         if getattr(self.op, attr) is None:
5819           setattr(self.op, attr, getattr(old_node, attr))
5820     else:
5821       for attr in self._NFLAGS:
5822         if getattr(self.op, attr) is None:
5823           setattr(self.op, attr, True)
5824
5825     if self.op.readd and not self.op.vm_capable:
5826       pri, sec = cfg.GetNodeInstances(node)
5827       if pri or sec:
5828         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5829                                    " flag set to false, but it already holds"
5830                                    " instances" % node,
5831                                    errors.ECODE_STATE)
5832
5833     # check that the type of the node (single versus dual homed) is the
5834     # same as for the master
5835     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5836     master_singlehomed = myself.secondary_ip == myself.primary_ip
5837     newbie_singlehomed = secondary_ip == primary_ip
5838     if master_singlehomed != newbie_singlehomed:
5839       if master_singlehomed:
5840         raise errors.OpPrereqError("The master has no secondary ip but the"
5841                                    " new node has one",
5842                                    errors.ECODE_INVAL)
5843       else:
5844         raise errors.OpPrereqError("The master has a secondary ip but the"
5845                                    " new node doesn't have one",
5846                                    errors.ECODE_INVAL)
5847
5848     # checks reachability
5849     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5850       raise errors.OpPrereqError("Node not reachable by ping",
5851                                  errors.ECODE_ENVIRON)
5852
5853     if not newbie_singlehomed:
5854       # check reachability from my secondary ip to newbie's secondary ip
5855       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5856                               source=myself.secondary_ip):
5857         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5858                                    " based ping to node daemon port",
5859                                    errors.ECODE_ENVIRON)
5860
5861     if self.op.readd:
5862       exceptions = [node]
5863     else:
5864       exceptions = []
5865
5866     if self.op.master_capable:
5867       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5868     else:
5869       self.master_candidate = False
5870
5871     if self.op.readd:
5872       self.new_node = old_node
5873     else:
5874       node_group = cfg.LookupNodeGroup(self.op.group)
5875       self.new_node = objects.Node(name=node,
5876                                    primary_ip=primary_ip,
5877                                    secondary_ip=secondary_ip,
5878                                    master_candidate=self.master_candidate,
5879                                    offline=False, drained=False,
5880                                    group=node_group)
5881
5882     if self.op.ndparams:
5883       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5884
5885     if self.op.hv_state:
5886       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5887
5888     if self.op.disk_state:
5889       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5890
5891     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5892     #       it a property on the base class.
5893     result = rpc.DnsOnlyRunner().call_version([node])[node]
5894     result.Raise("Can't get version information from node %s" % node)
5895     if constants.PROTOCOL_VERSION == result.payload:
5896       logging.info("Communication to node %s fine, sw version %s match",
5897                    node, result.payload)
5898     else:
5899       raise errors.OpPrereqError("Version mismatch master version %s,"
5900                                  " node version %s" %
5901                                  (constants.PROTOCOL_VERSION, result.payload),
5902                                  errors.ECODE_ENVIRON)
5903
5904   def Exec(self, feedback_fn):
5905     """Adds the new node to the cluster.
5906
5907     """
5908     new_node = self.new_node
5909     node = new_node.name
5910
5911     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5912       "Not owning BGL"
5913
5914     # We adding a new node so we assume it's powered
5915     new_node.powered = True
5916
5917     # for re-adds, reset the offline/drained/master-candidate flags;
5918     # we need to reset here, otherwise offline would prevent RPC calls
5919     # later in the procedure; this also means that if the re-add
5920     # fails, we are left with a non-offlined, broken node
5921     if self.op.readd:
5922       new_node.drained = new_node.offline = False # pylint: disable=W0201
5923       self.LogInfo("Readding a node, the offline/drained flags were reset")
5924       # if we demote the node, we do cleanup later in the procedure
5925       new_node.master_candidate = self.master_candidate
5926       if self.changed_primary_ip:
5927         new_node.primary_ip = self.op.primary_ip
5928
5929     # copy the master/vm_capable flags
5930     for attr in self._NFLAGS:
5931       setattr(new_node, attr, getattr(self.op, attr))
5932
5933     # notify the user about any possible mc promotion
5934     if new_node.master_candidate:
5935       self.LogInfo("Node will be a master candidate")
5936
5937     if self.op.ndparams:
5938       new_node.ndparams = self.op.ndparams
5939     else:
5940       new_node.ndparams = {}
5941
5942     if self.op.hv_state:
5943       new_node.hv_state_static = self.new_hv_state
5944
5945     if self.op.disk_state:
5946       new_node.disk_state_static = self.new_disk_state
5947
5948     # Add node to our /etc/hosts, and add key to known_hosts
5949     if self.cfg.GetClusterInfo().modify_etc_hosts:
5950       master_node = self.cfg.GetMasterNode()
5951       result = self.rpc.call_etc_hosts_modify(master_node,
5952                                               constants.ETC_HOSTS_ADD,
5953                                               self.hostname.name,
5954                                               self.hostname.ip)
5955       result.Raise("Can't update hosts file with new host data")
5956
5957     if new_node.secondary_ip != new_node.primary_ip:
5958       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5959                                False)
5960
5961     node_verify_list = [self.cfg.GetMasterNode()]
5962     node_verify_param = {
5963       constants.NV_NODELIST: ([node], {}),
5964       # TODO: do a node-net-test as well?
5965     }
5966
5967     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5968                                        self.cfg.GetClusterName())
5969     for verifier in node_verify_list:
5970       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5971       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5972       if nl_payload:
5973         for failed in nl_payload:
5974           feedback_fn("ssh/hostname verification failed"
5975                       " (checking from %s): %s" %
5976                       (verifier, nl_payload[failed]))
5977         raise errors.OpExecError("ssh/hostname verification failed")
5978
5979     if self.op.readd:
5980       _RedistributeAncillaryFiles(self)
5981       self.context.ReaddNode(new_node)
5982       # make sure we redistribute the config
5983       self.cfg.Update(new_node, feedback_fn)
5984       # and make sure the new node will not have old files around
5985       if not new_node.master_candidate:
5986         result = self.rpc.call_node_demote_from_mc(new_node.name)
5987         msg = result.fail_msg
5988         if msg:
5989           self.LogWarning("Node failed to demote itself from master"
5990                           " candidate status: %s" % msg)
5991     else:
5992       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5993                                   additional_vm=self.op.vm_capable)
5994       self.context.AddNode(new_node, self.proc.GetECId())
5995
5996
5997 class LUNodeSetParams(LogicalUnit):
5998   """Modifies the parameters of a node.
5999
6000   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6001       to the node role (as _ROLE_*)
6002   @cvar _R2F: a dictionary from node role to tuples of flags
6003   @cvar _FLAGS: a list of attribute names corresponding to the flags
6004
6005   """
6006   HPATH = "node-modify"
6007   HTYPE = constants.HTYPE_NODE
6008   REQ_BGL = False
6009   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6010   _F2R = {
6011     (True, False, False): _ROLE_CANDIDATE,
6012     (False, True, False): _ROLE_DRAINED,
6013     (False, False, True): _ROLE_OFFLINE,
6014     (False, False, False): _ROLE_REGULAR,
6015     }
6016   _R2F = dict((v, k) for k, v in _F2R.items())
6017   _FLAGS = ["master_candidate", "drained", "offline"]
6018
6019   def CheckArguments(self):
6020     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6021     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6022                 self.op.master_capable, self.op.vm_capable,
6023                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6024                 self.op.disk_state]
6025     if all_mods.count(None) == len(all_mods):
6026       raise errors.OpPrereqError("Please pass at least one modification",
6027                                  errors.ECODE_INVAL)
6028     if all_mods.count(True) > 1:
6029       raise errors.OpPrereqError("Can't set the node into more than one"
6030                                  " state at the same time",
6031                                  errors.ECODE_INVAL)
6032
6033     # Boolean value that tells us whether we might be demoting from MC
6034     self.might_demote = (self.op.master_candidate is False or
6035                          self.op.offline is True or
6036                          self.op.drained is True or
6037                          self.op.master_capable is False)
6038
6039     if self.op.secondary_ip:
6040       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6041         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6042                                    " address" % self.op.secondary_ip,
6043                                    errors.ECODE_INVAL)
6044
6045     self.lock_all = self.op.auto_promote and self.might_demote
6046     self.lock_instances = self.op.secondary_ip is not None
6047
6048   def _InstanceFilter(self, instance):
6049     """Filter for getting affected instances.
6050
6051     """
6052     return (instance.disk_template in constants.DTS_INT_MIRROR and
6053             self.op.node_name in instance.all_nodes)
6054
6055   def ExpandNames(self):
6056     if self.lock_all:
6057       self.needed_locks = {
6058         locking.LEVEL_NODE: locking.ALL_SET,
6059
6060         # Block allocations when all nodes are locked
6061         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6062         }
6063     else:
6064       self.needed_locks = {
6065         locking.LEVEL_NODE: self.op.node_name,
6066         }
6067
6068     # Since modifying a node can have severe effects on currently running
6069     # operations the resource lock is at least acquired in shared mode
6070     self.needed_locks[locking.LEVEL_NODE_RES] = \
6071       self.needed_locks[locking.LEVEL_NODE]
6072
6073     # Get all locks except nodes in shared mode; they are not used for anything
6074     # but read-only access
6075     self.share_locks = _ShareAll()
6076     self.share_locks[locking.LEVEL_NODE] = 0
6077     self.share_locks[locking.LEVEL_NODE_RES] = 0
6078     self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6079
6080     if self.lock_instances:
6081       self.needed_locks[locking.LEVEL_INSTANCE] = \
6082         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6083
6084   def BuildHooksEnv(self):
6085     """Build hooks env.
6086
6087     This runs on the master node.
6088
6089     """
6090     return {
6091       "OP_TARGET": self.op.node_name,
6092       "MASTER_CANDIDATE": str(self.op.master_candidate),
6093       "OFFLINE": str(self.op.offline),
6094       "DRAINED": str(self.op.drained),
6095       "MASTER_CAPABLE": str(self.op.master_capable),
6096       "VM_CAPABLE": str(self.op.vm_capable),
6097       }
6098
6099   def BuildHooksNodes(self):
6100     """Build hooks nodes.
6101
6102     """
6103     nl = [self.cfg.GetMasterNode(), self.op.node_name]
6104     return (nl, nl)
6105
6106   def CheckPrereq(self):
6107     """Check prerequisites.
6108
6109     This only checks the instance list against the existing names.
6110
6111     """
6112     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6113
6114     if self.lock_instances:
6115       affected_instances = \
6116         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6117
6118       # Verify instance locks
6119       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6120       wanted_instances = frozenset(affected_instances.keys())
6121       if wanted_instances - owned_instances:
6122         raise errors.OpPrereqError("Instances affected by changing node %s's"
6123                                    " secondary IP address have changed since"
6124                                    " locks were acquired, wanted '%s', have"
6125                                    " '%s'; retry the operation" %
6126                                    (self.op.node_name,
6127                                     utils.CommaJoin(wanted_instances),
6128                                     utils.CommaJoin(owned_instances)),
6129                                    errors.ECODE_STATE)
6130     else:
6131       affected_instances = None
6132
6133     if (self.op.master_candidate is not None or
6134         self.op.drained is not None or
6135         self.op.offline is not None):
6136       # we can't change the master's node flags
6137       if self.op.node_name == self.cfg.GetMasterNode():
6138         raise errors.OpPrereqError("The master role can be changed"
6139                                    " only via master-failover",
6140                                    errors.ECODE_INVAL)
6141
6142     if self.op.master_candidate and not node.master_capable:
6143       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6144                                  " it a master candidate" % node.name,
6145                                  errors.ECODE_STATE)
6146
6147     if self.op.vm_capable is False:
6148       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6149       if ipri or isec:
6150         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6151                                    " the vm_capable flag" % node.name,
6152                                    errors.ECODE_STATE)
6153
6154     if node.master_candidate and self.might_demote and not self.lock_all:
6155       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6156       # check if after removing the current node, we're missing master
6157       # candidates
6158       (mc_remaining, mc_should, _) = \
6159           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6160       if mc_remaining < mc_should:
6161         raise errors.OpPrereqError("Not enough master candidates, please"
6162                                    " pass auto promote option to allow"
6163                                    " promotion (--auto-promote or RAPI"
6164                                    " auto_promote=True)", errors.ECODE_STATE)
6165
6166     self.old_flags = old_flags = (node.master_candidate,
6167                                   node.drained, node.offline)
6168     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6169     self.old_role = old_role = self._F2R[old_flags]
6170
6171     # Check for ineffective changes
6172     for attr in self._FLAGS:
6173       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6174         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6175         setattr(self.op, attr, None)
6176
6177     # Past this point, any flag change to False means a transition
6178     # away from the respective state, as only real changes are kept
6179
6180     # TODO: We might query the real power state if it supports OOB
6181     if _SupportsOob(self.cfg, node):
6182       if self.op.offline is False and not (node.powered or
6183                                            self.op.powered is True):
6184         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6185                                     " offline status can be reset") %
6186                                    self.op.node_name, errors.ECODE_STATE)
6187     elif self.op.powered is not None:
6188       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6189                                   " as it does not support out-of-band"
6190                                   " handling") % self.op.node_name,
6191                                  errors.ECODE_STATE)
6192
6193     # If we're being deofflined/drained, we'll MC ourself if needed
6194     if (self.op.drained is False or self.op.offline is False or
6195         (self.op.master_capable and not node.master_capable)):
6196       if _DecideSelfPromotion(self):
6197         self.op.master_candidate = True
6198         self.LogInfo("Auto-promoting node to master candidate")
6199
6200     # If we're no longer master capable, we'll demote ourselves from MC
6201     if self.op.master_capable is False and node.master_candidate:
6202       self.LogInfo("Demoting from master candidate")
6203       self.op.master_candidate = False
6204
6205     # Compute new role
6206     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6207     if self.op.master_candidate:
6208       new_role = self._ROLE_CANDIDATE
6209     elif self.op.drained:
6210       new_role = self._ROLE_DRAINED
6211     elif self.op.offline:
6212       new_role = self._ROLE_OFFLINE
6213     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6214       # False is still in new flags, which means we're un-setting (the
6215       # only) True flag
6216       new_role = self._ROLE_REGULAR
6217     else: # no new flags, nothing, keep old role
6218       new_role = old_role
6219
6220     self.new_role = new_role
6221
6222     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6223       # Trying to transition out of offline status
6224       result = self.rpc.call_version([node.name])[node.name]
6225       if result.fail_msg:
6226         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6227                                    " to report its version: %s" %
6228                                    (node.name, result.fail_msg),
6229                                    errors.ECODE_STATE)
6230       else:
6231         self.LogWarning("Transitioning node from offline to online state"
6232                         " without using re-add. Please make sure the node"
6233                         " is healthy!")
6234
6235     # When changing the secondary ip, verify if this is a single-homed to
6236     # multi-homed transition or vice versa, and apply the relevant
6237     # restrictions.
6238     if self.op.secondary_ip:
6239       # Ok even without locking, because this can't be changed by any LU
6240       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6241       master_singlehomed = master.secondary_ip == master.primary_ip
6242       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6243         if self.op.force and node.name == master.name:
6244           self.LogWarning("Transitioning from single-homed to multi-homed"
6245                           " cluster; all nodes will require a secondary IP"
6246                           " address")
6247         else:
6248           raise errors.OpPrereqError("Changing the secondary ip on a"
6249                                      " single-homed cluster requires the"
6250                                      " --force option to be passed, and the"
6251                                      " target node to be the master",
6252                                      errors.ECODE_INVAL)
6253       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6254         if self.op.force and node.name == master.name:
6255           self.LogWarning("Transitioning from multi-homed to single-homed"
6256                           " cluster; secondary IP addresses will have to be"
6257                           " removed")
6258         else:
6259           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6260                                      " same as the primary IP on a multi-homed"
6261                                      " cluster, unless the --force option is"
6262                                      " passed, and the target node is the"
6263                                      " master", errors.ECODE_INVAL)
6264
6265       assert not (frozenset(affected_instances) -
6266                   self.owned_locks(locking.LEVEL_INSTANCE))
6267
6268       if node.offline:
6269         if affected_instances:
6270           msg = ("Cannot change secondary IP address: offline node has"
6271                  " instances (%s) configured to use it" %
6272                  utils.CommaJoin(affected_instances.keys()))
6273           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6274       else:
6275         # On online nodes, check that no instances are running, and that
6276         # the node has the new ip and we can reach it.
6277         for instance in affected_instances.values():
6278           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6279                               msg="cannot change secondary ip")
6280
6281         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6282         if master.name != node.name:
6283           # check reachability from master secondary ip to new secondary ip
6284           if not netutils.TcpPing(self.op.secondary_ip,
6285                                   constants.DEFAULT_NODED_PORT,
6286                                   source=master.secondary_ip):
6287             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6288                                        " based ping to node daemon port",
6289                                        errors.ECODE_ENVIRON)
6290
6291     if self.op.ndparams:
6292       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6293       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6294       self.new_ndparams = new_ndparams
6295
6296     if self.op.hv_state:
6297       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6298                                                  self.node.hv_state_static)
6299
6300     if self.op.disk_state:
6301       self.new_disk_state = \
6302         _MergeAndVerifyDiskState(self.op.disk_state,
6303                                  self.node.disk_state_static)
6304
6305   def Exec(self, feedback_fn):
6306     """Modifies a node.
6307
6308     """
6309     node = self.node
6310     old_role = self.old_role
6311     new_role = self.new_role
6312
6313     result = []
6314
6315     if self.op.ndparams:
6316       node.ndparams = self.new_ndparams
6317
6318     if self.op.powered is not None:
6319       node.powered = self.op.powered
6320
6321     if self.op.hv_state:
6322       node.hv_state_static = self.new_hv_state
6323
6324     if self.op.disk_state:
6325       node.disk_state_static = self.new_disk_state
6326
6327     for attr in ["master_capable", "vm_capable"]:
6328       val = getattr(self.op, attr)
6329       if val is not None:
6330         setattr(node, attr, val)
6331         result.append((attr, str(val)))
6332
6333     if new_role != old_role:
6334       # Tell the node to demote itself, if no longer MC and not offline
6335       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6336         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6337         if msg:
6338           self.LogWarning("Node failed to demote itself: %s", msg)
6339
6340       new_flags = self._R2F[new_role]
6341       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6342         if of != nf:
6343           result.append((desc, str(nf)))
6344       (node.master_candidate, node.drained, node.offline) = new_flags
6345
6346       # we locked all nodes, we adjust the CP before updating this node
6347       if self.lock_all:
6348         _AdjustCandidatePool(self, [node.name])
6349
6350     if self.op.secondary_ip:
6351       node.secondary_ip = self.op.secondary_ip
6352       result.append(("secondary_ip", self.op.secondary_ip))
6353
6354     # this will trigger configuration file update, if needed
6355     self.cfg.Update(node, feedback_fn)
6356
6357     # this will trigger job queue propagation or cleanup if the mc
6358     # flag changed
6359     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6360       self.context.ReaddNode(node)
6361
6362     return result
6363
6364
6365 class LUNodePowercycle(NoHooksLU):
6366   """Powercycles a node.
6367
6368   """
6369   REQ_BGL = False
6370
6371   def CheckArguments(self):
6372     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6373     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6374       raise errors.OpPrereqError("The node is the master and the force"
6375                                  " parameter was not set",
6376                                  errors.ECODE_INVAL)
6377
6378   def ExpandNames(self):
6379     """Locking for PowercycleNode.
6380
6381     This is a last-resort option and shouldn't block on other
6382     jobs. Therefore, we grab no locks.
6383
6384     """
6385     self.needed_locks = {}
6386
6387   def Exec(self, feedback_fn):
6388     """Reboots a node.
6389
6390     """
6391     result = self.rpc.call_node_powercycle(self.op.node_name,
6392                                            self.cfg.GetHypervisorType())
6393     result.Raise("Failed to schedule the reboot")
6394     return result.payload
6395
6396
6397 class LUClusterQuery(NoHooksLU):
6398   """Query cluster configuration.
6399
6400   """
6401   REQ_BGL = False
6402
6403   def ExpandNames(self):
6404     self.needed_locks = {}
6405
6406   def Exec(self, feedback_fn):
6407     """Return cluster config.
6408
6409     """
6410     cluster = self.cfg.GetClusterInfo()
6411     os_hvp = {}
6412
6413     # Filter just for enabled hypervisors
6414     for os_name, hv_dict in cluster.os_hvp.items():
6415       os_hvp[os_name] = {}
6416       for hv_name, hv_params in hv_dict.items():
6417         if hv_name in cluster.enabled_hypervisors:
6418           os_hvp[os_name][hv_name] = hv_params
6419
6420     # Convert ip_family to ip_version
6421     primary_ip_version = constants.IP4_VERSION
6422     if cluster.primary_ip_family == netutils.IP6Address.family:
6423       primary_ip_version = constants.IP6_VERSION
6424
6425     result = {
6426       "software_version": constants.RELEASE_VERSION,
6427       "protocol_version": constants.PROTOCOL_VERSION,
6428       "config_version": constants.CONFIG_VERSION,
6429       "os_api_version": max(constants.OS_API_VERSIONS),
6430       "export_version": constants.EXPORT_VERSION,
6431       "architecture": runtime.GetArchInfo(),
6432       "name": cluster.cluster_name,
6433       "master": cluster.master_node,
6434       "default_hypervisor": cluster.primary_hypervisor,
6435       "enabled_hypervisors": cluster.enabled_hypervisors,
6436       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6437                         for hypervisor_name in cluster.enabled_hypervisors]),
6438       "os_hvp": os_hvp,
6439       "beparams": cluster.beparams,
6440       "osparams": cluster.osparams,
6441       "ipolicy": cluster.ipolicy,
6442       "nicparams": cluster.nicparams,
6443       "ndparams": cluster.ndparams,
6444       "diskparams": cluster.diskparams,
6445       "candidate_pool_size": cluster.candidate_pool_size,
6446       "master_netdev": cluster.master_netdev,
6447       "master_netmask": cluster.master_netmask,
6448       "use_external_mip_script": cluster.use_external_mip_script,
6449       "volume_group_name": cluster.volume_group_name,
6450       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6451       "file_storage_dir": cluster.file_storage_dir,
6452       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6453       "maintain_node_health": cluster.maintain_node_health,
6454       "ctime": cluster.ctime,
6455       "mtime": cluster.mtime,
6456       "uuid": cluster.uuid,
6457       "tags": list(cluster.GetTags()),
6458       "uid_pool": cluster.uid_pool,
6459       "default_iallocator": cluster.default_iallocator,
6460       "reserved_lvs": cluster.reserved_lvs,
6461       "primary_ip_version": primary_ip_version,
6462       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6463       "hidden_os": cluster.hidden_os,
6464       "blacklisted_os": cluster.blacklisted_os,
6465       }
6466
6467     return result
6468
6469
6470 class LUClusterConfigQuery(NoHooksLU):
6471   """Return configuration values.
6472
6473   """
6474   REQ_BGL = False
6475
6476   def CheckArguments(self):
6477     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6478
6479   def ExpandNames(self):
6480     self.cq.ExpandNames(self)
6481
6482   def DeclareLocks(self, level):
6483     self.cq.DeclareLocks(self, level)
6484
6485   def Exec(self, feedback_fn):
6486     result = self.cq.OldStyleQuery(self)
6487
6488     assert len(result) == 1
6489
6490     return result[0]
6491
6492
6493 class _ClusterQuery(_QueryBase):
6494   FIELDS = query.CLUSTER_FIELDS
6495
6496   #: Do not sort (there is only one item)
6497   SORT_FIELD = None
6498
6499   def ExpandNames(self, lu):
6500     lu.needed_locks = {}
6501
6502     # The following variables interact with _QueryBase._GetNames
6503     self.wanted = locking.ALL_SET
6504     self.do_locking = self.use_locking
6505
6506     if self.do_locking:
6507       raise errors.OpPrereqError("Can not use locking for cluster queries",
6508                                  errors.ECODE_INVAL)
6509
6510   def DeclareLocks(self, lu, level):
6511     pass
6512
6513   def _GetQueryData(self, lu):
6514     """Computes the list of nodes and their attributes.
6515
6516     """
6517     # Locking is not used
6518     assert not (compat.any(lu.glm.is_owned(level)
6519                            for level in locking.LEVELS
6520                            if level != locking.LEVEL_CLUSTER) or
6521                 self.do_locking or self.use_locking)
6522
6523     if query.CQ_CONFIG in self.requested_data:
6524       cluster = lu.cfg.GetClusterInfo()
6525     else:
6526       cluster = NotImplemented
6527
6528     if query.CQ_QUEUE_DRAINED in self.requested_data:
6529       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6530     else:
6531       drain_flag = NotImplemented
6532
6533     if query.CQ_WATCHER_PAUSE in self.requested_data:
6534       master_name = lu.cfg.GetMasterNode()
6535
6536       result = lu.rpc.call_get_watcher_pause(master_name)
6537       result.Raise("Can't retrieve watcher pause from master node '%s'" %
6538                    master_name)
6539
6540       watcher_pause = result.payload
6541     else:
6542       watcher_pause = NotImplemented
6543
6544     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6545
6546
6547 class LUInstanceActivateDisks(NoHooksLU):
6548   """Bring up an instance's disks.
6549
6550   """
6551   REQ_BGL = False
6552
6553   def ExpandNames(self):
6554     self._ExpandAndLockInstance()
6555     self.needed_locks[locking.LEVEL_NODE] = []
6556     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6557
6558   def DeclareLocks(self, level):
6559     if level == locking.LEVEL_NODE:
6560       self._LockInstancesNodes()
6561
6562   def CheckPrereq(self):
6563     """Check prerequisites.
6564
6565     This checks that the instance is in the cluster.
6566
6567     """
6568     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6569     assert self.instance is not None, \
6570       "Cannot retrieve locked instance %s" % self.op.instance_name
6571     _CheckNodeOnline(self, self.instance.primary_node)
6572
6573   def Exec(self, feedback_fn):
6574     """Activate the disks.
6575
6576     """
6577     disks_ok, disks_info = \
6578               _AssembleInstanceDisks(self, self.instance,
6579                                      ignore_size=self.op.ignore_size)
6580     if not disks_ok:
6581       raise errors.OpExecError("Cannot activate block devices")
6582
6583     if self.op.wait_for_sync:
6584       if not _WaitForSync(self, self.instance):
6585         raise errors.OpExecError("Some disks of the instance are degraded!")
6586
6587     return disks_info
6588
6589
6590 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6591                            ignore_size=False):
6592   """Prepare the block devices for an instance.
6593
6594   This sets up the block devices on all nodes.
6595
6596   @type lu: L{LogicalUnit}
6597   @param lu: the logical unit on whose behalf we execute
6598   @type instance: L{objects.Instance}
6599   @param instance: the instance for whose disks we assemble
6600   @type disks: list of L{objects.Disk} or None
6601   @param disks: which disks to assemble (or all, if None)
6602   @type ignore_secondaries: boolean
6603   @param ignore_secondaries: if true, errors on secondary nodes
6604       won't result in an error return from the function
6605   @type ignore_size: boolean
6606   @param ignore_size: if true, the current known size of the disk
6607       will not be used during the disk activation, useful for cases
6608       when the size is wrong
6609   @return: False if the operation failed, otherwise a list of
6610       (host, instance_visible_name, node_visible_name)
6611       with the mapping from node devices to instance devices
6612
6613   """
6614   device_info = []
6615   disks_ok = True
6616   iname = instance.name
6617   disks = _ExpandCheckDisks(instance, disks)
6618
6619   # With the two passes mechanism we try to reduce the window of
6620   # opportunity for the race condition of switching DRBD to primary
6621   # before handshaking occured, but we do not eliminate it
6622
6623   # The proper fix would be to wait (with some limits) until the
6624   # connection has been made and drbd transitions from WFConnection
6625   # into any other network-connected state (Connected, SyncTarget,
6626   # SyncSource, etc.)
6627
6628   # 1st pass, assemble on all nodes in secondary mode
6629   for idx, inst_disk in enumerate(disks):
6630     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6631       if ignore_size:
6632         node_disk = node_disk.Copy()
6633         node_disk.UnsetSize()
6634       lu.cfg.SetDiskID(node_disk, node)
6635       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6636                                              False, idx)
6637       msg = result.fail_msg
6638       if msg:
6639         is_offline_secondary = (node in instance.secondary_nodes and
6640                                 result.offline)
6641         lu.LogWarning("Could not prepare block device %s on node %s"
6642                       " (is_primary=False, pass=1): %s",
6643                       inst_disk.iv_name, node, msg)
6644         if not (ignore_secondaries or is_offline_secondary):
6645           disks_ok = False
6646
6647   # FIXME: race condition on drbd migration to primary
6648
6649   # 2nd pass, do only the primary node
6650   for idx, inst_disk in enumerate(disks):
6651     dev_path = None
6652
6653     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6654       if node != instance.primary_node:
6655         continue
6656       if ignore_size:
6657         node_disk = node_disk.Copy()
6658         node_disk.UnsetSize()
6659       lu.cfg.SetDiskID(node_disk, node)
6660       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6661                                              True, idx)
6662       msg = result.fail_msg
6663       if msg:
6664         lu.LogWarning("Could not prepare block device %s on node %s"
6665                       " (is_primary=True, pass=2): %s",
6666                       inst_disk.iv_name, node, msg)
6667         disks_ok = False
6668       else:
6669         dev_path = result.payload
6670
6671     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6672
6673   # leave the disks configured for the primary node
6674   # this is a workaround that would be fixed better by
6675   # improving the logical/physical id handling
6676   for disk in disks:
6677     lu.cfg.SetDiskID(disk, instance.primary_node)
6678
6679   return disks_ok, device_info
6680
6681
6682 def _StartInstanceDisks(lu, instance, force):
6683   """Start the disks of an instance.
6684
6685   """
6686   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6687                                            ignore_secondaries=force)
6688   if not disks_ok:
6689     _ShutdownInstanceDisks(lu, instance)
6690     if force is not None and not force:
6691       lu.LogWarning("",
6692                     hint=("If the message above refers to a secondary node,"
6693                           " you can retry the operation using '--force'"))
6694     raise errors.OpExecError("Disk consistency error")
6695
6696
6697 class LUInstanceDeactivateDisks(NoHooksLU):
6698   """Shutdown an instance's disks.
6699
6700   """
6701   REQ_BGL = False
6702
6703   def ExpandNames(self):
6704     self._ExpandAndLockInstance()
6705     self.needed_locks[locking.LEVEL_NODE] = []
6706     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6707
6708   def DeclareLocks(self, level):
6709     if level == locking.LEVEL_NODE:
6710       self._LockInstancesNodes()
6711
6712   def CheckPrereq(self):
6713     """Check prerequisites.
6714
6715     This checks that the instance is in the cluster.
6716
6717     """
6718     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6719     assert self.instance is not None, \
6720       "Cannot retrieve locked instance %s" % self.op.instance_name
6721
6722   def Exec(self, feedback_fn):
6723     """Deactivate the disks
6724
6725     """
6726     instance = self.instance
6727     if self.op.force:
6728       _ShutdownInstanceDisks(self, instance)
6729     else:
6730       _SafeShutdownInstanceDisks(self, instance)
6731
6732
6733 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6734   """Shutdown block devices of an instance.
6735
6736   This function checks if an instance is running, before calling
6737   _ShutdownInstanceDisks.
6738
6739   """
6740   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6741   _ShutdownInstanceDisks(lu, instance, disks=disks)
6742
6743
6744 def _ExpandCheckDisks(instance, disks):
6745   """Return the instance disks selected by the disks list
6746
6747   @type disks: list of L{objects.Disk} or None
6748   @param disks: selected disks
6749   @rtype: list of L{objects.Disk}
6750   @return: selected instance disks to act on
6751
6752   """
6753   if disks is None:
6754     return instance.disks
6755   else:
6756     if not set(disks).issubset(instance.disks):
6757       raise errors.ProgrammerError("Can only act on disks belonging to the"
6758                                    " target instance")
6759     return disks
6760
6761
6762 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6763   """Shutdown block devices of an instance.
6764
6765   This does the shutdown on all nodes of the instance.
6766
6767   If the ignore_primary is false, errors on the primary node are
6768   ignored.
6769
6770   """
6771   all_result = True
6772   disks = _ExpandCheckDisks(instance, disks)
6773
6774   for disk in disks:
6775     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6776       lu.cfg.SetDiskID(top_disk, node)
6777       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6778       msg = result.fail_msg
6779       if msg:
6780         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6781                       disk.iv_name, node, msg)
6782         if ((node == instance.primary_node and not ignore_primary) or
6783             (node != instance.primary_node and not result.offline)):
6784           all_result = False
6785   return all_result
6786
6787
6788 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6789   """Checks if a node has enough free memory.
6790
6791   This function checks if a given node has the needed amount of free
6792   memory. In case the node has less memory or we cannot get the
6793   information from the node, this function raises an OpPrereqError
6794   exception.
6795
6796   @type lu: C{LogicalUnit}
6797   @param lu: a logical unit from which we get configuration data
6798   @type node: C{str}
6799   @param node: the node to check
6800   @type reason: C{str}
6801   @param reason: string to use in the error message
6802   @type requested: C{int}
6803   @param requested: the amount of memory in MiB to check for
6804   @type hypervisor_name: C{str}
6805   @param hypervisor_name: the hypervisor to ask for memory stats
6806   @rtype: integer
6807   @return: node current free memory
6808   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6809       we cannot check the node
6810
6811   """
6812   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6813   nodeinfo[node].Raise("Can't get data from node %s" % node,
6814                        prereq=True, ecode=errors.ECODE_ENVIRON)
6815   (_, _, (hv_info, )) = nodeinfo[node].payload
6816
6817   free_mem = hv_info.get("memory_free", None)
6818   if not isinstance(free_mem, int):
6819     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6820                                " was '%s'" % (node, free_mem),
6821                                errors.ECODE_ENVIRON)
6822   if requested > free_mem:
6823     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6824                                " needed %s MiB, available %s MiB" %
6825                                (node, reason, requested, free_mem),
6826                                errors.ECODE_NORES)
6827   return free_mem
6828
6829
6830 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6831   """Checks if nodes have enough free disk space in all the VGs.
6832
6833   This function checks if all given nodes have the needed amount of
6834   free disk. In case any node has less disk or we cannot get the
6835   information from the node, this function raises an OpPrereqError
6836   exception.
6837
6838   @type lu: C{LogicalUnit}
6839   @param lu: a logical unit from which we get configuration data
6840   @type nodenames: C{list}
6841   @param nodenames: the list of node names to check
6842   @type req_sizes: C{dict}
6843   @param req_sizes: the hash of vg and corresponding amount of disk in
6844       MiB to check for
6845   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6846       or we cannot check the node
6847
6848   """
6849   for vg, req_size in req_sizes.items():
6850     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6851
6852
6853 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6854   """Checks if nodes have enough free disk space in the specified VG.
6855
6856   This function checks if all given nodes have the needed amount of
6857   free disk. In case any node has less disk or we cannot get the
6858   information from the node, this function raises an OpPrereqError
6859   exception.
6860
6861   @type lu: C{LogicalUnit}
6862   @param lu: a logical unit from which we get configuration data
6863   @type nodenames: C{list}
6864   @param nodenames: the list of node names to check
6865   @type vg: C{str}
6866   @param vg: the volume group to check
6867   @type requested: C{int}
6868   @param requested: the amount of disk in MiB to check for
6869   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6870       or we cannot check the node
6871
6872   """
6873   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6874   for node in nodenames:
6875     info = nodeinfo[node]
6876     info.Raise("Cannot get current information from node %s" % node,
6877                prereq=True, ecode=errors.ECODE_ENVIRON)
6878     (_, (vg_info, ), _) = info.payload
6879     vg_free = vg_info.get("vg_free", None)
6880     if not isinstance(vg_free, int):
6881       raise errors.OpPrereqError("Can't compute free disk space on node"
6882                                  " %s for vg %s, result was '%s'" %
6883                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6884     if requested > vg_free:
6885       raise errors.OpPrereqError("Not enough disk space on target node %s"
6886                                  " vg %s: required %d MiB, available %d MiB" %
6887                                  (node, vg, requested, vg_free),
6888                                  errors.ECODE_NORES)
6889
6890
6891 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6892   """Checks if nodes have enough physical CPUs
6893
6894   This function checks if all given nodes have the needed number of
6895   physical CPUs. In case any node has less CPUs or we cannot get the
6896   information from the node, this function raises an OpPrereqError
6897   exception.
6898
6899   @type lu: C{LogicalUnit}
6900   @param lu: a logical unit from which we get configuration data
6901   @type nodenames: C{list}
6902   @param nodenames: the list of node names to check
6903   @type requested: C{int}
6904   @param requested: the minimum acceptable number of physical CPUs
6905   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6906       or we cannot check the node
6907
6908   """
6909   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6910   for node in nodenames:
6911     info = nodeinfo[node]
6912     info.Raise("Cannot get current information from node %s" % node,
6913                prereq=True, ecode=errors.ECODE_ENVIRON)
6914     (_, _, (hv_info, )) = info.payload
6915     num_cpus = hv_info.get("cpu_total", None)
6916     if not isinstance(num_cpus, int):
6917       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6918                                  " on node %s, result was '%s'" %
6919                                  (node, num_cpus), errors.ECODE_ENVIRON)
6920     if requested > num_cpus:
6921       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6922                                  "required" % (node, num_cpus, requested),
6923                                  errors.ECODE_NORES)
6924
6925
6926 class LUInstanceStartup(LogicalUnit):
6927   """Starts an instance.
6928
6929   """
6930   HPATH = "instance-start"
6931   HTYPE = constants.HTYPE_INSTANCE
6932   REQ_BGL = False
6933
6934   def CheckArguments(self):
6935     # extra beparams
6936     if self.op.beparams:
6937       # fill the beparams dict
6938       objects.UpgradeBeParams(self.op.beparams)
6939       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6940
6941   def ExpandNames(self):
6942     self._ExpandAndLockInstance()
6943     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6944
6945   def DeclareLocks(self, level):
6946     if level == locking.LEVEL_NODE_RES:
6947       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6948
6949   def BuildHooksEnv(self):
6950     """Build hooks env.
6951
6952     This runs on master, primary and secondary nodes of the instance.
6953
6954     """
6955     env = {
6956       "FORCE": self.op.force,
6957       }
6958
6959     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6960
6961     return env
6962
6963   def BuildHooksNodes(self):
6964     """Build hooks nodes.
6965
6966     """
6967     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6968     return (nl, nl)
6969
6970   def CheckPrereq(self):
6971     """Check prerequisites.
6972
6973     This checks that the instance is in the cluster.
6974
6975     """
6976     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6977     assert self.instance is not None, \
6978       "Cannot retrieve locked instance %s" % self.op.instance_name
6979
6980     # extra hvparams
6981     if self.op.hvparams:
6982       # check hypervisor parameter syntax (locally)
6983       cluster = self.cfg.GetClusterInfo()
6984       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6985       filled_hvp = cluster.FillHV(instance)
6986       filled_hvp.update(self.op.hvparams)
6987       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6988       hv_type.CheckParameterSyntax(filled_hvp)
6989       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6990
6991     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6992
6993     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6994
6995     if self.primary_offline and self.op.ignore_offline_nodes:
6996       self.LogWarning("Ignoring offline primary node")
6997
6998       if self.op.hvparams or self.op.beparams:
6999         self.LogWarning("Overridden parameters are ignored")
7000     else:
7001       _CheckNodeOnline(self, instance.primary_node)
7002
7003       bep = self.cfg.GetClusterInfo().FillBE(instance)
7004       bep.update(self.op.beparams)
7005
7006       # check bridges existence
7007       _CheckInstanceBridgesExist(self, instance)
7008
7009       remote_info = self.rpc.call_instance_info(instance.primary_node,
7010                                                 instance.name,
7011                                                 instance.hypervisor)
7012       remote_info.Raise("Error checking node %s" % instance.primary_node,
7013                         prereq=True, ecode=errors.ECODE_ENVIRON)
7014       if not remote_info.payload: # not running already
7015         _CheckNodeFreeMemory(self, instance.primary_node,
7016                              "starting instance %s" % instance.name,
7017                              bep[constants.BE_MINMEM], instance.hypervisor)
7018
7019   def Exec(self, feedback_fn):
7020     """Start the instance.
7021
7022     """
7023     instance = self.instance
7024     force = self.op.force
7025
7026     if not self.op.no_remember:
7027       self.cfg.MarkInstanceUp(instance.name)
7028
7029     if self.primary_offline:
7030       assert self.op.ignore_offline_nodes
7031       self.LogInfo("Primary node offline, marked instance as started")
7032     else:
7033       node_current = instance.primary_node
7034
7035       _StartInstanceDisks(self, instance, force)
7036
7037       result = \
7038         self.rpc.call_instance_start(node_current,
7039                                      (instance, self.op.hvparams,
7040                                       self.op.beparams),
7041                                      self.op.startup_paused)
7042       msg = result.fail_msg
7043       if msg:
7044         _ShutdownInstanceDisks(self, instance)
7045         raise errors.OpExecError("Could not start instance: %s" % msg)
7046
7047
7048 class LUInstanceReboot(LogicalUnit):
7049   """Reboot an instance.
7050
7051   """
7052   HPATH = "instance-reboot"
7053   HTYPE = constants.HTYPE_INSTANCE
7054   REQ_BGL = False
7055
7056   def ExpandNames(self):
7057     self._ExpandAndLockInstance()
7058
7059   def BuildHooksEnv(self):
7060     """Build hooks env.
7061
7062     This runs on master, primary and secondary nodes of the instance.
7063
7064     """
7065     env = {
7066       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7067       "REBOOT_TYPE": self.op.reboot_type,
7068       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7069       }
7070
7071     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7072
7073     return env
7074
7075   def BuildHooksNodes(self):
7076     """Build hooks nodes.
7077
7078     """
7079     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7080     return (nl, nl)
7081
7082   def CheckPrereq(self):
7083     """Check prerequisites.
7084
7085     This checks that the instance is in the cluster.
7086
7087     """
7088     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7089     assert self.instance is not None, \
7090       "Cannot retrieve locked instance %s" % self.op.instance_name
7091     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7092     _CheckNodeOnline(self, instance.primary_node)
7093
7094     # check bridges existence
7095     _CheckInstanceBridgesExist(self, instance)
7096
7097   def Exec(self, feedback_fn):
7098     """Reboot the instance.
7099
7100     """
7101     instance = self.instance
7102     ignore_secondaries = self.op.ignore_secondaries
7103     reboot_type = self.op.reboot_type
7104
7105     remote_info = self.rpc.call_instance_info(instance.primary_node,
7106                                               instance.name,
7107                                               instance.hypervisor)
7108     remote_info.Raise("Error checking node %s" % instance.primary_node)
7109     instance_running = bool(remote_info.payload)
7110
7111     node_current = instance.primary_node
7112
7113     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7114                                             constants.INSTANCE_REBOOT_HARD]:
7115       for disk in instance.disks:
7116         self.cfg.SetDiskID(disk, node_current)
7117       result = self.rpc.call_instance_reboot(node_current, instance,
7118                                              reboot_type,
7119                                              self.op.shutdown_timeout)
7120       result.Raise("Could not reboot instance")
7121     else:
7122       if instance_running:
7123         result = self.rpc.call_instance_shutdown(node_current, instance,
7124                                                  self.op.shutdown_timeout)
7125         result.Raise("Could not shutdown instance for full reboot")
7126         _ShutdownInstanceDisks(self, instance)
7127       else:
7128         self.LogInfo("Instance %s was already stopped, starting now",
7129                      instance.name)
7130       _StartInstanceDisks(self, instance, ignore_secondaries)
7131       result = self.rpc.call_instance_start(node_current,
7132                                             (instance, None, None), False)
7133       msg = result.fail_msg
7134       if msg:
7135         _ShutdownInstanceDisks(self, instance)
7136         raise errors.OpExecError("Could not start instance for"
7137                                  " full reboot: %s" % msg)
7138
7139     self.cfg.MarkInstanceUp(instance.name)
7140
7141
7142 class LUInstanceShutdown(LogicalUnit):
7143   """Shutdown an instance.
7144
7145   """
7146   HPATH = "instance-stop"
7147   HTYPE = constants.HTYPE_INSTANCE
7148   REQ_BGL = False
7149
7150   def ExpandNames(self):
7151     self._ExpandAndLockInstance()
7152
7153   def BuildHooksEnv(self):
7154     """Build hooks env.
7155
7156     This runs on master, primary and secondary nodes of the instance.
7157
7158     """
7159     env = _BuildInstanceHookEnvByObject(self, self.instance)
7160     env["TIMEOUT"] = self.op.timeout
7161     return env
7162
7163   def BuildHooksNodes(self):
7164     """Build hooks nodes.
7165
7166     """
7167     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7168     return (nl, nl)
7169
7170   def CheckPrereq(self):
7171     """Check prerequisites.
7172
7173     This checks that the instance is in the cluster.
7174
7175     """
7176     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7177     assert self.instance is not None, \
7178       "Cannot retrieve locked instance %s" % self.op.instance_name
7179
7180     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7181
7182     self.primary_offline = \
7183       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7184
7185     if self.primary_offline and self.op.ignore_offline_nodes:
7186       self.LogWarning("Ignoring offline primary node")
7187     else:
7188       _CheckNodeOnline(self, self.instance.primary_node)
7189
7190   def Exec(self, feedback_fn):
7191     """Shutdown the instance.
7192
7193     """
7194     instance = self.instance
7195     node_current = instance.primary_node
7196     timeout = self.op.timeout
7197
7198     if not self.op.no_remember:
7199       self.cfg.MarkInstanceDown(instance.name)
7200
7201     if self.primary_offline:
7202       assert self.op.ignore_offline_nodes
7203       self.LogInfo("Primary node offline, marked instance as stopped")
7204     else:
7205       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7206       msg = result.fail_msg
7207       if msg:
7208         self.LogWarning("Could not shutdown instance: %s", msg)
7209
7210       _ShutdownInstanceDisks(self, instance)
7211
7212
7213 class LUInstanceReinstall(LogicalUnit):
7214   """Reinstall an instance.
7215
7216   """
7217   HPATH = "instance-reinstall"
7218   HTYPE = constants.HTYPE_INSTANCE
7219   REQ_BGL = False
7220
7221   def ExpandNames(self):
7222     self._ExpandAndLockInstance()
7223
7224   def BuildHooksEnv(self):
7225     """Build hooks env.
7226
7227     This runs on master, primary and secondary nodes of the instance.
7228
7229     """
7230     return _BuildInstanceHookEnvByObject(self, self.instance)
7231
7232   def BuildHooksNodes(self):
7233     """Build hooks nodes.
7234
7235     """
7236     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7237     return (nl, nl)
7238
7239   def CheckPrereq(self):
7240     """Check prerequisites.
7241
7242     This checks that the instance is in the cluster and is not running.
7243
7244     """
7245     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7246     assert instance is not None, \
7247       "Cannot retrieve locked instance %s" % self.op.instance_name
7248     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7249                      " offline, cannot reinstall")
7250
7251     if instance.disk_template == constants.DT_DISKLESS:
7252       raise errors.OpPrereqError("Instance '%s' has no disks" %
7253                                  self.op.instance_name,
7254                                  errors.ECODE_INVAL)
7255     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7256
7257     if self.op.os_type is not None:
7258       # OS verification
7259       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7260       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7261       instance_os = self.op.os_type
7262     else:
7263       instance_os = instance.os
7264
7265     nodelist = list(instance.all_nodes)
7266
7267     if self.op.osparams:
7268       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7269       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7270       self.os_inst = i_osdict # the new dict (without defaults)
7271     else:
7272       self.os_inst = None
7273
7274     self.instance = instance
7275
7276   def Exec(self, feedback_fn):
7277     """Reinstall the instance.
7278
7279     """
7280     inst = self.instance
7281
7282     if self.op.os_type is not None:
7283       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7284       inst.os = self.op.os_type
7285       # Write to configuration
7286       self.cfg.Update(inst, feedback_fn)
7287
7288     _StartInstanceDisks(self, inst, None)
7289     try:
7290       feedback_fn("Running the instance OS create scripts...")
7291       # FIXME: pass debug option from opcode to backend
7292       result = self.rpc.call_instance_os_add(inst.primary_node,
7293                                              (inst, self.os_inst), True,
7294                                              self.op.debug_level)
7295       result.Raise("Could not install OS for instance %s on node %s" %
7296                    (inst.name, inst.primary_node))
7297     finally:
7298       _ShutdownInstanceDisks(self, inst)
7299
7300
7301 class LUInstanceRecreateDisks(LogicalUnit):
7302   """Recreate an instance's missing disks.
7303
7304   """
7305   HPATH = "instance-recreate-disks"
7306   HTYPE = constants.HTYPE_INSTANCE
7307   REQ_BGL = False
7308
7309   _MODIFYABLE = frozenset([
7310     constants.IDISK_SIZE,
7311     constants.IDISK_MODE,
7312     ])
7313
7314   # New or changed disk parameters may have different semantics
7315   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7316     constants.IDISK_ADOPT,
7317
7318     # TODO: Implement support changing VG while recreating
7319     constants.IDISK_VG,
7320     constants.IDISK_METAVG,
7321     ]))
7322
7323   def _RunAllocator(self):
7324     """Run the allocator based on input opcode.
7325
7326     """
7327     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7328
7329     # FIXME
7330     # The allocator should actually run in "relocate" mode, but current
7331     # allocators don't support relocating all the nodes of an instance at
7332     # the same time. As a workaround we use "allocate" mode, but this is
7333     # suboptimal for two reasons:
7334     # - The instance name passed to the allocator is present in the list of
7335     #   existing instances, so there could be a conflict within the
7336     #   internal structures of the allocator. This doesn't happen with the
7337     #   current allocators, but it's a liability.
7338     # - The allocator counts the resources used by the instance twice: once
7339     #   because the instance exists already, and once because it tries to
7340     #   allocate a new instance.
7341     # The allocator could choose some of the nodes on which the instance is
7342     # running, but that's not a problem. If the instance nodes are broken,
7343     # they should be already be marked as drained or offline, and hence
7344     # skipped by the allocator. If instance disks have been lost for other
7345     # reasons, then recreating the disks on the same nodes should be fine.
7346     disk_template = self.instance.disk_template
7347     spindle_use = be_full[constants.BE_SPINDLE_USE]
7348     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7349                                         disk_template=disk_template,
7350                                         tags=list(self.instance.GetTags()),
7351                                         os=self.instance.os,
7352                                         nics=[{}],
7353                                         vcpus=be_full[constants.BE_VCPUS],
7354                                         memory=be_full[constants.BE_MAXMEM],
7355                                         spindle_use=spindle_use,
7356                                         disks=[{constants.IDISK_SIZE: d.size,
7357                                                 constants.IDISK_MODE: d.mode}
7358                                                 for d in self.instance.disks],
7359                                         hypervisor=self.instance.hypervisor)
7360     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7361
7362     ial.Run(self.op.iallocator)
7363
7364     assert req.RequiredNodes() == len(self.instance.all_nodes)
7365
7366     if not ial.success:
7367       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7368                                  " %s" % (self.op.iallocator, ial.info),
7369                                  errors.ECODE_NORES)
7370
7371     self.op.nodes = ial.result
7372     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7373                  self.op.instance_name, self.op.iallocator,
7374                  utils.CommaJoin(ial.result))
7375
7376   def CheckArguments(self):
7377     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7378       # Normalize and convert deprecated list of disk indices
7379       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7380
7381     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7382     if duplicates:
7383       raise errors.OpPrereqError("Some disks have been specified more than"
7384                                  " once: %s" % utils.CommaJoin(duplicates),
7385                                  errors.ECODE_INVAL)
7386
7387     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7388     # when neither iallocator nor nodes are specified
7389     if self.op.iallocator or self.op.nodes:
7390       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7391
7392     for (idx, params) in self.op.disks:
7393       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7394       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7395       if unsupported:
7396         raise errors.OpPrereqError("Parameters for disk %s try to change"
7397                                    " unmodifyable parameter(s): %s" %
7398                                    (idx, utils.CommaJoin(unsupported)),
7399                                    errors.ECODE_INVAL)
7400
7401   def ExpandNames(self):
7402     self._ExpandAndLockInstance()
7403     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7404
7405     if self.op.nodes:
7406       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7407       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7408     else:
7409       self.needed_locks[locking.LEVEL_NODE] = []
7410       if self.op.iallocator:
7411         # iallocator will select a new node in the same group
7412         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7413         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7414
7415     self.needed_locks[locking.LEVEL_NODE_RES] = []
7416
7417   def DeclareLocks(self, level):
7418     if level == locking.LEVEL_NODEGROUP:
7419       assert self.op.iallocator is not None
7420       assert not self.op.nodes
7421       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7422       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7423       # Lock the primary group used by the instance optimistically; this
7424       # requires going via the node before it's locked, requiring
7425       # verification later on
7426       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7427         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7428
7429     elif level == locking.LEVEL_NODE:
7430       # If an allocator is used, then we lock all the nodes in the current
7431       # instance group, as we don't know yet which ones will be selected;
7432       # if we replace the nodes without using an allocator, locks are
7433       # already declared in ExpandNames; otherwise, we need to lock all the
7434       # instance nodes for disk re-creation
7435       if self.op.iallocator:
7436         assert not self.op.nodes
7437         assert not self.needed_locks[locking.LEVEL_NODE]
7438         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7439
7440         # Lock member nodes of the group of the primary node
7441         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7442           self.needed_locks[locking.LEVEL_NODE].extend(
7443             self.cfg.GetNodeGroup(group_uuid).members)
7444
7445         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7446       elif not self.op.nodes:
7447         self._LockInstancesNodes(primary_only=False)
7448     elif level == locking.LEVEL_NODE_RES:
7449       # Copy node locks
7450       self.needed_locks[locking.LEVEL_NODE_RES] = \
7451         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7452
7453   def BuildHooksEnv(self):
7454     """Build hooks env.
7455
7456     This runs on master, primary and secondary nodes of the instance.
7457
7458     """
7459     return _BuildInstanceHookEnvByObject(self, self.instance)
7460
7461   def BuildHooksNodes(self):
7462     """Build hooks nodes.
7463
7464     """
7465     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7466     return (nl, nl)
7467
7468   def CheckPrereq(self):
7469     """Check prerequisites.
7470
7471     This checks that the instance is in the cluster and is not running.
7472
7473     """
7474     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7475     assert instance is not None, \
7476       "Cannot retrieve locked instance %s" % self.op.instance_name
7477     if self.op.nodes:
7478       if len(self.op.nodes) != len(instance.all_nodes):
7479         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7480                                    " %d replacement nodes were specified" %
7481                                    (instance.name, len(instance.all_nodes),
7482                                     len(self.op.nodes)),
7483                                    errors.ECODE_INVAL)
7484       assert instance.disk_template != constants.DT_DRBD8 or \
7485           len(self.op.nodes) == 2
7486       assert instance.disk_template != constants.DT_PLAIN or \
7487           len(self.op.nodes) == 1
7488       primary_node = self.op.nodes[0]
7489     else:
7490       primary_node = instance.primary_node
7491     if not self.op.iallocator:
7492       _CheckNodeOnline(self, primary_node)
7493
7494     if instance.disk_template == constants.DT_DISKLESS:
7495       raise errors.OpPrereqError("Instance '%s' has no disks" %
7496                                  self.op.instance_name, errors.ECODE_INVAL)
7497
7498     # Verify if node group locks are still correct
7499     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7500     if owned_groups:
7501       # Node group locks are acquired only for the primary node (and only
7502       # when the allocator is used)
7503       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7504                                primary_only=True)
7505
7506     # if we replace nodes *and* the old primary is offline, we don't
7507     # check the instance state
7508     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7509     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7510       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7511                           msg="cannot recreate disks")
7512
7513     if self.op.disks:
7514       self.disks = dict(self.op.disks)
7515     else:
7516       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7517
7518     maxidx = max(self.disks.keys())
7519     if maxidx >= len(instance.disks):
7520       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7521                                  errors.ECODE_INVAL)
7522
7523     if ((self.op.nodes or self.op.iallocator) and
7524         sorted(self.disks.keys()) != range(len(instance.disks))):
7525       raise errors.OpPrereqError("Can't recreate disks partially and"
7526                                  " change the nodes at the same time",
7527                                  errors.ECODE_INVAL)
7528
7529     self.instance = instance
7530
7531     if self.op.iallocator:
7532       self._RunAllocator()
7533       # Release unneeded node and node resource locks
7534       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7535       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7536       _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7537
7538     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7539
7540   def Exec(self, feedback_fn):
7541     """Recreate the disks.
7542
7543     """
7544     instance = self.instance
7545
7546     assert (self.owned_locks(locking.LEVEL_NODE) ==
7547             self.owned_locks(locking.LEVEL_NODE_RES))
7548
7549     to_skip = []
7550     mods = [] # keeps track of needed changes
7551
7552     for idx, disk in enumerate(instance.disks):
7553       try:
7554         changes = self.disks[idx]
7555       except KeyError:
7556         # Disk should not be recreated
7557         to_skip.append(idx)
7558         continue
7559
7560       # update secondaries for disks, if needed
7561       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7562         # need to update the nodes and minors
7563         assert len(self.op.nodes) == 2
7564         assert len(disk.logical_id) == 6 # otherwise disk internals
7565                                          # have changed
7566         (_, _, old_port, _, _, old_secret) = disk.logical_id
7567         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7568         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7569                   new_minors[0], new_minors[1], old_secret)
7570         assert len(disk.logical_id) == len(new_id)
7571       else:
7572         new_id = None
7573
7574       mods.append((idx, new_id, changes))
7575
7576     # now that we have passed all asserts above, we can apply the mods
7577     # in a single run (to avoid partial changes)
7578     for idx, new_id, changes in mods:
7579       disk = instance.disks[idx]
7580       if new_id is not None:
7581         assert disk.dev_type == constants.LD_DRBD8
7582         disk.logical_id = new_id
7583       if changes:
7584         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7585                     mode=changes.get(constants.IDISK_MODE, None))
7586
7587     # change primary node, if needed
7588     if self.op.nodes:
7589       instance.primary_node = self.op.nodes[0]
7590       self.LogWarning("Changing the instance's nodes, you will have to"
7591                       " remove any disks left on the older nodes manually")
7592
7593     if self.op.nodes:
7594       self.cfg.Update(instance, feedback_fn)
7595
7596     # All touched nodes must be locked
7597     mylocks = self.owned_locks(locking.LEVEL_NODE)
7598     assert mylocks.issuperset(frozenset(instance.all_nodes))
7599     _CreateDisks(self, instance, to_skip=to_skip)
7600
7601
7602 class LUInstanceRename(LogicalUnit):
7603   """Rename an instance.
7604
7605   """
7606   HPATH = "instance-rename"
7607   HTYPE = constants.HTYPE_INSTANCE
7608
7609   def CheckArguments(self):
7610     """Check arguments.
7611
7612     """
7613     if self.op.ip_check and not self.op.name_check:
7614       # TODO: make the ip check more flexible and not depend on the name check
7615       raise errors.OpPrereqError("IP address check requires a name check",
7616                                  errors.ECODE_INVAL)
7617
7618   def BuildHooksEnv(self):
7619     """Build hooks env.
7620
7621     This runs on master, primary and secondary nodes of the instance.
7622
7623     """
7624     env = _BuildInstanceHookEnvByObject(self, self.instance)
7625     env["INSTANCE_NEW_NAME"] = self.op.new_name
7626     return env
7627
7628   def BuildHooksNodes(self):
7629     """Build hooks nodes.
7630
7631     """
7632     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7633     return (nl, nl)
7634
7635   def CheckPrereq(self):
7636     """Check prerequisites.
7637
7638     This checks that the instance is in the cluster and is not running.
7639
7640     """
7641     self.op.instance_name = _ExpandInstanceName(self.cfg,
7642                                                 self.op.instance_name)
7643     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7644     assert instance is not None
7645     _CheckNodeOnline(self, instance.primary_node)
7646     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7647                         msg="cannot rename")
7648     self.instance = instance
7649
7650     new_name = self.op.new_name
7651     if self.op.name_check:
7652       hostname = _CheckHostnameSane(self, new_name)
7653       new_name = self.op.new_name = hostname.name
7654       if (self.op.ip_check and
7655           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7656         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7657                                    (hostname.ip, new_name),
7658                                    errors.ECODE_NOTUNIQUE)
7659
7660     instance_list = self.cfg.GetInstanceList()
7661     if new_name in instance_list and new_name != instance.name:
7662       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7663                                  new_name, errors.ECODE_EXISTS)
7664
7665   def Exec(self, feedback_fn):
7666     """Rename the instance.
7667
7668     """
7669     inst = self.instance
7670     old_name = inst.name
7671
7672     rename_file_storage = False
7673     if (inst.disk_template in constants.DTS_FILEBASED and
7674         self.op.new_name != inst.name):
7675       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7676       rename_file_storage = True
7677
7678     self.cfg.RenameInstance(inst.name, self.op.new_name)
7679     # Change the instance lock. This is definitely safe while we hold the BGL.
7680     # Otherwise the new lock would have to be added in acquired mode.
7681     assert self.REQ_BGL
7682     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7683     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7684     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7685
7686     # re-read the instance from the configuration after rename
7687     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7688
7689     if rename_file_storage:
7690       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7691       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7692                                                      old_file_storage_dir,
7693                                                      new_file_storage_dir)
7694       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7695                    " (but the instance has been renamed in Ganeti)" %
7696                    (inst.primary_node, old_file_storage_dir,
7697                     new_file_storage_dir))
7698
7699     _StartInstanceDisks(self, inst, None)
7700     # update info on disks
7701     info = _GetInstanceInfoText(inst)
7702     for (idx, disk) in enumerate(inst.disks):
7703       for node in inst.all_nodes:
7704         self.cfg.SetDiskID(disk, node)
7705         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7706         if result.fail_msg:
7707           self.LogWarning("Error setting info on node %s for disk %s: %s",
7708                           node, idx, result.fail_msg)
7709     try:
7710       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7711                                                  old_name, self.op.debug_level)
7712       msg = result.fail_msg
7713       if msg:
7714         msg = ("Could not run OS rename script for instance %s on node %s"
7715                " (but the instance has been renamed in Ganeti): %s" %
7716                (inst.name, inst.primary_node, msg))
7717         self.LogWarning(msg)
7718     finally:
7719       _ShutdownInstanceDisks(self, inst)
7720
7721     return inst.name
7722
7723
7724 class LUInstanceRemove(LogicalUnit):
7725   """Remove an instance.
7726
7727   """
7728   HPATH = "instance-remove"
7729   HTYPE = constants.HTYPE_INSTANCE
7730   REQ_BGL = False
7731
7732   def ExpandNames(self):
7733     self._ExpandAndLockInstance()
7734     self.needed_locks[locking.LEVEL_NODE] = []
7735     self.needed_locks[locking.LEVEL_NODE_RES] = []
7736     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7737
7738   def DeclareLocks(self, level):
7739     if level == locking.LEVEL_NODE:
7740       self._LockInstancesNodes()
7741     elif level == locking.LEVEL_NODE_RES:
7742       # Copy node locks
7743       self.needed_locks[locking.LEVEL_NODE_RES] = \
7744         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7745
7746   def BuildHooksEnv(self):
7747     """Build hooks env.
7748
7749     This runs on master, primary and secondary nodes of the instance.
7750
7751     """
7752     env = _BuildInstanceHookEnvByObject(self, self.instance)
7753     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7754     return env
7755
7756   def BuildHooksNodes(self):
7757     """Build hooks nodes.
7758
7759     """
7760     nl = [self.cfg.GetMasterNode()]
7761     nl_post = list(self.instance.all_nodes) + nl
7762     return (nl, nl_post)
7763
7764   def CheckPrereq(self):
7765     """Check prerequisites.
7766
7767     This checks that the instance is in the cluster.
7768
7769     """
7770     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7771     assert self.instance is not None, \
7772       "Cannot retrieve locked instance %s" % self.op.instance_name
7773
7774   def Exec(self, feedback_fn):
7775     """Remove the instance.
7776
7777     """
7778     instance = self.instance
7779     logging.info("Shutting down instance %s on node %s",
7780                  instance.name, instance.primary_node)
7781
7782     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7783                                              self.op.shutdown_timeout)
7784     msg = result.fail_msg
7785     if msg:
7786       if self.op.ignore_failures:
7787         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7788       else:
7789         raise errors.OpExecError("Could not shutdown instance %s on"
7790                                  " node %s: %s" %
7791                                  (instance.name, instance.primary_node, msg))
7792
7793     assert (self.owned_locks(locking.LEVEL_NODE) ==
7794             self.owned_locks(locking.LEVEL_NODE_RES))
7795     assert not (set(instance.all_nodes) -
7796                 self.owned_locks(locking.LEVEL_NODE)), \
7797       "Not owning correct locks"
7798
7799     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7800
7801
7802 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7803   """Utility function to remove an instance.
7804
7805   """
7806   logging.info("Removing block devices for instance %s", instance.name)
7807
7808   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7809     if not ignore_failures:
7810       raise errors.OpExecError("Can't remove instance's disks")
7811     feedback_fn("Warning: can't remove instance's disks")
7812
7813   logging.info("Removing instance %s out of cluster config", instance.name)
7814
7815   lu.cfg.RemoveInstance(instance.name)
7816
7817   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7818     "Instance lock removal conflict"
7819
7820   # Remove lock for the instance
7821   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7822
7823
7824 class LUInstanceQuery(NoHooksLU):
7825   """Logical unit for querying instances.
7826
7827   """
7828   # pylint: disable=W0142
7829   REQ_BGL = False
7830
7831   def CheckArguments(self):
7832     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7833                              self.op.output_fields, self.op.use_locking)
7834
7835   def ExpandNames(self):
7836     self.iq.ExpandNames(self)
7837
7838   def DeclareLocks(self, level):
7839     self.iq.DeclareLocks(self, level)
7840
7841   def Exec(self, feedback_fn):
7842     return self.iq.OldStyleQuery(self)
7843
7844
7845 def _ExpandNamesForMigration(lu):
7846   """Expands names for use with L{TLMigrateInstance}.
7847
7848   @type lu: L{LogicalUnit}
7849
7850   """
7851   if lu.op.target_node is not None:
7852     lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
7853
7854   lu.needed_locks[locking.LEVEL_NODE] = []
7855   lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7856
7857   lu.needed_locks[locking.LEVEL_NODE_RES] = []
7858   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7859
7860   # The node allocation lock is actually only needed for replicated instances
7861   # (e.g. DRBD8) and if an iallocator is used.
7862   lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
7863
7864
7865 def _DeclareLocksForMigration(lu, level):
7866   """Declares locks for L{TLMigrateInstance}.
7867
7868   @type lu: L{LogicalUnit}
7869   @param level: Lock level
7870
7871   """
7872   if level == locking.LEVEL_NODE_ALLOC:
7873     assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
7874
7875     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
7876
7877     # Node locks are already declared here rather than at LEVEL_NODE as we need
7878     # the instance object anyway to declare the node allocation lock.
7879     if instance.disk_template in constants.DTS_EXT_MIRROR:
7880       if lu.op.target_node is None:
7881         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7882         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7883       else:
7884         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7885                                                lu.op.target_node]
7886       del lu.recalculate_locks[locking.LEVEL_NODE]
7887     else:
7888       lu._LockInstancesNodes() # pylint: disable=W0212
7889
7890   elif level == locking.LEVEL_NODE:
7891     # Node locks are declared together with the node allocation lock
7892     assert (lu.needed_locks[locking.LEVEL_NODE] or
7893             lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
7894
7895   elif level == locking.LEVEL_NODE_RES:
7896     # Copy node locks
7897     lu.needed_locks[locking.LEVEL_NODE_RES] = \
7898       _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
7899
7900
7901 class LUInstanceFailover(LogicalUnit):
7902   """Failover an instance.
7903
7904   """
7905   HPATH = "instance-failover"
7906   HTYPE = constants.HTYPE_INSTANCE
7907   REQ_BGL = False
7908
7909   def CheckArguments(self):
7910     """Check the arguments.
7911
7912     """
7913     self.iallocator = getattr(self.op, "iallocator", None)
7914     self.target_node = getattr(self.op, "target_node", None)
7915
7916   def ExpandNames(self):
7917     self._ExpandAndLockInstance()
7918     _ExpandNamesForMigration(self)
7919
7920     self._migrater = \
7921       TLMigrateInstance(self, self.op.instance_name, False, True, False,
7922                         self.op.ignore_consistency, True,
7923                         self.op.shutdown_timeout, self.op.ignore_ipolicy)
7924
7925     self.tasklets = [self._migrater]
7926
7927   def DeclareLocks(self, level):
7928     _DeclareLocksForMigration(self, level)
7929
7930   def BuildHooksEnv(self):
7931     """Build hooks env.
7932
7933     This runs on master, primary and secondary nodes of the instance.
7934
7935     """
7936     instance = self._migrater.instance
7937     source_node = instance.primary_node
7938     target_node = self.op.target_node
7939     env = {
7940       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7941       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7942       "OLD_PRIMARY": source_node,
7943       "NEW_PRIMARY": target_node,
7944       }
7945
7946     if instance.disk_template in constants.DTS_INT_MIRROR:
7947       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7948       env["NEW_SECONDARY"] = source_node
7949     else:
7950       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7951
7952     env.update(_BuildInstanceHookEnvByObject(self, instance))
7953
7954     return env
7955
7956   def BuildHooksNodes(self):
7957     """Build hooks nodes.
7958
7959     """
7960     instance = self._migrater.instance
7961     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7962     return (nl, nl + [instance.primary_node])
7963
7964
7965 class LUInstanceMigrate(LogicalUnit):
7966   """Migrate an instance.
7967
7968   This is migration without shutting down, compared to the failover,
7969   which is done with shutdown.
7970
7971   """
7972   HPATH = "instance-migrate"
7973   HTYPE = constants.HTYPE_INSTANCE
7974   REQ_BGL = False
7975
7976   def ExpandNames(self):
7977     self._ExpandAndLockInstance()
7978     _ExpandNamesForMigration(self)
7979
7980     self._migrater = \
7981       TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
7982                         False, self.op.allow_failover, False,
7983                         self.op.allow_runtime_changes,
7984                         constants.DEFAULT_SHUTDOWN_TIMEOUT,
7985                         self.op.ignore_ipolicy)
7986
7987     self.tasklets = [self._migrater]
7988
7989   def DeclareLocks(self, level):
7990     _DeclareLocksForMigration(self, level)
7991
7992   def BuildHooksEnv(self):
7993     """Build hooks env.
7994
7995     This runs on master, primary and secondary nodes of the instance.
7996
7997     """
7998     instance = self._migrater.instance
7999     source_node = instance.primary_node
8000     target_node = self.op.target_node
8001     env = _BuildInstanceHookEnvByObject(self, instance)
8002     env.update({
8003       "MIGRATE_LIVE": self._migrater.live,
8004       "MIGRATE_CLEANUP": self.op.cleanup,
8005       "OLD_PRIMARY": source_node,
8006       "NEW_PRIMARY": target_node,
8007       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8008       })
8009
8010     if instance.disk_template in constants.DTS_INT_MIRROR:
8011       env["OLD_SECONDARY"] = target_node
8012       env["NEW_SECONDARY"] = source_node
8013     else:
8014       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8015
8016     return env
8017
8018   def BuildHooksNodes(self):
8019     """Build hooks nodes.
8020
8021     """
8022     instance = self._migrater.instance
8023     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8024     return (nl, nl + [instance.primary_node])
8025
8026
8027 class LUInstanceMove(LogicalUnit):
8028   """Move an instance by data-copying.
8029
8030   """
8031   HPATH = "instance-move"
8032   HTYPE = constants.HTYPE_INSTANCE
8033   REQ_BGL = False
8034
8035   def ExpandNames(self):
8036     self._ExpandAndLockInstance()
8037     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8038     self.op.target_node = target_node
8039     self.needed_locks[locking.LEVEL_NODE] = [target_node]
8040     self.needed_locks[locking.LEVEL_NODE_RES] = []
8041     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8042
8043   def DeclareLocks(self, level):
8044     if level == locking.LEVEL_NODE:
8045       self._LockInstancesNodes(primary_only=True)
8046     elif level == locking.LEVEL_NODE_RES:
8047       # Copy node locks
8048       self.needed_locks[locking.LEVEL_NODE_RES] = \
8049         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8050
8051   def BuildHooksEnv(self):
8052     """Build hooks env.
8053
8054     This runs on master, primary and secondary nodes of the instance.
8055
8056     """
8057     env = {
8058       "TARGET_NODE": self.op.target_node,
8059       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8060       }
8061     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8062     return env
8063
8064   def BuildHooksNodes(self):
8065     """Build hooks nodes.
8066
8067     """
8068     nl = [
8069       self.cfg.GetMasterNode(),
8070       self.instance.primary_node,
8071       self.op.target_node,
8072       ]
8073     return (nl, nl)
8074
8075   def CheckPrereq(self):
8076     """Check prerequisites.
8077
8078     This checks that the instance is in the cluster.
8079
8080     """
8081     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8082     assert self.instance is not None, \
8083       "Cannot retrieve locked instance %s" % self.op.instance_name
8084
8085     node = self.cfg.GetNodeInfo(self.op.target_node)
8086     assert node is not None, \
8087       "Cannot retrieve locked node %s" % self.op.target_node
8088
8089     self.target_node = target_node = node.name
8090
8091     if target_node == instance.primary_node:
8092       raise errors.OpPrereqError("Instance %s is already on the node %s" %
8093                                  (instance.name, target_node),
8094                                  errors.ECODE_STATE)
8095
8096     bep = self.cfg.GetClusterInfo().FillBE(instance)
8097
8098     for idx, dsk in enumerate(instance.disks):
8099       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8100         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8101                                    " cannot copy" % idx, errors.ECODE_STATE)
8102
8103     _CheckNodeOnline(self, target_node)
8104     _CheckNodeNotDrained(self, target_node)
8105     _CheckNodeVmCapable(self, target_node)
8106     cluster = self.cfg.GetClusterInfo()
8107     group_info = self.cfg.GetNodeGroup(node.group)
8108     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8109     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8110                             ignore=self.op.ignore_ipolicy)
8111
8112     if instance.admin_state == constants.ADMINST_UP:
8113       # check memory requirements on the secondary node
8114       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8115                            instance.name, bep[constants.BE_MAXMEM],
8116                            instance.hypervisor)
8117     else:
8118       self.LogInfo("Not checking memory on the secondary node as"
8119                    " instance will not be started")
8120
8121     # check bridge existance
8122     _CheckInstanceBridgesExist(self, instance, node=target_node)
8123
8124   def Exec(self, feedback_fn):
8125     """Move an instance.
8126
8127     The move is done by shutting it down on its present node, copying
8128     the data over (slow) and starting it on the new node.
8129
8130     """
8131     instance = self.instance
8132
8133     source_node = instance.primary_node
8134     target_node = self.target_node
8135
8136     self.LogInfo("Shutting down instance %s on source node %s",
8137                  instance.name, source_node)
8138
8139     assert (self.owned_locks(locking.LEVEL_NODE) ==
8140             self.owned_locks(locking.LEVEL_NODE_RES))
8141
8142     result = self.rpc.call_instance_shutdown(source_node, instance,
8143                                              self.op.shutdown_timeout)
8144     msg = result.fail_msg
8145     if msg:
8146       if self.op.ignore_consistency:
8147         self.LogWarning("Could not shutdown instance %s on node %s."
8148                         " Proceeding anyway. Please make sure node"
8149                         " %s is down. Error details: %s",
8150                         instance.name, source_node, source_node, msg)
8151       else:
8152         raise errors.OpExecError("Could not shutdown instance %s on"
8153                                  " node %s: %s" %
8154                                  (instance.name, source_node, msg))
8155
8156     # create the target disks
8157     try:
8158       _CreateDisks(self, instance, target_node=target_node)
8159     except errors.OpExecError:
8160       self.LogWarning("Device creation failed, reverting...")
8161       try:
8162         _RemoveDisks(self, instance, target_node=target_node)
8163       finally:
8164         self.cfg.ReleaseDRBDMinors(instance.name)
8165         raise
8166
8167     cluster_name = self.cfg.GetClusterInfo().cluster_name
8168
8169     errs = []
8170     # activate, get path, copy the data over
8171     for idx, disk in enumerate(instance.disks):
8172       self.LogInfo("Copying data for disk %d", idx)
8173       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8174                                                instance.name, True, idx)
8175       if result.fail_msg:
8176         self.LogWarning("Can't assemble newly created disk %d: %s",
8177                         idx, result.fail_msg)
8178         errs.append(result.fail_msg)
8179         break
8180       dev_path = result.payload
8181       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8182                                              target_node, dev_path,
8183                                              cluster_name)
8184       if result.fail_msg:
8185         self.LogWarning("Can't copy data over for disk %d: %s",
8186                         idx, result.fail_msg)
8187         errs.append(result.fail_msg)
8188         break
8189
8190     if errs:
8191       self.LogWarning("Some disks failed to copy, aborting")
8192       try:
8193         _RemoveDisks(self, instance, target_node=target_node)
8194       finally:
8195         self.cfg.ReleaseDRBDMinors(instance.name)
8196         raise errors.OpExecError("Errors during disk copy: %s" %
8197                                  (",".join(errs),))
8198
8199     instance.primary_node = target_node
8200     self.cfg.Update(instance, feedback_fn)
8201
8202     self.LogInfo("Removing the disks on the original node")
8203     _RemoveDisks(self, instance, target_node=source_node)
8204
8205     # Only start the instance if it's marked as up
8206     if instance.admin_state == constants.ADMINST_UP:
8207       self.LogInfo("Starting instance %s on node %s",
8208                    instance.name, target_node)
8209
8210       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8211                                            ignore_secondaries=True)
8212       if not disks_ok:
8213         _ShutdownInstanceDisks(self, instance)
8214         raise errors.OpExecError("Can't activate the instance's disks")
8215
8216       result = self.rpc.call_instance_start(target_node,
8217                                             (instance, None, None), False)
8218       msg = result.fail_msg
8219       if msg:
8220         _ShutdownInstanceDisks(self, instance)
8221         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8222                                  (instance.name, target_node, msg))
8223
8224
8225 class LUNodeMigrate(LogicalUnit):
8226   """Migrate all instances from a node.
8227
8228   """
8229   HPATH = "node-migrate"
8230   HTYPE = constants.HTYPE_NODE
8231   REQ_BGL = False
8232
8233   def CheckArguments(self):
8234     pass
8235
8236   def ExpandNames(self):
8237     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8238
8239     self.share_locks = _ShareAll()
8240     self.needed_locks = {
8241       locking.LEVEL_NODE: [self.op.node_name],
8242       }
8243
8244   def BuildHooksEnv(self):
8245     """Build hooks env.
8246
8247     This runs on the master, the primary and all the secondaries.
8248
8249     """
8250     return {
8251       "NODE_NAME": self.op.node_name,
8252       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8253       }
8254
8255   def BuildHooksNodes(self):
8256     """Build hooks nodes.
8257
8258     """
8259     nl = [self.cfg.GetMasterNode()]
8260     return (nl, nl)
8261
8262   def CheckPrereq(self):
8263     pass
8264
8265   def Exec(self, feedback_fn):
8266     # Prepare jobs for migration instances
8267     allow_runtime_changes = self.op.allow_runtime_changes
8268     jobs = [
8269       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8270                                  mode=self.op.mode,
8271                                  live=self.op.live,
8272                                  iallocator=self.op.iallocator,
8273                                  target_node=self.op.target_node,
8274                                  allow_runtime_changes=allow_runtime_changes,
8275                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8276       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8277
8278     # TODO: Run iallocator in this opcode and pass correct placement options to
8279     # OpInstanceMigrate. Since other jobs can modify the cluster between
8280     # running the iallocator and the actual migration, a good consistency model
8281     # will have to be found.
8282
8283     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8284             frozenset([self.op.node_name]))
8285
8286     return ResultWithJobs(jobs)
8287
8288
8289 class TLMigrateInstance(Tasklet):
8290   """Tasklet class for instance migration.
8291
8292   @type live: boolean
8293   @ivar live: whether the migration will be done live or non-live;
8294       this variable is initalized only after CheckPrereq has run
8295   @type cleanup: boolean
8296   @ivar cleanup: Wheater we cleanup from a failed migration
8297   @type iallocator: string
8298   @ivar iallocator: The iallocator used to determine target_node
8299   @type target_node: string
8300   @ivar target_node: If given, the target_node to reallocate the instance to
8301   @type failover: boolean
8302   @ivar failover: Whether operation results in failover or migration
8303   @type fallback: boolean
8304   @ivar fallback: Whether fallback to failover is allowed if migration not
8305                   possible
8306   @type ignore_consistency: boolean
8307   @ivar ignore_consistency: Wheter we should ignore consistency between source
8308                             and target node
8309   @type shutdown_timeout: int
8310   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8311   @type ignore_ipolicy: bool
8312   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8313
8314   """
8315
8316   # Constants
8317   _MIGRATION_POLL_INTERVAL = 1      # seconds
8318   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8319
8320   def __init__(self, lu, instance_name, cleanup, failover, fallback,
8321                ignore_consistency, allow_runtime_changes, shutdown_timeout,
8322                ignore_ipolicy):
8323     """Initializes this class.
8324
8325     """
8326     Tasklet.__init__(self, lu)
8327
8328     # Parameters
8329     self.instance_name = instance_name
8330     self.cleanup = cleanup
8331     self.live = False # will be overridden later
8332     self.failover = failover
8333     self.fallback = fallback
8334     self.ignore_consistency = ignore_consistency
8335     self.shutdown_timeout = shutdown_timeout
8336     self.ignore_ipolicy = ignore_ipolicy
8337     self.allow_runtime_changes = allow_runtime_changes
8338
8339   def CheckPrereq(self):
8340     """Check prerequisites.
8341
8342     This checks that the instance is in the cluster.
8343
8344     """
8345     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8346     instance = self.cfg.GetInstanceInfo(instance_name)
8347     assert instance is not None
8348     self.instance = instance
8349     cluster = self.cfg.GetClusterInfo()
8350
8351     if (not self.cleanup and
8352         not instance.admin_state == constants.ADMINST_UP and
8353         not self.failover and self.fallback):
8354       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8355                       " switching to failover")
8356       self.failover = True
8357
8358     if instance.disk_template not in constants.DTS_MIRRORED:
8359       if self.failover:
8360         text = "failovers"
8361       else:
8362         text = "migrations"
8363       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8364                                  " %s" % (instance.disk_template, text),
8365                                  errors.ECODE_STATE)
8366
8367     if instance.disk_template in constants.DTS_EXT_MIRROR:
8368       assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8369
8370       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8371
8372       if self.lu.op.iallocator:
8373         self._RunAllocator()
8374       else:
8375         # We set set self.target_node as it is required by
8376         # BuildHooksEnv
8377         self.target_node = self.lu.op.target_node
8378
8379       # Check that the target node is correct in terms of instance policy
8380       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8381       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8382       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8383                                                               group_info)
8384       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8385                               ignore=self.ignore_ipolicy)
8386
8387       # self.target_node is already populated, either directly or by the
8388       # iallocator run
8389       target_node = self.target_node
8390       if self.target_node == instance.primary_node:
8391         raise errors.OpPrereqError("Cannot migrate instance %s"
8392                                    " to its primary (%s)" %
8393                                    (instance.name, instance.primary_node),
8394                                    errors.ECODE_STATE)
8395
8396       if len(self.lu.tasklets) == 1:
8397         # It is safe to release locks only when we're the only tasklet
8398         # in the LU
8399         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8400                       keep=[instance.primary_node, self.target_node])
8401         _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8402
8403     else:
8404       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8405
8406       secondary_nodes = instance.secondary_nodes
8407       if not secondary_nodes:
8408         raise errors.ConfigurationError("No secondary node but using"
8409                                         " %s disk template" %
8410                                         instance.disk_template)
8411       target_node = secondary_nodes[0]
8412       if self.lu.op.iallocator or (self.lu.op.target_node and
8413                                    self.lu.op.target_node != target_node):
8414         if self.failover:
8415           text = "failed over"
8416         else:
8417           text = "migrated"
8418         raise errors.OpPrereqError("Instances with disk template %s cannot"
8419                                    " be %s to arbitrary nodes"
8420                                    " (neither an iallocator nor a target"
8421                                    " node can be passed)" %
8422                                    (instance.disk_template, text),
8423                                    errors.ECODE_INVAL)
8424       nodeinfo = self.cfg.GetNodeInfo(target_node)
8425       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8426       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8427                                                               group_info)
8428       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8429                               ignore=self.ignore_ipolicy)
8430
8431     i_be = cluster.FillBE(instance)
8432
8433     # check memory requirements on the secondary node
8434     if (not self.cleanup and
8435          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8436       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8437                                                "migrating instance %s" %
8438                                                instance.name,
8439                                                i_be[constants.BE_MINMEM],
8440                                                instance.hypervisor)
8441     else:
8442       self.lu.LogInfo("Not checking memory on the secondary node as"
8443                       " instance will not be started")
8444
8445     # check if failover must be forced instead of migration
8446     if (not self.cleanup and not self.failover and
8447         i_be[constants.BE_ALWAYS_FAILOVER]):
8448       self.lu.LogInfo("Instance configured to always failover; fallback"
8449                       " to failover")
8450       self.failover = True
8451
8452     # check bridge existance
8453     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8454
8455     if not self.cleanup:
8456       _CheckNodeNotDrained(self.lu, target_node)
8457       if not self.failover:
8458         result = self.rpc.call_instance_migratable(instance.primary_node,
8459                                                    instance)
8460         if result.fail_msg and self.fallback:
8461           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8462                           " failover")
8463           self.failover = True
8464         else:
8465           result.Raise("Can't migrate, please use failover",
8466                        prereq=True, ecode=errors.ECODE_STATE)
8467
8468     assert not (self.failover and self.cleanup)
8469
8470     if not self.failover:
8471       if self.lu.op.live is not None and self.lu.op.mode is not None:
8472         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8473                                    " parameters are accepted",
8474                                    errors.ECODE_INVAL)
8475       if self.lu.op.live is not None:
8476         if self.lu.op.live:
8477           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8478         else:
8479           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8480         # reset the 'live' parameter to None so that repeated
8481         # invocations of CheckPrereq do not raise an exception
8482         self.lu.op.live = None
8483       elif self.lu.op.mode is None:
8484         # read the default value from the hypervisor
8485         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8486         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8487
8488       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8489     else:
8490       # Failover is never live
8491       self.live = False
8492
8493     if not (self.failover or self.cleanup):
8494       remote_info = self.rpc.call_instance_info(instance.primary_node,
8495                                                 instance.name,
8496                                                 instance.hypervisor)
8497       remote_info.Raise("Error checking instance on node %s" %
8498                         instance.primary_node)
8499       instance_running = bool(remote_info.payload)
8500       if instance_running:
8501         self.current_mem = int(remote_info.payload["memory"])
8502
8503   def _RunAllocator(self):
8504     """Run the allocator based on input opcode.
8505
8506     """
8507     assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8508
8509     # FIXME: add a self.ignore_ipolicy option
8510     req = iallocator.IAReqRelocate(name=self.instance_name,
8511                                    relocate_from=[self.instance.primary_node])
8512     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8513
8514     ial.Run(self.lu.op.iallocator)
8515
8516     if not ial.success:
8517       raise errors.OpPrereqError("Can't compute nodes using"
8518                                  " iallocator '%s': %s" %
8519                                  (self.lu.op.iallocator, ial.info),
8520                                  errors.ECODE_NORES)
8521     self.target_node = ial.result[0]
8522     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8523                     self.instance_name, self.lu.op.iallocator,
8524                     utils.CommaJoin(ial.result))
8525
8526   def _WaitUntilSync(self):
8527     """Poll with custom rpc for disk sync.
8528
8529     This uses our own step-based rpc call.
8530
8531     """
8532     self.feedback_fn("* wait until resync is done")
8533     all_done = False
8534     while not all_done:
8535       all_done = True
8536       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8537                                             self.nodes_ip,
8538                                             (self.instance.disks,
8539                                              self.instance))
8540       min_percent = 100
8541       for node, nres in result.items():
8542         nres.Raise("Cannot resync disks on node %s" % node)
8543         node_done, node_percent = nres.payload
8544         all_done = all_done and node_done
8545         if node_percent is not None:
8546           min_percent = min(min_percent, node_percent)
8547       if not all_done:
8548         if min_percent < 100:
8549           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8550         time.sleep(2)
8551
8552   def _EnsureSecondary(self, node):
8553     """Demote a node to secondary.
8554
8555     """
8556     self.feedback_fn("* switching node %s to secondary mode" % node)
8557
8558     for dev in self.instance.disks:
8559       self.cfg.SetDiskID(dev, node)
8560
8561     result = self.rpc.call_blockdev_close(node, self.instance.name,
8562                                           self.instance.disks)
8563     result.Raise("Cannot change disk to secondary on node %s" % node)
8564
8565   def _GoStandalone(self):
8566     """Disconnect from the network.
8567
8568     """
8569     self.feedback_fn("* changing into standalone mode")
8570     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8571                                                self.instance.disks)
8572     for node, nres in result.items():
8573       nres.Raise("Cannot disconnect disks node %s" % node)
8574
8575   def _GoReconnect(self, multimaster):
8576     """Reconnect to the network.
8577
8578     """
8579     if multimaster:
8580       msg = "dual-master"
8581     else:
8582       msg = "single-master"
8583     self.feedback_fn("* changing disks into %s mode" % msg)
8584     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8585                                            (self.instance.disks, self.instance),
8586                                            self.instance.name, multimaster)
8587     for node, nres in result.items():
8588       nres.Raise("Cannot change disks config on node %s" % node)
8589
8590   def _ExecCleanup(self):
8591     """Try to cleanup after a failed migration.
8592
8593     The cleanup is done by:
8594       - check that the instance is running only on one node
8595         (and update the config if needed)
8596       - change disks on its secondary node to secondary
8597       - wait until disks are fully synchronized
8598       - disconnect from the network
8599       - change disks into single-master mode
8600       - wait again until disks are fully synchronized
8601
8602     """
8603     instance = self.instance
8604     target_node = self.target_node
8605     source_node = self.source_node
8606
8607     # check running on only one node
8608     self.feedback_fn("* checking where the instance actually runs"
8609                      " (if this hangs, the hypervisor might be in"
8610                      " a bad state)")
8611     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8612     for node, result in ins_l.items():
8613       result.Raise("Can't contact node %s" % node)
8614
8615     runningon_source = instance.name in ins_l[source_node].payload
8616     runningon_target = instance.name in ins_l[target_node].payload
8617
8618     if runningon_source and runningon_target:
8619       raise errors.OpExecError("Instance seems to be running on two nodes,"
8620                                " or the hypervisor is confused; you will have"
8621                                " to ensure manually that it runs only on one"
8622                                " and restart this operation")
8623
8624     if not (runningon_source or runningon_target):
8625       raise errors.OpExecError("Instance does not seem to be running at all;"
8626                                " in this case it's safer to repair by"
8627                                " running 'gnt-instance stop' to ensure disk"
8628                                " shutdown, and then restarting it")
8629
8630     if runningon_target:
8631       # the migration has actually succeeded, we need to update the config
8632       self.feedback_fn("* instance running on secondary node (%s),"
8633                        " updating config" % target_node)
8634       instance.primary_node = target_node
8635       self.cfg.Update(instance, self.feedback_fn)
8636       demoted_node = source_node
8637     else:
8638       self.feedback_fn("* instance confirmed to be running on its"
8639                        " primary node (%s)" % source_node)
8640       demoted_node = target_node
8641
8642     if instance.disk_template in constants.DTS_INT_MIRROR:
8643       self._EnsureSecondary(demoted_node)
8644       try:
8645         self._WaitUntilSync()
8646       except errors.OpExecError:
8647         # we ignore here errors, since if the device is standalone, it
8648         # won't be able to sync
8649         pass
8650       self._GoStandalone()
8651       self._GoReconnect(False)
8652       self._WaitUntilSync()
8653
8654     self.feedback_fn("* done")
8655
8656   def _RevertDiskStatus(self):
8657     """Try to revert the disk status after a failed migration.
8658
8659     """
8660     target_node = self.target_node
8661     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8662       return
8663
8664     try:
8665       self._EnsureSecondary(target_node)
8666       self._GoStandalone()
8667       self._GoReconnect(False)
8668       self._WaitUntilSync()
8669     except errors.OpExecError, err:
8670       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8671                          " please try to recover the instance manually;"
8672                          " error '%s'" % str(err))
8673
8674   def _AbortMigration(self):
8675     """Call the hypervisor code to abort a started migration.
8676
8677     """
8678     instance = self.instance
8679     target_node = self.target_node
8680     source_node = self.source_node
8681     migration_info = self.migration_info
8682
8683     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8684                                                                  instance,
8685                                                                  migration_info,
8686                                                                  False)
8687     abort_msg = abort_result.fail_msg
8688     if abort_msg:
8689       logging.error("Aborting migration failed on target node %s: %s",
8690                     target_node, abort_msg)
8691       # Don't raise an exception here, as we stil have to try to revert the
8692       # disk status, even if this step failed.
8693
8694     abort_result = self.rpc.call_instance_finalize_migration_src(
8695       source_node, instance, False, self.live)
8696     abort_msg = abort_result.fail_msg
8697     if abort_msg:
8698       logging.error("Aborting migration failed on source node %s: %s",
8699                     source_node, abort_msg)
8700
8701   def _ExecMigration(self):
8702     """Migrate an instance.
8703
8704     The migrate is done by:
8705       - change the disks into dual-master mode
8706       - wait until disks are fully synchronized again
8707       - migrate the instance
8708       - change disks on the new secondary node (the old primary) to secondary
8709       - wait until disks are fully synchronized
8710       - change disks into single-master mode
8711
8712     """
8713     instance = self.instance
8714     target_node = self.target_node
8715     source_node = self.source_node
8716
8717     # Check for hypervisor version mismatch and warn the user.
8718     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8719                                        None, [self.instance.hypervisor])
8720     for ninfo in nodeinfo.values():
8721       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8722                   ninfo.node)
8723     (_, _, (src_info, )) = nodeinfo[source_node].payload
8724     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8725
8726     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8727         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8728       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8729       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8730       if src_version != dst_version:
8731         self.feedback_fn("* warning: hypervisor version mismatch between"
8732                          " source (%s) and target (%s) node" %
8733                          (src_version, dst_version))
8734
8735     self.feedback_fn("* checking disk consistency between source and target")
8736     for (idx, dev) in enumerate(instance.disks):
8737       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8738         raise errors.OpExecError("Disk %s is degraded or not fully"
8739                                  " synchronized on target node,"
8740                                  " aborting migration" % idx)
8741
8742     if self.current_mem > self.tgt_free_mem:
8743       if not self.allow_runtime_changes:
8744         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8745                                  " free memory to fit instance %s on target"
8746                                  " node %s (have %dMB, need %dMB)" %
8747                                  (instance.name, target_node,
8748                                   self.tgt_free_mem, self.current_mem))
8749       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8750       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8751                                                      instance,
8752                                                      self.tgt_free_mem)
8753       rpcres.Raise("Cannot modify instance runtime memory")
8754
8755     # First get the migration information from the remote node
8756     result = self.rpc.call_migration_info(source_node, instance)
8757     msg = result.fail_msg
8758     if msg:
8759       log_err = ("Failed fetching source migration information from %s: %s" %
8760                  (source_node, msg))
8761       logging.error(log_err)
8762       raise errors.OpExecError(log_err)
8763
8764     self.migration_info = migration_info = result.payload
8765
8766     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8767       # Then switch the disks to master/master mode
8768       self._EnsureSecondary(target_node)
8769       self._GoStandalone()
8770       self._GoReconnect(True)
8771       self._WaitUntilSync()
8772
8773     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8774     result = self.rpc.call_accept_instance(target_node,
8775                                            instance,
8776                                            migration_info,
8777                                            self.nodes_ip[target_node])
8778
8779     msg = result.fail_msg
8780     if msg:
8781       logging.error("Instance pre-migration failed, trying to revert"
8782                     " disk status: %s", msg)
8783       self.feedback_fn("Pre-migration failed, aborting")
8784       self._AbortMigration()
8785       self._RevertDiskStatus()
8786       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8787                                (instance.name, msg))
8788
8789     self.feedback_fn("* migrating instance to %s" % target_node)
8790     result = self.rpc.call_instance_migrate(source_node, instance,
8791                                             self.nodes_ip[target_node],
8792                                             self.live)
8793     msg = result.fail_msg
8794     if msg:
8795       logging.error("Instance migration failed, trying to revert"
8796                     " disk status: %s", msg)
8797       self.feedback_fn("Migration failed, aborting")
8798       self._AbortMigration()
8799       self._RevertDiskStatus()
8800       raise errors.OpExecError("Could not migrate instance %s: %s" %
8801                                (instance.name, msg))
8802
8803     self.feedback_fn("* starting memory transfer")
8804     last_feedback = time.time()
8805     while True:
8806       result = self.rpc.call_instance_get_migration_status(source_node,
8807                                                            instance)
8808       msg = result.fail_msg
8809       ms = result.payload   # MigrationStatus instance
8810       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8811         logging.error("Instance migration failed, trying to revert"
8812                       " disk status: %s", msg)
8813         self.feedback_fn("Migration failed, aborting")
8814         self._AbortMigration()
8815         self._RevertDiskStatus()
8816         if not msg:
8817           msg = "hypervisor returned failure"
8818         raise errors.OpExecError("Could not migrate instance %s: %s" %
8819                                  (instance.name, msg))
8820
8821       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8822         self.feedback_fn("* memory transfer complete")
8823         break
8824
8825       if (utils.TimeoutExpired(last_feedback,
8826                                self._MIGRATION_FEEDBACK_INTERVAL) and
8827           ms.transferred_ram is not None):
8828         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8829         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8830         last_feedback = time.time()
8831
8832       time.sleep(self._MIGRATION_POLL_INTERVAL)
8833
8834     result = self.rpc.call_instance_finalize_migration_src(source_node,
8835                                                            instance,
8836                                                            True,
8837                                                            self.live)
8838     msg = result.fail_msg
8839     if msg:
8840       logging.error("Instance migration succeeded, but finalization failed"
8841                     " on the source node: %s", msg)
8842       raise errors.OpExecError("Could not finalize instance migration: %s" %
8843                                msg)
8844
8845     instance.primary_node = target_node
8846
8847     # distribute new instance config to the other nodes
8848     self.cfg.Update(instance, self.feedback_fn)
8849
8850     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8851                                                            instance,
8852                                                            migration_info,
8853                                                            True)
8854     msg = result.fail_msg
8855     if msg:
8856       logging.error("Instance migration succeeded, but finalization failed"
8857                     " on the target node: %s", msg)
8858       raise errors.OpExecError("Could not finalize instance migration: %s" %
8859                                msg)
8860
8861     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8862       self._EnsureSecondary(source_node)
8863       self._WaitUntilSync()
8864       self._GoStandalone()
8865       self._GoReconnect(False)
8866       self._WaitUntilSync()
8867
8868     # If the instance's disk template is `rbd' and there was a successful
8869     # migration, unmap the device from the source node.
8870     if self.instance.disk_template == constants.DT_RBD:
8871       disks = _ExpandCheckDisks(instance, instance.disks)
8872       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8873       for disk in disks:
8874         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8875         msg = result.fail_msg
8876         if msg:
8877           logging.error("Migration was successful, but couldn't unmap the"
8878                         " block device %s on source node %s: %s",
8879                         disk.iv_name, source_node, msg)
8880           logging.error("You need to unmap the device %s manually on %s",
8881                         disk.iv_name, source_node)
8882
8883     self.feedback_fn("* done")
8884
8885   def _ExecFailover(self):
8886     """Failover an instance.
8887
8888     The failover is done by shutting it down on its present node and
8889     starting it on the secondary.
8890
8891     """
8892     instance = self.instance
8893     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8894
8895     source_node = instance.primary_node
8896     target_node = self.target_node
8897
8898     if instance.admin_state == constants.ADMINST_UP:
8899       self.feedback_fn("* checking disk consistency between source and target")
8900       for (idx, dev) in enumerate(instance.disks):
8901         # for drbd, these are drbd over lvm
8902         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8903                                      False):
8904           if primary_node.offline:
8905             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8906                              " target node %s" %
8907                              (primary_node.name, idx, target_node))
8908           elif not self.ignore_consistency:
8909             raise errors.OpExecError("Disk %s is degraded on target node,"
8910                                      " aborting failover" % idx)
8911     else:
8912       self.feedback_fn("* not checking disk consistency as instance is not"
8913                        " running")
8914
8915     self.feedback_fn("* shutting down instance on source node")
8916     logging.info("Shutting down instance %s on node %s",
8917                  instance.name, source_node)
8918
8919     result = self.rpc.call_instance_shutdown(source_node, instance,
8920                                              self.shutdown_timeout)
8921     msg = result.fail_msg
8922     if msg:
8923       if self.ignore_consistency or primary_node.offline:
8924         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8925                            " proceeding anyway; please make sure node"
8926                            " %s is down; error details: %s",
8927                            instance.name, source_node, source_node, msg)
8928       else:
8929         raise errors.OpExecError("Could not shutdown instance %s on"
8930                                  " node %s: %s" %
8931                                  (instance.name, source_node, msg))
8932
8933     self.feedback_fn("* deactivating the instance's disks on source node")
8934     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8935       raise errors.OpExecError("Can't shut down the instance's disks")
8936
8937     instance.primary_node = target_node
8938     # distribute new instance config to the other nodes
8939     self.cfg.Update(instance, self.feedback_fn)
8940
8941     # Only start the instance if it's marked as up
8942     if instance.admin_state == constants.ADMINST_UP:
8943       self.feedback_fn("* activating the instance's disks on target node %s" %
8944                        target_node)
8945       logging.info("Starting instance %s on node %s",
8946                    instance.name, target_node)
8947
8948       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8949                                            ignore_secondaries=True)
8950       if not disks_ok:
8951         _ShutdownInstanceDisks(self.lu, instance)
8952         raise errors.OpExecError("Can't activate the instance's disks")
8953
8954       self.feedback_fn("* starting the instance on the target node %s" %
8955                        target_node)
8956       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8957                                             False)
8958       msg = result.fail_msg
8959       if msg:
8960         _ShutdownInstanceDisks(self.lu, instance)
8961         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8962                                  (instance.name, target_node, msg))
8963
8964   def Exec(self, feedback_fn):
8965     """Perform the migration.
8966
8967     """
8968     self.feedback_fn = feedback_fn
8969     self.source_node = self.instance.primary_node
8970
8971     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8972     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8973       self.target_node = self.instance.secondary_nodes[0]
8974       # Otherwise self.target_node has been populated either
8975       # directly, or through an iallocator.
8976
8977     self.all_nodes = [self.source_node, self.target_node]
8978     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8979                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8980
8981     if self.failover:
8982       feedback_fn("Failover instance %s" % self.instance.name)
8983       self._ExecFailover()
8984     else:
8985       feedback_fn("Migrating instance %s" % self.instance.name)
8986
8987       if self.cleanup:
8988         return self._ExecCleanup()
8989       else:
8990         return self._ExecMigration()
8991
8992
8993 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8994                     force_open):
8995   """Wrapper around L{_CreateBlockDevInner}.
8996
8997   This method annotates the root device first.
8998
8999   """
9000   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9001   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9002                               force_open)
9003
9004
9005 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9006                          info, force_open):
9007   """Create a tree of block devices on a given node.
9008
9009   If this device type has to be created on secondaries, create it and
9010   all its children.
9011
9012   If not, just recurse to children keeping the same 'force' value.
9013
9014   @attention: The device has to be annotated already.
9015
9016   @param lu: the lu on whose behalf we execute
9017   @param node: the node on which to create the device
9018   @type instance: L{objects.Instance}
9019   @param instance: the instance which owns the device
9020   @type device: L{objects.Disk}
9021   @param device: the device to create
9022   @type force_create: boolean
9023   @param force_create: whether to force creation of this device; this
9024       will be change to True whenever we find a device which has
9025       CreateOnSecondary() attribute
9026   @param info: the extra 'metadata' we should attach to the device
9027       (this will be represented as a LVM tag)
9028   @type force_open: boolean
9029   @param force_open: this parameter will be passes to the
9030       L{backend.BlockdevCreate} function where it specifies
9031       whether we run on primary or not, and it affects both
9032       the child assembly and the device own Open() execution
9033
9034   """
9035   if device.CreateOnSecondary():
9036     force_create = True
9037
9038   if device.children:
9039     for child in device.children:
9040       _CreateBlockDevInner(lu, node, instance, child, force_create,
9041                            info, force_open)
9042
9043   if not force_create:
9044     return
9045
9046   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
9047
9048
9049 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
9050   """Create a single block device on a given node.
9051
9052   This will not recurse over children of the device, so they must be
9053   created in advance.
9054
9055   @param lu: the lu on whose behalf we execute
9056   @param node: the node on which to create the device
9057   @type instance: L{objects.Instance}
9058   @param instance: the instance which owns the device
9059   @type device: L{objects.Disk}
9060   @param device: the device to create
9061   @param info: the extra 'metadata' we should attach to the device
9062       (this will be represented as a LVM tag)
9063   @type force_open: boolean
9064   @param force_open: this parameter will be passes to the
9065       L{backend.BlockdevCreate} function where it specifies
9066       whether we run on primary or not, and it affects both
9067       the child assembly and the device own Open() execution
9068
9069   """
9070   lu.cfg.SetDiskID(device, node)
9071   result = lu.rpc.call_blockdev_create(node, device, device.size,
9072                                        instance.name, force_open, info)
9073   result.Raise("Can't create block device %s on"
9074                " node %s for instance %s" % (device, node, instance.name))
9075   if device.physical_id is None:
9076     device.physical_id = result.payload
9077
9078
9079 def _GenerateUniqueNames(lu, exts):
9080   """Generate a suitable LV name.
9081
9082   This will generate a logical volume name for the given instance.
9083
9084   """
9085   results = []
9086   for val in exts:
9087     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9088     results.append("%s%s" % (new_id, val))
9089   return results
9090
9091
9092 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9093                          iv_name, p_minor, s_minor):
9094   """Generate a drbd8 device complete with its children.
9095
9096   """
9097   assert len(vgnames) == len(names) == 2
9098   port = lu.cfg.AllocatePort()
9099   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9100
9101   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9102                           logical_id=(vgnames[0], names[0]),
9103                           params={})
9104   dev_meta = objects.Disk(dev_type=constants.LD_LV,
9105                           size=constants.DRBD_META_SIZE,
9106                           logical_id=(vgnames[1], names[1]),
9107                           params={})
9108   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9109                           logical_id=(primary, secondary, port,
9110                                       p_minor, s_minor,
9111                                       shared_secret),
9112                           children=[dev_data, dev_meta],
9113                           iv_name=iv_name, params={})
9114   return drbd_dev
9115
9116
9117 _DISK_TEMPLATE_NAME_PREFIX = {
9118   constants.DT_PLAIN: "",
9119   constants.DT_RBD: ".rbd",
9120   }
9121
9122
9123 _DISK_TEMPLATE_DEVICE_TYPE = {
9124   constants.DT_PLAIN: constants.LD_LV,
9125   constants.DT_FILE: constants.LD_FILE,
9126   constants.DT_SHARED_FILE: constants.LD_FILE,
9127   constants.DT_BLOCK: constants.LD_BLOCKDEV,
9128   constants.DT_RBD: constants.LD_RBD,
9129   }
9130
9131
9132 def _GenerateDiskTemplate(
9133   lu, template_name, instance_name, primary_node, secondary_nodes,
9134   disk_info, file_storage_dir, file_driver, base_index,
9135   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9136   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9137   """Generate the entire disk layout for a given template type.
9138
9139   """
9140   vgname = lu.cfg.GetVGName()
9141   disk_count = len(disk_info)
9142   disks = []
9143
9144   if template_name == constants.DT_DISKLESS:
9145     pass
9146   elif template_name == constants.DT_DRBD8:
9147     if len(secondary_nodes) != 1:
9148       raise errors.ProgrammerError("Wrong template configuration")
9149     remote_node = secondary_nodes[0]
9150     minors = lu.cfg.AllocateDRBDMinor(
9151       [primary_node, remote_node] * len(disk_info), instance_name)
9152
9153     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9154                                                        full_disk_params)
9155     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9156
9157     names = []
9158     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9159                                                for i in range(disk_count)]):
9160       names.append(lv_prefix + "_data")
9161       names.append(lv_prefix + "_meta")
9162     for idx, disk in enumerate(disk_info):
9163       disk_index = idx + base_index
9164       data_vg = disk.get(constants.IDISK_VG, vgname)
9165       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9166       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9167                                       disk[constants.IDISK_SIZE],
9168                                       [data_vg, meta_vg],
9169                                       names[idx * 2:idx * 2 + 2],
9170                                       "disk/%d" % disk_index,
9171                                       minors[idx * 2], minors[idx * 2 + 1])
9172       disk_dev.mode = disk[constants.IDISK_MODE]
9173       disks.append(disk_dev)
9174   else:
9175     if secondary_nodes:
9176       raise errors.ProgrammerError("Wrong template configuration")
9177
9178     if template_name == constants.DT_FILE:
9179       _req_file_storage()
9180     elif template_name == constants.DT_SHARED_FILE:
9181       _req_shr_file_storage()
9182
9183     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9184     if name_prefix is None:
9185       names = None
9186     else:
9187       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9188                                         (name_prefix, base_index + i)
9189                                         for i in range(disk_count)])
9190
9191     if template_name == constants.DT_PLAIN:
9192
9193       def logical_id_fn(idx, _, disk):
9194         vg = disk.get(constants.IDISK_VG, vgname)
9195         return (vg, names[idx])
9196
9197     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9198       logical_id_fn = \
9199         lambda _, disk_index, disk: (file_driver,
9200                                      "%s/disk%d" % (file_storage_dir,
9201                                                     disk_index))
9202     elif template_name == constants.DT_BLOCK:
9203       logical_id_fn = \
9204         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9205                                        disk[constants.IDISK_ADOPT])
9206     elif template_name == constants.DT_RBD:
9207       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9208     else:
9209       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9210
9211     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9212
9213     for idx, disk in enumerate(disk_info):
9214       disk_index = idx + base_index
9215       size = disk[constants.IDISK_SIZE]
9216       feedback_fn("* disk %s, size %s" %
9217                   (disk_index, utils.FormatUnit(size, "h")))
9218       disks.append(objects.Disk(dev_type=dev_type, size=size,
9219                                 logical_id=logical_id_fn(idx, disk_index, disk),
9220                                 iv_name="disk/%d" % disk_index,
9221                                 mode=disk[constants.IDISK_MODE],
9222                                 params={}))
9223
9224   return disks
9225
9226
9227 def _GetInstanceInfoText(instance):
9228   """Compute that text that should be added to the disk's metadata.
9229
9230   """
9231   return "originstname+%s" % instance.name
9232
9233
9234 def _CalcEta(time_taken, written, total_size):
9235   """Calculates the ETA based on size written and total size.
9236
9237   @param time_taken: The time taken so far
9238   @param written: amount written so far
9239   @param total_size: The total size of data to be written
9240   @return: The remaining time in seconds
9241
9242   """
9243   avg_time = time_taken / float(written)
9244   return (total_size - written) * avg_time
9245
9246
9247 def _WipeDisks(lu, instance, disks=None):
9248   """Wipes instance disks.
9249
9250   @type lu: L{LogicalUnit}
9251   @param lu: the logical unit on whose behalf we execute
9252   @type instance: L{objects.Instance}
9253   @param instance: the instance whose disks we should create
9254   @return: the success of the wipe
9255
9256   """
9257   node = instance.primary_node
9258
9259   if disks is None:
9260     disks = [(idx, disk, 0)
9261              for (idx, disk) in enumerate(instance.disks)]
9262
9263   for (_, device, _) in disks:
9264     lu.cfg.SetDiskID(device, node)
9265
9266   logging.info("Pausing synchronization of disks of instance '%s'",
9267                instance.name)
9268   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9269                                                   (map(compat.snd, disks),
9270                                                    instance),
9271                                                   True)
9272   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9273
9274   for idx, success in enumerate(result.payload):
9275     if not success:
9276       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9277                    " failed", idx, instance.name)
9278
9279   try:
9280     for (idx, device, offset) in disks:
9281       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9282       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9283       wipe_chunk_size = \
9284         int(min(constants.MAX_WIPE_CHUNK,
9285                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9286
9287       size = device.size
9288       last_output = 0
9289       start_time = time.time()
9290
9291       if offset == 0:
9292         info_text = ""
9293       else:
9294         info_text = (" (from %s to %s)" %
9295                      (utils.FormatUnit(offset, "h"),
9296                       utils.FormatUnit(size, "h")))
9297
9298       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9299
9300       logging.info("Wiping disk %d for instance %s on node %s using"
9301                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9302
9303       while offset < size:
9304         wipe_size = min(wipe_chunk_size, size - offset)
9305
9306         logging.debug("Wiping disk %d, offset %s, chunk %s",
9307                       idx, offset, wipe_size)
9308
9309         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9310                                            wipe_size)
9311         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9312                      (idx, offset, wipe_size))
9313
9314         now = time.time()
9315         offset += wipe_size
9316         if now - last_output >= 60:
9317           eta = _CalcEta(now - start_time, offset, size)
9318           lu.LogInfo(" - done: %.1f%% ETA: %s",
9319                      offset / float(size) * 100, utils.FormatSeconds(eta))
9320           last_output = now
9321   finally:
9322     logging.info("Resuming synchronization of disks for instance '%s'",
9323                  instance.name)
9324
9325     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9326                                                     (map(compat.snd, disks),
9327                                                      instance),
9328                                                     False)
9329
9330     if result.fail_msg:
9331       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9332                     node, result.fail_msg)
9333     else:
9334       for idx, success in enumerate(result.payload):
9335         if not success:
9336           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9337                         " failed", idx, instance.name)
9338
9339
9340 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9341   """Create all disks for an instance.
9342
9343   This abstracts away some work from AddInstance.
9344
9345   @type lu: L{LogicalUnit}
9346   @param lu: the logical unit on whose behalf we execute
9347   @type instance: L{objects.Instance}
9348   @param instance: the instance whose disks we should create
9349   @type to_skip: list
9350   @param to_skip: list of indices to skip
9351   @type target_node: string
9352   @param target_node: if passed, overrides the target node for creation
9353   @rtype: boolean
9354   @return: the success of the creation
9355
9356   """
9357   info = _GetInstanceInfoText(instance)
9358   if target_node is None:
9359     pnode = instance.primary_node
9360     all_nodes = instance.all_nodes
9361   else:
9362     pnode = target_node
9363     all_nodes = [pnode]
9364
9365   if instance.disk_template in constants.DTS_FILEBASED:
9366     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9367     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9368
9369     result.Raise("Failed to create directory '%s' on"
9370                  " node %s" % (file_storage_dir, pnode))
9371
9372   # Note: this needs to be kept in sync with adding of disks in
9373   # LUInstanceSetParams
9374   for idx, device in enumerate(instance.disks):
9375     if to_skip and idx in to_skip:
9376       continue
9377     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9378     #HARDCODE
9379     for node in all_nodes:
9380       f_create = node == pnode
9381       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9382
9383
9384 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9385   """Remove all disks for an instance.
9386
9387   This abstracts away some work from `AddInstance()` and
9388   `RemoveInstance()`. Note that in case some of the devices couldn't
9389   be removed, the removal will continue with the other ones (compare
9390   with `_CreateDisks()`).
9391
9392   @type lu: L{LogicalUnit}
9393   @param lu: the logical unit on whose behalf we execute
9394   @type instance: L{objects.Instance}
9395   @param instance: the instance whose disks we should remove
9396   @type target_node: string
9397   @param target_node: used to override the node on which to remove the disks
9398   @rtype: boolean
9399   @return: the success of the removal
9400
9401   """
9402   logging.info("Removing block devices for instance %s", instance.name)
9403
9404   all_result = True
9405   ports_to_release = set()
9406   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9407   for (idx, device) in enumerate(anno_disks):
9408     if target_node:
9409       edata = [(target_node, device)]
9410     else:
9411       edata = device.ComputeNodeTree(instance.primary_node)
9412     for node, disk in edata:
9413       lu.cfg.SetDiskID(disk, node)
9414       result = lu.rpc.call_blockdev_remove(node, disk)
9415       if result.fail_msg:
9416         lu.LogWarning("Could not remove disk %s on node %s,"
9417                       " continuing anyway: %s", idx, node, result.fail_msg)
9418         if not (result.offline and node != instance.primary_node):
9419           all_result = False
9420
9421     # if this is a DRBD disk, return its port to the pool
9422     if device.dev_type in constants.LDS_DRBD:
9423       ports_to_release.add(device.logical_id[2])
9424
9425   if all_result or ignore_failures:
9426     for port in ports_to_release:
9427       lu.cfg.AddTcpUdpPort(port)
9428
9429   if instance.disk_template in constants.DTS_FILEBASED:
9430     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9431     if target_node:
9432       tgt = target_node
9433     else:
9434       tgt = instance.primary_node
9435     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9436     if result.fail_msg:
9437       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9438                     file_storage_dir, instance.primary_node, result.fail_msg)
9439       all_result = False
9440
9441   return all_result
9442
9443
9444 def _ComputeDiskSizePerVG(disk_template, disks):
9445   """Compute disk size requirements in the volume group
9446
9447   """
9448   def _compute(disks, payload):
9449     """Universal algorithm.
9450
9451     """
9452     vgs = {}
9453     for disk in disks:
9454       vgs[disk[constants.IDISK_VG]] = \
9455         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9456
9457     return vgs
9458
9459   # Required free disk space as a function of disk and swap space
9460   req_size_dict = {
9461     constants.DT_DISKLESS: {},
9462     constants.DT_PLAIN: _compute(disks, 0),
9463     # 128 MB are added for drbd metadata for each disk
9464     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9465     constants.DT_FILE: {},
9466     constants.DT_SHARED_FILE: {},
9467   }
9468
9469   if disk_template not in req_size_dict:
9470     raise errors.ProgrammerError("Disk template '%s' size requirement"
9471                                  " is unknown" % disk_template)
9472
9473   return req_size_dict[disk_template]
9474
9475
9476 def _FilterVmNodes(lu, nodenames):
9477   """Filters out non-vm_capable nodes from a list.
9478
9479   @type lu: L{LogicalUnit}
9480   @param lu: the logical unit for which we check
9481   @type nodenames: list
9482   @param nodenames: the list of nodes on which we should check
9483   @rtype: list
9484   @return: the list of vm-capable nodes
9485
9486   """
9487   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9488   return [name for name in nodenames if name not in vm_nodes]
9489
9490
9491 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9492   """Hypervisor parameter validation.
9493
9494   This function abstract the hypervisor parameter validation to be
9495   used in both instance create and instance modify.
9496
9497   @type lu: L{LogicalUnit}
9498   @param lu: the logical unit for which we check
9499   @type nodenames: list
9500   @param nodenames: the list of nodes on which we should check
9501   @type hvname: string
9502   @param hvname: the name of the hypervisor we should use
9503   @type hvparams: dict
9504   @param hvparams: the parameters which we need to check
9505   @raise errors.OpPrereqError: if the parameters are not valid
9506
9507   """
9508   nodenames = _FilterVmNodes(lu, nodenames)
9509
9510   cluster = lu.cfg.GetClusterInfo()
9511   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9512
9513   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9514   for node in nodenames:
9515     info = hvinfo[node]
9516     if info.offline:
9517       continue
9518     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9519
9520
9521 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9522   """OS parameters validation.
9523
9524   @type lu: L{LogicalUnit}
9525   @param lu: the logical unit for which we check
9526   @type required: boolean
9527   @param required: whether the validation should fail if the OS is not
9528       found
9529   @type nodenames: list
9530   @param nodenames: the list of nodes on which we should check
9531   @type osname: string
9532   @param osname: the name of the hypervisor we should use
9533   @type osparams: dict
9534   @param osparams: the parameters which we need to check
9535   @raise errors.OpPrereqError: if the parameters are not valid
9536
9537   """
9538   nodenames = _FilterVmNodes(lu, nodenames)
9539   result = lu.rpc.call_os_validate(nodenames, required, osname,
9540                                    [constants.OS_VALIDATE_PARAMETERS],
9541                                    osparams)
9542   for node, nres in result.items():
9543     # we don't check for offline cases since this should be run only
9544     # against the master node and/or an instance's nodes
9545     nres.Raise("OS Parameters validation failed on node %s" % node)
9546     if not nres.payload:
9547       lu.LogInfo("OS %s not found on node %s, validation skipped",
9548                  osname, node)
9549
9550
9551 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9552   """Wrapper around IAReqInstanceAlloc.
9553
9554   @param op: The instance opcode
9555   @param disks: The computed disks
9556   @param nics: The computed nics
9557   @param beparams: The full filled beparams
9558   @param node_whitelist: List of nodes which should appear as online to the
9559     allocator (unless the node is already marked offline)
9560
9561   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9562
9563   """
9564   spindle_use = beparams[constants.BE_SPINDLE_USE]
9565   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9566                                        disk_template=op.disk_template,
9567                                        tags=op.tags,
9568                                        os=op.os_type,
9569                                        vcpus=beparams[constants.BE_VCPUS],
9570                                        memory=beparams[constants.BE_MAXMEM],
9571                                        spindle_use=spindle_use,
9572                                        disks=disks,
9573                                        nics=[n.ToDict() for n in nics],
9574                                        hypervisor=op.hypervisor,
9575                                        node_whitelist=node_whitelist)
9576
9577
9578 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9579   """Computes the nics.
9580
9581   @param op: The instance opcode
9582   @param cluster: Cluster configuration object
9583   @param default_ip: The default ip to assign
9584   @param cfg: An instance of the configuration object
9585   @param ec_id: Execution context ID
9586
9587   @returns: The build up nics
9588
9589   """
9590   nics = []
9591   for nic in op.nics:
9592     nic_mode_req = nic.get(constants.INIC_MODE, None)
9593     nic_mode = nic_mode_req
9594     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9595       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9596
9597     net = nic.get(constants.INIC_NETWORK, None)
9598     link = nic.get(constants.NIC_LINK, None)
9599     ip = nic.get(constants.INIC_IP, None)
9600
9601     if net is None or net.lower() == constants.VALUE_NONE:
9602       net = None
9603     else:
9604       if nic_mode_req is not None or link is not None:
9605         raise errors.OpPrereqError("If network is given, no mode or link"
9606                                    " is allowed to be passed",
9607                                    errors.ECODE_INVAL)
9608
9609     # ip validity checks
9610     if ip is None or ip.lower() == constants.VALUE_NONE:
9611       nic_ip = None
9612     elif ip.lower() == constants.VALUE_AUTO:
9613       if not op.name_check:
9614         raise errors.OpPrereqError("IP address set to auto but name checks"
9615                                    " have been skipped",
9616                                    errors.ECODE_INVAL)
9617       nic_ip = default_ip
9618     else:
9619       # We defer pool operations until later, so that the iallocator has
9620       # filled in the instance's node(s) dimara
9621       if ip.lower() == constants.NIC_IP_POOL:
9622         if net is None:
9623           raise errors.OpPrereqError("if ip=pool, parameter network"
9624                                      " must be passed too",
9625                                      errors.ECODE_INVAL)
9626
9627       elif not netutils.IPAddress.IsValid(ip):
9628         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9629                                    errors.ECODE_INVAL)
9630
9631       nic_ip = ip
9632
9633     # TODO: check the ip address for uniqueness
9634     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9635       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9636                                  errors.ECODE_INVAL)
9637
9638     # MAC address verification
9639     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9640     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9641       mac = utils.NormalizeAndValidateMac(mac)
9642
9643       try:
9644         # TODO: We need to factor this out
9645         cfg.ReserveMAC(mac, ec_id)
9646       except errors.ReservationError:
9647         raise errors.OpPrereqError("MAC address %s already in use"
9648                                    " in cluster" % mac,
9649                                    errors.ECODE_NOTUNIQUE)
9650
9651     #  Build nic parameters
9652     nicparams = {}
9653     if nic_mode_req:
9654       nicparams[constants.NIC_MODE] = nic_mode
9655     if link:
9656       nicparams[constants.NIC_LINK] = link
9657
9658     check_params = cluster.SimpleFillNIC(nicparams)
9659     objects.NIC.CheckParameterSyntax(check_params)
9660     nics.append(objects.NIC(mac=mac, ip=nic_ip,
9661                             network=net, nicparams=nicparams))
9662
9663   return nics
9664
9665
9666 def _ComputeDisks(op, default_vg):
9667   """Computes the instance disks.
9668
9669   @param op: The instance opcode
9670   @param default_vg: The default_vg to assume
9671
9672   @return: The computer disks
9673
9674   """
9675   disks = []
9676   for disk in op.disks:
9677     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9678     if mode not in constants.DISK_ACCESS_SET:
9679       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9680                                  mode, errors.ECODE_INVAL)
9681     size = disk.get(constants.IDISK_SIZE, None)
9682     if size is None:
9683       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9684     try:
9685       size = int(size)
9686     except (TypeError, ValueError):
9687       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9688                                  errors.ECODE_INVAL)
9689
9690     data_vg = disk.get(constants.IDISK_VG, default_vg)
9691     new_disk = {
9692       constants.IDISK_SIZE: size,
9693       constants.IDISK_MODE: mode,
9694       constants.IDISK_VG: data_vg,
9695       }
9696     if constants.IDISK_METAVG in disk:
9697       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9698     if constants.IDISK_ADOPT in disk:
9699       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9700     disks.append(new_disk)
9701
9702   return disks
9703
9704
9705 def _ComputeFullBeParams(op, cluster):
9706   """Computes the full beparams.
9707
9708   @param op: The instance opcode
9709   @param cluster: The cluster config object
9710
9711   @return: The fully filled beparams
9712
9713   """
9714   default_beparams = cluster.beparams[constants.PP_DEFAULT]
9715   for param, value in op.beparams.iteritems():
9716     if value == constants.VALUE_AUTO:
9717       op.beparams[param] = default_beparams[param]
9718   objects.UpgradeBeParams(op.beparams)
9719   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9720   return cluster.SimpleFillBE(op.beparams)
9721
9722
9723 class LUInstanceCreate(LogicalUnit):
9724   """Create an instance.
9725
9726   """
9727   HPATH = "instance-add"
9728   HTYPE = constants.HTYPE_INSTANCE
9729   REQ_BGL = False
9730
9731   def CheckArguments(self):
9732     """Check arguments.
9733
9734     """
9735     # do not require name_check to ease forward/backward compatibility
9736     # for tools
9737     if self.op.no_install and self.op.start:
9738       self.LogInfo("No-installation mode selected, disabling startup")
9739       self.op.start = False
9740     # validate/normalize the instance name
9741     self.op.instance_name = \
9742       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9743
9744     if self.op.ip_check and not self.op.name_check:
9745       # TODO: make the ip check more flexible and not depend on the name check
9746       raise errors.OpPrereqError("Cannot do IP address check without a name"
9747                                  " check", errors.ECODE_INVAL)
9748
9749     # check nics' parameter names
9750     for nic in self.op.nics:
9751       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9752
9753     # check disks. parameter names and consistent adopt/no-adopt strategy
9754     has_adopt = has_no_adopt = False
9755     for disk in self.op.disks:
9756       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9757       if constants.IDISK_ADOPT in disk:
9758         has_adopt = True
9759       else:
9760         has_no_adopt = True
9761     if has_adopt and has_no_adopt:
9762       raise errors.OpPrereqError("Either all disks are adopted or none is",
9763                                  errors.ECODE_INVAL)
9764     if has_adopt:
9765       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9766         raise errors.OpPrereqError("Disk adoption is not supported for the"
9767                                    " '%s' disk template" %
9768                                    self.op.disk_template,
9769                                    errors.ECODE_INVAL)
9770       if self.op.iallocator is not None:
9771         raise errors.OpPrereqError("Disk adoption not allowed with an"
9772                                    " iallocator script", errors.ECODE_INVAL)
9773       if self.op.mode == constants.INSTANCE_IMPORT:
9774         raise errors.OpPrereqError("Disk adoption not allowed for"
9775                                    " instance import", errors.ECODE_INVAL)
9776     else:
9777       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9778         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9779                                    " but no 'adopt' parameter given" %
9780                                    self.op.disk_template,
9781                                    errors.ECODE_INVAL)
9782
9783     self.adopt_disks = has_adopt
9784
9785     # instance name verification
9786     if self.op.name_check:
9787       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9788       self.op.instance_name = self.hostname1.name
9789       # used in CheckPrereq for ip ping check
9790       self.check_ip = self.hostname1.ip
9791     else:
9792       self.check_ip = None
9793
9794     # file storage checks
9795     if (self.op.file_driver and
9796         not self.op.file_driver in constants.FILE_DRIVER):
9797       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9798                                  self.op.file_driver, errors.ECODE_INVAL)
9799
9800     if self.op.disk_template == constants.DT_FILE:
9801       opcodes.RequireFileStorage()
9802     elif self.op.disk_template == constants.DT_SHARED_FILE:
9803       opcodes.RequireSharedFileStorage()
9804
9805     ### Node/iallocator related checks
9806     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9807
9808     if self.op.pnode is not None:
9809       if self.op.disk_template in constants.DTS_INT_MIRROR:
9810         if self.op.snode is None:
9811           raise errors.OpPrereqError("The networked disk templates need"
9812                                      " a mirror node", errors.ECODE_INVAL)
9813       elif self.op.snode:
9814         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9815                         " template")
9816         self.op.snode = None
9817
9818     self._cds = _GetClusterDomainSecret()
9819
9820     if self.op.mode == constants.INSTANCE_IMPORT:
9821       # On import force_variant must be True, because if we forced it at
9822       # initial install, our only chance when importing it back is that it
9823       # works again!
9824       self.op.force_variant = True
9825
9826       if self.op.no_install:
9827         self.LogInfo("No-installation mode has no effect during import")
9828
9829     elif self.op.mode == constants.INSTANCE_CREATE:
9830       if self.op.os_type is None:
9831         raise errors.OpPrereqError("No guest OS specified",
9832                                    errors.ECODE_INVAL)
9833       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9834         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9835                                    " installation" % self.op.os_type,
9836                                    errors.ECODE_STATE)
9837       if self.op.disk_template is None:
9838         raise errors.OpPrereqError("No disk template specified",
9839                                    errors.ECODE_INVAL)
9840
9841     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9842       # Check handshake to ensure both clusters have the same domain secret
9843       src_handshake = self.op.source_handshake
9844       if not src_handshake:
9845         raise errors.OpPrereqError("Missing source handshake",
9846                                    errors.ECODE_INVAL)
9847
9848       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9849                                                            src_handshake)
9850       if errmsg:
9851         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9852                                    errors.ECODE_INVAL)
9853
9854       # Load and check source CA
9855       self.source_x509_ca_pem = self.op.source_x509_ca
9856       if not self.source_x509_ca_pem:
9857         raise errors.OpPrereqError("Missing source X509 CA",
9858                                    errors.ECODE_INVAL)
9859
9860       try:
9861         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9862                                                     self._cds)
9863       except OpenSSL.crypto.Error, err:
9864         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9865                                    (err, ), errors.ECODE_INVAL)
9866
9867       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9868       if errcode is not None:
9869         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9870                                    errors.ECODE_INVAL)
9871
9872       self.source_x509_ca = cert
9873
9874       src_instance_name = self.op.source_instance_name
9875       if not src_instance_name:
9876         raise errors.OpPrereqError("Missing source instance name",
9877                                    errors.ECODE_INVAL)
9878
9879       self.source_instance_name = \
9880           netutils.GetHostname(name=src_instance_name).name
9881
9882     else:
9883       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9884                                  self.op.mode, errors.ECODE_INVAL)
9885
9886   def ExpandNames(self):
9887     """ExpandNames for CreateInstance.
9888
9889     Figure out the right locks for instance creation.
9890
9891     """
9892     self.needed_locks = {}
9893
9894     instance_name = self.op.instance_name
9895     # this is just a preventive check, but someone might still add this
9896     # instance in the meantime, and creation will fail at lock-add time
9897     if instance_name in self.cfg.GetInstanceList():
9898       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9899                                  instance_name, errors.ECODE_EXISTS)
9900
9901     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9902
9903     if self.op.iallocator:
9904       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9905       # specifying a group on instance creation and then selecting nodes from
9906       # that group
9907       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9908       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9909
9910       if self.op.opportunistic_locking:
9911         self.opportunistic_locks[locking.LEVEL_NODE] = True
9912         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
9913     else:
9914       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9915       nodelist = [self.op.pnode]
9916       if self.op.snode is not None:
9917         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9918         nodelist.append(self.op.snode)
9919       self.needed_locks[locking.LEVEL_NODE] = nodelist
9920
9921     # in case of import lock the source node too
9922     if self.op.mode == constants.INSTANCE_IMPORT:
9923       src_node = self.op.src_node
9924       src_path = self.op.src_path
9925
9926       if src_path is None:
9927         self.op.src_path = src_path = self.op.instance_name
9928
9929       if src_node is None:
9930         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9931         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9932         self.op.src_node = None
9933         if os.path.isabs(src_path):
9934           raise errors.OpPrereqError("Importing an instance from a path"
9935                                      " requires a source node option",
9936                                      errors.ECODE_INVAL)
9937       else:
9938         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9939         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9940           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9941         if not os.path.isabs(src_path):
9942           self.op.src_path = src_path = \
9943             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9944
9945     self.needed_locks[locking.LEVEL_NODE_RES] = \
9946       _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
9947
9948   def _RunAllocator(self):
9949     """Run the allocator based on input opcode.
9950
9951     """
9952     if self.op.opportunistic_locking:
9953       # Only consider nodes for which a lock is held
9954       node_whitelist = self.owned_locks(locking.LEVEL_NODE)
9955     else:
9956       node_whitelist = None
9957
9958     #TODO Export network to iallocator so that it chooses a pnode
9959     #     in a nodegroup that has the desired network connected to
9960     req = _CreateInstanceAllocRequest(self.op, self.disks,
9961                                       self.nics, self.be_full,
9962                                       node_whitelist)
9963     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9964
9965     ial.Run(self.op.iallocator)
9966
9967     if not ial.success:
9968       # When opportunistic locks are used only a temporary failure is generated
9969       if self.op.opportunistic_locking:
9970         ecode = errors.ECODE_TEMP_NORES
9971       else:
9972         ecode = errors.ECODE_NORES
9973
9974       raise errors.OpPrereqError("Can't compute nodes using"
9975                                  " iallocator '%s': %s" %
9976                                  (self.op.iallocator, ial.info),
9977                                  ecode)
9978
9979     self.op.pnode = ial.result[0]
9980     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9981                  self.op.instance_name, self.op.iallocator,
9982                  utils.CommaJoin(ial.result))
9983
9984     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9985
9986     if req.RequiredNodes() == 2:
9987       self.op.snode = ial.result[1]
9988
9989   def BuildHooksEnv(self):
9990     """Build hooks env.
9991
9992     This runs on master, primary and secondary nodes of the instance.
9993
9994     """
9995     env = {
9996       "ADD_MODE": self.op.mode,
9997       }
9998     if self.op.mode == constants.INSTANCE_IMPORT:
9999       env["SRC_NODE"] = self.op.src_node
10000       env["SRC_PATH"] = self.op.src_path
10001       env["SRC_IMAGES"] = self.src_images
10002
10003     env.update(_BuildInstanceHookEnv(
10004       name=self.op.instance_name,
10005       primary_node=self.op.pnode,
10006       secondary_nodes=self.secondaries,
10007       status=self.op.start,
10008       os_type=self.op.os_type,
10009       minmem=self.be_full[constants.BE_MINMEM],
10010       maxmem=self.be_full[constants.BE_MAXMEM],
10011       vcpus=self.be_full[constants.BE_VCPUS],
10012       nics=_NICListToTuple(self, self.nics),
10013       disk_template=self.op.disk_template,
10014       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10015              for d in self.disks],
10016       bep=self.be_full,
10017       hvp=self.hv_full,
10018       hypervisor_name=self.op.hypervisor,
10019       tags=self.op.tags,
10020     ))
10021
10022     return env
10023
10024   def BuildHooksNodes(self):
10025     """Build hooks nodes.
10026
10027     """
10028     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10029     return nl, nl
10030
10031   def _ReadExportInfo(self):
10032     """Reads the export information from disk.
10033
10034     It will override the opcode source node and path with the actual
10035     information, if these two were not specified before.
10036
10037     @return: the export information
10038
10039     """
10040     assert self.op.mode == constants.INSTANCE_IMPORT
10041
10042     src_node = self.op.src_node
10043     src_path = self.op.src_path
10044
10045     if src_node is None:
10046       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10047       exp_list = self.rpc.call_export_list(locked_nodes)
10048       found = False
10049       for node in exp_list:
10050         if exp_list[node].fail_msg:
10051           continue
10052         if src_path in exp_list[node].payload:
10053           found = True
10054           self.op.src_node = src_node = node
10055           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10056                                                        src_path)
10057           break
10058       if not found:
10059         raise errors.OpPrereqError("No export found for relative path %s" %
10060                                     src_path, errors.ECODE_INVAL)
10061
10062     _CheckNodeOnline(self, src_node)
10063     result = self.rpc.call_export_info(src_node, src_path)
10064     result.Raise("No export or invalid export found in dir %s" % src_path)
10065
10066     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10067     if not export_info.has_section(constants.INISECT_EXP):
10068       raise errors.ProgrammerError("Corrupted export config",
10069                                    errors.ECODE_ENVIRON)
10070
10071     ei_version = export_info.get(constants.INISECT_EXP, "version")
10072     if (int(ei_version) != constants.EXPORT_VERSION):
10073       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10074                                  (ei_version, constants.EXPORT_VERSION),
10075                                  errors.ECODE_ENVIRON)
10076     return export_info
10077
10078   def _ReadExportParams(self, einfo):
10079     """Use export parameters as defaults.
10080
10081     In case the opcode doesn't specify (as in override) some instance
10082     parameters, then try to use them from the export information, if
10083     that declares them.
10084
10085     """
10086     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10087
10088     if self.op.disk_template is None:
10089       if einfo.has_option(constants.INISECT_INS, "disk_template"):
10090         self.op.disk_template = einfo.get(constants.INISECT_INS,
10091                                           "disk_template")
10092         if self.op.disk_template not in constants.DISK_TEMPLATES:
10093           raise errors.OpPrereqError("Disk template specified in configuration"
10094                                      " file is not one of the allowed values:"
10095                                      " %s" %
10096                                      " ".join(constants.DISK_TEMPLATES),
10097                                      errors.ECODE_INVAL)
10098       else:
10099         raise errors.OpPrereqError("No disk template specified and the export"
10100                                    " is missing the disk_template information",
10101                                    errors.ECODE_INVAL)
10102
10103     if not self.op.disks:
10104       disks = []
10105       # TODO: import the disk iv_name too
10106       for idx in range(constants.MAX_DISKS):
10107         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10108           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10109           disks.append({constants.IDISK_SIZE: disk_sz})
10110       self.op.disks = disks
10111       if not disks and self.op.disk_template != constants.DT_DISKLESS:
10112         raise errors.OpPrereqError("No disk info specified and the export"
10113                                    " is missing the disk information",
10114                                    errors.ECODE_INVAL)
10115
10116     if not self.op.nics:
10117       nics = []
10118       for idx in range(constants.MAX_NICS):
10119         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10120           ndict = {}
10121           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10122             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10123             ndict[name] = v
10124           nics.append(ndict)
10125         else:
10126           break
10127       self.op.nics = nics
10128
10129     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10130       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10131
10132     if (self.op.hypervisor is None and
10133         einfo.has_option(constants.INISECT_INS, "hypervisor")):
10134       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10135
10136     if einfo.has_section(constants.INISECT_HYP):
10137       # use the export parameters but do not override the ones
10138       # specified by the user
10139       for name, value in einfo.items(constants.INISECT_HYP):
10140         if name not in self.op.hvparams:
10141           self.op.hvparams[name] = value
10142
10143     if einfo.has_section(constants.INISECT_BEP):
10144       # use the parameters, without overriding
10145       for name, value in einfo.items(constants.INISECT_BEP):
10146         if name not in self.op.beparams:
10147           self.op.beparams[name] = value
10148         # Compatibility for the old "memory" be param
10149         if name == constants.BE_MEMORY:
10150           if constants.BE_MAXMEM not in self.op.beparams:
10151             self.op.beparams[constants.BE_MAXMEM] = value
10152           if constants.BE_MINMEM not in self.op.beparams:
10153             self.op.beparams[constants.BE_MINMEM] = value
10154     else:
10155       # try to read the parameters old style, from the main section
10156       for name in constants.BES_PARAMETERS:
10157         if (name not in self.op.beparams and
10158             einfo.has_option(constants.INISECT_INS, name)):
10159           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10160
10161     if einfo.has_section(constants.INISECT_OSP):
10162       # use the parameters, without overriding
10163       for name, value in einfo.items(constants.INISECT_OSP):
10164         if name not in self.op.osparams:
10165           self.op.osparams[name] = value
10166
10167   def _RevertToDefaults(self, cluster):
10168     """Revert the instance parameters to the default values.
10169
10170     """
10171     # hvparams
10172     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10173     for name in self.op.hvparams.keys():
10174       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10175         del self.op.hvparams[name]
10176     # beparams
10177     be_defs = cluster.SimpleFillBE({})
10178     for name in self.op.beparams.keys():
10179       if name in be_defs and be_defs[name] == self.op.beparams[name]:
10180         del self.op.beparams[name]
10181     # nic params
10182     nic_defs = cluster.SimpleFillNIC({})
10183     for nic in self.op.nics:
10184       for name in constants.NICS_PARAMETERS:
10185         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10186           del nic[name]
10187     # osparams
10188     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10189     for name in self.op.osparams.keys():
10190       if name in os_defs and os_defs[name] == self.op.osparams[name]:
10191         del self.op.osparams[name]
10192
10193   def _CalculateFileStorageDir(self):
10194     """Calculate final instance file storage dir.
10195
10196     """
10197     # file storage dir calculation/check
10198     self.instance_file_storage_dir = None
10199     if self.op.disk_template in constants.DTS_FILEBASED:
10200       # build the full file storage dir path
10201       joinargs = []
10202
10203       if self.op.disk_template == constants.DT_SHARED_FILE:
10204         get_fsd_fn = self.cfg.GetSharedFileStorageDir
10205       else:
10206         get_fsd_fn = self.cfg.GetFileStorageDir
10207
10208       cfg_storagedir = get_fsd_fn()
10209       if not cfg_storagedir:
10210         raise errors.OpPrereqError("Cluster file storage dir not defined",
10211                                    errors.ECODE_STATE)
10212       joinargs.append(cfg_storagedir)
10213
10214       if self.op.file_storage_dir is not None:
10215         joinargs.append(self.op.file_storage_dir)
10216
10217       joinargs.append(self.op.instance_name)
10218
10219       # pylint: disable=W0142
10220       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10221
10222   def CheckPrereq(self): # pylint: disable=R0914
10223     """Check prerequisites.
10224
10225     """
10226     self._CalculateFileStorageDir()
10227
10228     if self.op.mode == constants.INSTANCE_IMPORT:
10229       export_info = self._ReadExportInfo()
10230       self._ReadExportParams(export_info)
10231       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10232     else:
10233       self._old_instance_name = None
10234
10235     if (not self.cfg.GetVGName() and
10236         self.op.disk_template not in constants.DTS_NOT_LVM):
10237       raise errors.OpPrereqError("Cluster does not support lvm-based"
10238                                  " instances", errors.ECODE_STATE)
10239
10240     if (self.op.hypervisor is None or
10241         self.op.hypervisor == constants.VALUE_AUTO):
10242       self.op.hypervisor = self.cfg.GetHypervisorType()
10243
10244     cluster = self.cfg.GetClusterInfo()
10245     enabled_hvs = cluster.enabled_hypervisors
10246     if self.op.hypervisor not in enabled_hvs:
10247       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10248                                  " cluster (%s)" %
10249                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10250                                  errors.ECODE_STATE)
10251
10252     # Check tag validity
10253     for tag in self.op.tags:
10254       objects.TaggableObject.ValidateTag(tag)
10255
10256     # check hypervisor parameter syntax (locally)
10257     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10258     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10259                                       self.op.hvparams)
10260     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10261     hv_type.CheckParameterSyntax(filled_hvp)
10262     self.hv_full = filled_hvp
10263     # check that we don't specify global parameters on an instance
10264     _CheckGlobalHvParams(self.op.hvparams)
10265
10266     # fill and remember the beparams dict
10267     self.be_full = _ComputeFullBeParams(self.op, cluster)
10268
10269     # build os parameters
10270     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10271
10272     # now that hvp/bep are in final format, let's reset to defaults,
10273     # if told to do so
10274     if self.op.identify_defaults:
10275       self._RevertToDefaults(cluster)
10276
10277     # NIC buildup
10278     self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10279                              self.proc.GetECId())
10280
10281     # disk checks/pre-build
10282     default_vg = self.cfg.GetVGName()
10283     self.disks = _ComputeDisks(self.op, default_vg)
10284
10285     if self.op.mode == constants.INSTANCE_IMPORT:
10286       disk_images = []
10287       for idx in range(len(self.disks)):
10288         option = "disk%d_dump" % idx
10289         if export_info.has_option(constants.INISECT_INS, option):
10290           # FIXME: are the old os-es, disk sizes, etc. useful?
10291           export_name = export_info.get(constants.INISECT_INS, option)
10292           image = utils.PathJoin(self.op.src_path, export_name)
10293           disk_images.append(image)
10294         else:
10295           disk_images.append(False)
10296
10297       self.src_images = disk_images
10298
10299       if self.op.instance_name == self._old_instance_name:
10300         for idx, nic in enumerate(self.nics):
10301           if nic.mac == constants.VALUE_AUTO:
10302             nic_mac_ini = "nic%d_mac" % idx
10303             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10304
10305     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10306
10307     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10308     if self.op.ip_check:
10309       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10310         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10311                                    (self.check_ip, self.op.instance_name),
10312                                    errors.ECODE_NOTUNIQUE)
10313
10314     #### mac address generation
10315     # By generating here the mac address both the allocator and the hooks get
10316     # the real final mac address rather than the 'auto' or 'generate' value.
10317     # There is a race condition between the generation and the instance object
10318     # creation, which means that we know the mac is valid now, but we're not
10319     # sure it will be when we actually add the instance. If things go bad
10320     # adding the instance will abort because of a duplicate mac, and the
10321     # creation job will fail.
10322     for nic in self.nics:
10323       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10324         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10325
10326     #### allocator run
10327
10328     if self.op.iallocator is not None:
10329       self._RunAllocator()
10330
10331     # Release all unneeded node locks
10332     keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10333     _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10334     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10335     _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10336
10337     assert (self.owned_locks(locking.LEVEL_NODE) ==
10338             self.owned_locks(locking.LEVEL_NODE_RES)), \
10339       "Node locks differ from node resource locks"
10340
10341     #### node related checks
10342
10343     # check primary node
10344     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10345     assert self.pnode is not None, \
10346       "Cannot retrieve locked node %s" % self.op.pnode
10347     if pnode.offline:
10348       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10349                                  pnode.name, errors.ECODE_STATE)
10350     if pnode.drained:
10351       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10352                                  pnode.name, errors.ECODE_STATE)
10353     if not pnode.vm_capable:
10354       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10355                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10356
10357     self.secondaries = []
10358
10359     # Fill in any IPs from IP pools. This must happen here, because we need to
10360     # know the nic's primary node, as specified by the iallocator
10361     for idx, nic in enumerate(self.nics):
10362       net = nic.network
10363       if net is not None:
10364         netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10365         if netparams is None:
10366           raise errors.OpPrereqError("No netparams found for network"
10367                                      " %s. Propably not connected to"
10368                                      " node's %s nodegroup" %
10369                                      (net, self.pnode.name),
10370                                      errors.ECODE_INVAL)
10371         self.LogInfo("NIC/%d inherits netparams %s" %
10372                      (idx, netparams.values()))
10373         nic.nicparams = dict(netparams)
10374         if nic.ip is not None:
10375           if nic.ip.lower() == constants.NIC_IP_POOL:
10376             try:
10377               nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10378             except errors.ReservationError:
10379               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10380                                          " from the address pool" % idx,
10381                                          errors.ECODE_STATE)
10382             self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10383           else:
10384             try:
10385               self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10386             except errors.ReservationError:
10387               raise errors.OpPrereqError("IP address %s already in use"
10388                                          " or does not belong to network %s" %
10389                                          (nic.ip, net),
10390                                          errors.ECODE_NOTUNIQUE)
10391       else:
10392         # net is None, ip None or given
10393         if self.op.conflicts_check:
10394           _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10395
10396     # mirror node verification
10397     if self.op.disk_template in constants.DTS_INT_MIRROR:
10398       if self.op.snode == pnode.name:
10399         raise errors.OpPrereqError("The secondary node cannot be the"
10400                                    " primary node", errors.ECODE_INVAL)
10401       _CheckNodeOnline(self, self.op.snode)
10402       _CheckNodeNotDrained(self, self.op.snode)
10403       _CheckNodeVmCapable(self, self.op.snode)
10404       self.secondaries.append(self.op.snode)
10405
10406       snode = self.cfg.GetNodeInfo(self.op.snode)
10407       if pnode.group != snode.group:
10408         self.LogWarning("The primary and secondary nodes are in two"
10409                         " different node groups; the disk parameters"
10410                         " from the first disk's node group will be"
10411                         " used")
10412
10413     nodenames = [pnode.name] + self.secondaries
10414
10415     # Verify instance specs
10416     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10417     ispec = {
10418       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10419       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10420       constants.ISPEC_DISK_COUNT: len(self.disks),
10421       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10422       constants.ISPEC_NIC_COUNT: len(self.nics),
10423       constants.ISPEC_SPINDLE_USE: spindle_use,
10424       }
10425
10426     group_info = self.cfg.GetNodeGroup(pnode.group)
10427     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10428     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10429     if not self.op.ignore_ipolicy and res:
10430       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10431              (pnode.group, group_info.name, utils.CommaJoin(res)))
10432       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10433
10434     if not self.adopt_disks:
10435       if self.op.disk_template == constants.DT_RBD:
10436         # _CheckRADOSFreeSpace() is just a placeholder.
10437         # Any function that checks prerequisites can be placed here.
10438         # Check if there is enough space on the RADOS cluster.
10439         _CheckRADOSFreeSpace()
10440       else:
10441         # Check lv size requirements, if not adopting
10442         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10443         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10444
10445     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10446       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10447                                 disk[constants.IDISK_ADOPT])
10448                      for disk in self.disks])
10449       if len(all_lvs) != len(self.disks):
10450         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10451                                    errors.ECODE_INVAL)
10452       for lv_name in all_lvs:
10453         try:
10454           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10455           # to ReserveLV uses the same syntax
10456           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10457         except errors.ReservationError:
10458           raise errors.OpPrereqError("LV named %s used by another instance" %
10459                                      lv_name, errors.ECODE_NOTUNIQUE)
10460
10461       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10462       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10463
10464       node_lvs = self.rpc.call_lv_list([pnode.name],
10465                                        vg_names.payload.keys())[pnode.name]
10466       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10467       node_lvs = node_lvs.payload
10468
10469       delta = all_lvs.difference(node_lvs.keys())
10470       if delta:
10471         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10472                                    utils.CommaJoin(delta),
10473                                    errors.ECODE_INVAL)
10474       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10475       if online_lvs:
10476         raise errors.OpPrereqError("Online logical volumes found, cannot"
10477                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10478                                    errors.ECODE_STATE)
10479       # update the size of disk based on what is found
10480       for dsk in self.disks:
10481         dsk[constants.IDISK_SIZE] = \
10482           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10483                                         dsk[constants.IDISK_ADOPT])][0]))
10484
10485     elif self.op.disk_template == constants.DT_BLOCK:
10486       # Normalize and de-duplicate device paths
10487       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10488                        for disk in self.disks])
10489       if len(all_disks) != len(self.disks):
10490         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10491                                    errors.ECODE_INVAL)
10492       baddisks = [d for d in all_disks
10493                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10494       if baddisks:
10495         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10496                                    " cannot be adopted" %
10497                                    (utils.CommaJoin(baddisks),
10498                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10499                                    errors.ECODE_INVAL)
10500
10501       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10502                                             list(all_disks))[pnode.name]
10503       node_disks.Raise("Cannot get block device information from node %s" %
10504                        pnode.name)
10505       node_disks = node_disks.payload
10506       delta = all_disks.difference(node_disks.keys())
10507       if delta:
10508         raise errors.OpPrereqError("Missing block device(s): %s" %
10509                                    utils.CommaJoin(delta),
10510                                    errors.ECODE_INVAL)
10511       for dsk in self.disks:
10512         dsk[constants.IDISK_SIZE] = \
10513           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10514
10515     # Verify instance specs
10516     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10517     ispec = {
10518       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10519       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10520       constants.ISPEC_DISK_COUNT: len(self.disks),
10521       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10522                                   for disk in self.disks],
10523       constants.ISPEC_NIC_COUNT: len(self.nics),
10524       constants.ISPEC_SPINDLE_USE: spindle_use,
10525       }
10526
10527     group_info = self.cfg.GetNodeGroup(pnode.group)
10528     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10529     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10530     if not self.op.ignore_ipolicy and res:
10531       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10532                                   " policy: %s") % (pnode.group,
10533                                                     utils.CommaJoin(res)),
10534                                   errors.ECODE_INVAL)
10535
10536     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10537
10538     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10539     # check OS parameters (remotely)
10540     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10541
10542     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10543
10544     # memory check on primary node
10545     #TODO(dynmem): use MINMEM for checking
10546     if self.op.start:
10547       _CheckNodeFreeMemory(self, self.pnode.name,
10548                            "creating instance %s" % self.op.instance_name,
10549                            self.be_full[constants.BE_MAXMEM],
10550                            self.op.hypervisor)
10551
10552     self.dry_run_result = list(nodenames)
10553
10554   def Exec(self, feedback_fn):
10555     """Create and add the instance to the cluster.
10556
10557     """
10558     instance = self.op.instance_name
10559     pnode_name = self.pnode.name
10560
10561     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10562                 self.owned_locks(locking.LEVEL_NODE)), \
10563       "Node locks differ from node resource locks"
10564     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10565
10566     ht_kind = self.op.hypervisor
10567     if ht_kind in constants.HTS_REQ_PORT:
10568       network_port = self.cfg.AllocatePort()
10569     else:
10570       network_port = None
10571
10572     # This is ugly but we got a chicken-egg problem here
10573     # We can only take the group disk parameters, as the instance
10574     # has no disks yet (we are generating them right here).
10575     node = self.cfg.GetNodeInfo(pnode_name)
10576     nodegroup = self.cfg.GetNodeGroup(node.group)
10577     disks = _GenerateDiskTemplate(self,
10578                                   self.op.disk_template,
10579                                   instance, pnode_name,
10580                                   self.secondaries,
10581                                   self.disks,
10582                                   self.instance_file_storage_dir,
10583                                   self.op.file_driver,
10584                                   0,
10585                                   feedback_fn,
10586                                   self.cfg.GetGroupDiskParams(nodegroup))
10587
10588     iobj = objects.Instance(name=instance, os=self.op.os_type,
10589                             primary_node=pnode_name,
10590                             nics=self.nics, disks=disks,
10591                             disk_template=self.op.disk_template,
10592                             admin_state=constants.ADMINST_DOWN,
10593                             network_port=network_port,
10594                             beparams=self.op.beparams,
10595                             hvparams=self.op.hvparams,
10596                             hypervisor=self.op.hypervisor,
10597                             osparams=self.op.osparams,
10598                             )
10599
10600     if self.op.tags:
10601       for tag in self.op.tags:
10602         iobj.AddTag(tag)
10603
10604     if self.adopt_disks:
10605       if self.op.disk_template == constants.DT_PLAIN:
10606         # rename LVs to the newly-generated names; we need to construct
10607         # 'fake' LV disks with the old data, plus the new unique_id
10608         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10609         rename_to = []
10610         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10611           rename_to.append(t_dsk.logical_id)
10612           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10613           self.cfg.SetDiskID(t_dsk, pnode_name)
10614         result = self.rpc.call_blockdev_rename(pnode_name,
10615                                                zip(tmp_disks, rename_to))
10616         result.Raise("Failed to rename adoped LVs")
10617     else:
10618       feedback_fn("* creating instance disks...")
10619       try:
10620         _CreateDisks(self, iobj)
10621       except errors.OpExecError:
10622         self.LogWarning("Device creation failed, reverting...")
10623         try:
10624           _RemoveDisks(self, iobj)
10625         finally:
10626           self.cfg.ReleaseDRBDMinors(instance)
10627           raise
10628
10629     feedback_fn("adding instance %s to cluster config" % instance)
10630
10631     self.cfg.AddInstance(iobj, self.proc.GetECId())
10632
10633     # Declare that we don't want to remove the instance lock anymore, as we've
10634     # added the instance to the config
10635     del self.remove_locks[locking.LEVEL_INSTANCE]
10636
10637     if self.op.mode == constants.INSTANCE_IMPORT:
10638       # Release unused nodes
10639       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10640     else:
10641       # Release all nodes
10642       _ReleaseLocks(self, locking.LEVEL_NODE)
10643
10644     disk_abort = False
10645     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10646       feedback_fn("* wiping instance disks...")
10647       try:
10648         _WipeDisks(self, iobj)
10649       except errors.OpExecError, err:
10650         logging.exception("Wiping disks failed")
10651         self.LogWarning("Wiping instance disks failed (%s)", err)
10652         disk_abort = True
10653
10654     if disk_abort:
10655       # Something is already wrong with the disks, don't do anything else
10656       pass
10657     elif self.op.wait_for_sync:
10658       disk_abort = not _WaitForSync(self, iobj)
10659     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10660       # make sure the disks are not degraded (still sync-ing is ok)
10661       feedback_fn("* checking mirrors status")
10662       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10663     else:
10664       disk_abort = False
10665
10666     if disk_abort:
10667       _RemoveDisks(self, iobj)
10668       self.cfg.RemoveInstance(iobj.name)
10669       # Make sure the instance lock gets removed
10670       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10671       raise errors.OpExecError("There are some degraded disks for"
10672                                " this instance")
10673
10674     # Release all node resource locks
10675     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10676
10677     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10678       # we need to set the disks ID to the primary node, since the
10679       # preceding code might or might have not done it, depending on
10680       # disk template and other options
10681       for disk in iobj.disks:
10682         self.cfg.SetDiskID(disk, pnode_name)
10683       if self.op.mode == constants.INSTANCE_CREATE:
10684         if not self.op.no_install:
10685           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10686                         not self.op.wait_for_sync)
10687           if pause_sync:
10688             feedback_fn("* pausing disk sync to install instance OS")
10689             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10690                                                               (iobj.disks,
10691                                                                iobj), True)
10692             for idx, success in enumerate(result.payload):
10693               if not success:
10694                 logging.warn("pause-sync of instance %s for disk %d failed",
10695                              instance, idx)
10696
10697           feedback_fn("* running the instance OS create scripts...")
10698           # FIXME: pass debug option from opcode to backend
10699           os_add_result = \
10700             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10701                                           self.op.debug_level)
10702           if pause_sync:
10703             feedback_fn("* resuming disk sync")
10704             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10705                                                               (iobj.disks,
10706                                                                iobj), False)
10707             for idx, success in enumerate(result.payload):
10708               if not success:
10709                 logging.warn("resume-sync of instance %s for disk %d failed",
10710                              instance, idx)
10711
10712           os_add_result.Raise("Could not add os for instance %s"
10713                               " on node %s" % (instance, pnode_name))
10714
10715       else:
10716         if self.op.mode == constants.INSTANCE_IMPORT:
10717           feedback_fn("* running the instance OS import scripts...")
10718
10719           transfers = []
10720
10721           for idx, image in enumerate(self.src_images):
10722             if not image:
10723               continue
10724
10725             # FIXME: pass debug option from opcode to backend
10726             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10727                                                constants.IEIO_FILE, (image, ),
10728                                                constants.IEIO_SCRIPT,
10729                                                (iobj.disks[idx], idx),
10730                                                None)
10731             transfers.append(dt)
10732
10733           import_result = \
10734             masterd.instance.TransferInstanceData(self, feedback_fn,
10735                                                   self.op.src_node, pnode_name,
10736                                                   self.pnode.secondary_ip,
10737                                                   iobj, transfers)
10738           if not compat.all(import_result):
10739             self.LogWarning("Some disks for instance %s on node %s were not"
10740                             " imported successfully" % (instance, pnode_name))
10741
10742           rename_from = self._old_instance_name
10743
10744         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10745           feedback_fn("* preparing remote import...")
10746           # The source cluster will stop the instance before attempting to make
10747           # a connection. In some cases stopping an instance can take a long
10748           # time, hence the shutdown timeout is added to the connection
10749           # timeout.
10750           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10751                              self.op.source_shutdown_timeout)
10752           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10753
10754           assert iobj.primary_node == self.pnode.name
10755           disk_results = \
10756             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10757                                           self.source_x509_ca,
10758                                           self._cds, timeouts)
10759           if not compat.all(disk_results):
10760             # TODO: Should the instance still be started, even if some disks
10761             # failed to import (valid for local imports, too)?
10762             self.LogWarning("Some disks for instance %s on node %s were not"
10763                             " imported successfully" % (instance, pnode_name))
10764
10765           rename_from = self.source_instance_name
10766
10767         else:
10768           # also checked in the prereq part
10769           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10770                                        % self.op.mode)
10771
10772         # Run rename script on newly imported instance
10773         assert iobj.name == instance
10774         feedback_fn("Running rename script for %s" % instance)
10775         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10776                                                    rename_from,
10777                                                    self.op.debug_level)
10778         if result.fail_msg:
10779           self.LogWarning("Failed to run rename script for %s on node"
10780                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10781
10782     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10783
10784     if self.op.start:
10785       iobj.admin_state = constants.ADMINST_UP
10786       self.cfg.Update(iobj, feedback_fn)
10787       logging.info("Starting instance %s on node %s", instance, pnode_name)
10788       feedback_fn("* starting instance...")
10789       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10790                                             False)
10791       result.Raise("Could not start instance")
10792
10793     return list(iobj.all_nodes)
10794
10795
10796 class LUInstanceMultiAlloc(NoHooksLU):
10797   """Allocates multiple instances at the same time.
10798
10799   """
10800   REQ_BGL = False
10801
10802   def CheckArguments(self):
10803     """Check arguments.
10804
10805     """
10806     nodes = []
10807     for inst in self.op.instances:
10808       if inst.iallocator is not None:
10809         raise errors.OpPrereqError("iallocator are not allowed to be set on"
10810                                    " instance objects", errors.ECODE_INVAL)
10811       nodes.append(bool(inst.pnode))
10812       if inst.disk_template in constants.DTS_INT_MIRROR:
10813         nodes.append(bool(inst.snode))
10814
10815     has_nodes = compat.any(nodes)
10816     if compat.all(nodes) ^ has_nodes:
10817       raise errors.OpPrereqError("There are instance objects providing"
10818                                  " pnode/snode while others do not",
10819                                  errors.ECODE_INVAL)
10820
10821     if self.op.iallocator is None:
10822       default_iallocator = self.cfg.GetDefaultIAllocator()
10823       if default_iallocator and has_nodes:
10824         self.op.iallocator = default_iallocator
10825       else:
10826         raise errors.OpPrereqError("No iallocator or nodes on the instances"
10827                                    " given and no cluster-wide default"
10828                                    " iallocator found; please specify either"
10829                                    " an iallocator or nodes on the instances"
10830                                    " or set a cluster-wide default iallocator",
10831                                    errors.ECODE_INVAL)
10832
10833     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10834     if dups:
10835       raise errors.OpPrereqError("There are duplicate instance names: %s" %
10836                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
10837
10838   def ExpandNames(self):
10839     """Calculate the locks.
10840
10841     """
10842     self.share_locks = _ShareAll()
10843     self.needed_locks = {
10844       # iallocator will select nodes and even if no iallocator is used,
10845       # collisions with LUInstanceCreate should be avoided
10846       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
10847       }
10848
10849     if self.op.iallocator:
10850       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10851       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10852
10853       if self.op.opportunistic_locking:
10854         self.opportunistic_locks[locking.LEVEL_NODE] = True
10855         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10856     else:
10857       nodeslist = []
10858       for inst in self.op.instances:
10859         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10860         nodeslist.append(inst.pnode)
10861         if inst.snode is not None:
10862           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10863           nodeslist.append(inst.snode)
10864
10865       self.needed_locks[locking.LEVEL_NODE] = nodeslist
10866       # Lock resources of instance's primary and secondary nodes (copy to
10867       # prevent accidential modification)
10868       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10869
10870   def CheckPrereq(self):
10871     """Check prerequisite.
10872
10873     """
10874     cluster = self.cfg.GetClusterInfo()
10875     default_vg = self.cfg.GetVGName()
10876     ec_id = self.proc.GetECId()
10877
10878     if self.op.opportunistic_locking:
10879       # Only consider nodes for which a lock is held
10880       node_whitelist = self.owned_locks(locking.LEVEL_NODE)
10881     else:
10882       node_whitelist = None
10883
10884     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10885                                          _ComputeNics(op, cluster, None,
10886                                                       self.cfg, ec_id),
10887                                          _ComputeFullBeParams(op, cluster),
10888                                          node_whitelist)
10889              for op in self.op.instances]
10890
10891     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10892     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10893
10894     ial.Run(self.op.iallocator)
10895
10896     if not ial.success:
10897       raise errors.OpPrereqError("Can't compute nodes using"
10898                                  " iallocator '%s': %s" %
10899                                  (self.op.iallocator, ial.info),
10900                                  errors.ECODE_NORES)
10901
10902     self.ia_result = ial.result
10903
10904     if self.op.dry_run:
10905       self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
10906         constants.JOB_IDS_KEY: [],
10907         })
10908
10909   def _ConstructPartialResult(self):
10910     """Contructs the partial result.
10911
10912     """
10913     (allocatable, failed) = self.ia_result
10914     return {
10915       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10916         map(compat.fst, allocatable),
10917       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10918       }
10919
10920   def Exec(self, feedback_fn):
10921     """Executes the opcode.
10922
10923     """
10924     op2inst = dict((op.instance_name, op) for op in self.op.instances)
10925     (allocatable, failed) = self.ia_result
10926
10927     jobs = []
10928     for (name, nodes) in allocatable:
10929       op = op2inst.pop(name)
10930
10931       if len(nodes) > 1:
10932         (op.pnode, op.snode) = nodes
10933       else:
10934         (op.pnode,) = nodes
10935
10936       jobs.append([op])
10937
10938     missing = set(op2inst.keys()) - set(failed)
10939     assert not missing, \
10940       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10941
10942     return ResultWithJobs(jobs, **self._ConstructPartialResult())
10943
10944
10945 def _CheckRADOSFreeSpace():
10946   """Compute disk size requirements inside the RADOS cluster.
10947
10948   """
10949   # For the RADOS cluster we assume there is always enough space.
10950   pass
10951
10952
10953 class LUInstanceConsole(NoHooksLU):
10954   """Connect to an instance's console.
10955
10956   This is somewhat special in that it returns the command line that
10957   you need to run on the master node in order to connect to the
10958   console.
10959
10960   """
10961   REQ_BGL = False
10962
10963   def ExpandNames(self):
10964     self.share_locks = _ShareAll()
10965     self._ExpandAndLockInstance()
10966
10967   def CheckPrereq(self):
10968     """Check prerequisites.
10969
10970     This checks that the instance is in the cluster.
10971
10972     """
10973     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10974     assert self.instance is not None, \
10975       "Cannot retrieve locked instance %s" % self.op.instance_name
10976     _CheckNodeOnline(self, self.instance.primary_node)
10977
10978   def Exec(self, feedback_fn):
10979     """Connect to the console of an instance
10980
10981     """
10982     instance = self.instance
10983     node = instance.primary_node
10984
10985     node_insts = self.rpc.call_instance_list([node],
10986                                              [instance.hypervisor])[node]
10987     node_insts.Raise("Can't get node information from %s" % node)
10988
10989     if instance.name not in node_insts.payload:
10990       if instance.admin_state == constants.ADMINST_UP:
10991         state = constants.INSTST_ERRORDOWN
10992       elif instance.admin_state == constants.ADMINST_DOWN:
10993         state = constants.INSTST_ADMINDOWN
10994       else:
10995         state = constants.INSTST_ADMINOFFLINE
10996       raise errors.OpExecError("Instance %s is not running (state %s)" %
10997                                (instance.name, state))
10998
10999     logging.debug("Connecting to console of %s on %s", instance.name, node)
11000
11001     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11002
11003
11004 def _GetInstanceConsole(cluster, instance):
11005   """Returns console information for an instance.
11006
11007   @type cluster: L{objects.Cluster}
11008   @type instance: L{objects.Instance}
11009   @rtype: dict
11010
11011   """
11012   hyper = hypervisor.GetHypervisor(instance.hypervisor)
11013   # beparams and hvparams are passed separately, to avoid editing the
11014   # instance and then saving the defaults in the instance itself.
11015   hvparams = cluster.FillHV(instance)
11016   beparams = cluster.FillBE(instance)
11017   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11018
11019   assert console.instance == instance.name
11020   assert console.Validate()
11021
11022   return console.ToDict()
11023
11024
11025 class LUInstanceReplaceDisks(LogicalUnit):
11026   """Replace the disks of an instance.
11027
11028   """
11029   HPATH = "mirrors-replace"
11030   HTYPE = constants.HTYPE_INSTANCE
11031   REQ_BGL = False
11032
11033   def CheckArguments(self):
11034     """Check arguments.
11035
11036     """
11037     remote_node = self.op.remote_node
11038     ialloc = self.op.iallocator
11039     if self.op.mode == constants.REPLACE_DISK_CHG:
11040       if remote_node is None and ialloc is None:
11041         raise errors.OpPrereqError("When changing the secondary either an"
11042                                    " iallocator script must be used or the"
11043                                    " new node given", errors.ECODE_INVAL)
11044       else:
11045         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11046
11047     elif remote_node is not None or ialloc is not None:
11048       # Not replacing the secondary
11049       raise errors.OpPrereqError("The iallocator and new node options can"
11050                                  " only be used when changing the"
11051                                  " secondary node", errors.ECODE_INVAL)
11052
11053   def ExpandNames(self):
11054     self._ExpandAndLockInstance()
11055
11056     assert locking.LEVEL_NODE not in self.needed_locks
11057     assert locking.LEVEL_NODE_RES not in self.needed_locks
11058     assert locking.LEVEL_NODEGROUP not in self.needed_locks
11059
11060     assert self.op.iallocator is None or self.op.remote_node is None, \
11061       "Conflicting options"
11062
11063     if self.op.remote_node is not None:
11064       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11065
11066       # Warning: do not remove the locking of the new secondary here
11067       # unless DRBD8.AddChildren is changed to work in parallel;
11068       # currently it doesn't since parallel invocations of
11069       # FindUnusedMinor will conflict
11070       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11071       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11072     else:
11073       self.needed_locks[locking.LEVEL_NODE] = []
11074       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11075
11076       if self.op.iallocator is not None:
11077         # iallocator will select a new node in the same group
11078         self.needed_locks[locking.LEVEL_NODEGROUP] = []
11079         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11080
11081     self.needed_locks[locking.LEVEL_NODE_RES] = []
11082
11083     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11084                                    self.op.iallocator, self.op.remote_node,
11085                                    self.op.disks, self.op.early_release,
11086                                    self.op.ignore_ipolicy)
11087
11088     self.tasklets = [self.replacer]
11089
11090   def DeclareLocks(self, level):
11091     if level == locking.LEVEL_NODEGROUP:
11092       assert self.op.remote_node is None
11093       assert self.op.iallocator is not None
11094       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11095
11096       self.share_locks[locking.LEVEL_NODEGROUP] = 1
11097       # Lock all groups used by instance optimistically; this requires going
11098       # via the node before it's locked, requiring verification later on
11099       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11100         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11101
11102     elif level == locking.LEVEL_NODE:
11103       if self.op.iallocator is not None:
11104         assert self.op.remote_node is None
11105         assert not self.needed_locks[locking.LEVEL_NODE]
11106         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11107
11108         # Lock member nodes of all locked groups
11109         self.needed_locks[locking.LEVEL_NODE] = \
11110             [node_name
11111              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11112              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11113       else:
11114         assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11115
11116         self._LockInstancesNodes()
11117
11118     elif level == locking.LEVEL_NODE_RES:
11119       # Reuse node locks
11120       self.needed_locks[locking.LEVEL_NODE_RES] = \
11121         self.needed_locks[locking.LEVEL_NODE]
11122
11123   def BuildHooksEnv(self):
11124     """Build hooks env.
11125
11126     This runs on the master, the primary and all the secondaries.
11127
11128     """
11129     instance = self.replacer.instance
11130     env = {
11131       "MODE": self.op.mode,
11132       "NEW_SECONDARY": self.op.remote_node,
11133       "OLD_SECONDARY": instance.secondary_nodes[0],
11134       }
11135     env.update(_BuildInstanceHookEnvByObject(self, instance))
11136     return env
11137
11138   def BuildHooksNodes(self):
11139     """Build hooks nodes.
11140
11141     """
11142     instance = self.replacer.instance
11143     nl = [
11144       self.cfg.GetMasterNode(),
11145       instance.primary_node,
11146       ]
11147     if self.op.remote_node is not None:
11148       nl.append(self.op.remote_node)
11149     return nl, nl
11150
11151   def CheckPrereq(self):
11152     """Check prerequisites.
11153
11154     """
11155     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11156             self.op.iallocator is None)
11157
11158     # Verify if node group locks are still correct
11159     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11160     if owned_groups:
11161       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11162
11163     return LogicalUnit.CheckPrereq(self)
11164
11165
11166 class TLReplaceDisks(Tasklet):
11167   """Replaces disks for an instance.
11168
11169   Note: Locking is not within the scope of this class.
11170
11171   """
11172   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11173                disks, early_release, ignore_ipolicy):
11174     """Initializes this class.
11175
11176     """
11177     Tasklet.__init__(self, lu)
11178
11179     # Parameters
11180     self.instance_name = instance_name
11181     self.mode = mode
11182     self.iallocator_name = iallocator_name
11183     self.remote_node = remote_node
11184     self.disks = disks
11185     self.early_release = early_release
11186     self.ignore_ipolicy = ignore_ipolicy
11187
11188     # Runtime data
11189     self.instance = None
11190     self.new_node = None
11191     self.target_node = None
11192     self.other_node = None
11193     self.remote_node_info = None
11194     self.node_secondary_ip = None
11195
11196   @staticmethod
11197   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11198     """Compute a new secondary node using an IAllocator.
11199
11200     """
11201     req = iallocator.IAReqRelocate(name=instance_name,
11202                                    relocate_from=list(relocate_from))
11203     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11204
11205     ial.Run(iallocator_name)
11206
11207     if not ial.success:
11208       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11209                                  " %s" % (iallocator_name, ial.info),
11210                                  errors.ECODE_NORES)
11211
11212     remote_node_name = ial.result[0]
11213
11214     lu.LogInfo("Selected new secondary for instance '%s': %s",
11215                instance_name, remote_node_name)
11216
11217     return remote_node_name
11218
11219   def _FindFaultyDisks(self, node_name):
11220     """Wrapper for L{_FindFaultyInstanceDisks}.
11221
11222     """
11223     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11224                                     node_name, True)
11225
11226   def _CheckDisksActivated(self, instance):
11227     """Checks if the instance disks are activated.
11228
11229     @param instance: The instance to check disks
11230     @return: True if they are activated, False otherwise
11231
11232     """
11233     nodes = instance.all_nodes
11234
11235     for idx, dev in enumerate(instance.disks):
11236       for node in nodes:
11237         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11238         self.cfg.SetDiskID(dev, node)
11239
11240         result = _BlockdevFind(self, node, dev, instance)
11241
11242         if result.offline:
11243           continue
11244         elif result.fail_msg or not result.payload:
11245           return False
11246
11247     return True
11248
11249   def CheckPrereq(self):
11250     """Check prerequisites.
11251
11252     This checks that the instance is in the cluster.
11253
11254     """
11255     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11256     assert instance is not None, \
11257       "Cannot retrieve locked instance %s" % self.instance_name
11258
11259     if instance.disk_template != constants.DT_DRBD8:
11260       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11261                                  " instances", errors.ECODE_INVAL)
11262
11263     if len(instance.secondary_nodes) != 1:
11264       raise errors.OpPrereqError("The instance has a strange layout,"
11265                                  " expected one secondary but found %d" %
11266                                  len(instance.secondary_nodes),
11267                                  errors.ECODE_FAULT)
11268
11269     instance = self.instance
11270     secondary_node = instance.secondary_nodes[0]
11271
11272     if self.iallocator_name is None:
11273       remote_node = self.remote_node
11274     else:
11275       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11276                                        instance.name, instance.secondary_nodes)
11277
11278     if remote_node is None:
11279       self.remote_node_info = None
11280     else:
11281       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11282              "Remote node '%s' is not locked" % remote_node
11283
11284       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11285       assert self.remote_node_info is not None, \
11286         "Cannot retrieve locked node %s" % remote_node
11287
11288     if remote_node == self.instance.primary_node:
11289       raise errors.OpPrereqError("The specified node is the primary node of"
11290                                  " the instance", errors.ECODE_INVAL)
11291
11292     if remote_node == secondary_node:
11293       raise errors.OpPrereqError("The specified node is already the"
11294                                  " secondary node of the instance",
11295                                  errors.ECODE_INVAL)
11296
11297     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11298                                     constants.REPLACE_DISK_CHG):
11299       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11300                                  errors.ECODE_INVAL)
11301
11302     if self.mode == constants.REPLACE_DISK_AUTO:
11303       if not self._CheckDisksActivated(instance):
11304         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11305                                    " first" % self.instance_name,
11306                                    errors.ECODE_STATE)
11307       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11308       faulty_secondary = self._FindFaultyDisks(secondary_node)
11309
11310       if faulty_primary and faulty_secondary:
11311         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11312                                    " one node and can not be repaired"
11313                                    " automatically" % self.instance_name,
11314                                    errors.ECODE_STATE)
11315
11316       if faulty_primary:
11317         self.disks = faulty_primary
11318         self.target_node = instance.primary_node
11319         self.other_node = secondary_node
11320         check_nodes = [self.target_node, self.other_node]
11321       elif faulty_secondary:
11322         self.disks = faulty_secondary
11323         self.target_node = secondary_node
11324         self.other_node = instance.primary_node
11325         check_nodes = [self.target_node, self.other_node]
11326       else:
11327         self.disks = []
11328         check_nodes = []
11329
11330     else:
11331       # Non-automatic modes
11332       if self.mode == constants.REPLACE_DISK_PRI:
11333         self.target_node = instance.primary_node
11334         self.other_node = secondary_node
11335         check_nodes = [self.target_node, self.other_node]
11336
11337       elif self.mode == constants.REPLACE_DISK_SEC:
11338         self.target_node = secondary_node
11339         self.other_node = instance.primary_node
11340         check_nodes = [self.target_node, self.other_node]
11341
11342       elif self.mode == constants.REPLACE_DISK_CHG:
11343         self.new_node = remote_node
11344         self.other_node = instance.primary_node
11345         self.target_node = secondary_node
11346         check_nodes = [self.new_node, self.other_node]
11347
11348         _CheckNodeNotDrained(self.lu, remote_node)
11349         _CheckNodeVmCapable(self.lu, remote_node)
11350
11351         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11352         assert old_node_info is not None
11353         if old_node_info.offline and not self.early_release:
11354           # doesn't make sense to delay the release
11355           self.early_release = True
11356           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11357                           " early-release mode", secondary_node)
11358
11359       else:
11360         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11361                                      self.mode)
11362
11363       # If not specified all disks should be replaced
11364       if not self.disks:
11365         self.disks = range(len(self.instance.disks))
11366
11367     # TODO: This is ugly, but right now we can't distinguish between internal
11368     # submitted opcode and external one. We should fix that.
11369     if self.remote_node_info:
11370       # We change the node, lets verify it still meets instance policy
11371       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11372       cluster = self.cfg.GetClusterInfo()
11373       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11374                                                               new_group_info)
11375       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11376                               ignore=self.ignore_ipolicy)
11377
11378     for node in check_nodes:
11379       _CheckNodeOnline(self.lu, node)
11380
11381     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11382                                                           self.other_node,
11383                                                           self.target_node]
11384                               if node_name is not None)
11385
11386     # Release unneeded node and node resource locks
11387     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11388     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11389     _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11390
11391     # Release any owned node group
11392     _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11393
11394     # Check whether disks are valid
11395     for disk_idx in self.disks:
11396       instance.FindDisk(disk_idx)
11397
11398     # Get secondary node IP addresses
11399     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11400                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11401
11402   def Exec(self, feedback_fn):
11403     """Execute disk replacement.
11404
11405     This dispatches the disk replacement to the appropriate handler.
11406
11407     """
11408     if __debug__:
11409       # Verify owned locks before starting operation
11410       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11411       assert set(owned_nodes) == set(self.node_secondary_ip), \
11412           ("Incorrect node locks, owning %s, expected %s" %
11413            (owned_nodes, self.node_secondary_ip.keys()))
11414       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11415               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11416       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11417
11418       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11419       assert list(owned_instances) == [self.instance_name], \
11420           "Instance '%s' not locked" % self.instance_name
11421
11422       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11423           "Should not own any node group lock at this point"
11424
11425     if not self.disks:
11426       feedback_fn("No disks need replacement for instance '%s'" %
11427                   self.instance.name)
11428       return
11429
11430     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11431                 (utils.CommaJoin(self.disks), self.instance.name))
11432     feedback_fn("Current primary node: %s" % self.instance.primary_node)
11433     feedback_fn("Current seconary node: %s" %
11434                 utils.CommaJoin(self.instance.secondary_nodes))
11435
11436     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11437
11438     # Activate the instance disks if we're replacing them on a down instance
11439     if activate_disks:
11440       _StartInstanceDisks(self.lu, self.instance, True)
11441
11442     try:
11443       # Should we replace the secondary node?
11444       if self.new_node is not None:
11445         fn = self._ExecDrbd8Secondary
11446       else:
11447         fn = self._ExecDrbd8DiskOnly
11448
11449       result = fn(feedback_fn)
11450     finally:
11451       # Deactivate the instance disks if we're replacing them on a
11452       # down instance
11453       if activate_disks:
11454         _SafeShutdownInstanceDisks(self.lu, self.instance)
11455
11456     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11457
11458     if __debug__:
11459       # Verify owned locks
11460       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11461       nodes = frozenset(self.node_secondary_ip)
11462       assert ((self.early_release and not owned_nodes) or
11463               (not self.early_release and not (set(owned_nodes) - nodes))), \
11464         ("Not owning the correct locks, early_release=%s, owned=%r,"
11465          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11466
11467     return result
11468
11469   def _CheckVolumeGroup(self, nodes):
11470     self.lu.LogInfo("Checking volume groups")
11471
11472     vgname = self.cfg.GetVGName()
11473
11474     # Make sure volume group exists on all involved nodes
11475     results = self.rpc.call_vg_list(nodes)
11476     if not results:
11477       raise errors.OpExecError("Can't list volume groups on the nodes")
11478
11479     for node in nodes:
11480       res = results[node]
11481       res.Raise("Error checking node %s" % node)
11482       if vgname not in res.payload:
11483         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11484                                  (vgname, node))
11485
11486   def _CheckDisksExistence(self, nodes):
11487     # Check disk existence
11488     for idx, dev in enumerate(self.instance.disks):
11489       if idx not in self.disks:
11490         continue
11491
11492       for node in nodes:
11493         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11494         self.cfg.SetDiskID(dev, node)
11495
11496         result = _BlockdevFind(self, node, dev, self.instance)
11497
11498         msg = result.fail_msg
11499         if msg or not result.payload:
11500           if not msg:
11501             msg = "disk not found"
11502           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11503                                    (idx, node, msg))
11504
11505   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11506     for idx, dev in enumerate(self.instance.disks):
11507       if idx not in self.disks:
11508         continue
11509
11510       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11511                       (idx, node_name))
11512
11513       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11514                                    on_primary, ldisk=ldisk):
11515         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11516                                  " replace disks for instance %s" %
11517                                  (node_name, self.instance.name))
11518
11519   def _CreateNewStorage(self, node_name):
11520     """Create new storage on the primary or secondary node.
11521
11522     This is only used for same-node replaces, not for changing the
11523     secondary node, hence we don't want to modify the existing disk.
11524
11525     """
11526     iv_names = {}
11527
11528     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11529     for idx, dev in enumerate(disks):
11530       if idx not in self.disks:
11531         continue
11532
11533       self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11534
11535       self.cfg.SetDiskID(dev, node_name)
11536
11537       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11538       names = _GenerateUniqueNames(self.lu, lv_names)
11539
11540       (data_disk, meta_disk) = dev.children
11541       vg_data = data_disk.logical_id[0]
11542       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11543                              logical_id=(vg_data, names[0]),
11544                              params=data_disk.params)
11545       vg_meta = meta_disk.logical_id[0]
11546       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11547                              size=constants.DRBD_META_SIZE,
11548                              logical_id=(vg_meta, names[1]),
11549                              params=meta_disk.params)
11550
11551       new_lvs = [lv_data, lv_meta]
11552       old_lvs = [child.Copy() for child in dev.children]
11553       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11554
11555       # we pass force_create=True to force the LVM creation
11556       for new_lv in new_lvs:
11557         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11558                              _GetInstanceInfoText(self.instance), False)
11559
11560     return iv_names
11561
11562   def _CheckDevices(self, node_name, iv_names):
11563     for name, (dev, _, _) in iv_names.iteritems():
11564       self.cfg.SetDiskID(dev, node_name)
11565
11566       result = _BlockdevFind(self, node_name, dev, self.instance)
11567
11568       msg = result.fail_msg
11569       if msg or not result.payload:
11570         if not msg:
11571           msg = "disk not found"
11572         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11573                                  (name, msg))
11574
11575       if result.payload.is_degraded:
11576         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11577
11578   def _RemoveOldStorage(self, node_name, iv_names):
11579     for name, (_, old_lvs, _) in iv_names.iteritems():
11580       self.lu.LogInfo("Remove logical volumes for %s", name)
11581
11582       for lv in old_lvs:
11583         self.cfg.SetDiskID(lv, node_name)
11584
11585         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11586         if msg:
11587           self.lu.LogWarning("Can't remove old LV: %s", msg,
11588                              hint="remove unused LVs manually")
11589
11590   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11591     """Replace a disk on the primary or secondary for DRBD 8.
11592
11593     The algorithm for replace is quite complicated:
11594
11595       1. for each disk to be replaced:
11596
11597         1. create new LVs on the target node with unique names
11598         1. detach old LVs from the drbd device
11599         1. rename old LVs to name_replaced.<time_t>
11600         1. rename new LVs to old LVs
11601         1. attach the new LVs (with the old names now) to the drbd device
11602
11603       1. wait for sync across all devices
11604
11605       1. for each modified disk:
11606
11607         1. remove old LVs (which have the name name_replaces.<time_t>)
11608
11609     Failures are not very well handled.
11610
11611     """
11612     steps_total = 6
11613
11614     # Step: check device activation
11615     self.lu.LogStep(1, steps_total, "Check device existence")
11616     self._CheckDisksExistence([self.other_node, self.target_node])
11617     self._CheckVolumeGroup([self.target_node, self.other_node])
11618
11619     # Step: check other node consistency
11620     self.lu.LogStep(2, steps_total, "Check peer consistency")
11621     self._CheckDisksConsistency(self.other_node,
11622                                 self.other_node == self.instance.primary_node,
11623                                 False)
11624
11625     # Step: create new storage
11626     self.lu.LogStep(3, steps_total, "Allocate new storage")
11627     iv_names = self._CreateNewStorage(self.target_node)
11628
11629     # Step: for each lv, detach+rename*2+attach
11630     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11631     for dev, old_lvs, new_lvs in iv_names.itervalues():
11632       self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11633
11634       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11635                                                      old_lvs)
11636       result.Raise("Can't detach drbd from local storage on node"
11637                    " %s for device %s" % (self.target_node, dev.iv_name))
11638       #dev.children = []
11639       #cfg.Update(instance)
11640
11641       # ok, we created the new LVs, so now we know we have the needed
11642       # storage; as such, we proceed on the target node to rename
11643       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11644       # using the assumption that logical_id == physical_id (which in
11645       # turn is the unique_id on that node)
11646
11647       # FIXME(iustin): use a better name for the replaced LVs
11648       temp_suffix = int(time.time())
11649       ren_fn = lambda d, suff: (d.physical_id[0],
11650                                 d.physical_id[1] + "_replaced-%s" % suff)
11651
11652       # Build the rename list based on what LVs exist on the node
11653       rename_old_to_new = []
11654       for to_ren in old_lvs:
11655         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11656         if not result.fail_msg and result.payload:
11657           # device exists
11658           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11659
11660       self.lu.LogInfo("Renaming the old LVs on the target node")
11661       result = self.rpc.call_blockdev_rename(self.target_node,
11662                                              rename_old_to_new)
11663       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11664
11665       # Now we rename the new LVs to the old LVs
11666       self.lu.LogInfo("Renaming the new LVs on the target node")
11667       rename_new_to_old = [(new, old.physical_id)
11668                            for old, new in zip(old_lvs, new_lvs)]
11669       result = self.rpc.call_blockdev_rename(self.target_node,
11670                                              rename_new_to_old)
11671       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11672
11673       # Intermediate steps of in memory modifications
11674       for old, new in zip(old_lvs, new_lvs):
11675         new.logical_id = old.logical_id
11676         self.cfg.SetDiskID(new, self.target_node)
11677
11678       # We need to modify old_lvs so that removal later removes the
11679       # right LVs, not the newly added ones; note that old_lvs is a
11680       # copy here
11681       for disk in old_lvs:
11682         disk.logical_id = ren_fn(disk, temp_suffix)
11683         self.cfg.SetDiskID(disk, self.target_node)
11684
11685       # Now that the new lvs have the old name, we can add them to the device
11686       self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
11687       result = self.rpc.call_blockdev_addchildren(self.target_node,
11688                                                   (dev, self.instance), new_lvs)
11689       msg = result.fail_msg
11690       if msg:
11691         for new_lv in new_lvs:
11692           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11693                                                new_lv).fail_msg
11694           if msg2:
11695             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11696                                hint=("cleanup manually the unused logical"
11697                                      "volumes"))
11698         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11699
11700     cstep = itertools.count(5)
11701
11702     if self.early_release:
11703       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11704       self._RemoveOldStorage(self.target_node, iv_names)
11705       # TODO: Check if releasing locks early still makes sense
11706       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11707     else:
11708       # Release all resource locks except those used by the instance
11709       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11710                     keep=self.node_secondary_ip.keys())
11711
11712     # Release all node locks while waiting for sync
11713     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11714
11715     # TODO: Can the instance lock be downgraded here? Take the optional disk
11716     # shutdown in the caller into consideration.
11717
11718     # Wait for sync
11719     # This can fail as the old devices are degraded and _WaitForSync
11720     # does a combined result over all disks, so we don't check its return value
11721     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11722     _WaitForSync(self.lu, self.instance)
11723
11724     # Check all devices manually
11725     self._CheckDevices(self.instance.primary_node, iv_names)
11726
11727     # Step: remove old storage
11728     if not self.early_release:
11729       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11730       self._RemoveOldStorage(self.target_node, iv_names)
11731
11732   def _ExecDrbd8Secondary(self, feedback_fn):
11733     """Replace the secondary node for DRBD 8.
11734
11735     The algorithm for replace is quite complicated:
11736       - for all disks of the instance:
11737         - create new LVs on the new node with same names
11738         - shutdown the drbd device on the old secondary
11739         - disconnect the drbd network on the primary
11740         - create the drbd device on the new secondary
11741         - network attach the drbd on the primary, using an artifice:
11742           the drbd code for Attach() will connect to the network if it
11743           finds a device which is connected to the good local disks but
11744           not network enabled
11745       - wait for sync across all devices
11746       - remove all disks from the old secondary
11747
11748     Failures are not very well handled.
11749
11750     """
11751     steps_total = 6
11752
11753     pnode = self.instance.primary_node
11754
11755     # Step: check device activation
11756     self.lu.LogStep(1, steps_total, "Check device existence")
11757     self._CheckDisksExistence([self.instance.primary_node])
11758     self._CheckVolumeGroup([self.instance.primary_node])
11759
11760     # Step: check other node consistency
11761     self.lu.LogStep(2, steps_total, "Check peer consistency")
11762     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11763
11764     # Step: create new storage
11765     self.lu.LogStep(3, steps_total, "Allocate new storage")
11766     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11767     for idx, dev in enumerate(disks):
11768       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11769                       (self.new_node, idx))
11770       # we pass force_create=True to force LVM creation
11771       for new_lv in dev.children:
11772         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11773                              True, _GetInstanceInfoText(self.instance), False)
11774
11775     # Step 4: dbrd minors and drbd setups changes
11776     # after this, we must manually remove the drbd minors on both the
11777     # error and the success paths
11778     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11779     minors = self.cfg.AllocateDRBDMinor([self.new_node
11780                                          for dev in self.instance.disks],
11781                                         self.instance.name)
11782     logging.debug("Allocated minors %r", minors)
11783
11784     iv_names = {}
11785     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11786       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11787                       (self.new_node, idx))
11788       # create new devices on new_node; note that we create two IDs:
11789       # one without port, so the drbd will be activated without
11790       # networking information on the new node at this stage, and one
11791       # with network, for the latter activation in step 4
11792       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11793       if self.instance.primary_node == o_node1:
11794         p_minor = o_minor1
11795       else:
11796         assert self.instance.primary_node == o_node2, "Three-node instance?"
11797         p_minor = o_minor2
11798
11799       new_alone_id = (self.instance.primary_node, self.new_node, None,
11800                       p_minor, new_minor, o_secret)
11801       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11802                     p_minor, new_minor, o_secret)
11803
11804       iv_names[idx] = (dev, dev.children, new_net_id)
11805       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11806                     new_net_id)
11807       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11808                               logical_id=new_alone_id,
11809                               children=dev.children,
11810                               size=dev.size,
11811                               params={})
11812       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11813                                              self.cfg)
11814       try:
11815         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11816                               anno_new_drbd,
11817                               _GetInstanceInfoText(self.instance), False)
11818       except errors.GenericError:
11819         self.cfg.ReleaseDRBDMinors(self.instance.name)
11820         raise
11821
11822     # We have new devices, shutdown the drbd on the old secondary
11823     for idx, dev in enumerate(self.instance.disks):
11824       self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
11825       self.cfg.SetDiskID(dev, self.target_node)
11826       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11827                                             (dev, self.instance)).fail_msg
11828       if msg:
11829         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11830                            "node: %s" % (idx, msg),
11831                            hint=("Please cleanup this device manually as"
11832                                  " soon as possible"))
11833
11834     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11835     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11836                                                self.instance.disks)[pnode]
11837
11838     msg = result.fail_msg
11839     if msg:
11840       # detaches didn't succeed (unlikely)
11841       self.cfg.ReleaseDRBDMinors(self.instance.name)
11842       raise errors.OpExecError("Can't detach the disks from the network on"
11843                                " old node: %s" % (msg,))
11844
11845     # if we managed to detach at least one, we update all the disks of
11846     # the instance to point to the new secondary
11847     self.lu.LogInfo("Updating instance configuration")
11848     for dev, _, new_logical_id in iv_names.itervalues():
11849       dev.logical_id = new_logical_id
11850       self.cfg.SetDiskID(dev, self.instance.primary_node)
11851
11852     self.cfg.Update(self.instance, feedback_fn)
11853
11854     # Release all node locks (the configuration has been updated)
11855     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11856
11857     # and now perform the drbd attach
11858     self.lu.LogInfo("Attaching primary drbds to new secondary"
11859                     " (standalone => connected)")
11860     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11861                                             self.new_node],
11862                                            self.node_secondary_ip,
11863                                            (self.instance.disks, self.instance),
11864                                            self.instance.name,
11865                                            False)
11866     for to_node, to_result in result.items():
11867       msg = to_result.fail_msg
11868       if msg:
11869         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11870                            to_node, msg,
11871                            hint=("please do a gnt-instance info to see the"
11872                                  " status of disks"))
11873
11874     cstep = itertools.count(5)
11875
11876     if self.early_release:
11877       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11878       self._RemoveOldStorage(self.target_node, iv_names)
11879       # TODO: Check if releasing locks early still makes sense
11880       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11881     else:
11882       # Release all resource locks except those used by the instance
11883       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11884                     keep=self.node_secondary_ip.keys())
11885
11886     # TODO: Can the instance lock be downgraded here? Take the optional disk
11887     # shutdown in the caller into consideration.
11888
11889     # Wait for sync
11890     # This can fail as the old devices are degraded and _WaitForSync
11891     # does a combined result over all disks, so we don't check its return value
11892     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11893     _WaitForSync(self.lu, self.instance)
11894
11895     # Check all devices manually
11896     self._CheckDevices(self.instance.primary_node, iv_names)
11897
11898     # Step: remove old storage
11899     if not self.early_release:
11900       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11901       self._RemoveOldStorage(self.target_node, iv_names)
11902
11903
11904 class LURepairNodeStorage(NoHooksLU):
11905   """Repairs the volume group on a node.
11906
11907   """
11908   REQ_BGL = False
11909
11910   def CheckArguments(self):
11911     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11912
11913     storage_type = self.op.storage_type
11914
11915     if (constants.SO_FIX_CONSISTENCY not in
11916         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11917       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11918                                  " repaired" % storage_type,
11919                                  errors.ECODE_INVAL)
11920
11921   def ExpandNames(self):
11922     self.needed_locks = {
11923       locking.LEVEL_NODE: [self.op.node_name],
11924       }
11925
11926   def _CheckFaultyDisks(self, instance, node_name):
11927     """Ensure faulty disks abort the opcode or at least warn."""
11928     try:
11929       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11930                                   node_name, True):
11931         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11932                                    " node '%s'" % (instance.name, node_name),
11933                                    errors.ECODE_STATE)
11934     except errors.OpPrereqError, err:
11935       if self.op.ignore_consistency:
11936         self.LogWarning(str(err.args[0]))
11937       else:
11938         raise
11939
11940   def CheckPrereq(self):
11941     """Check prerequisites.
11942
11943     """
11944     # Check whether any instance on this node has faulty disks
11945     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11946       if inst.admin_state != constants.ADMINST_UP:
11947         continue
11948       check_nodes = set(inst.all_nodes)
11949       check_nodes.discard(self.op.node_name)
11950       for inst_node_name in check_nodes:
11951         self._CheckFaultyDisks(inst, inst_node_name)
11952
11953   def Exec(self, feedback_fn):
11954     feedback_fn("Repairing storage unit '%s' on %s ..." %
11955                 (self.op.name, self.op.node_name))
11956
11957     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11958     result = self.rpc.call_storage_execute(self.op.node_name,
11959                                            self.op.storage_type, st_args,
11960                                            self.op.name,
11961                                            constants.SO_FIX_CONSISTENCY)
11962     result.Raise("Failed to repair storage unit '%s' on %s" %
11963                  (self.op.name, self.op.node_name))
11964
11965
11966 class LUNodeEvacuate(NoHooksLU):
11967   """Evacuates instances off a list of nodes.
11968
11969   """
11970   REQ_BGL = False
11971
11972   _MODE2IALLOCATOR = {
11973     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11974     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11975     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11976     }
11977   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11978   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11979           constants.IALLOCATOR_NEVAC_MODES)
11980
11981   def CheckArguments(self):
11982     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11983
11984   def ExpandNames(self):
11985     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11986
11987     if self.op.remote_node is not None:
11988       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11989       assert self.op.remote_node
11990
11991       if self.op.remote_node == self.op.node_name:
11992         raise errors.OpPrereqError("Can not use evacuated node as a new"
11993                                    " secondary node", errors.ECODE_INVAL)
11994
11995       if self.op.mode != constants.NODE_EVAC_SEC:
11996         raise errors.OpPrereqError("Without the use of an iallocator only"
11997                                    " secondary instances can be evacuated",
11998                                    errors.ECODE_INVAL)
11999
12000     # Declare locks
12001     self.share_locks = _ShareAll()
12002     self.needed_locks = {
12003       locking.LEVEL_INSTANCE: [],
12004       locking.LEVEL_NODEGROUP: [],
12005       locking.LEVEL_NODE: [],
12006       }
12007
12008     # Determine nodes (via group) optimistically, needs verification once locks
12009     # have been acquired
12010     self.lock_nodes = self._DetermineNodes()
12011
12012   def _DetermineNodes(self):
12013     """Gets the list of nodes to operate on.
12014
12015     """
12016     if self.op.remote_node is None:
12017       # Iallocator will choose any node(s) in the same group
12018       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12019     else:
12020       group_nodes = frozenset([self.op.remote_node])
12021
12022     # Determine nodes to be locked
12023     return set([self.op.node_name]) | group_nodes
12024
12025   def _DetermineInstances(self):
12026     """Builds list of instances to operate on.
12027
12028     """
12029     assert self.op.mode in constants.NODE_EVAC_MODES
12030
12031     if self.op.mode == constants.NODE_EVAC_PRI:
12032       # Primary instances only
12033       inst_fn = _GetNodePrimaryInstances
12034       assert self.op.remote_node is None, \
12035         "Evacuating primary instances requires iallocator"
12036     elif self.op.mode == constants.NODE_EVAC_SEC:
12037       # Secondary instances only
12038       inst_fn = _GetNodeSecondaryInstances
12039     else:
12040       # All instances
12041       assert self.op.mode == constants.NODE_EVAC_ALL
12042       inst_fn = _GetNodeInstances
12043       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12044       # per instance
12045       raise errors.OpPrereqError("Due to an issue with the iallocator"
12046                                  " interface it is not possible to evacuate"
12047                                  " all instances at once; specify explicitly"
12048                                  " whether to evacuate primary or secondary"
12049                                  " instances",
12050                                  errors.ECODE_INVAL)
12051
12052     return inst_fn(self.cfg, self.op.node_name)
12053
12054   def DeclareLocks(self, level):
12055     if level == locking.LEVEL_INSTANCE:
12056       # Lock instances optimistically, needs verification once node and group
12057       # locks have been acquired
12058       self.needed_locks[locking.LEVEL_INSTANCE] = \
12059         set(i.name for i in self._DetermineInstances())
12060
12061     elif level == locking.LEVEL_NODEGROUP:
12062       # Lock node groups for all potential target nodes optimistically, needs
12063       # verification once nodes have been acquired
12064       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12065         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12066
12067     elif level == locking.LEVEL_NODE:
12068       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12069
12070   def CheckPrereq(self):
12071     # Verify locks
12072     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12073     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12074     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12075
12076     need_nodes = self._DetermineNodes()
12077
12078     if not owned_nodes.issuperset(need_nodes):
12079       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12080                                  " locks were acquired, current nodes are"
12081                                  " are '%s', used to be '%s'; retry the"
12082                                  " operation" %
12083                                  (self.op.node_name,
12084                                   utils.CommaJoin(need_nodes),
12085                                   utils.CommaJoin(owned_nodes)),
12086                                  errors.ECODE_STATE)
12087
12088     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12089     if owned_groups != wanted_groups:
12090       raise errors.OpExecError("Node groups changed since locks were acquired,"
12091                                " current groups are '%s', used to be '%s';"
12092                                " retry the operation" %
12093                                (utils.CommaJoin(wanted_groups),
12094                                 utils.CommaJoin(owned_groups)))
12095
12096     # Determine affected instances
12097     self.instances = self._DetermineInstances()
12098     self.instance_names = [i.name for i in self.instances]
12099
12100     if set(self.instance_names) != owned_instances:
12101       raise errors.OpExecError("Instances on node '%s' changed since locks"
12102                                " were acquired, current instances are '%s',"
12103                                " used to be '%s'; retry the operation" %
12104                                (self.op.node_name,
12105                                 utils.CommaJoin(self.instance_names),
12106                                 utils.CommaJoin(owned_instances)))
12107
12108     if self.instance_names:
12109       self.LogInfo("Evacuating instances from node '%s': %s",
12110                    self.op.node_name,
12111                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
12112     else:
12113       self.LogInfo("No instances to evacuate from node '%s'",
12114                    self.op.node_name)
12115
12116     if self.op.remote_node is not None:
12117       for i in self.instances:
12118         if i.primary_node == self.op.remote_node:
12119           raise errors.OpPrereqError("Node %s is the primary node of"
12120                                      " instance %s, cannot use it as"
12121                                      " secondary" %
12122                                      (self.op.remote_node, i.name),
12123                                      errors.ECODE_INVAL)
12124
12125   def Exec(self, feedback_fn):
12126     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12127
12128     if not self.instance_names:
12129       # No instances to evacuate
12130       jobs = []
12131
12132     elif self.op.iallocator is not None:
12133       # TODO: Implement relocation to other group
12134       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12135       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12136                                      instances=list(self.instance_names))
12137       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12138
12139       ial.Run(self.op.iallocator)
12140
12141       if not ial.success:
12142         raise errors.OpPrereqError("Can't compute node evacuation using"
12143                                    " iallocator '%s': %s" %
12144                                    (self.op.iallocator, ial.info),
12145                                    errors.ECODE_NORES)
12146
12147       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12148
12149     elif self.op.remote_node is not None:
12150       assert self.op.mode == constants.NODE_EVAC_SEC
12151       jobs = [
12152         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12153                                         remote_node=self.op.remote_node,
12154                                         disks=[],
12155                                         mode=constants.REPLACE_DISK_CHG,
12156                                         early_release=self.op.early_release)]
12157         for instance_name in self.instance_names]
12158
12159     else:
12160       raise errors.ProgrammerError("No iallocator or remote node")
12161
12162     return ResultWithJobs(jobs)
12163
12164
12165 def _SetOpEarlyRelease(early_release, op):
12166   """Sets C{early_release} flag on opcodes if available.
12167
12168   """
12169   try:
12170     op.early_release = early_release
12171   except AttributeError:
12172     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12173
12174   return op
12175
12176
12177 def _NodeEvacDest(use_nodes, group, nodes):
12178   """Returns group or nodes depending on caller's choice.
12179
12180   """
12181   if use_nodes:
12182     return utils.CommaJoin(nodes)
12183   else:
12184     return group
12185
12186
12187 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12188   """Unpacks the result of change-group and node-evacuate iallocator requests.
12189
12190   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12191   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12192
12193   @type lu: L{LogicalUnit}
12194   @param lu: Logical unit instance
12195   @type alloc_result: tuple/list
12196   @param alloc_result: Result from iallocator
12197   @type early_release: bool
12198   @param early_release: Whether to release locks early if possible
12199   @type use_nodes: bool
12200   @param use_nodes: Whether to display node names instead of groups
12201
12202   """
12203   (moved, failed, jobs) = alloc_result
12204
12205   if failed:
12206     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12207                                  for (name, reason) in failed)
12208     lu.LogWarning("Unable to evacuate instances %s", failreason)
12209     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12210
12211   if moved:
12212     lu.LogInfo("Instances to be moved: %s",
12213                utils.CommaJoin("%s (to %s)" %
12214                                (name, _NodeEvacDest(use_nodes, group, nodes))
12215                                for (name, group, nodes) in moved))
12216
12217   return [map(compat.partial(_SetOpEarlyRelease, early_release),
12218               map(opcodes.OpCode.LoadOpCode, ops))
12219           for ops in jobs]
12220
12221
12222 def _DiskSizeInBytesToMebibytes(lu, size):
12223   """Converts a disk size in bytes to mebibytes.
12224
12225   Warns and rounds up if the size isn't an even multiple of 1 MiB.
12226
12227   """
12228   (mib, remainder) = divmod(size, 1024 * 1024)
12229
12230   if remainder != 0:
12231     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12232                   " to not overwrite existing data (%s bytes will not be"
12233                   " wiped)", (1024 * 1024) - remainder)
12234     mib += 1
12235
12236   return mib
12237
12238
12239 class LUInstanceGrowDisk(LogicalUnit):
12240   """Grow a disk of an instance.
12241
12242   """
12243   HPATH = "disk-grow"
12244   HTYPE = constants.HTYPE_INSTANCE
12245   REQ_BGL = False
12246
12247   def ExpandNames(self):
12248     self._ExpandAndLockInstance()
12249     self.needed_locks[locking.LEVEL_NODE] = []
12250     self.needed_locks[locking.LEVEL_NODE_RES] = []
12251     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12252     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12253
12254   def DeclareLocks(self, level):
12255     if level == locking.LEVEL_NODE:
12256       self._LockInstancesNodes()
12257     elif level == locking.LEVEL_NODE_RES:
12258       # Copy node locks
12259       self.needed_locks[locking.LEVEL_NODE_RES] = \
12260         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12261
12262   def BuildHooksEnv(self):
12263     """Build hooks env.
12264
12265     This runs on the master, the primary and all the secondaries.
12266
12267     """
12268     env = {
12269       "DISK": self.op.disk,
12270       "AMOUNT": self.op.amount,
12271       "ABSOLUTE": self.op.absolute,
12272       }
12273     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12274     return env
12275
12276   def BuildHooksNodes(self):
12277     """Build hooks nodes.
12278
12279     """
12280     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12281     return (nl, nl)
12282
12283   def CheckPrereq(self):
12284     """Check prerequisites.
12285
12286     This checks that the instance is in the cluster.
12287
12288     """
12289     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12290     assert instance is not None, \
12291       "Cannot retrieve locked instance %s" % self.op.instance_name
12292     nodenames = list(instance.all_nodes)
12293     for node in nodenames:
12294       _CheckNodeOnline(self, node)
12295
12296     self.instance = instance
12297
12298     if instance.disk_template not in constants.DTS_GROWABLE:
12299       raise errors.OpPrereqError("Instance's disk layout does not support"
12300                                  " growing", errors.ECODE_INVAL)
12301
12302     self.disk = instance.FindDisk(self.op.disk)
12303
12304     if self.op.absolute:
12305       self.target = self.op.amount
12306       self.delta = self.target - self.disk.size
12307       if self.delta < 0:
12308         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12309                                    "current disk size (%s)" %
12310                                    (utils.FormatUnit(self.target, "h"),
12311                                     utils.FormatUnit(self.disk.size, "h")),
12312                                    errors.ECODE_STATE)
12313     else:
12314       self.delta = self.op.amount
12315       self.target = self.disk.size + self.delta
12316       if self.delta < 0:
12317         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12318                                    utils.FormatUnit(self.delta, "h"),
12319                                    errors.ECODE_INVAL)
12320
12321     if instance.disk_template not in (constants.DT_FILE,
12322                                       constants.DT_SHARED_FILE,
12323                                       constants.DT_RBD):
12324       # TODO: check the free disk space for file, when that feature will be
12325       # supported
12326       _CheckNodesFreeDiskPerVG(self, nodenames,
12327                                self.disk.ComputeGrowth(self.delta))
12328
12329   def Exec(self, feedback_fn):
12330     """Execute disk grow.
12331
12332     """
12333     instance = self.instance
12334     disk = self.disk
12335
12336     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12337     assert (self.owned_locks(locking.LEVEL_NODE) ==
12338             self.owned_locks(locking.LEVEL_NODE_RES))
12339
12340     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12341
12342     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12343     if not disks_ok:
12344       raise errors.OpExecError("Cannot activate block device to grow")
12345
12346     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12347                 (self.op.disk, instance.name,
12348                  utils.FormatUnit(self.delta, "h"),
12349                  utils.FormatUnit(self.target, "h")))
12350
12351     # First run all grow ops in dry-run mode
12352     for node in instance.all_nodes:
12353       self.cfg.SetDiskID(disk, node)
12354       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12355                                            True, True)
12356       result.Raise("Dry-run grow request failed to node %s" % node)
12357
12358     if wipe_disks:
12359       # Get disk size from primary node for wiping
12360       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12361       result.Raise("Failed to retrieve disk size from node '%s'" %
12362                    instance.primary_node)
12363
12364       (disk_size_in_bytes, ) = result.payload
12365
12366       if disk_size_in_bytes is None:
12367         raise errors.OpExecError("Failed to retrieve disk size from primary"
12368                                  " node '%s'" % instance.primary_node)
12369
12370       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12371
12372       assert old_disk_size >= disk.size, \
12373         ("Retrieved disk size too small (got %s, should be at least %s)" %
12374          (old_disk_size, disk.size))
12375     else:
12376       old_disk_size = None
12377
12378     # We know that (as far as we can test) operations across different
12379     # nodes will succeed, time to run it for real on the backing storage
12380     for node in instance.all_nodes:
12381       self.cfg.SetDiskID(disk, node)
12382       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12383                                            False, True)
12384       result.Raise("Grow request failed to node %s" % node)
12385
12386     # And now execute it for logical storage, on the primary node
12387     node = instance.primary_node
12388     self.cfg.SetDiskID(disk, node)
12389     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12390                                          False, False)
12391     result.Raise("Grow request failed to node %s" % node)
12392
12393     disk.RecordGrow(self.delta)
12394     self.cfg.Update(instance, feedback_fn)
12395
12396     # Changes have been recorded, release node lock
12397     _ReleaseLocks(self, locking.LEVEL_NODE)
12398
12399     # Downgrade lock while waiting for sync
12400     self.glm.downgrade(locking.LEVEL_INSTANCE)
12401
12402     assert wipe_disks ^ (old_disk_size is None)
12403
12404     if wipe_disks:
12405       assert instance.disks[self.op.disk] == disk
12406
12407       # Wipe newly added disk space
12408       _WipeDisks(self, instance,
12409                  disks=[(self.op.disk, disk, old_disk_size)])
12410
12411     if self.op.wait_for_sync:
12412       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12413       if disk_abort:
12414         self.LogWarning("Disk syncing has not returned a good status; check"
12415                         " the instance")
12416       if instance.admin_state != constants.ADMINST_UP:
12417         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12418     elif instance.admin_state != constants.ADMINST_UP:
12419       self.LogWarning("Not shutting down the disk even if the instance is"
12420                       " not supposed to be running because no wait for"
12421                       " sync mode was requested")
12422
12423     assert self.owned_locks(locking.LEVEL_NODE_RES)
12424     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12425
12426
12427 class LUInstanceQueryData(NoHooksLU):
12428   """Query runtime instance data.
12429
12430   """
12431   REQ_BGL = False
12432
12433   def ExpandNames(self):
12434     self.needed_locks = {}
12435
12436     # Use locking if requested or when non-static information is wanted
12437     if not (self.op.static or self.op.use_locking):
12438       self.LogWarning("Non-static data requested, locks need to be acquired")
12439       self.op.use_locking = True
12440
12441     if self.op.instances or not self.op.use_locking:
12442       # Expand instance names right here
12443       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12444     else:
12445       # Will use acquired locks
12446       self.wanted_names = None
12447
12448     if self.op.use_locking:
12449       self.share_locks = _ShareAll()
12450
12451       if self.wanted_names is None:
12452         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12453       else:
12454         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12455
12456       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12457       self.needed_locks[locking.LEVEL_NODE] = []
12458       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12459
12460   def DeclareLocks(self, level):
12461     if self.op.use_locking:
12462       if level == locking.LEVEL_NODEGROUP:
12463         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12464
12465         # Lock all groups used by instances optimistically; this requires going
12466         # via the node before it's locked, requiring verification later on
12467         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12468           frozenset(group_uuid
12469                     for instance_name in owned_instances
12470                     for group_uuid in
12471                       self.cfg.GetInstanceNodeGroups(instance_name))
12472
12473       elif level == locking.LEVEL_NODE:
12474         self._LockInstancesNodes()
12475
12476   def CheckPrereq(self):
12477     """Check prerequisites.
12478
12479     This only checks the optional instance list against the existing names.
12480
12481     """
12482     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12483     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12484     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12485
12486     if self.wanted_names is None:
12487       assert self.op.use_locking, "Locking was not used"
12488       self.wanted_names = owned_instances
12489
12490     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12491
12492     if self.op.use_locking:
12493       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12494                                 None)
12495     else:
12496       assert not (owned_instances or owned_groups or owned_nodes)
12497
12498     self.wanted_instances = instances.values()
12499
12500   def _ComputeBlockdevStatus(self, node, instance, dev):
12501     """Returns the status of a block device
12502
12503     """
12504     if self.op.static or not node:
12505       return None
12506
12507     self.cfg.SetDiskID(dev, node)
12508
12509     result = self.rpc.call_blockdev_find(node, dev)
12510     if result.offline:
12511       return None
12512
12513     result.Raise("Can't compute disk status for %s" % instance.name)
12514
12515     status = result.payload
12516     if status is None:
12517       return None
12518
12519     return (status.dev_path, status.major, status.minor,
12520             status.sync_percent, status.estimated_time,
12521             status.is_degraded, status.ldisk_status)
12522
12523   def _ComputeDiskStatus(self, instance, snode, dev):
12524     """Compute block device status.
12525
12526     """
12527     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12528
12529     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12530
12531   def _ComputeDiskStatusInner(self, instance, snode, dev):
12532     """Compute block device status.
12533
12534     @attention: The device has to be annotated already.
12535
12536     """
12537     if dev.dev_type in constants.LDS_DRBD:
12538       # we change the snode then (otherwise we use the one passed in)
12539       if dev.logical_id[0] == instance.primary_node:
12540         snode = dev.logical_id[1]
12541       else:
12542         snode = dev.logical_id[0]
12543
12544     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12545                                               instance, dev)
12546     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12547
12548     if dev.children:
12549       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12550                                         instance, snode),
12551                          dev.children)
12552     else:
12553       dev_children = []
12554
12555     return {
12556       "iv_name": dev.iv_name,
12557       "dev_type": dev.dev_type,
12558       "logical_id": dev.logical_id,
12559       "physical_id": dev.physical_id,
12560       "pstatus": dev_pstatus,
12561       "sstatus": dev_sstatus,
12562       "children": dev_children,
12563       "mode": dev.mode,
12564       "size": dev.size,
12565       }
12566
12567   def Exec(self, feedback_fn):
12568     """Gather and return data"""
12569     result = {}
12570
12571     cluster = self.cfg.GetClusterInfo()
12572
12573     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12574     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12575
12576     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12577                                                  for node in nodes.values()))
12578
12579     group2name_fn = lambda uuid: groups[uuid].name
12580
12581     for instance in self.wanted_instances:
12582       pnode = nodes[instance.primary_node]
12583
12584       if self.op.static or pnode.offline:
12585         remote_state = None
12586         if pnode.offline:
12587           self.LogWarning("Primary node %s is marked offline, returning static"
12588                           " information only for instance %s" %
12589                           (pnode.name, instance.name))
12590       else:
12591         remote_info = self.rpc.call_instance_info(instance.primary_node,
12592                                                   instance.name,
12593                                                   instance.hypervisor)
12594         remote_info.Raise("Error checking node %s" % instance.primary_node)
12595         remote_info = remote_info.payload
12596         if remote_info and "state" in remote_info:
12597           remote_state = "up"
12598         else:
12599           if instance.admin_state == constants.ADMINST_UP:
12600             remote_state = "down"
12601           else:
12602             remote_state = instance.admin_state
12603
12604       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12605                   instance.disks)
12606
12607       snodes_group_uuids = [nodes[snode_name].group
12608                             for snode_name in instance.secondary_nodes]
12609
12610       result[instance.name] = {
12611         "name": instance.name,
12612         "config_state": instance.admin_state,
12613         "run_state": remote_state,
12614         "pnode": instance.primary_node,
12615         "pnode_group_uuid": pnode.group,
12616         "pnode_group_name": group2name_fn(pnode.group),
12617         "snodes": instance.secondary_nodes,
12618         "snodes_group_uuids": snodes_group_uuids,
12619         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12620         "os": instance.os,
12621         # this happens to be the same format used for hooks
12622         "nics": _NICListToTuple(self, instance.nics),
12623         "disk_template": instance.disk_template,
12624         "disks": disks,
12625         "hypervisor": instance.hypervisor,
12626         "network_port": instance.network_port,
12627         "hv_instance": instance.hvparams,
12628         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12629         "be_instance": instance.beparams,
12630         "be_actual": cluster.FillBE(instance),
12631         "os_instance": instance.osparams,
12632         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12633         "serial_no": instance.serial_no,
12634         "mtime": instance.mtime,
12635         "ctime": instance.ctime,
12636         "uuid": instance.uuid,
12637         }
12638
12639     return result
12640
12641
12642 def PrepareContainerMods(mods, private_fn):
12643   """Prepares a list of container modifications by adding a private data field.
12644
12645   @type mods: list of tuples; (operation, index, parameters)
12646   @param mods: List of modifications
12647   @type private_fn: callable or None
12648   @param private_fn: Callable for constructing a private data field for a
12649     modification
12650   @rtype: list
12651
12652   """
12653   if private_fn is None:
12654     fn = lambda: None
12655   else:
12656     fn = private_fn
12657
12658   return [(op, idx, params, fn()) for (op, idx, params) in mods]
12659
12660
12661 #: Type description for changes as returned by L{ApplyContainerMods}'s
12662 #: callbacks
12663 _TApplyContModsCbChanges = \
12664   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12665     ht.TNonEmptyString,
12666     ht.TAny,
12667     ])))
12668
12669
12670 def ApplyContainerMods(kind, container, chgdesc, mods,
12671                        create_fn, modify_fn, remove_fn):
12672   """Applies descriptions in C{mods} to C{container}.
12673
12674   @type kind: string
12675   @param kind: One-word item description
12676   @type container: list
12677   @param container: Container to modify
12678   @type chgdesc: None or list
12679   @param chgdesc: List of applied changes
12680   @type mods: list
12681   @param mods: Modifications as returned by L{PrepareContainerMods}
12682   @type create_fn: callable
12683   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12684     receives absolute item index, parameters and private data object as added
12685     by L{PrepareContainerMods}, returns tuple containing new item and changes
12686     as list
12687   @type modify_fn: callable
12688   @param modify_fn: Callback for modifying an existing item
12689     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12690     and private data object as added by L{PrepareContainerMods}, returns
12691     changes as list
12692   @type remove_fn: callable
12693   @param remove_fn: Callback on removing item; receives absolute item index,
12694     item and private data object as added by L{PrepareContainerMods}
12695
12696   """
12697   for (op, idx, params, private) in mods:
12698     if idx == -1:
12699       # Append
12700       absidx = len(container) - 1
12701     elif idx < 0:
12702       raise IndexError("Not accepting negative indices other than -1")
12703     elif idx > len(container):
12704       raise IndexError("Got %s index %s, but there are only %s" %
12705                        (kind, idx, len(container)))
12706     else:
12707       absidx = idx
12708
12709     changes = None
12710
12711     if op == constants.DDM_ADD:
12712       # Calculate where item will be added
12713       if idx == -1:
12714         addidx = len(container)
12715       else:
12716         addidx = idx
12717
12718       if create_fn is None:
12719         item = params
12720       else:
12721         (item, changes) = create_fn(addidx, params, private)
12722
12723       if idx == -1:
12724         container.append(item)
12725       else:
12726         assert idx >= 0
12727         assert idx <= len(container)
12728         # list.insert does so before the specified index
12729         container.insert(idx, item)
12730     else:
12731       # Retrieve existing item
12732       try:
12733         item = container[absidx]
12734       except IndexError:
12735         raise IndexError("Invalid %s index %s" % (kind, idx))
12736
12737       if op == constants.DDM_REMOVE:
12738         assert not params
12739
12740         if remove_fn is not None:
12741           remove_fn(absidx, item, private)
12742
12743         changes = [("%s/%s" % (kind, absidx), "remove")]
12744
12745         assert container[absidx] == item
12746         del container[absidx]
12747       elif op == constants.DDM_MODIFY:
12748         if modify_fn is not None:
12749           changes = modify_fn(absidx, item, params, private)
12750       else:
12751         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12752
12753     assert _TApplyContModsCbChanges(changes)
12754
12755     if not (chgdesc is None or changes is None):
12756       chgdesc.extend(changes)
12757
12758
12759 def _UpdateIvNames(base_index, disks):
12760   """Updates the C{iv_name} attribute of disks.
12761
12762   @type disks: list of L{objects.Disk}
12763
12764   """
12765   for (idx, disk) in enumerate(disks):
12766     disk.iv_name = "disk/%s" % (base_index + idx, )
12767
12768
12769 class _InstNicModPrivate:
12770   """Data structure for network interface modifications.
12771
12772   Used by L{LUInstanceSetParams}.
12773
12774   """
12775   def __init__(self):
12776     self.params = None
12777     self.filled = None
12778
12779
12780 class LUInstanceSetParams(LogicalUnit):
12781   """Modifies an instances's parameters.
12782
12783   """
12784   HPATH = "instance-modify"
12785   HTYPE = constants.HTYPE_INSTANCE
12786   REQ_BGL = False
12787
12788   @staticmethod
12789   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12790     assert ht.TList(mods)
12791     assert not mods or len(mods[0]) in (2, 3)
12792
12793     if mods and len(mods[0]) == 2:
12794       result = []
12795
12796       addremove = 0
12797       for op, params in mods:
12798         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12799           result.append((op, -1, params))
12800           addremove += 1
12801
12802           if addremove > 1:
12803             raise errors.OpPrereqError("Only one %s add or remove operation is"
12804                                        " supported at a time" % kind,
12805                                        errors.ECODE_INVAL)
12806         else:
12807           result.append((constants.DDM_MODIFY, op, params))
12808
12809       assert verify_fn(result)
12810     else:
12811       result = mods
12812
12813     return result
12814
12815   @staticmethod
12816   def _CheckMods(kind, mods, key_types, item_fn):
12817     """Ensures requested disk/NIC modifications are valid.
12818
12819     """
12820     for (op, _, params) in mods:
12821       assert ht.TDict(params)
12822
12823       utils.ForceDictType(params, key_types)
12824
12825       if op == constants.DDM_REMOVE:
12826         if params:
12827           raise errors.OpPrereqError("No settings should be passed when"
12828                                      " removing a %s" % kind,
12829                                      errors.ECODE_INVAL)
12830       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12831         item_fn(op, params)
12832       else:
12833         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12834
12835   @staticmethod
12836   def _VerifyDiskModification(op, params):
12837     """Verifies a disk modification.
12838
12839     """
12840     if op == constants.DDM_ADD:
12841       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12842       if mode not in constants.DISK_ACCESS_SET:
12843         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12844                                    errors.ECODE_INVAL)
12845
12846       size = params.get(constants.IDISK_SIZE, None)
12847       if size is None:
12848         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12849                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12850
12851       try:
12852         size = int(size)
12853       except (TypeError, ValueError), err:
12854         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12855                                    errors.ECODE_INVAL)
12856
12857       params[constants.IDISK_SIZE] = size
12858
12859     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12860       raise errors.OpPrereqError("Disk size change not possible, use"
12861                                  " grow-disk", errors.ECODE_INVAL)
12862
12863   @staticmethod
12864   def _VerifyNicModification(op, params):
12865     """Verifies a network interface modification.
12866
12867     """
12868     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12869       ip = params.get(constants.INIC_IP, None)
12870       req_net = params.get(constants.INIC_NETWORK, None)
12871       link = params.get(constants.NIC_LINK, None)
12872       mode = params.get(constants.NIC_MODE, None)
12873       if req_net is not None:
12874         if req_net.lower() == constants.VALUE_NONE:
12875           params[constants.INIC_NETWORK] = None
12876           req_net = None
12877         elif link is not None or mode is not None:
12878           raise errors.OpPrereqError("If network is given"
12879                                      " mode or link should not",
12880                                      errors.ECODE_INVAL)
12881
12882       if op == constants.DDM_ADD:
12883         macaddr = params.get(constants.INIC_MAC, None)
12884         if macaddr is None:
12885           params[constants.INIC_MAC] = constants.VALUE_AUTO
12886
12887       if ip is not None:
12888         if ip.lower() == constants.VALUE_NONE:
12889           params[constants.INIC_IP] = None
12890         else:
12891           if ip.lower() == constants.NIC_IP_POOL:
12892             if op == constants.DDM_ADD and req_net is None:
12893               raise errors.OpPrereqError("If ip=pool, parameter network"
12894                                          " cannot be none",
12895                                          errors.ECODE_INVAL)
12896           else:
12897             if not netutils.IPAddress.IsValid(ip):
12898               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12899                                          errors.ECODE_INVAL)
12900
12901       if constants.INIC_MAC in params:
12902         macaddr = params[constants.INIC_MAC]
12903         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12904           macaddr = utils.NormalizeAndValidateMac(macaddr)
12905
12906         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12907           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12908                                      " modifying an existing NIC",
12909                                      errors.ECODE_INVAL)
12910
12911   def CheckArguments(self):
12912     if not (self.op.nics or self.op.disks or self.op.disk_template or
12913             self.op.hvparams or self.op.beparams or self.op.os_name or
12914             self.op.offline is not None or self.op.runtime_mem):
12915       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12916
12917     if self.op.hvparams:
12918       _CheckGlobalHvParams(self.op.hvparams)
12919
12920     self.op.disks = self._UpgradeDiskNicMods(
12921       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12922     self.op.nics = self._UpgradeDiskNicMods(
12923       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12924
12925     # Check disk modifications
12926     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12927                     self._VerifyDiskModification)
12928
12929     if self.op.disks and self.op.disk_template is not None:
12930       raise errors.OpPrereqError("Disk template conversion and other disk"
12931                                  " changes not supported at the same time",
12932                                  errors.ECODE_INVAL)
12933
12934     if (self.op.disk_template and
12935         self.op.disk_template in constants.DTS_INT_MIRROR and
12936         self.op.remote_node is None):
12937       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12938                                  " one requires specifying a secondary node",
12939                                  errors.ECODE_INVAL)
12940
12941     # Check NIC modifications
12942     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12943                     self._VerifyNicModification)
12944
12945   def ExpandNames(self):
12946     self._ExpandAndLockInstance()
12947     self.needed_locks[locking.LEVEL_NODEGROUP] = []
12948     # Can't even acquire node locks in shared mode as upcoming changes in
12949     # Ganeti 2.6 will start to modify the node object on disk conversion
12950     self.needed_locks[locking.LEVEL_NODE] = []
12951     self.needed_locks[locking.LEVEL_NODE_RES] = []
12952     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12953     # Look node group to look up the ipolicy
12954     self.share_locks[locking.LEVEL_NODEGROUP] = 1
12955
12956   def DeclareLocks(self, level):
12957     if level == locking.LEVEL_NODEGROUP:
12958       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12959       # Acquire locks for the instance's nodegroups optimistically. Needs
12960       # to be verified in CheckPrereq
12961       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12962         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12963     elif level == locking.LEVEL_NODE:
12964       self._LockInstancesNodes()
12965       if self.op.disk_template and self.op.remote_node:
12966         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12967         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12968     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12969       # Copy node locks
12970       self.needed_locks[locking.LEVEL_NODE_RES] = \
12971         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12972
12973   def BuildHooksEnv(self):
12974     """Build hooks env.
12975
12976     This runs on the master, primary and secondaries.
12977
12978     """
12979     args = {}
12980     if constants.BE_MINMEM in self.be_new:
12981       args["minmem"] = self.be_new[constants.BE_MINMEM]
12982     if constants.BE_MAXMEM in self.be_new:
12983       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12984     if constants.BE_VCPUS in self.be_new:
12985       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12986     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12987     # information at all.
12988
12989     if self._new_nics is not None:
12990       nics = []
12991
12992       for nic in self._new_nics:
12993         n = copy.deepcopy(nic)
12994         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12995         n.nicparams = nicparams
12996         nics.append(_NICToTuple(self, n))
12997
12998       args["nics"] = nics
12999
13000     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13001     if self.op.disk_template:
13002       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13003     if self.op.runtime_mem:
13004       env["RUNTIME_MEMORY"] = self.op.runtime_mem
13005
13006     return env
13007
13008   def BuildHooksNodes(self):
13009     """Build hooks nodes.
13010
13011     """
13012     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13013     return (nl, nl)
13014
13015   def _PrepareNicModification(self, params, private, old_ip, old_net,
13016                               old_params, cluster, pnode):
13017
13018     update_params_dict = dict([(key, params[key])
13019                                for key in constants.NICS_PARAMETERS
13020                                if key in params])
13021
13022     req_link = update_params_dict.get(constants.NIC_LINK, None)
13023     req_mode = update_params_dict.get(constants.NIC_MODE, None)
13024
13025     new_net = params.get(constants.INIC_NETWORK, old_net)
13026     if new_net is not None:
13027       netparams = self.cfg.GetGroupNetParams(new_net, pnode)
13028       if netparams is None:
13029         raise errors.OpPrereqError("No netparams found for the network"
13030                                    " %s, probably not connected" % new_net,
13031                                    errors.ECODE_INVAL)
13032       new_params = dict(netparams)
13033     else:
13034       new_params = _GetUpdatedParams(old_params, update_params_dict)
13035
13036     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13037
13038     new_filled_params = cluster.SimpleFillNIC(new_params)
13039     objects.NIC.CheckParameterSyntax(new_filled_params)
13040
13041     new_mode = new_filled_params[constants.NIC_MODE]
13042     if new_mode == constants.NIC_MODE_BRIDGED:
13043       bridge = new_filled_params[constants.NIC_LINK]
13044       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13045       if msg:
13046         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13047         if self.op.force:
13048           self.warn.append(msg)
13049         else:
13050           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13051
13052     elif new_mode == constants.NIC_MODE_ROUTED:
13053       ip = params.get(constants.INIC_IP, old_ip)
13054       if ip is None:
13055         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13056                                    " on a routed NIC", errors.ECODE_INVAL)
13057
13058     elif new_mode == constants.NIC_MODE_OVS:
13059       # TODO: check OVS link
13060       self.LogInfo("OVS links are currently not checked for correctness")
13061
13062     if constants.INIC_MAC in params:
13063       mac = params[constants.INIC_MAC]
13064       if mac is None:
13065         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13066                                    errors.ECODE_INVAL)
13067       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13068         # otherwise generate the MAC address
13069         params[constants.INIC_MAC] = \
13070           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13071       else:
13072         # or validate/reserve the current one
13073         try:
13074           self.cfg.ReserveMAC(mac, self.proc.GetECId())
13075         except errors.ReservationError:
13076           raise errors.OpPrereqError("MAC address '%s' already in use"
13077                                      " in cluster" % mac,
13078                                      errors.ECODE_NOTUNIQUE)
13079     elif new_net != old_net:
13080
13081       def get_net_prefix(net):
13082         if net:
13083           uuid = self.cfg.LookupNetwork(net)
13084           if uuid:
13085             nobj = self.cfg.GetNetwork(uuid)
13086             return nobj.mac_prefix
13087         return None
13088
13089       new_prefix = get_net_prefix(new_net)
13090       old_prefix = get_net_prefix(old_net)
13091       if old_prefix != new_prefix:
13092         params[constants.INIC_MAC] = \
13093           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13094
13095     #if there is a change in nic-network configuration
13096     new_ip = params.get(constants.INIC_IP, old_ip)
13097     if (new_ip, new_net) != (old_ip, old_net):
13098       if new_ip:
13099         if new_net:
13100           if new_ip.lower() == constants.NIC_IP_POOL:
13101             try:
13102               new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13103             except errors.ReservationError:
13104               raise errors.OpPrereqError("Unable to get a free IP"
13105                                          " from the address pool",
13106                                          errors.ECODE_STATE)
13107             self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13108             params[constants.INIC_IP] = new_ip
13109           elif new_ip != old_ip or new_net != old_net:
13110             try:
13111               self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13112               self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13113             except errors.ReservationError:
13114               raise errors.OpPrereqError("IP %s not available in network %s" %
13115                                          (new_ip, new_net),
13116                                          errors.ECODE_NOTUNIQUE)
13117         elif new_ip.lower() == constants.NIC_IP_POOL:
13118           raise errors.OpPrereqError("ip=pool, but no network found",
13119                                      errors.ECODE_INVAL)
13120         else:
13121           # new net is None
13122           if self.op.conflicts_check:
13123             _CheckForConflictingIp(self, new_ip, pnode)
13124
13125       if old_ip:
13126         if old_net:
13127           try:
13128             self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13129           except errors.AddressPoolError:
13130             logging.warning("Release IP %s not contained in network %s",
13131                             old_ip, old_net)
13132
13133     # there are no changes in (net, ip) tuple
13134     elif (old_net is not None and
13135           (req_link is not None or req_mode is not None)):
13136       raise errors.OpPrereqError("Not allowed to change link or mode of"
13137                                  " a NIC that is connected to a network",
13138                                  errors.ECODE_INVAL)
13139
13140     private.params = new_params
13141     private.filled = new_filled_params
13142
13143   def CheckPrereq(self):
13144     """Check prerequisites.
13145
13146     This only checks the instance list against the existing names.
13147
13148     """
13149     assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13150     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13151
13152     cluster = self.cluster = self.cfg.GetClusterInfo()
13153     assert self.instance is not None, \
13154       "Cannot retrieve locked instance %s" % self.op.instance_name
13155
13156     pnode = instance.primary_node
13157     assert pnode in self.owned_locks(locking.LEVEL_NODE)
13158     nodelist = list(instance.all_nodes)
13159     pnode_info = self.cfg.GetNodeInfo(pnode)
13160     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13161
13162     #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13163     assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13164     group_info = self.cfg.GetNodeGroup(pnode_info.group)
13165
13166     # dictionary with instance information after the modification
13167     ispec = {}
13168
13169     # Prepare disk/NIC modifications
13170     self.diskmod = PrepareContainerMods(self.op.disks, None)
13171     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13172
13173     # OS change
13174     if self.op.os_name and not self.op.force:
13175       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13176                       self.op.force_variant)
13177       instance_os = self.op.os_name
13178     else:
13179       instance_os = instance.os
13180
13181     assert not (self.op.disk_template and self.op.disks), \
13182       "Can't modify disk template and apply disk changes at the same time"
13183
13184     if self.op.disk_template:
13185       if instance.disk_template == self.op.disk_template:
13186         raise errors.OpPrereqError("Instance already has disk template %s" %
13187                                    instance.disk_template, errors.ECODE_INVAL)
13188
13189       if (instance.disk_template,
13190           self.op.disk_template) not in self._DISK_CONVERSIONS:
13191         raise errors.OpPrereqError("Unsupported disk template conversion from"
13192                                    " %s to %s" % (instance.disk_template,
13193                                                   self.op.disk_template),
13194                                    errors.ECODE_INVAL)
13195       _CheckInstanceState(self, instance, INSTANCE_DOWN,
13196                           msg="cannot change disk template")
13197       if self.op.disk_template in constants.DTS_INT_MIRROR:
13198         if self.op.remote_node == pnode:
13199           raise errors.OpPrereqError("Given new secondary node %s is the same"
13200                                      " as the primary node of the instance" %
13201                                      self.op.remote_node, errors.ECODE_STATE)
13202         _CheckNodeOnline(self, self.op.remote_node)
13203         _CheckNodeNotDrained(self, self.op.remote_node)
13204         # FIXME: here we assume that the old instance type is DT_PLAIN
13205         assert instance.disk_template == constants.DT_PLAIN
13206         disks = [{constants.IDISK_SIZE: d.size,
13207                   constants.IDISK_VG: d.logical_id[0]}
13208                  for d in instance.disks]
13209         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13210         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13211
13212         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13213         snode_group = self.cfg.GetNodeGroup(snode_info.group)
13214         ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13215                                                                 snode_group)
13216         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13217                                 ignore=self.op.ignore_ipolicy)
13218         if pnode_info.group != snode_info.group:
13219           self.LogWarning("The primary and secondary nodes are in two"
13220                           " different node groups; the disk parameters"
13221                           " from the first disk's node group will be"
13222                           " used")
13223
13224     # hvparams processing
13225     if self.op.hvparams:
13226       hv_type = instance.hypervisor
13227       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13228       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13229       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13230
13231       # local check
13232       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
13233       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13234       self.hv_proposed = self.hv_new = hv_new # the new actual values
13235       self.hv_inst = i_hvdict # the new dict (without defaults)
13236     else:
13237       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13238                                               instance.hvparams)
13239       self.hv_new = self.hv_inst = {}
13240
13241     # beparams processing
13242     if self.op.beparams:
13243       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13244                                    use_none=True)
13245       objects.UpgradeBeParams(i_bedict)
13246       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13247       be_new = cluster.SimpleFillBE(i_bedict)
13248       self.be_proposed = self.be_new = be_new # the new actual values
13249       self.be_inst = i_bedict # the new dict (without defaults)
13250     else:
13251       self.be_new = self.be_inst = {}
13252       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13253     be_old = cluster.FillBE(instance)
13254
13255     # CPU param validation -- checking every time a parameter is
13256     # changed to cover all cases where either CPU mask or vcpus have
13257     # changed
13258     if (constants.BE_VCPUS in self.be_proposed and
13259         constants.HV_CPU_MASK in self.hv_proposed):
13260       cpu_list = \
13261         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13262       # Verify mask is consistent with number of vCPUs. Can skip this
13263       # test if only 1 entry in the CPU mask, which means same mask
13264       # is applied to all vCPUs.
13265       if (len(cpu_list) > 1 and
13266           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13267         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13268                                    " CPU mask [%s]" %
13269                                    (self.be_proposed[constants.BE_VCPUS],
13270                                     self.hv_proposed[constants.HV_CPU_MASK]),
13271                                    errors.ECODE_INVAL)
13272
13273       # Only perform this test if a new CPU mask is given
13274       if constants.HV_CPU_MASK in self.hv_new:
13275         # Calculate the largest CPU number requested
13276         max_requested_cpu = max(map(max, cpu_list))
13277         # Check that all of the instance's nodes have enough physical CPUs to
13278         # satisfy the requested CPU mask
13279         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13280                                 max_requested_cpu + 1, instance.hypervisor)
13281
13282     # osparams processing
13283     if self.op.osparams:
13284       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13285       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13286       self.os_inst = i_osdict # the new dict (without defaults)
13287     else:
13288       self.os_inst = {}
13289
13290     self.warn = []
13291
13292     #TODO(dynmem): do the appropriate check involving MINMEM
13293     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13294         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13295       mem_check_list = [pnode]
13296       if be_new[constants.BE_AUTO_BALANCE]:
13297         # either we changed auto_balance to yes or it was from before
13298         mem_check_list.extend(instance.secondary_nodes)
13299       instance_info = self.rpc.call_instance_info(pnode, instance.name,
13300                                                   instance.hypervisor)
13301       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13302                                          [instance.hypervisor])
13303       pninfo = nodeinfo[pnode]
13304       msg = pninfo.fail_msg
13305       if msg:
13306         # Assume the primary node is unreachable and go ahead
13307         self.warn.append("Can't get info from primary node %s: %s" %
13308                          (pnode, msg))
13309       else:
13310         (_, _, (pnhvinfo, )) = pninfo.payload
13311         if not isinstance(pnhvinfo.get("memory_free", None), int):
13312           self.warn.append("Node data from primary node %s doesn't contain"
13313                            " free memory information" % pnode)
13314         elif instance_info.fail_msg:
13315           self.warn.append("Can't get instance runtime information: %s" %
13316                            instance_info.fail_msg)
13317         else:
13318           if instance_info.payload:
13319             current_mem = int(instance_info.payload["memory"])
13320           else:
13321             # Assume instance not running
13322             # (there is a slight race condition here, but it's not very
13323             # probable, and we have no other way to check)
13324             # TODO: Describe race condition
13325             current_mem = 0
13326           #TODO(dynmem): do the appropriate check involving MINMEM
13327           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13328                       pnhvinfo["memory_free"])
13329           if miss_mem > 0:
13330             raise errors.OpPrereqError("This change will prevent the instance"
13331                                        " from starting, due to %d MB of memory"
13332                                        " missing on its primary node" %
13333                                        miss_mem, errors.ECODE_NORES)
13334
13335       if be_new[constants.BE_AUTO_BALANCE]:
13336         for node, nres in nodeinfo.items():
13337           if node not in instance.secondary_nodes:
13338             continue
13339           nres.Raise("Can't get info from secondary node %s" % node,
13340                      prereq=True, ecode=errors.ECODE_STATE)
13341           (_, _, (nhvinfo, )) = nres.payload
13342           if not isinstance(nhvinfo.get("memory_free", None), int):
13343             raise errors.OpPrereqError("Secondary node %s didn't return free"
13344                                        " memory information" % node,
13345                                        errors.ECODE_STATE)
13346           #TODO(dynmem): do the appropriate check involving MINMEM
13347           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13348             raise errors.OpPrereqError("This change will prevent the instance"
13349                                        " from failover to its secondary node"
13350                                        " %s, due to not enough memory" % node,
13351                                        errors.ECODE_STATE)
13352
13353     if self.op.runtime_mem:
13354       remote_info = self.rpc.call_instance_info(instance.primary_node,
13355                                                 instance.name,
13356                                                 instance.hypervisor)
13357       remote_info.Raise("Error checking node %s" % instance.primary_node)
13358       if not remote_info.payload: # not running already
13359         raise errors.OpPrereqError("Instance %s is not running" %
13360                                    instance.name, errors.ECODE_STATE)
13361
13362       current_memory = remote_info.payload["memory"]
13363       if (not self.op.force and
13364            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13365             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13366         raise errors.OpPrereqError("Instance %s must have memory between %d"
13367                                    " and %d MB of memory unless --force is"
13368                                    " given" %
13369                                    (instance.name,
13370                                     self.be_proposed[constants.BE_MINMEM],
13371                                     self.be_proposed[constants.BE_MAXMEM]),
13372                                    errors.ECODE_INVAL)
13373
13374       delta = self.op.runtime_mem - current_memory
13375       if delta > 0:
13376         _CheckNodeFreeMemory(self, instance.primary_node,
13377                              "ballooning memory for instance %s" %
13378                              instance.name, delta, instance.hypervisor)
13379
13380     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13381       raise errors.OpPrereqError("Disk operations not supported for"
13382                                  " diskless instances", errors.ECODE_INVAL)
13383
13384     def _PrepareNicCreate(_, params, private):
13385       self._PrepareNicModification(params, private, None, None,
13386                                    {}, cluster, pnode)
13387       return (None, None)
13388
13389     def _PrepareNicMod(_, nic, params, private):
13390       self._PrepareNicModification(params, private, nic.ip, nic.network,
13391                                    nic.nicparams, cluster, pnode)
13392       return None
13393
13394     def _PrepareNicRemove(_, params, __):
13395       ip = params.ip
13396       net = params.network
13397       if net is not None and ip is not None:
13398         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13399
13400     # Verify NIC changes (operating on copy)
13401     nics = instance.nics[:]
13402     ApplyContainerMods("NIC", nics, None, self.nicmod,
13403                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13404     if len(nics) > constants.MAX_NICS:
13405       raise errors.OpPrereqError("Instance has too many network interfaces"
13406                                  " (%d), cannot add more" % constants.MAX_NICS,
13407                                  errors.ECODE_STATE)
13408
13409     # Verify disk changes (operating on a copy)
13410     disks = instance.disks[:]
13411     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13412     if len(disks) > constants.MAX_DISKS:
13413       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13414                                  " more" % constants.MAX_DISKS,
13415                                  errors.ECODE_STATE)
13416     disk_sizes = [disk.size for disk in instance.disks]
13417     disk_sizes.extend(params["size"] for (op, idx, params, private) in
13418                       self.diskmod if op == constants.DDM_ADD)
13419     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13420     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13421
13422     if self.op.offline is not None:
13423       if self.op.offline:
13424         msg = "can't change to offline"
13425       else:
13426         msg = "can't change to online"
13427       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13428
13429     # Pre-compute NIC changes (necessary to use result in hooks)
13430     self._nic_chgdesc = []
13431     if self.nicmod:
13432       # Operate on copies as this is still in prereq
13433       nics = [nic.Copy() for nic in instance.nics]
13434       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13435                          self._CreateNewNic, self._ApplyNicMods, None)
13436       self._new_nics = nics
13437       ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13438     else:
13439       self._new_nics = None
13440       ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13441
13442     if not self.op.ignore_ipolicy:
13443       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13444                                                               group_info)
13445
13446       # Fill ispec with backend parameters
13447       ispec[constants.ISPEC_SPINDLE_USE] = \
13448         self.be_new.get(constants.BE_SPINDLE_USE, None)
13449       ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13450                                                          None)
13451
13452       # Copy ispec to verify parameters with min/max values separately
13453       ispec_max = ispec.copy()
13454       ispec_max[constants.ISPEC_MEM_SIZE] = \
13455         self.be_new.get(constants.BE_MAXMEM, None)
13456       res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13457       ispec_min = ispec.copy()
13458       ispec_min[constants.ISPEC_MEM_SIZE] = \
13459         self.be_new.get(constants.BE_MINMEM, None)
13460       res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13461
13462       if (res_max or res_min):
13463         # FIXME: Improve error message by including information about whether
13464         # the upper or lower limit of the parameter fails the ipolicy.
13465         msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13466                (group_info, group_info.name,
13467                 utils.CommaJoin(set(res_max + res_min))))
13468         raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13469
13470   def _ConvertPlainToDrbd(self, feedback_fn):
13471     """Converts an instance from plain to drbd.
13472
13473     """
13474     feedback_fn("Converting template to drbd")
13475     instance = self.instance
13476     pnode = instance.primary_node
13477     snode = self.op.remote_node
13478
13479     assert instance.disk_template == constants.DT_PLAIN
13480
13481     # create a fake disk info for _GenerateDiskTemplate
13482     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13483                   constants.IDISK_VG: d.logical_id[0]}
13484                  for d in instance.disks]
13485     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13486                                       instance.name, pnode, [snode],
13487                                       disk_info, None, None, 0, feedback_fn,
13488                                       self.diskparams)
13489     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13490                                         self.diskparams)
13491     info = _GetInstanceInfoText(instance)
13492     feedback_fn("Creating additional volumes...")
13493     # first, create the missing data and meta devices
13494     for disk in anno_disks:
13495       # unfortunately this is... not too nice
13496       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13497                             info, True)
13498       for child in disk.children:
13499         _CreateSingleBlockDev(self, snode, instance, child, info, True)
13500     # at this stage, all new LVs have been created, we can rename the
13501     # old ones
13502     feedback_fn("Renaming original volumes...")
13503     rename_list = [(o, n.children[0].logical_id)
13504                    for (o, n) in zip(instance.disks, new_disks)]
13505     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13506     result.Raise("Failed to rename original LVs")
13507
13508     feedback_fn("Initializing DRBD devices...")
13509     # all child devices are in place, we can now create the DRBD devices
13510     for disk in anno_disks:
13511       for node in [pnode, snode]:
13512         f_create = node == pnode
13513         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13514
13515     # at this point, the instance has been modified
13516     instance.disk_template = constants.DT_DRBD8
13517     instance.disks = new_disks
13518     self.cfg.Update(instance, feedback_fn)
13519
13520     # Release node locks while waiting for sync
13521     _ReleaseLocks(self, locking.LEVEL_NODE)
13522
13523     # disks are created, waiting for sync
13524     disk_abort = not _WaitForSync(self, instance,
13525                                   oneshot=not self.op.wait_for_sync)
13526     if disk_abort:
13527       raise errors.OpExecError("There are some degraded disks for"
13528                                " this instance, please cleanup manually")
13529
13530     # Node resource locks will be released by caller
13531
13532   def _ConvertDrbdToPlain(self, feedback_fn):
13533     """Converts an instance from drbd to plain.
13534
13535     """
13536     instance = self.instance
13537
13538     assert len(instance.secondary_nodes) == 1
13539     assert instance.disk_template == constants.DT_DRBD8
13540
13541     pnode = instance.primary_node
13542     snode = instance.secondary_nodes[0]
13543     feedback_fn("Converting template to plain")
13544
13545     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13546     new_disks = [d.children[0] for d in instance.disks]
13547
13548     # copy over size and mode
13549     for parent, child in zip(old_disks, new_disks):
13550       child.size = parent.size
13551       child.mode = parent.mode
13552
13553     # this is a DRBD disk, return its port to the pool
13554     # NOTE: this must be done right before the call to cfg.Update!
13555     for disk in old_disks:
13556       tcp_port = disk.logical_id[2]
13557       self.cfg.AddTcpUdpPort(tcp_port)
13558
13559     # update instance structure
13560     instance.disks = new_disks
13561     instance.disk_template = constants.DT_PLAIN
13562     self.cfg.Update(instance, feedback_fn)
13563
13564     # Release locks in case removing disks takes a while
13565     _ReleaseLocks(self, locking.LEVEL_NODE)
13566
13567     feedback_fn("Removing volumes on the secondary node...")
13568     for disk in old_disks:
13569       self.cfg.SetDiskID(disk, snode)
13570       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13571       if msg:
13572         self.LogWarning("Could not remove block device %s on node %s,"
13573                         " continuing anyway: %s", disk.iv_name, snode, msg)
13574
13575     feedback_fn("Removing unneeded volumes on the primary node...")
13576     for idx, disk in enumerate(old_disks):
13577       meta = disk.children[1]
13578       self.cfg.SetDiskID(meta, pnode)
13579       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13580       if msg:
13581         self.LogWarning("Could not remove metadata for disk %d on node %s,"
13582                         " continuing anyway: %s", idx, pnode, msg)
13583
13584   def _CreateNewDisk(self, idx, params, _):
13585     """Creates a new disk.
13586
13587     """
13588     instance = self.instance
13589
13590     # add a new disk
13591     if instance.disk_template in constants.DTS_FILEBASED:
13592       (file_driver, file_path) = instance.disks[0].logical_id
13593       file_path = os.path.dirname(file_path)
13594     else:
13595       file_driver = file_path = None
13596
13597     disk = \
13598       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13599                             instance.primary_node, instance.secondary_nodes,
13600                             [params], file_path, file_driver, idx,
13601                             self.Log, self.diskparams)[0]
13602
13603     info = _GetInstanceInfoText(instance)
13604
13605     logging.info("Creating volume %s for instance %s",
13606                  disk.iv_name, instance.name)
13607     # Note: this needs to be kept in sync with _CreateDisks
13608     #HARDCODE
13609     for node in instance.all_nodes:
13610       f_create = (node == instance.primary_node)
13611       try:
13612         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13613       except errors.OpExecError, err:
13614         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13615                         disk.iv_name, disk, node, err)
13616
13617     return (disk, [
13618       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13619       ])
13620
13621   @staticmethod
13622   def _ModifyDisk(idx, disk, params, _):
13623     """Modifies a disk.
13624
13625     """
13626     disk.mode = params[constants.IDISK_MODE]
13627
13628     return [
13629       ("disk.mode/%d" % idx, disk.mode),
13630       ]
13631
13632   def _RemoveDisk(self, idx, root, _):
13633     """Removes a disk.
13634
13635     """
13636     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13637     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13638       self.cfg.SetDiskID(disk, node)
13639       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13640       if msg:
13641         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13642                         " continuing anyway", idx, node, msg)
13643
13644     # if this is a DRBD disk, return its port to the pool
13645     if root.dev_type in constants.LDS_DRBD:
13646       self.cfg.AddTcpUdpPort(root.logical_id[2])
13647
13648   @staticmethod
13649   def _CreateNewNic(idx, params, private):
13650     """Creates data structure for a new network interface.
13651
13652     """
13653     mac = params[constants.INIC_MAC]
13654     ip = params.get(constants.INIC_IP, None)
13655     net = params.get(constants.INIC_NETWORK, None)
13656     #TODO: not private.filled?? can a nic have no nicparams??
13657     nicparams = private.filled
13658
13659     return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
13660       ("nic.%d" % idx,
13661        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13662        (mac, ip, private.filled[constants.NIC_MODE],
13663        private.filled[constants.NIC_LINK],
13664        net)),
13665       ])
13666
13667   @staticmethod
13668   def _ApplyNicMods(idx, nic, params, private):
13669     """Modifies a network interface.
13670
13671     """
13672     changes = []
13673
13674     for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13675       if key in params:
13676         changes.append(("nic.%s/%d" % (key, idx), params[key]))
13677         setattr(nic, key, params[key])
13678
13679     if private.filled:
13680       nic.nicparams = private.filled
13681
13682       for (key, val) in nic.nicparams.items():
13683         changes.append(("nic.%s/%d" % (key, idx), val))
13684
13685     return changes
13686
13687   def Exec(self, feedback_fn):
13688     """Modifies an instance.
13689
13690     All parameters take effect only at the next restart of the instance.
13691
13692     """
13693     # Process here the warnings from CheckPrereq, as we don't have a
13694     # feedback_fn there.
13695     # TODO: Replace with self.LogWarning
13696     for warn in self.warn:
13697       feedback_fn("WARNING: %s" % warn)
13698
13699     assert ((self.op.disk_template is None) ^
13700             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13701       "Not owning any node resource locks"
13702
13703     result = []
13704     instance = self.instance
13705
13706     # runtime memory
13707     if self.op.runtime_mem:
13708       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13709                                                      instance,
13710                                                      self.op.runtime_mem)
13711       rpcres.Raise("Cannot modify instance runtime memory")
13712       result.append(("runtime_memory", self.op.runtime_mem))
13713
13714     # Apply disk changes
13715     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13716                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13717     _UpdateIvNames(0, instance.disks)
13718
13719     if self.op.disk_template:
13720       if __debug__:
13721         check_nodes = set(instance.all_nodes)
13722         if self.op.remote_node:
13723           check_nodes.add(self.op.remote_node)
13724         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13725           owned = self.owned_locks(level)
13726           assert not (check_nodes - owned), \
13727             ("Not owning the correct locks, owning %r, expected at least %r" %
13728              (owned, check_nodes))
13729
13730       r_shut = _ShutdownInstanceDisks(self, instance)
13731       if not r_shut:
13732         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13733                                  " proceed with disk template conversion")
13734       mode = (instance.disk_template, self.op.disk_template)
13735       try:
13736         self._DISK_CONVERSIONS[mode](self, feedback_fn)
13737       except:
13738         self.cfg.ReleaseDRBDMinors(instance.name)
13739         raise
13740       result.append(("disk_template", self.op.disk_template))
13741
13742       assert instance.disk_template == self.op.disk_template, \
13743         ("Expected disk template '%s', found '%s'" %
13744          (self.op.disk_template, instance.disk_template))
13745
13746     # Release node and resource locks if there are any (they might already have
13747     # been released during disk conversion)
13748     _ReleaseLocks(self, locking.LEVEL_NODE)
13749     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13750
13751     # Apply NIC changes
13752     if self._new_nics is not None:
13753       instance.nics = self._new_nics
13754       result.extend(self._nic_chgdesc)
13755
13756     # hvparams changes
13757     if self.op.hvparams:
13758       instance.hvparams = self.hv_inst
13759       for key, val in self.op.hvparams.iteritems():
13760         result.append(("hv/%s" % key, val))
13761
13762     # beparams changes
13763     if self.op.beparams:
13764       instance.beparams = self.be_inst
13765       for key, val in self.op.beparams.iteritems():
13766         result.append(("be/%s" % key, val))
13767
13768     # OS change
13769     if self.op.os_name:
13770       instance.os = self.op.os_name
13771
13772     # osparams changes
13773     if self.op.osparams:
13774       instance.osparams = self.os_inst
13775       for key, val in self.op.osparams.iteritems():
13776         result.append(("os/%s" % key, val))
13777
13778     if self.op.offline is None:
13779       # Ignore
13780       pass
13781     elif self.op.offline:
13782       # Mark instance as offline
13783       self.cfg.MarkInstanceOffline(instance.name)
13784       result.append(("admin_state", constants.ADMINST_OFFLINE))
13785     else:
13786       # Mark instance as online, but stopped
13787       self.cfg.MarkInstanceDown(instance.name)
13788       result.append(("admin_state", constants.ADMINST_DOWN))
13789
13790     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13791
13792     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13793                 self.owned_locks(locking.LEVEL_NODE)), \
13794       "All node locks should have been released by now"
13795
13796     return result
13797
13798   _DISK_CONVERSIONS = {
13799     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13800     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13801     }
13802
13803
13804 class LUInstanceChangeGroup(LogicalUnit):
13805   HPATH = "instance-change-group"
13806   HTYPE = constants.HTYPE_INSTANCE
13807   REQ_BGL = False
13808
13809   def ExpandNames(self):
13810     self.share_locks = _ShareAll()
13811
13812     self.needed_locks = {
13813       locking.LEVEL_NODEGROUP: [],
13814       locking.LEVEL_NODE: [],
13815       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
13816       }
13817
13818     self._ExpandAndLockInstance()
13819
13820     if self.op.target_groups:
13821       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13822                                   self.op.target_groups)
13823     else:
13824       self.req_target_uuids = None
13825
13826     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13827
13828   def DeclareLocks(self, level):
13829     if level == locking.LEVEL_NODEGROUP:
13830       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13831
13832       if self.req_target_uuids:
13833         lock_groups = set(self.req_target_uuids)
13834
13835         # Lock all groups used by instance optimistically; this requires going
13836         # via the node before it's locked, requiring verification later on
13837         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13838         lock_groups.update(instance_groups)
13839       else:
13840         # No target groups, need to lock all of them
13841         lock_groups = locking.ALL_SET
13842
13843       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13844
13845     elif level == locking.LEVEL_NODE:
13846       if self.req_target_uuids:
13847         # Lock all nodes used by instances
13848         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13849         self._LockInstancesNodes()
13850
13851         # Lock all nodes in all potential target groups
13852         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13853                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13854         member_nodes = [node_name
13855                         for group in lock_groups
13856                         for node_name in self.cfg.GetNodeGroup(group).members]
13857         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13858       else:
13859         # Lock all nodes as all groups are potential targets
13860         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13861
13862   def CheckPrereq(self):
13863     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13864     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13865     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13866
13867     assert (self.req_target_uuids is None or
13868             owned_groups.issuperset(self.req_target_uuids))
13869     assert owned_instances == set([self.op.instance_name])
13870
13871     # Get instance information
13872     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13873
13874     # Check if node groups for locked instance are still correct
13875     assert owned_nodes.issuperset(self.instance.all_nodes), \
13876       ("Instance %s's nodes changed while we kept the lock" %
13877        self.op.instance_name)
13878
13879     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13880                                            owned_groups)
13881
13882     if self.req_target_uuids:
13883       # User requested specific target groups
13884       self.target_uuids = frozenset(self.req_target_uuids)
13885     else:
13886       # All groups except those used by the instance are potential targets
13887       self.target_uuids = owned_groups - inst_groups
13888
13889     conflicting_groups = self.target_uuids & inst_groups
13890     if conflicting_groups:
13891       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13892                                  " used by the instance '%s'" %
13893                                  (utils.CommaJoin(conflicting_groups),
13894                                   self.op.instance_name),
13895                                  errors.ECODE_INVAL)
13896
13897     if not self.target_uuids:
13898       raise errors.OpPrereqError("There are no possible target groups",
13899                                  errors.ECODE_INVAL)
13900
13901   def BuildHooksEnv(self):
13902     """Build hooks env.
13903
13904     """
13905     assert self.target_uuids
13906
13907     env = {
13908       "TARGET_GROUPS": " ".join(self.target_uuids),
13909       }
13910
13911     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13912
13913     return env
13914
13915   def BuildHooksNodes(self):
13916     """Build hooks nodes.
13917
13918     """
13919     mn = self.cfg.GetMasterNode()
13920     return ([mn], [mn])
13921
13922   def Exec(self, feedback_fn):
13923     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13924
13925     assert instances == [self.op.instance_name], "Instance not locked"
13926
13927     req = iallocator.IAReqGroupChange(instances=instances,
13928                                       target_groups=list(self.target_uuids))
13929     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13930
13931     ial.Run(self.op.iallocator)
13932
13933     if not ial.success:
13934       raise errors.OpPrereqError("Can't compute solution for changing group of"
13935                                  " instance '%s' using iallocator '%s': %s" %
13936                                  (self.op.instance_name, self.op.iallocator,
13937                                   ial.info), errors.ECODE_NORES)
13938
13939     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13940
13941     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13942                  " instance '%s'", len(jobs), self.op.instance_name)
13943
13944     return ResultWithJobs(jobs)
13945
13946
13947 class LUBackupQuery(NoHooksLU):
13948   """Query the exports list
13949
13950   """
13951   REQ_BGL = False
13952
13953   def CheckArguments(self):
13954     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13955                              ["node", "export"], self.op.use_locking)
13956
13957   def ExpandNames(self):
13958     self.expq.ExpandNames(self)
13959
13960   def DeclareLocks(self, level):
13961     self.expq.DeclareLocks(self, level)
13962
13963   def Exec(self, feedback_fn):
13964     result = {}
13965
13966     for (node, expname) in self.expq.OldStyleQuery(self):
13967       if expname is None:
13968         result[node] = False
13969       else:
13970         result.setdefault(node, []).append(expname)
13971
13972     return result
13973
13974
13975 class _ExportQuery(_QueryBase):
13976   FIELDS = query.EXPORT_FIELDS
13977
13978   #: The node name is not a unique key for this query
13979   SORT_FIELD = "node"
13980
13981   def ExpandNames(self, lu):
13982     lu.needed_locks = {}
13983
13984     # The following variables interact with _QueryBase._GetNames
13985     if self.names:
13986       self.wanted = _GetWantedNodes(lu, self.names)
13987     else:
13988       self.wanted = locking.ALL_SET
13989
13990     self.do_locking = self.use_locking
13991
13992     if self.do_locking:
13993       lu.share_locks = _ShareAll()
13994       lu.needed_locks = {
13995         locking.LEVEL_NODE: self.wanted,
13996         }
13997
13998       if not self.names:
13999         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14000
14001   def DeclareLocks(self, lu, level):
14002     pass
14003
14004   def _GetQueryData(self, lu):
14005     """Computes the list of nodes and their attributes.
14006
14007     """
14008     # Locking is not used
14009     # TODO
14010     assert not (compat.any(lu.glm.is_owned(level)
14011                            for level in locking.LEVELS
14012                            if level != locking.LEVEL_CLUSTER) or
14013                 self.do_locking or self.use_locking)
14014
14015     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14016
14017     result = []
14018
14019     for (node, nres) in lu.rpc.call_export_list(nodes).items():
14020       if nres.fail_msg:
14021         result.append((node, None))
14022       else:
14023         result.extend((node, expname) for expname in nres.payload)
14024
14025     return result
14026
14027
14028 class LUBackupPrepare(NoHooksLU):
14029   """Prepares an instance for an export and returns useful information.
14030
14031   """
14032   REQ_BGL = False
14033
14034   def ExpandNames(self):
14035     self._ExpandAndLockInstance()
14036
14037   def CheckPrereq(self):
14038     """Check prerequisites.
14039
14040     """
14041     instance_name = self.op.instance_name
14042
14043     self.instance = self.cfg.GetInstanceInfo(instance_name)
14044     assert self.instance is not None, \
14045           "Cannot retrieve locked instance %s" % self.op.instance_name
14046     _CheckNodeOnline(self, self.instance.primary_node)
14047
14048     self._cds = _GetClusterDomainSecret()
14049
14050   def Exec(self, feedback_fn):
14051     """Prepares an instance for an export.
14052
14053     """
14054     instance = self.instance
14055
14056     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14057       salt = utils.GenerateSecret(8)
14058
14059       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14060       result = self.rpc.call_x509_cert_create(instance.primary_node,
14061                                               constants.RIE_CERT_VALIDITY)
14062       result.Raise("Can't create X509 key and certificate on %s" % result.node)
14063
14064       (name, cert_pem) = result.payload
14065
14066       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14067                                              cert_pem)
14068
14069       return {
14070         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14071         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14072                           salt),
14073         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14074         }
14075
14076     return None
14077
14078
14079 class LUBackupExport(LogicalUnit):
14080   """Export an instance to an image in the cluster.
14081
14082   """
14083   HPATH = "instance-export"
14084   HTYPE = constants.HTYPE_INSTANCE
14085   REQ_BGL = False
14086
14087   def CheckArguments(self):
14088     """Check the arguments.
14089
14090     """
14091     self.x509_key_name = self.op.x509_key_name
14092     self.dest_x509_ca_pem = self.op.destination_x509_ca
14093
14094     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14095       if not self.x509_key_name:
14096         raise errors.OpPrereqError("Missing X509 key name for encryption",
14097                                    errors.ECODE_INVAL)
14098
14099       if not self.dest_x509_ca_pem:
14100         raise errors.OpPrereqError("Missing destination X509 CA",
14101                                    errors.ECODE_INVAL)
14102
14103   def ExpandNames(self):
14104     self._ExpandAndLockInstance()
14105
14106     # Lock all nodes for local exports
14107     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14108       # FIXME: lock only instance primary and destination node
14109       #
14110       # Sad but true, for now we have do lock all nodes, as we don't know where
14111       # the previous export might be, and in this LU we search for it and
14112       # remove it from its current node. In the future we could fix this by:
14113       #  - making a tasklet to search (share-lock all), then create the
14114       #    new one, then one to remove, after
14115       #  - removing the removal operation altogether
14116       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14117
14118       # Allocations should be stopped while this LU runs with node locks, but
14119       # it doesn't have to be exclusive
14120       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14121       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14122
14123   def DeclareLocks(self, level):
14124     """Last minute lock declaration."""
14125     # All nodes are locked anyway, so nothing to do here.
14126
14127   def BuildHooksEnv(self):
14128     """Build hooks env.
14129
14130     This will run on the master, primary node and target node.
14131
14132     """
14133     env = {
14134       "EXPORT_MODE": self.op.mode,
14135       "EXPORT_NODE": self.op.target_node,
14136       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14137       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14138       # TODO: Generic function for boolean env variables
14139       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14140       }
14141
14142     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14143
14144     return env
14145
14146   def BuildHooksNodes(self):
14147     """Build hooks nodes.
14148
14149     """
14150     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14151
14152     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14153       nl.append(self.op.target_node)
14154
14155     return (nl, nl)
14156
14157   def CheckPrereq(self):
14158     """Check prerequisites.
14159
14160     This checks that the instance and node names are valid.
14161
14162     """
14163     instance_name = self.op.instance_name
14164
14165     self.instance = self.cfg.GetInstanceInfo(instance_name)
14166     assert self.instance is not None, \
14167           "Cannot retrieve locked instance %s" % self.op.instance_name
14168     _CheckNodeOnline(self, self.instance.primary_node)
14169
14170     if (self.op.remove_instance and
14171         self.instance.admin_state == constants.ADMINST_UP and
14172         not self.op.shutdown):
14173       raise errors.OpPrereqError("Can not remove instance without shutting it"
14174                                  " down before", errors.ECODE_STATE)
14175
14176     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14177       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14178       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14179       assert self.dst_node is not None
14180
14181       _CheckNodeOnline(self, self.dst_node.name)
14182       _CheckNodeNotDrained(self, self.dst_node.name)
14183
14184       self._cds = None
14185       self.dest_disk_info = None
14186       self.dest_x509_ca = None
14187
14188     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14189       self.dst_node = None
14190
14191       if len(self.op.target_node) != len(self.instance.disks):
14192         raise errors.OpPrereqError(("Received destination information for %s"
14193                                     " disks, but instance %s has %s disks") %
14194                                    (len(self.op.target_node), instance_name,
14195                                     len(self.instance.disks)),
14196                                    errors.ECODE_INVAL)
14197
14198       cds = _GetClusterDomainSecret()
14199
14200       # Check X509 key name
14201       try:
14202         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14203       except (TypeError, ValueError), err:
14204         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14205                                    errors.ECODE_INVAL)
14206
14207       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14208         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14209                                    errors.ECODE_INVAL)
14210
14211       # Load and verify CA
14212       try:
14213         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14214       except OpenSSL.crypto.Error, err:
14215         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14216                                    (err, ), errors.ECODE_INVAL)
14217
14218       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14219       if errcode is not None:
14220         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14221                                    (msg, ), errors.ECODE_INVAL)
14222
14223       self.dest_x509_ca = cert
14224
14225       # Verify target information
14226       disk_info = []
14227       for idx, disk_data in enumerate(self.op.target_node):
14228         try:
14229           (host, port, magic) = \
14230             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14231         except errors.GenericError, err:
14232           raise errors.OpPrereqError("Target info for disk %s: %s" %
14233                                      (idx, err), errors.ECODE_INVAL)
14234
14235         disk_info.append((host, port, magic))
14236
14237       assert len(disk_info) == len(self.op.target_node)
14238       self.dest_disk_info = disk_info
14239
14240     else:
14241       raise errors.ProgrammerError("Unhandled export mode %r" %
14242                                    self.op.mode)
14243
14244     # instance disk type verification
14245     # TODO: Implement export support for file-based disks
14246     for disk in self.instance.disks:
14247       if disk.dev_type == constants.LD_FILE:
14248         raise errors.OpPrereqError("Export not supported for instances with"
14249                                    " file-based disks", errors.ECODE_INVAL)
14250
14251   def _CleanupExports(self, feedback_fn):
14252     """Removes exports of current instance from all other nodes.
14253
14254     If an instance in a cluster with nodes A..D was exported to node C, its
14255     exports will be removed from the nodes A, B and D.
14256
14257     """
14258     assert self.op.mode != constants.EXPORT_MODE_REMOTE
14259
14260     nodelist = self.cfg.GetNodeList()
14261     nodelist.remove(self.dst_node.name)
14262
14263     # on one-node clusters nodelist will be empty after the removal
14264     # if we proceed the backup would be removed because OpBackupQuery
14265     # substitutes an empty list with the full cluster node list.
14266     iname = self.instance.name
14267     if nodelist:
14268       feedback_fn("Removing old exports for instance %s" % iname)
14269       exportlist = self.rpc.call_export_list(nodelist)
14270       for node in exportlist:
14271         if exportlist[node].fail_msg:
14272           continue
14273         if iname in exportlist[node].payload:
14274           msg = self.rpc.call_export_remove(node, iname).fail_msg
14275           if msg:
14276             self.LogWarning("Could not remove older export for instance %s"
14277                             " on node %s: %s", iname, node, msg)
14278
14279   def Exec(self, feedback_fn):
14280     """Export an instance to an image in the cluster.
14281
14282     """
14283     assert self.op.mode in constants.EXPORT_MODES
14284
14285     instance = self.instance
14286     src_node = instance.primary_node
14287
14288     if self.op.shutdown:
14289       # shutdown the instance, but not the disks
14290       feedback_fn("Shutting down instance %s" % instance.name)
14291       result = self.rpc.call_instance_shutdown(src_node, instance,
14292                                                self.op.shutdown_timeout)
14293       # TODO: Maybe ignore failures if ignore_remove_failures is set
14294       result.Raise("Could not shutdown instance %s on"
14295                    " node %s" % (instance.name, src_node))
14296
14297     # set the disks ID correctly since call_instance_start needs the
14298     # correct drbd minor to create the symlinks
14299     for disk in instance.disks:
14300       self.cfg.SetDiskID(disk, src_node)
14301
14302     activate_disks = (instance.admin_state != constants.ADMINST_UP)
14303
14304     if activate_disks:
14305       # Activate the instance disks if we'exporting a stopped instance
14306       feedback_fn("Activating disks for %s" % instance.name)
14307       _StartInstanceDisks(self, instance, None)
14308
14309     try:
14310       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14311                                                      instance)
14312
14313       helper.CreateSnapshots()
14314       try:
14315         if (self.op.shutdown and
14316             instance.admin_state == constants.ADMINST_UP and
14317             not self.op.remove_instance):
14318           assert not activate_disks
14319           feedback_fn("Starting instance %s" % instance.name)
14320           result = self.rpc.call_instance_start(src_node,
14321                                                 (instance, None, None), False)
14322           msg = result.fail_msg
14323           if msg:
14324             feedback_fn("Failed to start instance: %s" % msg)
14325             _ShutdownInstanceDisks(self, instance)
14326             raise errors.OpExecError("Could not start instance: %s" % msg)
14327
14328         if self.op.mode == constants.EXPORT_MODE_LOCAL:
14329           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14330         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14331           connect_timeout = constants.RIE_CONNECT_TIMEOUT
14332           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14333
14334           (key_name, _, _) = self.x509_key_name
14335
14336           dest_ca_pem = \
14337             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14338                                             self.dest_x509_ca)
14339
14340           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14341                                                      key_name, dest_ca_pem,
14342                                                      timeouts)
14343       finally:
14344         helper.Cleanup()
14345
14346       # Check for backwards compatibility
14347       assert len(dresults) == len(instance.disks)
14348       assert compat.all(isinstance(i, bool) for i in dresults), \
14349              "Not all results are boolean: %r" % dresults
14350
14351     finally:
14352       if activate_disks:
14353         feedback_fn("Deactivating disks for %s" % instance.name)
14354         _ShutdownInstanceDisks(self, instance)
14355
14356     if not (compat.all(dresults) and fin_resu):
14357       failures = []
14358       if not fin_resu:
14359         failures.append("export finalization")
14360       if not compat.all(dresults):
14361         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14362                                if not dsk)
14363         failures.append("disk export: disk(s) %s" % fdsk)
14364
14365       raise errors.OpExecError("Export failed, errors in %s" %
14366                                utils.CommaJoin(failures))
14367
14368     # At this point, the export was successful, we can cleanup/finish
14369
14370     # Remove instance if requested
14371     if self.op.remove_instance:
14372       feedback_fn("Removing instance %s" % instance.name)
14373       _RemoveInstance(self, feedback_fn, instance,
14374                       self.op.ignore_remove_failures)
14375
14376     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14377       self._CleanupExports(feedback_fn)
14378
14379     return fin_resu, dresults
14380
14381
14382 class LUBackupRemove(NoHooksLU):
14383   """Remove exports related to the named instance.
14384
14385   """
14386   REQ_BGL = False
14387
14388   def ExpandNames(self):
14389     self.needed_locks = {
14390       # We need all nodes to be locked in order for RemoveExport to work, but
14391       # we don't need to lock the instance itself, as nothing will happen to it
14392       # (and we can remove exports also for a removed instance)
14393       locking.LEVEL_NODE: locking.ALL_SET,
14394
14395       # Removing backups is quick, so blocking allocations is justified
14396       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14397       }
14398
14399     # Allocations should be stopped while this LU runs with node locks, but it
14400     # doesn't have to be exclusive
14401     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14402
14403   def Exec(self, feedback_fn):
14404     """Remove any export.
14405
14406     """
14407     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14408     # If the instance was not found we'll try with the name that was passed in.
14409     # This will only work if it was an FQDN, though.
14410     fqdn_warn = False
14411     if not instance_name:
14412       fqdn_warn = True
14413       instance_name = self.op.instance_name
14414
14415     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14416     exportlist = self.rpc.call_export_list(locked_nodes)
14417     found = False
14418     for node in exportlist:
14419       msg = exportlist[node].fail_msg
14420       if msg:
14421         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14422         continue
14423       if instance_name in exportlist[node].payload:
14424         found = True
14425         result = self.rpc.call_export_remove(node, instance_name)
14426         msg = result.fail_msg
14427         if msg:
14428           logging.error("Could not remove export for instance %s"
14429                         " on node %s: %s", instance_name, node, msg)
14430
14431     if fqdn_warn and not found:
14432       feedback_fn("Export not found. If trying to remove an export belonging"
14433                   " to a deleted instance please use its Fully Qualified"
14434                   " Domain Name.")
14435
14436
14437 class LUGroupAdd(LogicalUnit):
14438   """Logical unit for creating node groups.
14439
14440   """
14441   HPATH = "group-add"
14442   HTYPE = constants.HTYPE_GROUP
14443   REQ_BGL = False
14444
14445   def ExpandNames(self):
14446     # We need the new group's UUID here so that we can create and acquire the
14447     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14448     # that it should not check whether the UUID exists in the configuration.
14449     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14450     self.needed_locks = {}
14451     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14452
14453   def CheckPrereq(self):
14454     """Check prerequisites.
14455
14456     This checks that the given group name is not an existing node group
14457     already.
14458
14459     """
14460     try:
14461       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14462     except errors.OpPrereqError:
14463       pass
14464     else:
14465       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14466                                  " node group (UUID: %s)" %
14467                                  (self.op.group_name, existing_uuid),
14468                                  errors.ECODE_EXISTS)
14469
14470     if self.op.ndparams:
14471       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14472
14473     if self.op.hv_state:
14474       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14475     else:
14476       self.new_hv_state = None
14477
14478     if self.op.disk_state:
14479       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14480     else:
14481       self.new_disk_state = None
14482
14483     if self.op.diskparams:
14484       for templ in constants.DISK_TEMPLATES:
14485         if templ in self.op.diskparams:
14486           utils.ForceDictType(self.op.diskparams[templ],
14487                               constants.DISK_DT_TYPES)
14488       self.new_diskparams = self.op.diskparams
14489       try:
14490         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14491       except errors.OpPrereqError, err:
14492         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14493                                    errors.ECODE_INVAL)
14494     else:
14495       self.new_diskparams = {}
14496
14497     if self.op.ipolicy:
14498       cluster = self.cfg.GetClusterInfo()
14499       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14500       try:
14501         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14502       except errors.ConfigurationError, err:
14503         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14504                                    errors.ECODE_INVAL)
14505
14506   def BuildHooksEnv(self):
14507     """Build hooks env.
14508
14509     """
14510     return {
14511       "GROUP_NAME": self.op.group_name,
14512       }
14513
14514   def BuildHooksNodes(self):
14515     """Build hooks nodes.
14516
14517     """
14518     mn = self.cfg.GetMasterNode()
14519     return ([mn], [mn])
14520
14521   def Exec(self, feedback_fn):
14522     """Add the node group to the cluster.
14523
14524     """
14525     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14526                                   uuid=self.group_uuid,
14527                                   alloc_policy=self.op.alloc_policy,
14528                                   ndparams=self.op.ndparams,
14529                                   diskparams=self.new_diskparams,
14530                                   ipolicy=self.op.ipolicy,
14531                                   hv_state_static=self.new_hv_state,
14532                                   disk_state_static=self.new_disk_state)
14533
14534     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14535     del self.remove_locks[locking.LEVEL_NODEGROUP]
14536
14537
14538 class LUGroupAssignNodes(NoHooksLU):
14539   """Logical unit for assigning nodes to groups.
14540
14541   """
14542   REQ_BGL = False
14543
14544   def ExpandNames(self):
14545     # These raise errors.OpPrereqError on their own:
14546     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14547     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14548
14549     # We want to lock all the affected nodes and groups. We have readily
14550     # available the list of nodes, and the *destination* group. To gather the
14551     # list of "source" groups, we need to fetch node information later on.
14552     self.needed_locks = {
14553       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14554       locking.LEVEL_NODE: self.op.nodes,
14555       }
14556
14557   def DeclareLocks(self, level):
14558     if level == locking.LEVEL_NODEGROUP:
14559       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14560
14561       # Try to get all affected nodes' groups without having the group or node
14562       # lock yet. Needs verification later in the code flow.
14563       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14564
14565       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14566
14567   def CheckPrereq(self):
14568     """Check prerequisites.
14569
14570     """
14571     assert self.needed_locks[locking.LEVEL_NODEGROUP]
14572     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14573             frozenset(self.op.nodes))
14574
14575     expected_locks = (set([self.group_uuid]) |
14576                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14577     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14578     if actual_locks != expected_locks:
14579       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14580                                " current groups are '%s', used to be '%s'" %
14581                                (utils.CommaJoin(expected_locks),
14582                                 utils.CommaJoin(actual_locks)))
14583
14584     self.node_data = self.cfg.GetAllNodesInfo()
14585     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14586     instance_data = self.cfg.GetAllInstancesInfo()
14587
14588     if self.group is None:
14589       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14590                                (self.op.group_name, self.group_uuid))
14591
14592     (new_splits, previous_splits) = \
14593       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14594                                              for node in self.op.nodes],
14595                                             self.node_data, instance_data)
14596
14597     if new_splits:
14598       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14599
14600       if not self.op.force:
14601         raise errors.OpExecError("The following instances get split by this"
14602                                  " change and --force was not given: %s" %
14603                                  fmt_new_splits)
14604       else:
14605         self.LogWarning("This operation will split the following instances: %s",
14606                         fmt_new_splits)
14607
14608         if previous_splits:
14609           self.LogWarning("In addition, these already-split instances continue"
14610                           " to be split across groups: %s",
14611                           utils.CommaJoin(utils.NiceSort(previous_splits)))
14612
14613   def Exec(self, feedback_fn):
14614     """Assign nodes to a new group.
14615
14616     """
14617     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14618
14619     self.cfg.AssignGroupNodes(mods)
14620
14621   @staticmethod
14622   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14623     """Check for split instances after a node assignment.
14624
14625     This method considers a series of node assignments as an atomic operation,
14626     and returns information about split instances after applying the set of
14627     changes.
14628
14629     In particular, it returns information about newly split instances, and
14630     instances that were already split, and remain so after the change.
14631
14632     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14633     considered.
14634
14635     @type changes: list of (node_name, new_group_uuid) pairs.
14636     @param changes: list of node assignments to consider.
14637     @param node_data: a dict with data for all nodes
14638     @param instance_data: a dict with all instances to consider
14639     @rtype: a two-tuple
14640     @return: a list of instances that were previously okay and result split as a
14641       consequence of this change, and a list of instances that were previously
14642       split and this change does not fix.
14643
14644     """
14645     changed_nodes = dict((node, group) for node, group in changes
14646                          if node_data[node].group != group)
14647
14648     all_split_instances = set()
14649     previously_split_instances = set()
14650
14651     def InstanceNodes(instance):
14652       return [instance.primary_node] + list(instance.secondary_nodes)
14653
14654     for inst in instance_data.values():
14655       if inst.disk_template not in constants.DTS_INT_MIRROR:
14656         continue
14657
14658       instance_nodes = InstanceNodes(inst)
14659
14660       if len(set(node_data[node].group for node in instance_nodes)) > 1:
14661         previously_split_instances.add(inst.name)
14662
14663       if len(set(changed_nodes.get(node, node_data[node].group)
14664                  for node in instance_nodes)) > 1:
14665         all_split_instances.add(inst.name)
14666
14667     return (list(all_split_instances - previously_split_instances),
14668             list(previously_split_instances & all_split_instances))
14669
14670
14671 class _GroupQuery(_QueryBase):
14672   FIELDS = query.GROUP_FIELDS
14673
14674   def ExpandNames(self, lu):
14675     lu.needed_locks = {}
14676
14677     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14678     self._cluster = lu.cfg.GetClusterInfo()
14679     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14680
14681     if not self.names:
14682       self.wanted = [name_to_uuid[name]
14683                      for name in utils.NiceSort(name_to_uuid.keys())]
14684     else:
14685       # Accept names to be either names or UUIDs.
14686       missing = []
14687       self.wanted = []
14688       all_uuid = frozenset(self._all_groups.keys())
14689
14690       for name in self.names:
14691         if name in all_uuid:
14692           self.wanted.append(name)
14693         elif name in name_to_uuid:
14694           self.wanted.append(name_to_uuid[name])
14695         else:
14696           missing.append(name)
14697
14698       if missing:
14699         raise errors.OpPrereqError("Some groups do not exist: %s" %
14700                                    utils.CommaJoin(missing),
14701                                    errors.ECODE_NOENT)
14702
14703   def DeclareLocks(self, lu, level):
14704     pass
14705
14706   def _GetQueryData(self, lu):
14707     """Computes the list of node groups and their attributes.
14708
14709     """
14710     do_nodes = query.GQ_NODE in self.requested_data
14711     do_instances = query.GQ_INST in self.requested_data
14712
14713     group_to_nodes = None
14714     group_to_instances = None
14715
14716     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14717     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14718     # latter GetAllInstancesInfo() is not enough, for we have to go through
14719     # instance->node. Hence, we will need to process nodes even if we only need
14720     # instance information.
14721     if do_nodes or do_instances:
14722       all_nodes = lu.cfg.GetAllNodesInfo()
14723       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14724       node_to_group = {}
14725
14726       for node in all_nodes.values():
14727         if node.group in group_to_nodes:
14728           group_to_nodes[node.group].append(node.name)
14729           node_to_group[node.name] = node.group
14730
14731       if do_instances:
14732         all_instances = lu.cfg.GetAllInstancesInfo()
14733         group_to_instances = dict((uuid, []) for uuid in self.wanted)
14734
14735         for instance in all_instances.values():
14736           node = instance.primary_node
14737           if node in node_to_group:
14738             group_to_instances[node_to_group[node]].append(instance.name)
14739
14740         if not do_nodes:
14741           # Do not pass on node information if it was not requested.
14742           group_to_nodes = None
14743
14744     return query.GroupQueryData(self._cluster,
14745                                 [self._all_groups[uuid]
14746                                  for uuid in self.wanted],
14747                                 group_to_nodes, group_to_instances,
14748                                 query.GQ_DISKPARAMS in self.requested_data)
14749
14750
14751 class LUGroupQuery(NoHooksLU):
14752   """Logical unit for querying node groups.
14753
14754   """
14755   REQ_BGL = False
14756
14757   def CheckArguments(self):
14758     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14759                           self.op.output_fields, False)
14760
14761   def ExpandNames(self):
14762     self.gq.ExpandNames(self)
14763
14764   def DeclareLocks(self, level):
14765     self.gq.DeclareLocks(self, level)
14766
14767   def Exec(self, feedback_fn):
14768     return self.gq.OldStyleQuery(self)
14769
14770
14771 class LUGroupSetParams(LogicalUnit):
14772   """Modifies the parameters of a node group.
14773
14774   """
14775   HPATH = "group-modify"
14776   HTYPE = constants.HTYPE_GROUP
14777   REQ_BGL = False
14778
14779   def CheckArguments(self):
14780     all_changes = [
14781       self.op.ndparams,
14782       self.op.diskparams,
14783       self.op.alloc_policy,
14784       self.op.hv_state,
14785       self.op.disk_state,
14786       self.op.ipolicy,
14787       ]
14788
14789     if all_changes.count(None) == len(all_changes):
14790       raise errors.OpPrereqError("Please pass at least one modification",
14791                                  errors.ECODE_INVAL)
14792
14793   def ExpandNames(self):
14794     # This raises errors.OpPrereqError on its own:
14795     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14796
14797     self.needed_locks = {
14798       locking.LEVEL_INSTANCE: [],
14799       locking.LEVEL_NODEGROUP: [self.group_uuid],
14800       }
14801
14802     self.share_locks[locking.LEVEL_INSTANCE] = 1
14803
14804   def DeclareLocks(self, level):
14805     if level == locking.LEVEL_INSTANCE:
14806       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14807
14808       # Lock instances optimistically, needs verification once group lock has
14809       # been acquired
14810       self.needed_locks[locking.LEVEL_INSTANCE] = \
14811           self.cfg.GetNodeGroupInstances(self.group_uuid)
14812
14813   @staticmethod
14814   def _UpdateAndVerifyDiskParams(old, new):
14815     """Updates and verifies disk parameters.
14816
14817     """
14818     new_params = _GetUpdatedParams(old, new)
14819     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14820     return new_params
14821
14822   def CheckPrereq(self):
14823     """Check prerequisites.
14824
14825     """
14826     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14827
14828     # Check if locked instances are still correct
14829     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14830
14831     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14832     cluster = self.cfg.GetClusterInfo()
14833
14834     if self.group is None:
14835       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14836                                (self.op.group_name, self.group_uuid))
14837
14838     if self.op.ndparams:
14839       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14840       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14841       self.new_ndparams = new_ndparams
14842
14843     if self.op.diskparams:
14844       diskparams = self.group.diskparams
14845       uavdp = self._UpdateAndVerifyDiskParams
14846       # For each disktemplate subdict update and verify the values
14847       new_diskparams = dict((dt,
14848                              uavdp(diskparams.get(dt, {}),
14849                                    self.op.diskparams[dt]))
14850                             for dt in constants.DISK_TEMPLATES
14851                             if dt in self.op.diskparams)
14852       # As we've all subdicts of diskparams ready, lets merge the actual
14853       # dict with all updated subdicts
14854       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14855       try:
14856         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14857       except errors.OpPrereqError, err:
14858         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14859                                    errors.ECODE_INVAL)
14860
14861     if self.op.hv_state:
14862       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14863                                                  self.group.hv_state_static)
14864
14865     if self.op.disk_state:
14866       self.new_disk_state = \
14867         _MergeAndVerifyDiskState(self.op.disk_state,
14868                                  self.group.disk_state_static)
14869
14870     if self.op.ipolicy:
14871       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14872                                             self.op.ipolicy,
14873                                             group_policy=True)
14874
14875       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14876       inst_filter = lambda inst: inst.name in owned_instances
14877       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14878       gmi = ganeti.masterd.instance
14879       violations = \
14880           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14881                                                                   self.group),
14882                                         new_ipolicy, instances)
14883
14884       if violations:
14885         self.LogWarning("After the ipolicy change the following instances"
14886                         " violate them: %s",
14887                         utils.CommaJoin(violations))
14888
14889   def BuildHooksEnv(self):
14890     """Build hooks env.
14891
14892     """
14893     return {
14894       "GROUP_NAME": self.op.group_name,
14895       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14896       }
14897
14898   def BuildHooksNodes(self):
14899     """Build hooks nodes.
14900
14901     """
14902     mn = self.cfg.GetMasterNode()
14903     return ([mn], [mn])
14904
14905   def Exec(self, feedback_fn):
14906     """Modifies the node group.
14907
14908     """
14909     result = []
14910
14911     if self.op.ndparams:
14912       self.group.ndparams = self.new_ndparams
14913       result.append(("ndparams", str(self.group.ndparams)))
14914
14915     if self.op.diskparams:
14916       self.group.diskparams = self.new_diskparams
14917       result.append(("diskparams", str(self.group.diskparams)))
14918
14919     if self.op.alloc_policy:
14920       self.group.alloc_policy = self.op.alloc_policy
14921
14922     if self.op.hv_state:
14923       self.group.hv_state_static = self.new_hv_state
14924
14925     if self.op.disk_state:
14926       self.group.disk_state_static = self.new_disk_state
14927
14928     if self.op.ipolicy:
14929       self.group.ipolicy = self.new_ipolicy
14930
14931     self.cfg.Update(self.group, feedback_fn)
14932     return result
14933
14934
14935 class LUGroupRemove(LogicalUnit):
14936   HPATH = "group-remove"
14937   HTYPE = constants.HTYPE_GROUP
14938   REQ_BGL = False
14939
14940   def ExpandNames(self):
14941     # This will raises errors.OpPrereqError on its own:
14942     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14943     self.needed_locks = {
14944       locking.LEVEL_NODEGROUP: [self.group_uuid],
14945       }
14946
14947   def CheckPrereq(self):
14948     """Check prerequisites.
14949
14950     This checks that the given group name exists as a node group, that is
14951     empty (i.e., contains no nodes), and that is not the last group of the
14952     cluster.
14953
14954     """
14955     # Verify that the group is empty.
14956     group_nodes = [node.name
14957                    for node in self.cfg.GetAllNodesInfo().values()
14958                    if node.group == self.group_uuid]
14959
14960     if group_nodes:
14961       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14962                                  " nodes: %s" %
14963                                  (self.op.group_name,
14964                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14965                                  errors.ECODE_STATE)
14966
14967     # Verify the cluster would not be left group-less.
14968     if len(self.cfg.GetNodeGroupList()) == 1:
14969       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14970                                  " removed" % self.op.group_name,
14971                                  errors.ECODE_STATE)
14972
14973   def BuildHooksEnv(self):
14974     """Build hooks env.
14975
14976     """
14977     return {
14978       "GROUP_NAME": self.op.group_name,
14979       }
14980
14981   def BuildHooksNodes(self):
14982     """Build hooks nodes.
14983
14984     """
14985     mn = self.cfg.GetMasterNode()
14986     return ([mn], [mn])
14987
14988   def Exec(self, feedback_fn):
14989     """Remove the node group.
14990
14991     """
14992     try:
14993       self.cfg.RemoveNodeGroup(self.group_uuid)
14994     except errors.ConfigurationError:
14995       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14996                                (self.op.group_name, self.group_uuid))
14997
14998     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14999
15000
15001 class LUGroupRename(LogicalUnit):
15002   HPATH = "group-rename"
15003   HTYPE = constants.HTYPE_GROUP
15004   REQ_BGL = False
15005
15006   def ExpandNames(self):
15007     # This raises errors.OpPrereqError on its own:
15008     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15009
15010     self.needed_locks = {
15011       locking.LEVEL_NODEGROUP: [self.group_uuid],
15012       }
15013
15014   def CheckPrereq(self):
15015     """Check prerequisites.
15016
15017     Ensures requested new name is not yet used.
15018
15019     """
15020     try:
15021       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15022     except errors.OpPrereqError:
15023       pass
15024     else:
15025       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15026                                  " node group (UUID: %s)" %
15027                                  (self.op.new_name, new_name_uuid),
15028                                  errors.ECODE_EXISTS)
15029
15030   def BuildHooksEnv(self):
15031     """Build hooks env.
15032
15033     """
15034     return {
15035       "OLD_NAME": self.op.group_name,
15036       "NEW_NAME": self.op.new_name,
15037       }
15038
15039   def BuildHooksNodes(self):
15040     """Build hooks nodes.
15041
15042     """
15043     mn = self.cfg.GetMasterNode()
15044
15045     all_nodes = self.cfg.GetAllNodesInfo()
15046     all_nodes.pop(mn, None)
15047
15048     run_nodes = [mn]
15049     run_nodes.extend(node.name for node in all_nodes.values()
15050                      if node.group == self.group_uuid)
15051
15052     return (run_nodes, run_nodes)
15053
15054   def Exec(self, feedback_fn):
15055     """Rename the node group.
15056
15057     """
15058     group = self.cfg.GetNodeGroup(self.group_uuid)
15059
15060     if group is None:
15061       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15062                                (self.op.group_name, self.group_uuid))
15063
15064     group.name = self.op.new_name
15065     self.cfg.Update(group, feedback_fn)
15066
15067     return self.op.new_name
15068
15069
15070 class LUGroupEvacuate(LogicalUnit):
15071   HPATH = "group-evacuate"
15072   HTYPE = constants.HTYPE_GROUP
15073   REQ_BGL = False
15074
15075   def ExpandNames(self):
15076     # This raises errors.OpPrereqError on its own:
15077     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15078
15079     if self.op.target_groups:
15080       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15081                                   self.op.target_groups)
15082     else:
15083       self.req_target_uuids = []
15084
15085     if self.group_uuid in self.req_target_uuids:
15086       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15087                                  " as a target group (targets are %s)" %
15088                                  (self.group_uuid,
15089                                   utils.CommaJoin(self.req_target_uuids)),
15090                                  errors.ECODE_INVAL)
15091
15092     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15093
15094     self.share_locks = _ShareAll()
15095     self.needed_locks = {
15096       locking.LEVEL_INSTANCE: [],
15097       locking.LEVEL_NODEGROUP: [],
15098       locking.LEVEL_NODE: [],
15099       }
15100
15101   def DeclareLocks(self, level):
15102     if level == locking.LEVEL_INSTANCE:
15103       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15104
15105       # Lock instances optimistically, needs verification once node and group
15106       # locks have been acquired
15107       self.needed_locks[locking.LEVEL_INSTANCE] = \
15108         self.cfg.GetNodeGroupInstances(self.group_uuid)
15109
15110     elif level == locking.LEVEL_NODEGROUP:
15111       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15112
15113       if self.req_target_uuids:
15114         lock_groups = set([self.group_uuid] + self.req_target_uuids)
15115
15116         # Lock all groups used by instances optimistically; this requires going
15117         # via the node before it's locked, requiring verification later on
15118         lock_groups.update(group_uuid
15119                            for instance_name in
15120                              self.owned_locks(locking.LEVEL_INSTANCE)
15121                            for group_uuid in
15122                              self.cfg.GetInstanceNodeGroups(instance_name))
15123       else:
15124         # No target groups, need to lock all of them
15125         lock_groups = locking.ALL_SET
15126
15127       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15128
15129     elif level == locking.LEVEL_NODE:
15130       # This will only lock the nodes in the group to be evacuated which
15131       # contain actual instances
15132       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15133       self._LockInstancesNodes()
15134
15135       # Lock all nodes in group to be evacuated and target groups
15136       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15137       assert self.group_uuid in owned_groups
15138       member_nodes = [node_name
15139                       for group in owned_groups
15140                       for node_name in self.cfg.GetNodeGroup(group).members]
15141       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15142
15143   def CheckPrereq(self):
15144     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15145     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15146     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15147
15148     assert owned_groups.issuperset(self.req_target_uuids)
15149     assert self.group_uuid in owned_groups
15150
15151     # Check if locked instances are still correct
15152     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15153
15154     # Get instance information
15155     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15156
15157     # Check if node groups for locked instances are still correct
15158     _CheckInstancesNodeGroups(self.cfg, self.instances,
15159                               owned_groups, owned_nodes, self.group_uuid)
15160
15161     if self.req_target_uuids:
15162       # User requested specific target groups
15163       self.target_uuids = self.req_target_uuids
15164     else:
15165       # All groups except the one to be evacuated are potential targets
15166       self.target_uuids = [group_uuid for group_uuid in owned_groups
15167                            if group_uuid != self.group_uuid]
15168
15169       if not self.target_uuids:
15170         raise errors.OpPrereqError("There are no possible target groups",
15171                                    errors.ECODE_INVAL)
15172
15173   def BuildHooksEnv(self):
15174     """Build hooks env.
15175
15176     """
15177     return {
15178       "GROUP_NAME": self.op.group_name,
15179       "TARGET_GROUPS": " ".join(self.target_uuids),
15180       }
15181
15182   def BuildHooksNodes(self):
15183     """Build hooks nodes.
15184
15185     """
15186     mn = self.cfg.GetMasterNode()
15187
15188     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15189
15190     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15191
15192     return (run_nodes, run_nodes)
15193
15194   def Exec(self, feedback_fn):
15195     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15196
15197     assert self.group_uuid not in self.target_uuids
15198
15199     req = iallocator.IAReqGroupChange(instances=instances,
15200                                       target_groups=self.target_uuids)
15201     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15202
15203     ial.Run(self.op.iallocator)
15204
15205     if not ial.success:
15206       raise errors.OpPrereqError("Can't compute group evacuation using"
15207                                  " iallocator '%s': %s" %
15208                                  (self.op.iallocator, ial.info),
15209                                  errors.ECODE_NORES)
15210
15211     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15212
15213     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15214                  len(jobs), self.op.group_name)
15215
15216     return ResultWithJobs(jobs)
15217
15218
15219 class TagsLU(NoHooksLU): # pylint: disable=W0223
15220   """Generic tags LU.
15221
15222   This is an abstract class which is the parent of all the other tags LUs.
15223
15224   """
15225   def ExpandNames(self):
15226     self.group_uuid = None
15227     self.needed_locks = {}
15228
15229     if self.op.kind == constants.TAG_NODE:
15230       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15231       lock_level = locking.LEVEL_NODE
15232       lock_name = self.op.name
15233     elif self.op.kind == constants.TAG_INSTANCE:
15234       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15235       lock_level = locking.LEVEL_INSTANCE
15236       lock_name = self.op.name
15237     elif self.op.kind == constants.TAG_NODEGROUP:
15238       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15239       lock_level = locking.LEVEL_NODEGROUP
15240       lock_name = self.group_uuid
15241     elif self.op.kind == constants.TAG_NETWORK:
15242       self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15243       lock_level = locking.LEVEL_NETWORK
15244       lock_name = self.network_uuid
15245     else:
15246       lock_level = None
15247       lock_name = None
15248
15249     if lock_level and getattr(self.op, "use_locking", True):
15250       self.needed_locks[lock_level] = lock_name
15251
15252     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15253     # not possible to acquire the BGL based on opcode parameters)
15254
15255   def CheckPrereq(self):
15256     """Check prerequisites.
15257
15258     """
15259     if self.op.kind == constants.TAG_CLUSTER:
15260       self.target = self.cfg.GetClusterInfo()
15261     elif self.op.kind == constants.TAG_NODE:
15262       self.target = self.cfg.GetNodeInfo(self.op.name)
15263     elif self.op.kind == constants.TAG_INSTANCE:
15264       self.target = self.cfg.GetInstanceInfo(self.op.name)
15265     elif self.op.kind == constants.TAG_NODEGROUP:
15266       self.target = self.cfg.GetNodeGroup(self.group_uuid)
15267     elif self.op.kind == constants.TAG_NETWORK:
15268       self.target = self.cfg.GetNetwork(self.network_uuid)
15269     else:
15270       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15271                                  str(self.op.kind), errors.ECODE_INVAL)
15272
15273
15274 class LUTagsGet(TagsLU):
15275   """Returns the tags of a given object.
15276
15277   """
15278   REQ_BGL = False
15279
15280   def ExpandNames(self):
15281     TagsLU.ExpandNames(self)
15282
15283     # Share locks as this is only a read operation
15284     self.share_locks = _ShareAll()
15285
15286   def Exec(self, feedback_fn):
15287     """Returns the tag list.
15288
15289     """
15290     return list(self.target.GetTags())
15291
15292
15293 class LUTagsSearch(NoHooksLU):
15294   """Searches the tags for a given pattern.
15295
15296   """
15297   REQ_BGL = False
15298
15299   def ExpandNames(self):
15300     self.needed_locks = {}
15301
15302   def CheckPrereq(self):
15303     """Check prerequisites.
15304
15305     This checks the pattern passed for validity by compiling it.
15306
15307     """
15308     try:
15309       self.re = re.compile(self.op.pattern)
15310     except re.error, err:
15311       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15312                                  (self.op.pattern, err), errors.ECODE_INVAL)
15313
15314   def Exec(self, feedback_fn):
15315     """Returns the tag list.
15316
15317     """
15318     cfg = self.cfg
15319     tgts = [("/cluster", cfg.GetClusterInfo())]
15320     ilist = cfg.GetAllInstancesInfo().values()
15321     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15322     nlist = cfg.GetAllNodesInfo().values()
15323     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15324     tgts.extend(("/nodegroup/%s" % n.name, n)
15325                 for n in cfg.GetAllNodeGroupsInfo().values())
15326     results = []
15327     for path, target in tgts:
15328       for tag in target.GetTags():
15329         if self.re.search(tag):
15330           results.append((path, tag))
15331     return results
15332
15333
15334 class LUTagsSet(TagsLU):
15335   """Sets a tag on a given object.
15336
15337   """
15338   REQ_BGL = False
15339
15340   def CheckPrereq(self):
15341     """Check prerequisites.
15342
15343     This checks the type and length of the tag name and value.
15344
15345     """
15346     TagsLU.CheckPrereq(self)
15347     for tag in self.op.tags:
15348       objects.TaggableObject.ValidateTag(tag)
15349
15350   def Exec(self, feedback_fn):
15351     """Sets the tag.
15352
15353     """
15354     try:
15355       for tag in self.op.tags:
15356         self.target.AddTag(tag)
15357     except errors.TagError, err:
15358       raise errors.OpExecError("Error while setting tag: %s" % str(err))
15359     self.cfg.Update(self.target, feedback_fn)
15360
15361
15362 class LUTagsDel(TagsLU):
15363   """Delete a list of tags from a given object.
15364
15365   """
15366   REQ_BGL = False
15367
15368   def CheckPrereq(self):
15369     """Check prerequisites.
15370
15371     This checks that we have the given tag.
15372
15373     """
15374     TagsLU.CheckPrereq(self)
15375     for tag in self.op.tags:
15376       objects.TaggableObject.ValidateTag(tag)
15377     del_tags = frozenset(self.op.tags)
15378     cur_tags = self.target.GetTags()
15379
15380     diff_tags = del_tags - cur_tags
15381     if diff_tags:
15382       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15383       raise errors.OpPrereqError("Tag(s) %s not found" %
15384                                  (utils.CommaJoin(diff_names), ),
15385                                  errors.ECODE_NOENT)
15386
15387   def Exec(self, feedback_fn):
15388     """Remove the tag from the object.
15389
15390     """
15391     for tag in self.op.tags:
15392       self.target.RemoveTag(tag)
15393     self.cfg.Update(self.target, feedback_fn)
15394
15395
15396 class LUTestDelay(NoHooksLU):
15397   """Sleep for a specified amount of time.
15398
15399   This LU sleeps on the master and/or nodes for a specified amount of
15400   time.
15401
15402   """
15403   REQ_BGL = False
15404
15405   def ExpandNames(self):
15406     """Expand names and set required locks.
15407
15408     This expands the node list, if any.
15409
15410     """
15411     self.needed_locks = {}
15412     if self.op.on_nodes:
15413       # _GetWantedNodes can be used here, but is not always appropriate to use
15414       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15415       # more information.
15416       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15417       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15418
15419   def _TestDelay(self):
15420     """Do the actual sleep.
15421
15422     """
15423     if self.op.on_master:
15424       if not utils.TestDelay(self.op.duration):
15425         raise errors.OpExecError("Error during master delay test")
15426     if self.op.on_nodes:
15427       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15428       for node, node_result in result.items():
15429         node_result.Raise("Failure during rpc call to node %s" % node)
15430
15431   def Exec(self, feedback_fn):
15432     """Execute the test delay opcode, with the wanted repetitions.
15433
15434     """
15435     if self.op.repeat == 0:
15436       self._TestDelay()
15437     else:
15438       top_value = self.op.repeat - 1
15439       for i in range(self.op.repeat):
15440         self.LogInfo("Test delay iteration %d/%d", i, top_value)
15441         self._TestDelay()
15442
15443
15444 class LURestrictedCommand(NoHooksLU):
15445   """Logical unit for executing restricted commands.
15446
15447   """
15448   REQ_BGL = False
15449
15450   def ExpandNames(self):
15451     if self.op.nodes:
15452       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15453
15454     self.needed_locks = {
15455       locking.LEVEL_NODE: self.op.nodes,
15456       }
15457     self.share_locks = {
15458       locking.LEVEL_NODE: not self.op.use_locking,
15459       }
15460
15461   def CheckPrereq(self):
15462     """Check prerequisites.
15463
15464     """
15465
15466   def Exec(self, feedback_fn):
15467     """Execute restricted command and return output.
15468
15469     """
15470     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15471
15472     # Check if correct locks are held
15473     assert set(self.op.nodes).issubset(owned_nodes)
15474
15475     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15476
15477     result = []
15478
15479     for node_name in self.op.nodes:
15480       nres = rpcres[node_name]
15481       if nres.fail_msg:
15482         msg = ("Command '%s' on node '%s' failed: %s" %
15483                (self.op.command, node_name, nres.fail_msg))
15484         result.append((False, msg))
15485       else:
15486         result.append((True, nres.payload))
15487
15488     return result
15489
15490
15491 class LUTestJqueue(NoHooksLU):
15492   """Utility LU to test some aspects of the job queue.
15493
15494   """
15495   REQ_BGL = False
15496
15497   # Must be lower than default timeout for WaitForJobChange to see whether it
15498   # notices changed jobs
15499   _CLIENT_CONNECT_TIMEOUT = 20.0
15500   _CLIENT_CONFIRM_TIMEOUT = 60.0
15501
15502   @classmethod
15503   def _NotifyUsingSocket(cls, cb, errcls):
15504     """Opens a Unix socket and waits for another program to connect.
15505
15506     @type cb: callable
15507     @param cb: Callback to send socket name to client
15508     @type errcls: class
15509     @param errcls: Exception class to use for errors
15510
15511     """
15512     # Using a temporary directory as there's no easy way to create temporary
15513     # sockets without writing a custom loop around tempfile.mktemp and
15514     # socket.bind
15515     tmpdir = tempfile.mkdtemp()
15516     try:
15517       tmpsock = utils.PathJoin(tmpdir, "sock")
15518
15519       logging.debug("Creating temporary socket at %s", tmpsock)
15520       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15521       try:
15522         sock.bind(tmpsock)
15523         sock.listen(1)
15524
15525         # Send details to client
15526         cb(tmpsock)
15527
15528         # Wait for client to connect before continuing
15529         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15530         try:
15531           (conn, _) = sock.accept()
15532         except socket.error, err:
15533           raise errcls("Client didn't connect in time (%s)" % err)
15534       finally:
15535         sock.close()
15536     finally:
15537       # Remove as soon as client is connected
15538       shutil.rmtree(tmpdir)
15539
15540     # Wait for client to close
15541     try:
15542       try:
15543         # pylint: disable=E1101
15544         # Instance of '_socketobject' has no ... member
15545         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15546         conn.recv(1)
15547       except socket.error, err:
15548         raise errcls("Client failed to confirm notification (%s)" % err)
15549     finally:
15550       conn.close()
15551
15552   def _SendNotification(self, test, arg, sockname):
15553     """Sends a notification to the client.
15554
15555     @type test: string
15556     @param test: Test name
15557     @param arg: Test argument (depends on test)
15558     @type sockname: string
15559     @param sockname: Socket path
15560
15561     """
15562     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15563
15564   def _Notify(self, prereq, test, arg):
15565     """Notifies the client of a test.
15566
15567     @type prereq: bool
15568     @param prereq: Whether this is a prereq-phase test
15569     @type test: string
15570     @param test: Test name
15571     @param arg: Test argument (depends on test)
15572
15573     """
15574     if prereq:
15575       errcls = errors.OpPrereqError
15576     else:
15577       errcls = errors.OpExecError
15578
15579     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15580                                                   test, arg),
15581                                    errcls)
15582
15583   def CheckArguments(self):
15584     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15585     self.expandnames_calls = 0
15586
15587   def ExpandNames(self):
15588     checkargs_calls = getattr(self, "checkargs_calls", 0)
15589     if checkargs_calls < 1:
15590       raise errors.ProgrammerError("CheckArguments was not called")
15591
15592     self.expandnames_calls += 1
15593
15594     if self.op.notify_waitlock:
15595       self._Notify(True, constants.JQT_EXPANDNAMES, None)
15596
15597     self.LogInfo("Expanding names")
15598
15599     # Get lock on master node (just to get a lock, not for a particular reason)
15600     self.needed_locks = {
15601       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15602       }
15603
15604   def Exec(self, feedback_fn):
15605     if self.expandnames_calls < 1:
15606       raise errors.ProgrammerError("ExpandNames was not called")
15607
15608     if self.op.notify_exec:
15609       self._Notify(False, constants.JQT_EXEC, None)
15610
15611     self.LogInfo("Executing")
15612
15613     if self.op.log_messages:
15614       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15615       for idx, msg in enumerate(self.op.log_messages):
15616         self.LogInfo("Sending log message %s", idx + 1)
15617         feedback_fn(constants.JQT_MSGPREFIX + msg)
15618         # Report how many test messages have been sent
15619         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15620
15621     if self.op.fail:
15622       raise errors.OpExecError("Opcode failure was requested")
15623
15624     return True
15625
15626
15627 class LUTestAllocator(NoHooksLU):
15628   """Run allocator tests.
15629
15630   This LU runs the allocator tests
15631
15632   """
15633   def CheckPrereq(self):
15634     """Check prerequisites.
15635
15636     This checks the opcode parameters depending on the director and mode test.
15637
15638     """
15639     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15640                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
15641       for attr in ["memory", "disks", "disk_template",
15642                    "os", "tags", "nics", "vcpus"]:
15643         if not hasattr(self.op, attr):
15644           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15645                                      attr, errors.ECODE_INVAL)
15646       iname = self.cfg.ExpandInstanceName(self.op.name)
15647       if iname is not None:
15648         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15649                                    iname, errors.ECODE_EXISTS)
15650       if not isinstance(self.op.nics, list):
15651         raise errors.OpPrereqError("Invalid parameter 'nics'",
15652                                    errors.ECODE_INVAL)
15653       if not isinstance(self.op.disks, list):
15654         raise errors.OpPrereqError("Invalid parameter 'disks'",
15655                                    errors.ECODE_INVAL)
15656       for row in self.op.disks:
15657         if (not isinstance(row, dict) or
15658             constants.IDISK_SIZE not in row or
15659             not isinstance(row[constants.IDISK_SIZE], int) or
15660             constants.IDISK_MODE not in row or
15661             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15662           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15663                                      " parameter", errors.ECODE_INVAL)
15664       if self.op.hypervisor is None:
15665         self.op.hypervisor = self.cfg.GetHypervisorType()
15666     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15667       fname = _ExpandInstanceName(self.cfg, self.op.name)
15668       self.op.name = fname
15669       self.relocate_from = \
15670           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15671     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15672                           constants.IALLOCATOR_MODE_NODE_EVAC):
15673       if not self.op.instances:
15674         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15675       self.op.instances = _GetWantedInstances(self, self.op.instances)
15676     else:
15677       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15678                                  self.op.mode, errors.ECODE_INVAL)
15679
15680     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15681       if self.op.iallocator is None:
15682         raise errors.OpPrereqError("Missing allocator name",
15683                                    errors.ECODE_INVAL)
15684     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15685       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15686                                  self.op.direction, errors.ECODE_INVAL)
15687
15688   def Exec(self, feedback_fn):
15689     """Run the allocator test.
15690
15691     """
15692     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15693       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15694                                           memory=self.op.memory,
15695                                           disks=self.op.disks,
15696                                           disk_template=self.op.disk_template,
15697                                           os=self.op.os,
15698                                           tags=self.op.tags,
15699                                           nics=self.op.nics,
15700                                           vcpus=self.op.vcpus,
15701                                           spindle_use=self.op.spindle_use,
15702                                           hypervisor=self.op.hypervisor)
15703     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15704       req = iallocator.IAReqRelocate(name=self.op.name,
15705                                      relocate_from=list(self.relocate_from))
15706     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15707       req = iallocator.IAReqGroupChange(instances=self.op.instances,
15708                                         target_groups=self.op.target_groups)
15709     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15710       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15711                                      evac_mode=self.op.evac_mode)
15712     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15713       disk_template = self.op.disk_template
15714       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15715                                              memory=self.op.memory,
15716                                              disks=self.op.disks,
15717                                              disk_template=disk_template,
15718                                              os=self.op.os,
15719                                              tags=self.op.tags,
15720                                              nics=self.op.nics,
15721                                              vcpus=self.op.vcpus,
15722                                              spindle_use=self.op.spindle_use,
15723                                              hypervisor=self.op.hypervisor)
15724                for idx in range(self.op.count)]
15725       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15726     else:
15727       raise errors.ProgrammerError("Uncatched mode %s in"
15728                                    " LUTestAllocator.Exec", self.op.mode)
15729
15730     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15731     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15732       result = ial.in_text
15733     else:
15734       ial.Run(self.op.iallocator, validate=False)
15735       result = ial.out_text
15736     return result
15737
15738
15739 class LUNetworkAdd(LogicalUnit):
15740   """Logical unit for creating networks.
15741
15742   """
15743   HPATH = "network-add"
15744   HTYPE = constants.HTYPE_NETWORK
15745   REQ_BGL = False
15746
15747   def BuildHooksNodes(self):
15748     """Build hooks nodes.
15749
15750     """
15751     mn = self.cfg.GetMasterNode()
15752     return ([mn], [mn])
15753
15754   def CheckArguments(self):
15755     if self.op.mac_prefix:
15756       self.op.mac_prefix = \
15757         utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
15758
15759   def ExpandNames(self):
15760     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15761
15762     if self.op.conflicts_check:
15763       self.share_locks[locking.LEVEL_NODE] = 1
15764       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
15765       self.needed_locks = {
15766         locking.LEVEL_NODE: locking.ALL_SET,
15767         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
15768         }
15769     else:
15770       self.needed_locks = {}
15771
15772     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15773
15774   def CheckPrereq(self):
15775     if self.op.network is None:
15776       raise errors.OpPrereqError("Network must be given",
15777                                  errors.ECODE_INVAL)
15778
15779     uuid = self.cfg.LookupNetwork(self.op.network_name)
15780
15781     if uuid:
15782       raise errors.OpPrereqError("Network '%s' already defined" %
15783                                  self.op.network, errors.ECODE_EXISTS)
15784
15785     # Check tag validity
15786     for tag in self.op.tags:
15787       objects.TaggableObject.ValidateTag(tag)
15788
15789   def BuildHooksEnv(self):
15790     """Build hooks env.
15791
15792     """
15793     args = {
15794       "name": self.op.network_name,
15795       "subnet": self.op.network,
15796       "gateway": self.op.gateway,
15797       "network6": self.op.network6,
15798       "gateway6": self.op.gateway6,
15799       "mac_prefix": self.op.mac_prefix,
15800       "network_type": self.op.network_type,
15801       "tags": self.op.tags,
15802       }
15803     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15804
15805   def Exec(self, feedback_fn):
15806     """Add the ip pool to the cluster.
15807
15808     """
15809     nobj = objects.Network(name=self.op.network_name,
15810                            network=self.op.network,
15811                            gateway=self.op.gateway,
15812                            network6=self.op.network6,
15813                            gateway6=self.op.gateway6,
15814                            mac_prefix=self.op.mac_prefix,
15815                            network_type=self.op.network_type,
15816                            uuid=self.network_uuid,
15817                            family=constants.IP4_VERSION)
15818     # Initialize the associated address pool
15819     try:
15820       pool = network.AddressPool.InitializeNetwork(nobj)
15821     except errors.AddressPoolError, e:
15822       raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15823
15824     # Check if we need to reserve the nodes and the cluster master IP
15825     # These may not be allocated to any instances in routed mode, as
15826     # they wouldn't function anyway.
15827     if self.op.conflicts_check:
15828       for node in self.cfg.GetAllNodesInfo().values():
15829         for ip in [node.primary_ip, node.secondary_ip]:
15830           try:
15831             if pool.Contains(ip):
15832               pool.Reserve(ip)
15833               self.LogInfo("Reserved IP address of node '%s' (%s)",
15834                            node.name, ip)
15835           except errors.AddressPoolError:
15836             self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
15837                             node.name, ip)
15838
15839       master_ip = self.cfg.GetClusterInfo().master_ip
15840       try:
15841         if pool.Contains(master_ip):
15842           pool.Reserve(master_ip)
15843           self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
15844       except errors.AddressPoolError:
15845         self.LogWarning("Cannot reserve cluster master IP address (%s)",
15846                         master_ip)
15847
15848     if self.op.add_reserved_ips:
15849       for ip in self.op.add_reserved_ips:
15850         try:
15851           pool.Reserve(ip, external=True)
15852         except errors.AddressPoolError, e:
15853           raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15854
15855     if self.op.tags:
15856       for tag in self.op.tags:
15857         nobj.AddTag(tag)
15858
15859     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15860     del self.remove_locks[locking.LEVEL_NETWORK]
15861
15862
15863 class LUNetworkRemove(LogicalUnit):
15864   HPATH = "network-remove"
15865   HTYPE = constants.HTYPE_NETWORK
15866   REQ_BGL = False
15867
15868   def ExpandNames(self):
15869     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15870
15871     if not self.network_uuid:
15872       raise errors.OpPrereqError(("Network '%s' not found" %
15873                                   self.op.network_name),
15874                                  errors.ECODE_INVAL)
15875
15876     self.share_locks[locking.LEVEL_NODEGROUP] = 1
15877     self.needed_locks = {
15878       locking.LEVEL_NETWORK: [self.network_uuid],
15879       locking.LEVEL_NODEGROUP: locking.ALL_SET,
15880       }
15881
15882   def CheckPrereq(self):
15883     """Check prerequisites.
15884
15885     This checks that the given network name exists as a network, that is
15886     empty (i.e., contains no nodes), and that is not the last group of the
15887     cluster.
15888
15889     """
15890     # Verify that the network is not conncted.
15891     node_groups = [group.name
15892                    for group in self.cfg.GetAllNodeGroupsInfo().values()
15893                    if self.network_uuid in group.networks]
15894
15895     if node_groups:
15896       self.LogWarning("Network '%s' is connected to the following"
15897                       " node groups: %s" %
15898                       (self.op.network_name,
15899                        utils.CommaJoin(utils.NiceSort(node_groups))))
15900       raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
15901
15902   def BuildHooksEnv(self):
15903     """Build hooks env.
15904
15905     """
15906     return {
15907       "NETWORK_NAME": self.op.network_name,
15908       }
15909
15910   def BuildHooksNodes(self):
15911     """Build hooks nodes.
15912
15913     """
15914     mn = self.cfg.GetMasterNode()
15915     return ([mn], [mn])
15916
15917   def Exec(self, feedback_fn):
15918     """Remove the network.
15919
15920     """
15921     try:
15922       self.cfg.RemoveNetwork(self.network_uuid)
15923     except errors.ConfigurationError:
15924       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15925                                (self.op.network_name, self.network_uuid))
15926
15927
15928 class LUNetworkSetParams(LogicalUnit):
15929   """Modifies the parameters of a network.
15930
15931   """
15932   HPATH = "network-modify"
15933   HTYPE = constants.HTYPE_NETWORK
15934   REQ_BGL = False
15935
15936   def CheckArguments(self):
15937     if (self.op.gateway and
15938         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15939       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15940                                  " at once", errors.ECODE_INVAL)
15941
15942   def ExpandNames(self):
15943     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15944     if self.network_uuid is None:
15945       raise errors.OpPrereqError(("Network '%s' not found" %
15946                                   self.op.network_name),
15947                                  errors.ECODE_INVAL)
15948
15949     self.needed_locks = {
15950       locking.LEVEL_NETWORK: [self.network_uuid],
15951       }
15952
15953   def CheckPrereq(self):
15954     """Check prerequisites.
15955
15956     """
15957     self.network = self.cfg.GetNetwork(self.network_uuid)
15958     self.gateway = self.network.gateway
15959     self.network_type = self.network.network_type
15960     self.mac_prefix = self.network.mac_prefix
15961     self.network6 = self.network.network6
15962     self.gateway6 = self.network.gateway6
15963     self.tags = self.network.tags
15964
15965     self.pool = network.AddressPool(self.network)
15966
15967     if self.op.gateway:
15968       if self.op.gateway == constants.VALUE_NONE:
15969         self.gateway = None
15970       else:
15971         self.gateway = self.op.gateway
15972         if self.pool.IsReserved(self.gateway):
15973           raise errors.OpPrereqError("%s is already reserved" %
15974                                      self.gateway, errors.ECODE_INVAL)
15975
15976     if self.op.network_type:
15977       if self.op.network_type == constants.VALUE_NONE:
15978         self.network_type = None
15979       else:
15980         self.network_type = self.op.network_type
15981
15982     if self.op.mac_prefix:
15983       if self.op.mac_prefix == constants.VALUE_NONE:
15984         self.mac_prefix = None
15985       else:
15986         self.mac_prefix = \
15987           utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
15988
15989     if self.op.gateway6:
15990       if self.op.gateway6 == constants.VALUE_NONE:
15991         self.gateway6 = None
15992       else:
15993         self.gateway6 = self.op.gateway6
15994
15995     if self.op.network6:
15996       if self.op.network6 == constants.VALUE_NONE:
15997         self.network6 = None
15998       else:
15999         self.network6 = self.op.network6
16000
16001   def BuildHooksEnv(self):
16002     """Build hooks env.
16003
16004     """
16005     args = {
16006       "name": self.op.network_name,
16007       "subnet": self.network.network,
16008       "gateway": self.gateway,
16009       "network6": self.network6,
16010       "gateway6": self.gateway6,
16011       "mac_prefix": self.mac_prefix,
16012       "network_type": self.network_type,
16013       "tags": self.tags,
16014       }
16015     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16016
16017   def BuildHooksNodes(self):
16018     """Build hooks nodes.
16019
16020     """
16021     mn = self.cfg.GetMasterNode()
16022     return ([mn], [mn])
16023
16024   def Exec(self, feedback_fn):
16025     """Modifies the network.
16026
16027     """
16028     #TODO: reserve/release via temporary reservation manager
16029     #      extend cfg.ReserveIp/ReleaseIp with the external flag
16030     if self.op.gateway:
16031       if self.gateway == self.network.gateway:
16032         self.LogWarning("Gateway is already %s", self.gateway)
16033       else:
16034         if self.gateway:
16035           self.pool.Reserve(self.gateway, external=True)
16036         if self.network.gateway:
16037           self.pool.Release(self.network.gateway, external=True)
16038         self.network.gateway = self.gateway
16039
16040     if self.op.add_reserved_ips:
16041       for ip in self.op.add_reserved_ips:
16042         try:
16043           if self.pool.IsReserved(ip):
16044             self.LogWarning("IP address %s is already reserved", ip)
16045           else:
16046             self.pool.Reserve(ip, external=True)
16047         except errors.AddressPoolError, err:
16048           self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16049
16050     if self.op.remove_reserved_ips:
16051       for ip in self.op.remove_reserved_ips:
16052         if ip == self.network.gateway:
16053           self.LogWarning("Cannot unreserve Gateway's IP")
16054           continue
16055         try:
16056           if not self.pool.IsReserved(ip):
16057             self.LogWarning("IP address %s is already unreserved", ip)
16058           else:
16059             self.pool.Release(ip, external=True)
16060         except errors.AddressPoolError, err:
16061           self.LogWarning("Cannot release IP address %s: %s", ip, err)
16062
16063     if self.op.mac_prefix:
16064       self.network.mac_prefix = self.mac_prefix
16065
16066     if self.op.network6:
16067       self.network.network6 = self.network6
16068
16069     if self.op.gateway6:
16070       self.network.gateway6 = self.gateway6
16071
16072     if self.op.network_type:
16073       self.network.network_type = self.network_type
16074
16075     self.pool.Validate()
16076
16077     self.cfg.Update(self.network, feedback_fn)
16078
16079
16080 class _NetworkQuery(_QueryBase):
16081   FIELDS = query.NETWORK_FIELDS
16082
16083   def ExpandNames(self, lu):
16084     lu.needed_locks = {}
16085
16086     self._all_networks = lu.cfg.GetAllNetworksInfo()
16087     name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
16088
16089     if not self.names:
16090       self.wanted = [name_to_uuid[name]
16091                      for name in utils.NiceSort(name_to_uuid.keys())]
16092     else:
16093       # Accept names to be either names or UUIDs.
16094       missing = []
16095       self.wanted = []
16096       all_uuid = frozenset(self._all_networks.keys())
16097
16098       for name in self.names:
16099         if name in all_uuid:
16100           self.wanted.append(name)
16101         elif name in name_to_uuid:
16102           self.wanted.append(name_to_uuid[name])
16103         else:
16104           missing.append(name)
16105
16106       if missing:
16107         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16108                                    errors.ECODE_NOENT)
16109
16110   def DeclareLocks(self, lu, level):
16111     pass
16112
16113   def _GetQueryData(self, lu):
16114     """Computes the list of networks and their attributes.
16115
16116     """
16117     do_instances = query.NETQ_INST in self.requested_data
16118     do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
16119     do_stats = query.NETQ_STATS in self.requested_data
16120
16121     network_to_groups = None
16122     network_to_instances = None
16123     stats = None
16124
16125     # For NETQ_GROUP, we need to map network->[groups]
16126     if do_groups:
16127       all_groups = lu.cfg.GetAllNodeGroupsInfo()
16128       network_to_groups = dict((uuid, []) for uuid in self.wanted)
16129
16130       if do_instances:
16131         all_instances = lu.cfg.GetAllInstancesInfo()
16132         all_nodes = lu.cfg.GetAllNodesInfo()
16133         network_to_instances = dict((uuid, []) for uuid in self.wanted)
16134
16135       for group in all_groups.values():
16136         if do_instances:
16137           group_nodes = [node.name for node in all_nodes.values() if
16138                          node.group == group.uuid]
16139           group_instances = [instance for instance in all_instances.values()
16140                              if instance.primary_node in group_nodes]
16141
16142         for net_uuid in group.networks.keys():
16143           if net_uuid in network_to_groups:
16144             netparams = group.networks[net_uuid]
16145             mode = netparams[constants.NIC_MODE]
16146             link = netparams[constants.NIC_LINK]
16147             info = group.name + "(" + mode + ", " + link + ")"
16148             network_to_groups[net_uuid].append(info)
16149
16150             if do_instances:
16151               for instance in group_instances:
16152                 for nic in instance.nics:
16153                   if nic.network == self._all_networks[net_uuid].name:
16154                     network_to_instances[net_uuid].append(instance.name)
16155                     break
16156
16157     if do_stats:
16158       stats = {}
16159       for uuid, net in self._all_networks.items():
16160         if uuid in self.wanted:
16161           pool = network.AddressPool(net)
16162           stats[uuid] = {
16163             "free_count": pool.GetFreeCount(),
16164             "reserved_count": pool.GetReservedCount(),
16165             "map": pool.GetMap(),
16166             "external_reservations":
16167               utils.CommaJoin(pool.GetExternalReservations()),
16168             }
16169
16170     return query.NetworkQueryData([self._all_networks[uuid]
16171                                    for uuid in self.wanted],
16172                                    network_to_groups,
16173                                    network_to_instances,
16174                                    stats)
16175
16176
16177 class LUNetworkQuery(NoHooksLU):
16178   """Logical unit for querying networks.
16179
16180   """
16181   REQ_BGL = False
16182
16183   def CheckArguments(self):
16184     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16185                             self.op.output_fields, False)
16186
16187   def ExpandNames(self):
16188     self.nq.ExpandNames(self)
16189
16190   def Exec(self, feedback_fn):
16191     return self.nq.OldStyleQuery(self)
16192
16193
16194 class LUNetworkConnect(LogicalUnit):
16195   """Connect a network to a nodegroup
16196
16197   """
16198   HPATH = "network-connect"
16199   HTYPE = constants.HTYPE_NETWORK
16200   REQ_BGL = False
16201
16202   def ExpandNames(self):
16203     self.network_name = self.op.network_name
16204     self.group_name = self.op.group_name
16205     self.network_mode = self.op.network_mode
16206     self.network_link = self.op.network_link
16207
16208     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16209     if self.network_uuid is None:
16210       raise errors.OpPrereqError("Network %s does not exist" %
16211                                  self.network_name, errors.ECODE_INVAL)
16212
16213     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16214     if self.group_uuid is None:
16215       raise errors.OpPrereqError("Group %s does not exist" %
16216                                  self.group_name, errors.ECODE_INVAL)
16217
16218     self.needed_locks = {
16219       locking.LEVEL_INSTANCE: [],
16220       locking.LEVEL_NODEGROUP: [self.group_uuid],
16221       }
16222     self.share_locks[locking.LEVEL_INSTANCE] = 1
16223
16224     if self.op.conflicts_check:
16225       self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16226       self.share_locks[locking.LEVEL_NETWORK] = 1
16227
16228   def DeclareLocks(self, level):
16229     if level == locking.LEVEL_INSTANCE:
16230       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16231
16232       # Lock instances optimistically, needs verification once group lock has
16233       # been acquired
16234       if self.op.conflicts_check:
16235         self.needed_locks[locking.LEVEL_INSTANCE] = \
16236             self.cfg.GetNodeGroupInstances(self.group_uuid)
16237
16238   def BuildHooksEnv(self):
16239     ret = {
16240       "GROUP_NAME": self.group_name,
16241       "GROUP_NETWORK_MODE": self.network_mode,
16242       "GROUP_NETWORK_LINK": self.network_link,
16243       }
16244     return ret
16245
16246   def BuildHooksNodes(self):
16247     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16248     return (nodes, nodes)
16249
16250   def CheckPrereq(self):
16251     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16252
16253     assert self.group_uuid in owned_groups
16254
16255     l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16256                                       for i in value)
16257
16258     self.netparams = {
16259       constants.NIC_MODE: self.network_mode,
16260       constants.NIC_LINK: self.network_link,
16261       }
16262     objects.NIC.CheckParameterSyntax(self.netparams)
16263
16264     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16265     #if self.network_mode == constants.NIC_MODE_BRIDGED:
16266     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16267     self.connected = False
16268     if self.network_uuid in self.group.networks:
16269       self.LogWarning("Network '%s' is already mapped to group '%s'" %
16270                       (self.network_name, self.group.name))
16271       self.connected = True
16272       return
16273
16274     if self.op.conflicts_check:
16275       # Check if locked instances are still correct
16276       owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16277       _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16278
16279       nobj = self.cfg.GetNetwork(self.network_uuid)
16280       pool = network.AddressPool(nobj)
16281       conflicting_instances = []
16282
16283       for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
16284         for idx, nic in enumerate(instance.nics):
16285           if pool.Contains(nic.ip):
16286             conflicting_instances.append((instance.name, idx, nic.ip))
16287
16288       if conflicting_instances:
16289         self.LogWarning("Following occurences use IPs from network %s"
16290                         " that is about to connect to nodegroup %s: %s" %
16291                         (self.network_name, self.group.name,
16292                         l(conflicting_instances)))
16293         raise errors.OpPrereqError("Conflicting IPs found."
16294                                    " Please remove/modify"
16295                                    " corresponding NICs",
16296                                    errors.ECODE_INVAL)
16297
16298   def Exec(self, feedback_fn):
16299     if self.connected:
16300       return
16301
16302     self.group.networks[self.network_uuid] = self.netparams
16303     self.cfg.Update(self.group, feedback_fn)
16304
16305
16306 class LUNetworkDisconnect(LogicalUnit):
16307   """Disconnect a network to a nodegroup
16308
16309   """
16310   HPATH = "network-disconnect"
16311   HTYPE = constants.HTYPE_NETWORK
16312   REQ_BGL = False
16313
16314   def ExpandNames(self):
16315     self.network_name = self.op.network_name
16316     self.group_name = self.op.group_name
16317
16318     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16319     if self.network_uuid is None:
16320       raise errors.OpPrereqError("Network %s does not exist" %
16321                                  self.network_name, errors.ECODE_INVAL)
16322
16323     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16324     if self.group_uuid is None:
16325       raise errors.OpPrereqError("Group %s does not exist" %
16326                                  self.group_name, errors.ECODE_INVAL)
16327
16328     self.needed_locks = {
16329       locking.LEVEL_INSTANCE: [],
16330       locking.LEVEL_NODEGROUP: [self.group_uuid],
16331       }
16332     self.share_locks[locking.LEVEL_INSTANCE] = 1
16333
16334   def DeclareLocks(self, level):
16335     if level == locking.LEVEL_INSTANCE:
16336       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16337
16338       # Lock instances optimistically, needs verification once group lock has
16339       # been acquired
16340       if self.op.conflicts_check:
16341         self.needed_locks[locking.LEVEL_INSTANCE] = \
16342           self.cfg.GetNodeGroupInstances(self.group_uuid)
16343
16344   def BuildHooksEnv(self):
16345     ret = {
16346       "GROUP_NAME": self.group_name,
16347       }
16348     return ret
16349
16350   def BuildHooksNodes(self):
16351     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16352     return (nodes, nodes)
16353
16354   def CheckPrereq(self):
16355     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16356
16357     assert self.group_uuid in owned_groups
16358
16359     l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16360                                       for i in value)
16361
16362     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16363     self.connected = True
16364     if self.network_uuid not in self.group.networks:
16365       self.LogWarning("Network '%s' is not mapped to group '%s'",
16366                       self.network_name, self.group.name)
16367       self.connected = False
16368       return
16369
16370     if self.op.conflicts_check:
16371       # Check if locked instances are still correct
16372       owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16373       _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16374
16375       conflicting_instances = []
16376
16377       for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
16378         for idx, nic in enumerate(instance.nics):
16379           if nic.network == self.network_name:
16380             conflicting_instances.append((instance.name, idx, nic.ip))
16381
16382       if conflicting_instances:
16383         self.LogWarning("Following occurences use IPs from network %s"
16384                            " that is about to disconnected from the nodegroup"
16385                            " %s: %s" %
16386                            (self.network_name, self.group.name,
16387                             l(conflicting_instances)))
16388         raise errors.OpPrereqError("Conflicting IPs."
16389                                    " Please remove/modify"
16390                                    " corresponding NICS",
16391                                    errors.ECODE_INVAL)
16392
16393   def Exec(self, feedback_fn):
16394     if not self.connected:
16395       return
16396
16397     del self.group.networks[self.network_uuid]
16398     self.cfg.Update(self.group, feedback_fn)
16399
16400
16401 #: Query type implementations
16402 _QUERY_IMPL = {
16403   constants.QR_CLUSTER: _ClusterQuery,
16404   constants.QR_INSTANCE: _InstanceQuery,
16405   constants.QR_NODE: _NodeQuery,
16406   constants.QR_GROUP: _GroupQuery,
16407   constants.QR_NETWORK: _NetworkQuery,
16408   constants.QR_OS: _OsQuery,
16409   constants.QR_EXPORT: _ExportQuery,
16410   }
16411
16412 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16413
16414
16415 def _GetQueryImplementation(name):
16416   """Returns the implemtnation for a query type.
16417
16418   @param name: Query type, must be one of L{constants.QR_VIA_OP}
16419
16420   """
16421   try:
16422     return _QUERY_IMPL[name]
16423   except KeyError:
16424     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16425                                errors.ECODE_INVAL)
16426
16427
16428 def _CheckForConflictingIp(lu, ip, node):
16429   """In case of conflicting ip raise error.
16430
16431   @type ip: string
16432   @param ip: ip address
16433   @type node: string
16434   @param node: node name
16435
16436   """
16437   (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16438   if conf_net is not None:
16439     raise errors.OpPrereqError("Conflicting IP found:"
16440                                " %s <> %s." % (ip, conf_net),
16441                                errors.ECODE_INVAL)
16442
16443   return (None, None)