code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti import network
  65 from ganeti.masterd import iallocator
  66
  67 import ganeti.masterd.instance # pylint: disable=W0611
  68
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141
 142     # Dictionaries used to declare locking needs to mcpu
 143     self.needed_locks = None
 144     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 145     self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
 146
 147     self.add_locks = {}
 148     self.remove_locks = {}
 149
 150     # Used to force good behavior when calling helper functions
 151     self.recalculate_locks = {}
 152
 153     # logging
 154     self.Log = processor.Log # pylint: disable=C0103
 155     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 156     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 157     self.LogStep = processor.LogStep # pylint: disable=C0103
 158     # support for dry-run
 159     self.dry_run_result = None
 160     # support for generic debug attribute
 161     if (not hasattr(self.op, "debug_level") or
 162         not isinstance(self.op.debug_level, int)):
 163       self.op.debug_level = 0
 164
 165     # Tasklets
 166     self.tasklets = None
 167
 168     # Validate opcode parameters and set defaults
 169     self.op.Validate(True)
 170
 171     self.CheckArguments()
 172
 173   def CheckArguments(self):
 174     """Check syntactic validity for the opcode arguments.
 175
 176     This method is for doing a simple syntactic check and ensure
 177     validity of opcode parameters, without any cluster-related
 178     checks. While the same can be accomplished in ExpandNames and/or
 179     CheckPrereq, doing these separate is better because:
 180
 181       - ExpandNames is left as as purely a lock-related function
 182       - CheckPrereq is run after we have acquired locks (and possible
 183         waited for them)
 184
 185     The function is allowed to change the self.op attribute so that
 186     later methods can no longer worry about missing parameters.
 187
 188     """
 189     pass
 190
 191   def ExpandNames(self):
 192     """Expand names for this LU.
 193
 194     This method is called before starting to execute the opcode, and it should
 195     update all the parameters of the opcode to their canonical form (e.g. a
 196     short node name must be fully expanded after this method has successfully
 197     completed). This way locking, hooks, logging, etc. can work correctly.
 198
 199     LUs which implement this method must also populate the self.needed_locks
 200     member, as a dict with lock levels as keys, and a list of needed lock names
 201     as values. Rules:
 202
 203       - use an empty dict if you don't need any lock
 204       - if you don't need any lock at a particular level omit that
 205         level (note that in this case C{DeclareLocks} won't be called
 206         at all for that level)
 207       - if you need locks at a level, but you can't calculate it in
 208         this function, initialise that level with an empty list and do
 209         further processing in L{LogicalUnit.DeclareLocks} (see that
 210         function's docstring)
 211       - don't put anything for the BGL level
 212       - if you want all locks at a level use L{locking.ALL_SET} as a value
 213
 214     If you need to share locks (rather than acquire them exclusively) at one
 215     level you can modify self.share_locks, setting a true value (usually 1) for
 216     that level. By default locks are not shared.
 217
 218     This function can also define a list of tasklets, which then will be
 219     executed in order instead of the usual LU-level CheckPrereq and Exec
 220     functions, if those are not defined by the LU.
 221
 222     Examples::
 223
 224       # Acquire all nodes and one instance
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: locking.ALL_SET,
 227         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 228       }
 229       # Acquire just two nodes
 230       self.needed_locks = {
 231         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 232       }
 233       # Acquire no locks
 234       self.needed_locks = {} # No, you can't leave it to the default value None
 235
 236     """
 237     # The implementation of this method is mandatory only if the new LU is
 238     # concurrent, so that old LUs don't need to be changed all at the same
 239     # time.
 240     if self.REQ_BGL:
 241       self.needed_locks = {} # Exclusive LUs don't need locks.
 242     else:
 243       raise NotImplementedError
 244
 245   def DeclareLocks(self, level):
 246     """Declare LU locking needs for a level
 247
 248     While most LUs can just declare their locking needs at ExpandNames time,
 249     sometimes there's the need to calculate some locks after having acquired
 250     the ones before. This function is called just before acquiring locks at a
 251     particular level, but after acquiring the ones at lower levels, and permits
 252     such calculations. It can be used to modify self.needed_locks, and by
 253     default it does nothing.
 254
 255     This function is only called if you have something already set in
 256     self.needed_locks for the level.
 257
 258     @param level: Locking level which is going to be locked
 259     @type level: member of L{ganeti.locking.LEVELS}
 260
 261     """
 262
 263   def CheckPrereq(self):
 264     """Check prerequisites for this LU.
 265
 266     This method should check that the prerequisites for the execution
 267     of this LU are fulfilled. It can do internode communication, but
 268     it should be idempotent - no cluster or system changes are
 269     allowed.
 270
 271     The method should raise errors.OpPrereqError in case something is
 272     not fulfilled. Its return value is ignored.
 273
 274     This method should also update all the parameters of the opcode to
 275     their canonical form if it hasn't been done by ExpandNames before.
 276
 277     """
 278     if self.tasklets is not None:
 279       for (idx, tl) in enumerate(self.tasklets):
 280         logging.debug("Checking prerequisites for tasklet %s/%s",
 281                       idx + 1, len(self.tasklets))
 282         tl.CheckPrereq()
 283     else:
 284       pass
 285
 286   def Exec(self, feedback_fn):
 287     """Execute the LU.
 288
 289     This method should implement the actual work. It should raise
 290     errors.OpExecError for failures that are somewhat dealt with in
 291     code, or expected.
 292
 293     """
 294     if self.tasklets is not None:
 295       for (idx, tl) in enumerate(self.tasklets):
 296         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 297         tl.Exec(feedback_fn)
 298     else:
 299       raise NotImplementedError
 300
 301   def BuildHooksEnv(self):
 302     """Build hooks environment for this LU.
 303
 304     @rtype: dict
 305     @return: Dictionary containing the environment that will be used for
 306       running the hooks for this LU. The keys of the dict must not be prefixed
 307       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 308       will extend the environment with additional variables. If no environment
 309       should be defined, an empty dictionary should be returned (not C{None}).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def BuildHooksNodes(self):
 317     """Build list of nodes to run LU's hooks.
 318
 319     @rtype: tuple; (list, list)
 320     @return: Tuple containing a list of node names on which the hook
 321       should run before the execution and a list of node names on which the
 322       hook should run after the execution. No nodes should be returned as an
 323       empty list (and not None).
 324     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 325       will not be called.
 326
 327     """
 328     raise NotImplementedError
 329
 330   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 331     """Notify the LU about the results of its hooks.
 332
 333     This method is called every time a hooks phase is executed, and notifies
 334     the Logical Unit about the hooks' result. The LU can then use it to alter
 335     its result based on the hooks.  By default the method does nothing and the
 336     previous result is passed back unchanged but any LU can define it if it
 337     wants to use the local cluster hook-scripts somehow.
 338
 339     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 340         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 341     @param hook_results: the results of the multi-node hooks rpc call
 342     @param feedback_fn: function used send feedback back to the caller
 343     @param lu_result: the previous Exec result this LU had, or None
 344         in the PRE phase
 345     @return: the new Exec result, based on the previous result
 346         and hook results
 347
 348     """
 349     # API must be kept, thus we ignore the unused argument and could
 350     # be a function warnings
 351     # pylint: disable=W0613,R0201
 352     return lu_result
 353
 354   def _ExpandAndLockInstance(self):
 355     """Helper function to expand and lock an instance.
 356
 357     Many LUs that work on an instance take its name in self.op.instance_name
 358     and need to expand it and then declare the expanded name for locking. This
 359     function does it, and then updates self.op.instance_name to the expanded
 360     name. It also initializes needed_locks as a dict, if this hasn't been done
 361     before.
 362
 363     """
 364     if self.needed_locks is None:
 365       self.needed_locks = {}
 366     else:
 367       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 368         "_ExpandAndLockInstance called with instance-level locks set"
 369     self.op.instance_name = _ExpandInstanceName(self.cfg,
 370                                                 self.op.instance_name)
 371     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 372
 373   def _LockInstancesNodes(self, primary_only=False,
 374                           level=locking.LEVEL_NODE):
 375     """Helper function to declare instances' nodes for locking.
 376
 377     This function should be called after locking one or more instances to lock
 378     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 379     with all primary or secondary nodes for instances already locked and
 380     present in self.needed_locks[locking.LEVEL_INSTANCE].
 381
 382     It should be called from DeclareLocks, and for safety only works if
 383     self.recalculate_locks[locking.LEVEL_NODE] is set.
 384
 385     In the future it may grow parameters to just lock some instance's nodes, or
 386     to just lock primaries or secondary nodes, if needed.
 387
 388     If should be called in DeclareLocks in a way similar to::
 389
 390       if level == locking.LEVEL_NODE:
 391         self._LockInstancesNodes()
 392
 393     @type primary_only: boolean
 394     @param primary_only: only lock primary nodes of locked instances
 395     @param level: Which lock level to use for locking nodes
 396
 397     """
 398     assert level in self.recalculate_locks, \
 399       "_LockInstancesNodes helper function called with no nodes to recalculate"
 400
 401     # TODO: check if we're really been called with the instance locks held
 402
 403     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 404     # future we might want to have different behaviors depending on the value
 405     # of self.recalculate_locks[locking.LEVEL_NODE]
 406     wanted_nodes = []
 407     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 408     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 409       wanted_nodes.append(instance.primary_node)
 410       if not primary_only:
 411         wanted_nodes.extend(instance.secondary_nodes)
 412
 413     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 414       self.needed_locks[level] = wanted_nodes
 415     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 416       self.needed_locks[level].extend(wanted_nodes)
 417     else:
 418       raise errors.ProgrammerError("Unknown recalculation mode")
 419
 420     del self.recalculate_locks[level]
 421
 422
 423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 424   """Simple LU which runs no hooks.
 425
 426   This LU is intended as a parent for other LogicalUnits which will
 427   run no hooks, in order to reduce duplicate code.
 428
 429   """
 430   HPATH = None
 431   HTYPE = None
 432
 433   def BuildHooksEnv(self):
 434     """Empty BuildHooksEnv for NoHooksLu.
 435
 436     This just raises an error.
 437
 438     """
 439     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 440
 441   def BuildHooksNodes(self):
 442     """Empty BuildHooksNodes for NoHooksLU.
 443
 444     """
 445     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 446
 447
 448 class Tasklet:
 449   """Tasklet base class.
 450
 451   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 452   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 453   tasklets know nothing about locks.
 454
 455   Subclasses must follow these rules:
 456     - Implement CheckPrereq
 457     - Implement Exec
 458
 459   """
 460   def __init__(self, lu):
 461     self.lu = lu
 462
 463     # Shortcuts
 464     self.cfg = lu.cfg
 465     self.rpc = lu.rpc
 466
 467   def CheckPrereq(self):
 468     """Check prerequisites for this tasklets.
 469
 470     This method should check whether the prerequisites for the execution of
 471     this tasklet are fulfilled. It can do internode communication, but it
 472     should be idempotent - no cluster or system changes are allowed.
 473
 474     The method should raise errors.OpPrereqError in case something is not
 475     fulfilled. Its return value is ignored.
 476
 477     This method should also update all parameters to their canonical form if it
 478     hasn't been done before.
 479
 480     """
 481     pass
 482
 483   def Exec(self, feedback_fn):
 484     """Execute the tasklet.
 485
 486     This method should implement the actual work. It should raise
 487     errors.OpExecError for failures that are somewhat dealt with in code, or
 488     expected.
 489
 490     """
 491     raise NotImplementedError
 492
 493
 494 class _QueryBase:
 495   """Base for query utility classes.
 496
 497   """
 498   #: Attribute holding field definitions
 499   FIELDS = None
 500
 501   #: Field to sort by
 502   SORT_FIELD = "name"
 503
 504   def __init__(self, qfilter, fields, use_locking):
 505     """Initializes this class.
 506
 507     """
 508     self.use_locking = use_locking
 509
 510     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 511                              namefield=self.SORT_FIELD)
 512     self.requested_data = self.query.RequestedData()
 513     self.names = self.query.RequestedNames()
 514
 515     # Sort only if no names were requested
 516     self.sort_by_name = not self.names
 517
 518     self.do_locking = None
 519     self.wanted = None
 520
 521   def _GetNames(self, lu, all_names, lock_level):
 522     """Helper function to determine names asked for in the query.
 523
 524     """
 525     if self.do_locking:
 526       names = lu.owned_locks(lock_level)
 527     else:
 528       names = all_names
 529
 530     if self.wanted == locking.ALL_SET:
 531       assert not self.names
 532       # caller didn't specify names, so ordering is not important
 533       return utils.NiceSort(names)
 534
 535     # caller specified names and we must keep the same order
 536     assert self.names
 537     assert not self.do_locking or lu.glm.is_owned(lock_level)
 538
 539     missing = set(self.wanted).difference(names)
 540     if missing:
 541       raise errors.OpExecError("Some items were removed before retrieving"
 542                                " their data: %s" % missing)
 543
 544     # Return expanded names
 545     return self.wanted
 546
 547   def ExpandNames(self, lu):
 548     """Expand names for this query.
 549
 550     See L{LogicalUnit.ExpandNames}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def DeclareLocks(self, lu, level):
 556     """Declare locks for this query.
 557
 558     See L{LogicalUnit.DeclareLocks}.
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def _GetQueryData(self, lu):
 564     """Collects all data for this query.
 565
 566     @return: Query data object
 567
 568     """
 569     raise NotImplementedError()
 570
 571   def NewStyleQuery(self, lu):
 572     """Collect data and execute query.
 573
 574     """
 575     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 576                                   sort_by_name=self.sort_by_name)
 577
 578   def OldStyleQuery(self, lu):
 579     """Collect data and execute query.
 580
 581     """
 582     return self.query.OldStyleQuery(self._GetQueryData(lu),
 583                                     sort_by_name=self.sort_by_name)
 584
 585
 586 def _ShareAll():
 587   """Returns a dict declaring all lock levels shared.
 588
 589   """
 590   return dict.fromkeys(locking.LEVELS, 1)
 591
 592
 593 def _AnnotateDiskParams(instance, devs, cfg):
 594   """Little helper wrapper to the rpc annotation method.
 595
 596   @param instance: The instance object
 597   @type devs: List of L{objects.Disk}
 598   @param devs: The root devices (not any of its children!)
 599   @param cfg: The config object
 600   @returns The annotated disk copies
 601   @see L{rpc.AnnotateDiskParams}
 602
 603   """
 604   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 605                                 cfg.GetInstanceDiskParams(instance))
 606
 607
 608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 609                               cur_group_uuid):
 610   """Checks if node groups for locked instances are still correct.
 611
 612   @type cfg: L{config.ConfigWriter}
 613   @param cfg: Cluster configuration
 614   @type instances: dict; string as key, L{objects.Instance} as value
 615   @param instances: Dictionary, instance name as key, instance object as value
 616   @type owned_groups: iterable of string
 617   @param owned_groups: List of owned groups
 618   @type owned_nodes: iterable of string
 619   @param owned_nodes: List of owned nodes
 620   @type cur_group_uuid: string or None
 621   @param cur_group_uuid: Optional group UUID to check against instance's groups
 622
 623   """
 624   for (name, inst) in instances.items():
 625     assert owned_nodes.issuperset(inst.all_nodes), \
 626       "Instance %s's nodes changed while we kept the lock" % name
 627
 628     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 629
 630     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 631       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 632
 633
 634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 635                              primary_only=False):
 636   """Checks if the owned node groups are still correct for an instance.
 637
 638   @type cfg: L{config.ConfigWriter}
 639   @param cfg: The cluster configuration
 640   @type instance_name: string
 641   @param instance_name: Instance name
 642   @type owned_groups: set or frozenset
 643   @param owned_groups: List of currently owned node groups
 644   @type primary_only: boolean
 645   @param primary_only: Whether to check node groups for only the primary node
 646
 647   """
 648   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 649
 650   if not owned_groups.issuperset(inst_groups):
 651     raise errors.OpPrereqError("Instance %s's node groups changed since"
 652                                " locks were acquired, current groups are"
 653                                " are '%s', owning groups '%s'; retry the"
 654                                " operation" %
 655                                (instance_name,
 656                                 utils.CommaJoin(inst_groups),
 657                                 utils.CommaJoin(owned_groups)),
 658                                errors.ECODE_STATE)
 659
 660   return inst_groups
 661
 662
 663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 664   """Checks if the instances in a node group are still correct.
 665
 666   @type cfg: L{config.ConfigWriter}
 667   @param cfg: The cluster configuration
 668   @type group_uuid: string
 669   @param group_uuid: Node group UUID
 670   @type owned_instances: set or frozenset
 671   @param owned_instances: List of currently owned instances
 672
 673   """
 674   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 675   if owned_instances != wanted_instances:
 676     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 677                                " locks were acquired, wanted '%s', have '%s';"
 678                                " retry the operation" %
 679                                (group_uuid,
 680                                 utils.CommaJoin(wanted_instances),
 681                                 utils.CommaJoin(owned_instances)),
 682                                errors.ECODE_STATE)
 683
 684   return wanted_instances
 685
 686
 687 def _SupportsOob(cfg, node):
 688   """Tells if node supports OOB.
 689
 690   @type cfg: L{config.ConfigWriter}
 691   @param cfg: The cluster configuration
 692   @type node: L{objects.Node}
 693   @param node: The node
 694   @return: The OOB script if supported or an empty string otherwise
 695
 696   """
 697   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 698
 699
 700 def _CopyLockList(names):
 701   """Makes a copy of a list of lock names.
 702
 703   Handles L{locking.ALL_SET} correctly.
 704
 705   """
 706   if names == locking.ALL_SET:
 707     return locking.ALL_SET
 708   else:
 709     return names[:]
 710
 711
 712 def _GetWantedNodes(lu, nodes):
 713   """Returns list of checked and expanded node names.
 714
 715   @type lu: L{LogicalUnit}
 716   @param lu: the logical unit on whose behalf we execute
 717   @type nodes: list
 718   @param nodes: list of node names or None for all nodes
 719   @rtype: list
 720   @return: the list of nodes, sorted
 721   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 722
 723   """
 724   if nodes:
 725     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 726
 727   return utils.NiceSort(lu.cfg.GetNodeList())
 728
 729
 730 def _GetWantedInstances(lu, instances):
 731   """Returns list of checked and expanded instance names.
 732
 733   @type lu: L{LogicalUnit}
 734   @param lu: the logical unit on whose behalf we execute
 735   @type instances: list
 736   @param instances: list of instance names or None for all instances
 737   @rtype: list
 738   @return: the list of instances, sorted
 739   @raise errors.OpPrereqError: if the instances parameter is wrong type
 740   @raise errors.OpPrereqError: if any of the passed instances is not found
 741
 742   """
 743   if instances:
 744     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 745   else:
 746     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 747   return wanted
 748
 749
 750 def _GetUpdatedParams(old_params, update_dict,
 751                       use_default=True, use_none=False):
 752   """Return the new version of a parameter dictionary.
 753
 754   @type old_params: dict
 755   @param old_params: old parameters
 756   @type update_dict: dict
 757   @param update_dict: dict containing new parameter values, or
 758       constants.VALUE_DEFAULT to reset the parameter to its default
 759       value
 760   @param use_default: boolean
 761   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 762       values as 'to be deleted' values
 763   @param use_none: boolean
 764   @type use_none: whether to recognise C{None} values as 'to be
 765       deleted' values
 766   @rtype: dict
 767   @return: the new parameter dictionary
 768
 769   """
 770   params_copy = copy.deepcopy(old_params)
 771   for key, val in update_dict.iteritems():
 772     if ((use_default and val == constants.VALUE_DEFAULT) or
 773         (use_none and val is None)):
 774       try:
 775         del params_copy[key]
 776       except KeyError:
 777         pass
 778     else:
 779       params_copy[key] = val
 780   return params_copy
 781
 782
 783 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 784   """Return the new version of a instance policy.
 785
 786   @param group_policy: whether this policy applies to a group and thus
 787     we should support removal of policy entries
 788
 789   """
 790   use_none = use_default = group_policy
 791   ipolicy = copy.deepcopy(old_ipolicy)
 792   for key, value in new_ipolicy.items():
 793     if key not in constants.IPOLICY_ALL_KEYS:
 794       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 795                                  errors.ECODE_INVAL)
 796     if key in constants.IPOLICY_ISPECS:
 797       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 798       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 799                                        use_none=use_none,
 800                                        use_default=use_default)
 801     else:
 802       if (not value or value == [constants.VALUE_DEFAULT] or
 803           value == constants.VALUE_DEFAULT):
 804         if group_policy:
 805           del ipolicy[key]
 806         else:
 807           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 808                                      " on the cluster'" % key,
 809                                      errors.ECODE_INVAL)
 810       else:
 811         if key in constants.IPOLICY_PARAMETERS:
 812           # FIXME: we assume all such values are float
 813           try:
 814             ipolicy[key] = float(value)
 815           except (TypeError, ValueError), err:
 816             raise errors.OpPrereqError("Invalid value for attribute"
 817                                        " '%s': '%s', error: %s" %
 818                                        (key, value, err), errors.ECODE_INVAL)
 819         else:
 820           # FIXME: we assume all others are lists; this should be redone
 821           # in a nicer way
 822           ipolicy[key] = list(value)
 823   try:
 824     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 825   except errors.ConfigurationError, err:
 826     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 827                                errors.ECODE_INVAL)
 828   return ipolicy
 829
 830
 831 def _UpdateAndVerifySubDict(base, updates, type_check):
 832   """Updates and verifies a dict with sub dicts of the same type.
 833
 834   @param base: The dict with the old data
 835   @param updates: The dict with the new data
 836   @param type_check: Dict suitable to ForceDictType to verify correct types
 837   @returns: A new dict with updated and verified values
 838
 839   """
 840   def fn(old, value):
 841     new = _GetUpdatedParams(old, value)
 842     utils.ForceDictType(new, type_check)
 843     return new
 844
 845   ret = copy.deepcopy(base)
 846   ret.update(dict((key, fn(base.get(key, {}), value))
 847                   for key, value in updates.items()))
 848   return ret
 849
 850
 851 def _MergeAndVerifyHvState(op_input, obj_input):
 852   """Combines the hv state from an opcode with the one of the object
 853
 854   @param op_input: The input dict from the opcode
 855   @param obj_input: The input dict from the objects
 856   @return: The verified and updated dict
 857
 858   """
 859   if op_input:
 860     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 861     if invalid_hvs:
 862       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 863                                  " %s" % utils.CommaJoin(invalid_hvs),
 864                                  errors.ECODE_INVAL)
 865     if obj_input is None:
 866       obj_input = {}
 867     type_check = constants.HVSTS_PARAMETER_TYPES
 868     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 869
 870   return None
 871
 872
 873 def _MergeAndVerifyDiskState(op_input, obj_input):
 874   """Combines the disk state from an opcode with the one of the object
 875
 876   @param op_input: The input dict from the opcode
 877   @param obj_input: The input dict from the objects
 878   @return: The verified and updated dict
 879   """
 880   if op_input:
 881     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 882     if invalid_dst:
 883       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 884                                  utils.CommaJoin(invalid_dst),
 885                                  errors.ECODE_INVAL)
 886     type_check = constants.DSS_PARAMETER_TYPES
 887     if obj_input is None:
 888       obj_input = {}
 889     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 890                                               type_check))
 891                 for key, value in op_input.items())
 892
 893   return None
 894
 895
 896 def _ReleaseLocks(lu, level, names=None, keep=None):
 897   """Releases locks owned by an LU.
 898
 899   @type lu: L{LogicalUnit}
 900   @param level: Lock level
 901   @type names: list or None
 902   @param names: Names of locks to release
 903   @type keep: list or None
 904   @param keep: Names of locks to retain
 905
 906   """
 907   assert not (keep is not None and names is not None), \
 908          "Only one of the 'names' and the 'keep' parameters can be given"
 909
 910   if names is not None:
 911     should_release = names.__contains__
 912   elif keep:
 913     should_release = lambda name: name not in keep
 914   else:
 915     should_release = None
 916
 917   owned = lu.owned_locks(level)
 918   if not owned:
 919     # Not owning any lock at this level, do nothing
 920     pass
 921
 922   elif should_release:
 923     retain = []
 924     release = []
 925
 926     # Determine which locks to release
 927     for name in owned:
 928       if should_release(name):
 929         release.append(name)
 930       else:
 931         retain.append(name)
 932
 933     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 934
 935     # Release just some locks
 936     lu.glm.release(level, names=release)
 937
 938     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 939   else:
 940     # Release everything
 941     lu.glm.release(level)
 942
 943     assert not lu.glm.is_owned(level), "No locks should be owned"
 944
 945
 946 def _MapInstanceDisksToNodes(instances):
 947   """Creates a map from (node, volume) to instance name.
 948
 949   @type instances: list of L{objects.Instance}
 950   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 951
 952   """
 953   return dict(((node, vol), inst.name)
 954               for inst in instances
 955               for (node, vols) in inst.MapLVsByNode().items()
 956               for vol in vols)
 957
 958
 959 def _RunPostHook(lu, node_name):
 960   """Runs the post-hook for an opcode on a single node.
 961
 962   """
 963   hm = lu.proc.BuildHooksManager(lu)
 964   try:
 965     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 966   except Exception, err: # pylint: disable=W0703
 967     lu.LogWarning("Errors occurred running hooks on %s: %s",
 968                   node_name, err)
 969
 970
 971 def _CheckOutputFields(static, dynamic, selected):
 972   """Checks whether all selected fields are valid.
 973
 974   @type static: L{utils.FieldSet}
 975   @param static: static fields set
 976   @type dynamic: L{utils.FieldSet}
 977   @param dynamic: dynamic fields set
 978
 979   """
 980   f = utils.FieldSet()
 981   f.Extend(static)
 982   f.Extend(dynamic)
 983
 984   delta = f.NonMatching(selected)
 985   if delta:
 986     raise errors.OpPrereqError("Unknown output fields selected: %s"
 987                                % ",".join(delta), errors.ECODE_INVAL)
 988
 989
 990 def _CheckGlobalHvParams(params):
 991   """Validates that given hypervisor params are not global ones.
 992
 993   This will ensure that instances don't get customised versions of
 994   global params.
 995
 996   """
 997   used_globals = constants.HVC_GLOBALS.intersection(params)
 998   if used_globals:
 999     msg = ("The following hypervisor parameters are global and cannot"
1000            " be customized at instance level, please modify them at"
1001            " cluster level: %s" % utils.CommaJoin(used_globals))
1002     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1003
1004
1005 def _CheckNodeOnline(lu, node, msg=None):
1006   """Ensure that a given node is online.
1007
1008   @param lu: the LU on behalf of which we make the check
1009   @param node: the node to check
1010   @param msg: if passed, should be a message to replace the default one
1011   @raise errors.OpPrereqError: if the node is offline
1012
1013   """
1014   if msg is None:
1015     msg = "Can't use offline node"
1016   if lu.cfg.GetNodeInfo(node).offline:
1017     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1018
1019
1020 def _CheckNodeNotDrained(lu, node):
1021   """Ensure that a given node is not drained.
1022
1023   @param lu: the LU on behalf of which we make the check
1024   @param node: the node to check
1025   @raise errors.OpPrereqError: if the node is drained
1026
1027   """
1028   if lu.cfg.GetNodeInfo(node).drained:
1029     raise errors.OpPrereqError("Can't use drained node %s" % node,
1030                                errors.ECODE_STATE)
1031
1032
1033 def _CheckNodeVmCapable(lu, node):
1034   """Ensure that a given node is vm capable.
1035
1036   @param lu: the LU on behalf of which we make the check
1037   @param node: the node to check
1038   @raise errors.OpPrereqError: if the node is not vm capable
1039
1040   """
1041   if not lu.cfg.GetNodeInfo(node).vm_capable:
1042     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1043                                errors.ECODE_STATE)
1044
1045
1046 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1047   """Ensure that a node supports a given OS.
1048
1049   @param lu: the LU on behalf of which we make the check
1050   @param node: the node to check
1051   @param os_name: the OS to query about
1052   @param force_variant: whether to ignore variant errors
1053   @raise errors.OpPrereqError: if the node is not supporting the OS
1054
1055   """
1056   result = lu.rpc.call_os_get(node, os_name)
1057   result.Raise("OS '%s' not in supported OS list for node %s" %
1058                (os_name, node),
1059                prereq=True, ecode=errors.ECODE_INVAL)
1060   if not force_variant:
1061     _CheckOSVariant(result.payload, os_name)
1062
1063
1064 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1065   """Ensure that a node has the given secondary ip.
1066
1067   @type lu: L{LogicalUnit}
1068   @param lu: the LU on behalf of which we make the check
1069   @type node: string
1070   @param node: the node to check
1071   @type secondary_ip: string
1072   @param secondary_ip: the ip to check
1073   @type prereq: boolean
1074   @param prereq: whether to throw a prerequisite or an execute error
1075   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1076   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1077
1078   """
1079   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1080   result.Raise("Failure checking secondary ip on node %s" % node,
1081                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1082   if not result.payload:
1083     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1084            " please fix and re-run this command" % secondary_ip)
1085     if prereq:
1086       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1087     else:
1088       raise errors.OpExecError(msg)
1089
1090
1091 def _GetClusterDomainSecret():
1092   """Reads the cluster domain secret.
1093
1094   """
1095   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1096                                strict=True)
1097
1098
1099 def _CheckInstanceState(lu, instance, req_states, msg=None):
1100   """Ensure that an instance is in one of the required states.
1101
1102   @param lu: the LU on behalf of which we make the check
1103   @param instance: the instance to check
1104   @param msg: if passed, should be a message to replace the default one
1105   @raise errors.OpPrereqError: if the instance is not in the required state
1106
1107   """
1108   if msg is None:
1109     msg = ("can't use instance from outside %s states" %
1110            utils.CommaJoin(req_states))
1111   if instance.admin_state not in req_states:
1112     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1113                                (instance.name, instance.admin_state, msg),
1114                                errors.ECODE_STATE)
1115
1116   if constants.ADMINST_UP not in req_states:
1117     pnode = instance.primary_node
1118     if not lu.cfg.GetNodeInfo(pnode).offline:
1119       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1120       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1121                   prereq=True, ecode=errors.ECODE_ENVIRON)
1122       if instance.name in ins_l.payload:
1123         raise errors.OpPrereqError("Instance %s is running, %s" %
1124                                    (instance.name, msg), errors.ECODE_STATE)
1125     else:
1126       lu.LogWarning("Primary node offline, ignoring check that instance"
1127                      " is down")
1128
1129
1130 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1131   """Computes if value is in the desired range.
1132
1133   @param name: name of the parameter for which we perform the check
1134   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1135       not just 'disk')
1136   @param ipolicy: dictionary containing min, max and std values
1137   @param value: actual value that we want to use
1138   @return: None or element not meeting the criteria
1139
1140
1141   """
1142   if value in [None, constants.VALUE_AUTO]:
1143     return None
1144   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1145   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1146   if value > max_v or min_v > value:
1147     if qualifier:
1148       fqn = "%s/%s" % (name, qualifier)
1149     else:
1150       fqn = name
1151     return ("%s value %s is not in range [%s, %s]" %
1152             (fqn, value, min_v, max_v))
1153   return None
1154
1155
1156 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1157                                  nic_count, disk_sizes, spindle_use,
1158                                  _compute_fn=_ComputeMinMaxSpec):
1159   """Verifies ipolicy against provided specs.
1160
1161   @type ipolicy: dict
1162   @param ipolicy: The ipolicy
1163   @type mem_size: int
1164   @param mem_size: The memory size
1165   @type cpu_count: int
1166   @param cpu_count: Used cpu cores
1167   @type disk_count: int
1168   @param disk_count: Number of disks used
1169   @type nic_count: int
1170   @param nic_count: Number of nics used
1171   @type disk_sizes: list of ints
1172   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1173   @type spindle_use: int
1174   @param spindle_use: The number of spindles this instance uses
1175   @param _compute_fn: The compute function (unittest only)
1176   @return: A list of violations, or an empty list of no violations are found
1177
1178   """
1179   assert disk_count == len(disk_sizes)
1180
1181   test_settings = [
1182     (constants.ISPEC_MEM_SIZE, "", mem_size),
1183     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1184     (constants.ISPEC_DISK_COUNT, "", disk_count),
1185     (constants.ISPEC_NIC_COUNT, "", nic_count),
1186     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1187     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1188          for idx, d in enumerate(disk_sizes)]
1189
1190   return filter(None,
1191                 (_compute_fn(name, qualifier, ipolicy, value)
1192                  for (name, qualifier, value) in test_settings))
1193
1194
1195 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1196                                      _compute_fn=_ComputeIPolicySpecViolation):
1197   """Compute if instance meets the specs of ipolicy.
1198
1199   @type ipolicy: dict
1200   @param ipolicy: The ipolicy to verify against
1201   @type instance: L{objects.Instance}
1202   @param instance: The instance to verify
1203   @param _compute_fn: The function to verify ipolicy (unittest only)
1204   @see: L{_ComputeIPolicySpecViolation}
1205
1206   """
1207   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1208   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1209   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1210   disk_count = len(instance.disks)
1211   disk_sizes = [disk.size for disk in instance.disks]
1212   nic_count = len(instance.nics)
1213
1214   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1215                      disk_sizes, spindle_use)
1216
1217
1218 def _ComputeIPolicyInstanceSpecViolation(
1219   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1220   """Compute if instance specs meets the specs of ipolicy.
1221
1222   @type ipolicy: dict
1223   @param ipolicy: The ipolicy to verify against
1224   @param instance_spec: dict
1225   @param instance_spec: The instance spec to verify
1226   @param _compute_fn: The function to verify ipolicy (unittest only)
1227   @see: L{_ComputeIPolicySpecViolation}
1228
1229   """
1230   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1231   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1232   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1233   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1234   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1235   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1236
1237   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1238                      disk_sizes, spindle_use)
1239
1240
1241 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1242                                  target_group,
1243                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1244   """Compute if instance meets the specs of the new target group.
1245
1246   @param ipolicy: The ipolicy to verify
1247   @param instance: The instance object to verify
1248   @param current_group: The current group of the instance
1249   @param target_group: The new group of the instance
1250   @param _compute_fn: The function to verify ipolicy (unittest only)
1251   @see: L{_ComputeIPolicySpecViolation}
1252
1253   """
1254   if current_group == target_group:
1255     return []
1256   else:
1257     return _compute_fn(ipolicy, instance)
1258
1259
1260 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1261                             _compute_fn=_ComputeIPolicyNodeViolation):
1262   """Checks that the target node is correct in terms of instance policy.
1263
1264   @param ipolicy: The ipolicy to verify
1265   @param instance: The instance object to verify
1266   @param node: The new node to relocate
1267   @param ignore: Ignore violations of the ipolicy
1268   @param _compute_fn: The function to verify ipolicy (unittest only)
1269   @see: L{_ComputeIPolicySpecViolation}
1270
1271   """
1272   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1273   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1274
1275   if res:
1276     msg = ("Instance does not meet target node group's (%s) instance"
1277            " policy: %s") % (node.group, utils.CommaJoin(res))
1278     if ignore:
1279       lu.LogWarning(msg)
1280     else:
1281       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1282
1283
1284 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1285   """Computes a set of any instances that would violate the new ipolicy.
1286
1287   @param old_ipolicy: The current (still in-place) ipolicy
1288   @param new_ipolicy: The new (to become) ipolicy
1289   @param instances: List of instances to verify
1290   @return: A list of instances which violates the new ipolicy but
1291       did not before
1292
1293   """
1294   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1295           _ComputeViolatingInstances(old_ipolicy, instances))
1296
1297
1298 def _ExpandItemName(fn, name, kind):
1299   """Expand an item name.
1300
1301   @param fn: the function to use for expansion
1302   @param name: requested item name
1303   @param kind: text description ('Node' or 'Instance')
1304   @return: the resolved (full) name
1305   @raise errors.OpPrereqError: if the item is not found
1306
1307   """
1308   full_name = fn(name)
1309   if full_name is None:
1310     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1311                                errors.ECODE_NOENT)
1312   return full_name
1313
1314
1315 def _ExpandNodeName(cfg, name):
1316   """Wrapper over L{_ExpandItemName} for nodes."""
1317   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1318
1319
1320 def _ExpandInstanceName(cfg, name):
1321   """Wrapper over L{_ExpandItemName} for instance."""
1322   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1323
1324
1325 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1326                          network_type, mac_prefix, tags):
1327   """Builds network related env variables for hooks
1328
1329   This builds the hook environment from individual variables.
1330
1331   @type name: string
1332   @param name: the name of the network
1333   @type subnet: string
1334   @param subnet: the ipv4 subnet
1335   @type gateway: string
1336   @param gateway: the ipv4 gateway
1337   @type network6: string
1338   @param network6: the ipv6 subnet
1339   @type gateway6: string
1340   @param gateway6: the ipv6 gateway
1341   @type network_type: string
1342   @param network_type: the type of the network
1343   @type mac_prefix: string
1344   @param mac_prefix: the mac_prefix
1345   @type tags: list
1346   @param tags: the tags of the network
1347
1348   """
1349   env = {}
1350   if name:
1351     env["NETWORK_NAME"] = name
1352   if subnet:
1353     env["NETWORK_SUBNET"] = subnet
1354   if gateway:
1355     env["NETWORK_GATEWAY"] = gateway
1356   if network6:
1357     env["NETWORK_SUBNET6"] = network6
1358   if gateway6:
1359     env["NETWORK_GATEWAY6"] = gateway6
1360   if mac_prefix:
1361     env["NETWORK_MAC_PREFIX"] = mac_prefix
1362   if network_type:
1363     env["NETWORK_TYPE"] = network_type
1364   if tags:
1365     env["NETWORK_TAGS"] = " ".join(tags)
1366
1367   return env
1368
1369
1370 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1371                           minmem, maxmem, vcpus, nics, disk_template, disks,
1372                           bep, hvp, hypervisor_name, tags):
1373   """Builds instance related env variables for hooks
1374
1375   This builds the hook environment from individual variables.
1376
1377   @type name: string
1378   @param name: the name of the instance
1379   @type primary_node: string
1380   @param primary_node: the name of the instance's primary node
1381   @type secondary_nodes: list
1382   @param secondary_nodes: list of secondary nodes as strings
1383   @type os_type: string
1384   @param os_type: the name of the instance's OS
1385   @type status: string
1386   @param status: the desired status of the instance
1387   @type minmem: string
1388   @param minmem: the minimum memory size of the instance
1389   @type maxmem: string
1390   @param maxmem: the maximum memory size of the instance
1391   @type vcpus: string
1392   @param vcpus: the count of VCPUs the instance has
1393   @type nics: list
1394   @param nics: list of tuples (ip, mac, mode, link, network) representing
1395       the NICs the instance has
1396   @type disk_template: string
1397   @param disk_template: the disk template of the instance
1398   @type disks: list
1399   @param disks: the list of (size, mode) pairs
1400   @type bep: dict
1401   @param bep: the backend parameters for the instance
1402   @type hvp: dict
1403   @param hvp: the hypervisor parameters for the instance
1404   @type hypervisor_name: string
1405   @param hypervisor_name: the hypervisor for the instance
1406   @type tags: list
1407   @param tags: list of instance tags as strings
1408   @rtype: dict
1409   @return: the hook environment for this instance
1410
1411   """
1412   env = {
1413     "OP_TARGET": name,
1414     "INSTANCE_NAME": name,
1415     "INSTANCE_PRIMARY": primary_node,
1416     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1417     "INSTANCE_OS_TYPE": os_type,
1418     "INSTANCE_STATUS": status,
1419     "INSTANCE_MINMEM": minmem,
1420     "INSTANCE_MAXMEM": maxmem,
1421     # TODO(2.7) remove deprecated "memory" value
1422     "INSTANCE_MEMORY": maxmem,
1423     "INSTANCE_VCPUS": vcpus,
1424     "INSTANCE_DISK_TEMPLATE": disk_template,
1425     "INSTANCE_HYPERVISOR": hypervisor_name,
1426   }
1427   if nics:
1428     nic_count = len(nics)
1429     for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1430       if ip is None:
1431         ip = ""
1432       env["INSTANCE_NIC%d_IP" % idx] = ip
1433       env["INSTANCE_NIC%d_MAC" % idx] = mac
1434       env["INSTANCE_NIC%d_MODE" % idx] = mode
1435       env["INSTANCE_NIC%d_LINK" % idx] = link
1436       if network:
1437         env["INSTANCE_NIC%d_NETWORK" % idx] = net
1438         if netinfo:
1439           nobj = objects.Network.FromDict(netinfo)
1440           if nobj.network:
1441             env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1442           if nobj.gateway:
1443             env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1444           if nobj.network6:
1445             env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1446           if nobj.gateway6:
1447             env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1448           if nobj.mac_prefix:
1449             env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1450           if nobj.network_type:
1451             env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1452           if nobj.tags:
1453             env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1454       if mode == constants.NIC_MODE_BRIDGED:
1455         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1456   else:
1457     nic_count = 0
1458
1459   env["INSTANCE_NIC_COUNT"] = nic_count
1460
1461   if disks:
1462     disk_count = len(disks)
1463     for idx, (size, mode) in enumerate(disks):
1464       env["INSTANCE_DISK%d_SIZE" % idx] = size
1465       env["INSTANCE_DISK%d_MODE" % idx] = mode
1466   else:
1467     disk_count = 0
1468
1469   env["INSTANCE_DISK_COUNT"] = disk_count
1470
1471   if not tags:
1472     tags = []
1473
1474   env["INSTANCE_TAGS"] = " ".join(tags)
1475
1476   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1477     for key, value in source.items():
1478       env["INSTANCE_%s_%s" % (kind, key)] = value
1479
1480   return env
1481
1482
1483 def _NICToTuple(lu, nic):
1484   """Build a tupple of nic information.
1485
1486   @type lu:  L{LogicalUnit}
1487   @param lu: the logical unit on whose behalf we execute
1488   @type nic: L{objects.NIC}
1489   @param nic: nic to convert to hooks tuple
1490
1491   """
1492   ip = nic.ip
1493   mac = nic.mac
1494   cluster = lu.cfg.GetClusterInfo()
1495   filled_params = cluster.SimpleFillNIC(nic.nicparams)
1496   mode = filled_params[constants.NIC_MODE]
1497   link = filled_params[constants.NIC_LINK]
1498   net = nic.network
1499   netinfo = None
1500   if net:
1501     net_uuid = lu.cfg.LookupNetwork(net)
1502     if net_uuid:
1503       nobj = lu.cfg.GetNetwork(net_uuid)
1504       netinfo = objects.Network.ToDict(nobj)
1505   return (ip, mac, mode, link, net, netinfo)
1506
1507
1508 def _NICListToTuple(lu, nics):
1509   """Build a list of nic information tuples.
1510
1511   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1512   value in LUInstanceQueryData.
1513
1514   @type lu:  L{LogicalUnit}
1515   @param lu: the logical unit on whose behalf we execute
1516   @type nics: list of L{objects.NIC}
1517   @param nics: list of nics to convert to hooks tuples
1518
1519   """
1520   hooks_nics = []
1521   for nic in nics:
1522     hooks_nics.append(_NICToTuple(lu, nic))
1523   return hooks_nics
1524
1525
1526 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1527   """Builds instance related env variables for hooks from an object.
1528
1529   @type lu: L{LogicalUnit}
1530   @param lu: the logical unit on whose behalf we execute
1531   @type instance: L{objects.Instance}
1532   @param instance: the instance for which we should build the
1533       environment
1534   @type override: dict
1535   @param override: dictionary with key/values that will override
1536       our values
1537   @rtype: dict
1538   @return: the hook environment dictionary
1539
1540   """
1541   cluster = lu.cfg.GetClusterInfo()
1542   bep = cluster.FillBE(instance)
1543   hvp = cluster.FillHV(instance)
1544   args = {
1545     "name": instance.name,
1546     "primary_node": instance.primary_node,
1547     "secondary_nodes": instance.secondary_nodes,
1548     "os_type": instance.os,
1549     "status": instance.admin_state,
1550     "maxmem": bep[constants.BE_MAXMEM],
1551     "minmem": bep[constants.BE_MINMEM],
1552     "vcpus": bep[constants.BE_VCPUS],
1553     "nics": _NICListToTuple(lu, instance.nics),
1554     "disk_template": instance.disk_template,
1555     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1556     "bep": bep,
1557     "hvp": hvp,
1558     "hypervisor_name": instance.hypervisor,
1559     "tags": instance.tags,
1560   }
1561   if override:
1562     args.update(override)
1563   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1564
1565
1566 def _AdjustCandidatePool(lu, exceptions):
1567   """Adjust the candidate pool after node operations.
1568
1569   """
1570   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1571   if mod_list:
1572     lu.LogInfo("Promoted nodes to master candidate role: %s",
1573                utils.CommaJoin(node.name for node in mod_list))
1574     for name in mod_list:
1575       lu.context.ReaddNode(name)
1576   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1577   if mc_now > mc_max:
1578     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1579                (mc_now, mc_max))
1580
1581
1582 def _DecideSelfPromotion(lu, exceptions=None):
1583   """Decide whether I should promote myself as a master candidate.
1584
1585   """
1586   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1587   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1588   # the new node will increase mc_max with one, so:
1589   mc_should = min(mc_should + 1, cp_size)
1590   return mc_now < mc_should
1591
1592
1593 def _ComputeViolatingInstances(ipolicy, instances):
1594   """Computes a set of instances who violates given ipolicy.
1595
1596   @param ipolicy: The ipolicy to verify
1597   @type instances: object.Instance
1598   @param instances: List of instances to verify
1599   @return: A frozenset of instance names violating the ipolicy
1600
1601   """
1602   return frozenset([inst.name for inst in instances
1603                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1604
1605
1606 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1607   """Check that the brigdes needed by a list of nics exist.
1608
1609   """
1610   cluster = lu.cfg.GetClusterInfo()
1611   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1612   brlist = [params[constants.NIC_LINK] for params in paramslist
1613             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1614   if brlist:
1615     result = lu.rpc.call_bridges_exist(target_node, brlist)
1616     result.Raise("Error checking bridges on destination node '%s'" %
1617                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1618
1619
1620 def _CheckInstanceBridgesExist(lu, instance, node=None):
1621   """Check that the brigdes needed by an instance exist.
1622
1623   """
1624   if node is None:
1625     node = instance.primary_node
1626   _CheckNicsBridgesExist(lu, instance.nics, node)
1627
1628
1629 def _CheckOSVariant(os_obj, name):
1630   """Check whether an OS name conforms to the os variants specification.
1631
1632   @type os_obj: L{objects.OS}
1633   @param os_obj: OS object to check
1634   @type name: string
1635   @param name: OS name passed by the user, to check for validity
1636
1637   """
1638   variant = objects.OS.GetVariant(name)
1639   if not os_obj.supported_variants:
1640     if variant:
1641       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1642                                  " passed)" % (os_obj.name, variant),
1643                                  errors.ECODE_INVAL)
1644     return
1645   if not variant:
1646     raise errors.OpPrereqError("OS name must include a variant",
1647                                errors.ECODE_INVAL)
1648
1649   if variant not in os_obj.supported_variants:
1650     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1651
1652
1653 def _GetNodeInstancesInner(cfg, fn):
1654   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1655
1656
1657 def _GetNodeInstances(cfg, node_name):
1658   """Returns a list of all primary and secondary instances on a node.
1659
1660   """
1661
1662   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1663
1664
1665 def _GetNodePrimaryInstances(cfg, node_name):
1666   """Returns primary instances on a node.
1667
1668   """
1669   return _GetNodeInstancesInner(cfg,
1670                                 lambda inst: node_name == inst.primary_node)
1671
1672
1673 def _GetNodeSecondaryInstances(cfg, node_name):
1674   """Returns secondary instances on a node.
1675
1676   """
1677   return _GetNodeInstancesInner(cfg,
1678                                 lambda inst: node_name in inst.secondary_nodes)
1679
1680
1681 def _GetStorageTypeArgs(cfg, storage_type):
1682   """Returns the arguments for a storage type.
1683
1684   """
1685   # Special case for file storage
1686   if storage_type == constants.ST_FILE:
1687     # storage.FileStorage wants a list of storage directories
1688     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1689
1690   return []
1691
1692
1693 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1694   faulty = []
1695
1696   for dev in instance.disks:
1697     cfg.SetDiskID(dev, node_name)
1698
1699   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1700                                                                 instance))
1701   result.Raise("Failed to get disk status from node %s" % node_name,
1702                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1703
1704   for idx, bdev_status in enumerate(result.payload):
1705     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1706       faulty.append(idx)
1707
1708   return faulty
1709
1710
1711 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1712   """Check the sanity of iallocator and node arguments and use the
1713   cluster-wide iallocator if appropriate.
1714
1715   Check that at most one of (iallocator, node) is specified. If none is
1716   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1717   then the LU's opcode's iallocator slot is filled with the cluster-wide
1718   default iallocator.
1719
1720   @type iallocator_slot: string
1721   @param iallocator_slot: the name of the opcode iallocator slot
1722   @type node_slot: string
1723   @param node_slot: the name of the opcode target node slot
1724
1725   """
1726   node = getattr(lu.op, node_slot, None)
1727   ialloc = getattr(lu.op, iallocator_slot, None)
1728   if node == []:
1729     node = None
1730
1731   if node is not None and ialloc is not None:
1732     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1733                                errors.ECODE_INVAL)
1734   elif ((node is None and ialloc is None) or
1735         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1736     default_iallocator = lu.cfg.GetDefaultIAllocator()
1737     if default_iallocator:
1738       setattr(lu.op, iallocator_slot, default_iallocator)
1739     else:
1740       raise errors.OpPrereqError("No iallocator or node given and no"
1741                                  " cluster-wide default iallocator found;"
1742                                  " please specify either an iallocator or a"
1743                                  " node, or set a cluster-wide default"
1744                                  " iallocator", errors.ECODE_INVAL)
1745
1746
1747 def _GetDefaultIAllocator(cfg, ialloc):
1748   """Decides on which iallocator to use.
1749
1750   @type cfg: L{config.ConfigWriter}
1751   @param cfg: Cluster configuration object
1752   @type ialloc: string or None
1753   @param ialloc: Iallocator specified in opcode
1754   @rtype: string
1755   @return: Iallocator name
1756
1757   """
1758   if not ialloc:
1759     # Use default iallocator
1760     ialloc = cfg.GetDefaultIAllocator()
1761
1762   if not ialloc:
1763     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1764                                " opcode nor as a cluster-wide default",
1765                                errors.ECODE_INVAL)
1766
1767   return ialloc
1768
1769
1770 def _CheckHostnameSane(lu, name):
1771   """Ensures that a given hostname resolves to a 'sane' name.
1772
1773   The given name is required to be a prefix of the resolved hostname,
1774   to prevent accidental mismatches.
1775
1776   @param lu: the logical unit on behalf of which we're checking
1777   @param name: the name we should resolve and check
1778   @return: the resolved hostname object
1779
1780   """
1781   hostname = netutils.GetHostname(name=name)
1782   if hostname.name != name:
1783     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1784   if not utils.MatchNameComponent(name, [hostname.name]):
1785     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1786                                 " same as given hostname '%s'") %
1787                                 (hostname.name, name), errors.ECODE_INVAL)
1788   return hostname
1789
1790
1791 class LUClusterPostInit(LogicalUnit):
1792   """Logical unit for running hooks after cluster initialization.
1793
1794   """
1795   HPATH = "cluster-init"
1796   HTYPE = constants.HTYPE_CLUSTER
1797
1798   def BuildHooksEnv(self):
1799     """Build hooks env.
1800
1801     """
1802     return {
1803       "OP_TARGET": self.cfg.GetClusterName(),
1804       }
1805
1806   def BuildHooksNodes(self):
1807     """Build hooks nodes.
1808
1809     """
1810     return ([], [self.cfg.GetMasterNode()])
1811
1812   def Exec(self, feedback_fn):
1813     """Nothing to do.
1814
1815     """
1816     return True
1817
1818
1819 class LUClusterDestroy(LogicalUnit):
1820   """Logical unit for destroying the cluster.
1821
1822   """
1823   HPATH = "cluster-destroy"
1824   HTYPE = constants.HTYPE_CLUSTER
1825
1826   def BuildHooksEnv(self):
1827     """Build hooks env.
1828
1829     """
1830     return {
1831       "OP_TARGET": self.cfg.GetClusterName(),
1832       }
1833
1834   def BuildHooksNodes(self):
1835     """Build hooks nodes.
1836
1837     """
1838     return ([], [])
1839
1840   def CheckPrereq(self):
1841     """Check prerequisites.
1842
1843     This checks whether the cluster is empty.
1844
1845     Any errors are signaled by raising errors.OpPrereqError.
1846
1847     """
1848     master = self.cfg.GetMasterNode()
1849
1850     nodelist = self.cfg.GetNodeList()
1851     if len(nodelist) != 1 or nodelist[0] != master:
1852       raise errors.OpPrereqError("There are still %d node(s) in"
1853                                  " this cluster." % (len(nodelist) - 1),
1854                                  errors.ECODE_INVAL)
1855     instancelist = self.cfg.GetInstanceList()
1856     if instancelist:
1857       raise errors.OpPrereqError("There are still %d instance(s) in"
1858                                  " this cluster." % len(instancelist),
1859                                  errors.ECODE_INVAL)
1860
1861   def Exec(self, feedback_fn):
1862     """Destroys the cluster.
1863
1864     """
1865     master_params = self.cfg.GetMasterNetworkParameters()
1866
1867     # Run post hooks on master node before it's removed
1868     _RunPostHook(self, master_params.name)
1869
1870     ems = self.cfg.GetUseExternalMipScript()
1871     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1872                                                      master_params, ems)
1873     if result.fail_msg:
1874       self.LogWarning("Error disabling the master IP address: %s",
1875                       result.fail_msg)
1876
1877     return master_params.name
1878
1879
1880 def _VerifyCertificate(filename):
1881   """Verifies a certificate for L{LUClusterVerifyConfig}.
1882
1883   @type filename: string
1884   @param filename: Path to PEM file
1885
1886   """
1887   try:
1888     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1889                                            utils.ReadFile(filename))
1890   except Exception, err: # pylint: disable=W0703
1891     return (LUClusterVerifyConfig.ETYPE_ERROR,
1892             "Failed to load X509 certificate %s: %s" % (filename, err))
1893
1894   (errcode, msg) = \
1895     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1896                                 constants.SSL_CERT_EXPIRATION_ERROR)
1897
1898   if msg:
1899     fnamemsg = "While verifying %s: %s" % (filename, msg)
1900   else:
1901     fnamemsg = None
1902
1903   if errcode is None:
1904     return (None, fnamemsg)
1905   elif errcode == utils.CERT_WARNING:
1906     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1907   elif errcode == utils.CERT_ERROR:
1908     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1909
1910   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1911
1912
1913 def _GetAllHypervisorParameters(cluster, instances):
1914   """Compute the set of all hypervisor parameters.
1915
1916   @type cluster: L{objects.Cluster}
1917   @param cluster: the cluster object
1918   @param instances: list of L{objects.Instance}
1919   @param instances: additional instances from which to obtain parameters
1920   @rtype: list of (origin, hypervisor, parameters)
1921   @return: a list with all parameters found, indicating the hypervisor they
1922        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1923
1924   """
1925   hvp_data = []
1926
1927   for hv_name in cluster.enabled_hypervisors:
1928     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1929
1930   for os_name, os_hvp in cluster.os_hvp.items():
1931     for hv_name, hv_params in os_hvp.items():
1932       if hv_params:
1933         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1934         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1935
1936   # TODO: collapse identical parameter values in a single one
1937   for instance in instances:
1938     if instance.hvparams:
1939       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1940                        cluster.FillHV(instance)))
1941
1942   return hvp_data
1943
1944
1945 class _VerifyErrors(object):
1946   """Mix-in for cluster/group verify LUs.
1947
1948   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1949   self.op and self._feedback_fn to be available.)
1950
1951   """
1952
1953   ETYPE_FIELD = "code"
1954   ETYPE_ERROR = "ERROR"
1955   ETYPE_WARNING = "WARNING"
1956
1957   def _Error(self, ecode, item, msg, *args, **kwargs):
1958     """Format an error message.
1959
1960     Based on the opcode's error_codes parameter, either format a
1961     parseable error code, or a simpler error string.
1962
1963     This must be called only from Exec and functions called from Exec.
1964
1965     """
1966     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1967     itype, etxt, _ = ecode
1968     # first complete the msg
1969     if args:
1970       msg = msg % args
1971     # then format the whole message
1972     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1973       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1974     else:
1975       if item:
1976         item = " " + item
1977       else:
1978         item = ""
1979       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1980     # and finally report it via the feedback_fn
1981     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1982
1983   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1984     """Log an error message if the passed condition is True.
1985
1986     """
1987     cond = (bool(cond)
1988             or self.op.debug_simulate_errors) # pylint: disable=E1101
1989
1990     # If the error code is in the list of ignored errors, demote the error to a
1991     # warning
1992     (_, etxt, _) = ecode
1993     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1994       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1995
1996     if cond:
1997       self._Error(ecode, *args, **kwargs)
1998
1999     # do not mark the operation as failed for WARN cases only
2000     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2001       self.bad = self.bad or cond
2002
2003
2004 class LUClusterVerify(NoHooksLU):
2005   """Submits all jobs necessary to verify the cluster.
2006
2007   """
2008   REQ_BGL = False
2009
2010   def ExpandNames(self):
2011     self.needed_locks = {}
2012
2013   def Exec(self, feedback_fn):
2014     jobs = []
2015
2016     if self.op.group_name:
2017       groups = [self.op.group_name]
2018       depends_fn = lambda: None
2019     else:
2020       groups = self.cfg.GetNodeGroupList()
2021
2022       # Verify global configuration
2023       jobs.append([
2024         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2025         ])
2026
2027       # Always depend on global verification
2028       depends_fn = lambda: [(-len(jobs), [])]
2029
2030     jobs.extend(
2031       [opcodes.OpClusterVerifyGroup(group_name=group,
2032                                     ignore_errors=self.op.ignore_errors,
2033                                     depends=depends_fn())]
2034       for group in groups)
2035
2036     # Fix up all parameters
2037     for op in itertools.chain(*jobs): # pylint: disable=W0142
2038       op.debug_simulate_errors = self.op.debug_simulate_errors
2039       op.verbose = self.op.verbose
2040       op.error_codes = self.op.error_codes
2041       try:
2042         op.skip_checks = self.op.skip_checks
2043       except AttributeError:
2044         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2045
2046     return ResultWithJobs(jobs)
2047
2048
2049 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2050   """Verifies the cluster config.
2051
2052   """
2053   REQ_BGL = False
2054
2055   def _VerifyHVP(self, hvp_data):
2056     """Verifies locally the syntax of the hypervisor parameters.
2057
2058     """
2059     for item, hv_name, hv_params in hvp_data:
2060       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2061              (item, hv_name))
2062       try:
2063         hv_class = hypervisor.GetHypervisor(hv_name)
2064         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2065         hv_class.CheckParameterSyntax(hv_params)
2066       except errors.GenericError, err:
2067         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2068
2069   def ExpandNames(self):
2070     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2071     self.share_locks = _ShareAll()
2072
2073   def CheckPrereq(self):
2074     """Check prerequisites.
2075
2076     """
2077     # Retrieve all information
2078     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2079     self.all_node_info = self.cfg.GetAllNodesInfo()
2080     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2081
2082   def Exec(self, feedback_fn):
2083     """Verify integrity of cluster, performing various test on nodes.
2084
2085     """
2086     self.bad = False
2087     self._feedback_fn = feedback_fn
2088
2089     feedback_fn("* Verifying cluster config")
2090
2091     for msg in self.cfg.VerifyConfig():
2092       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2093
2094     feedback_fn("* Verifying cluster certificate files")
2095
2096     for cert_filename in pathutils.ALL_CERT_FILES:
2097       (errcode, msg) = _VerifyCertificate(cert_filename)
2098       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2099
2100     feedback_fn("* Verifying hypervisor parameters")
2101
2102     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2103                                                 self.all_inst_info.values()))
2104
2105     feedback_fn("* Verifying all nodes belong to an existing group")
2106
2107     # We do this verification here because, should this bogus circumstance
2108     # occur, it would never be caught by VerifyGroup, which only acts on
2109     # nodes/instances reachable from existing node groups.
2110
2111     dangling_nodes = set(node.name for node in self.all_node_info.values()
2112                          if node.group not in self.all_group_info)
2113
2114     dangling_instances = {}
2115     no_node_instances = []
2116
2117     for inst in self.all_inst_info.values():
2118       if inst.primary_node in dangling_nodes:
2119         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2120       elif inst.primary_node not in self.all_node_info:
2121         no_node_instances.append(inst.name)
2122
2123     pretty_dangling = [
2124         "%s (%s)" %
2125         (node.name,
2126          utils.CommaJoin(dangling_instances.get(node.name,
2127                                                 ["no instances"])))
2128         for node in dangling_nodes]
2129
2130     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2131                   None,
2132                   "the following nodes (and their instances) belong to a non"
2133                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2134
2135     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2136                   None,
2137                   "the following instances have a non-existing primary-node:"
2138                   " %s", utils.CommaJoin(no_node_instances))
2139
2140     return not self.bad
2141
2142
2143 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2144   """Verifies the status of a node group.
2145
2146   """
2147   HPATH = "cluster-verify"
2148   HTYPE = constants.HTYPE_CLUSTER
2149   REQ_BGL = False
2150
2151   _HOOKS_INDENT_RE = re.compile("^", re.M)
2152
2153   class NodeImage(object):
2154     """A class representing the logical and physical status of a node.
2155
2156     @type name: string
2157     @ivar name: the node name to which this object refers
2158     @ivar volumes: a structure as returned from
2159         L{ganeti.backend.GetVolumeList} (runtime)
2160     @ivar instances: a list of running instances (runtime)
2161     @ivar pinst: list of configured primary instances (config)
2162     @ivar sinst: list of configured secondary instances (config)
2163     @ivar sbp: dictionary of {primary-node: list of instances} for all
2164         instances for which this node is secondary (config)
2165     @ivar mfree: free memory, as reported by hypervisor (runtime)
2166     @ivar dfree: free disk, as reported by the node (runtime)
2167     @ivar offline: the offline status (config)
2168     @type rpc_fail: boolean
2169     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2170         not whether the individual keys were correct) (runtime)
2171     @type lvm_fail: boolean
2172     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2173     @type hyp_fail: boolean
2174     @ivar hyp_fail: whether the RPC call didn't return the instance list
2175     @type ghost: boolean
2176     @ivar ghost: whether this is a known node or not (config)
2177     @type os_fail: boolean
2178     @ivar os_fail: whether the RPC call didn't return valid OS data
2179     @type oslist: list
2180     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2181     @type vm_capable: boolean
2182     @ivar vm_capable: whether the node can host instances
2183
2184     """
2185     def __init__(self, offline=False, name=None, vm_capable=True):
2186       self.name = name
2187       self.volumes = {}
2188       self.instances = []
2189       self.pinst = []
2190       self.sinst = []
2191       self.sbp = {}
2192       self.mfree = 0
2193       self.dfree = 0
2194       self.offline = offline
2195       self.vm_capable = vm_capable
2196       self.rpc_fail = False
2197       self.lvm_fail = False
2198       self.hyp_fail = False
2199       self.ghost = False
2200       self.os_fail = False
2201       self.oslist = {}
2202
2203   def ExpandNames(self):
2204     # This raises errors.OpPrereqError on its own:
2205     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2206
2207     # Get instances in node group; this is unsafe and needs verification later
2208     inst_names = \
2209       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2210
2211     self.needed_locks = {
2212       locking.LEVEL_INSTANCE: inst_names,
2213       locking.LEVEL_NODEGROUP: [self.group_uuid],
2214       locking.LEVEL_NODE: [],
2215
2216       # This opcode is run by watcher every five minutes and acquires all nodes
2217       # for a group. It doesn't run for a long time, so it's better to acquire
2218       # the node allocation lock as well.
2219       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2220       }
2221
2222     self.share_locks = _ShareAll()
2223
2224   def DeclareLocks(self, level):
2225     if level == locking.LEVEL_NODE:
2226       # Get members of node group; this is unsafe and needs verification later
2227       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2228
2229       all_inst_info = self.cfg.GetAllInstancesInfo()
2230
2231       # In Exec(), we warn about mirrored instances that have primary and
2232       # secondary living in separate node groups. To fully verify that
2233       # volumes for these instances are healthy, we will need to do an
2234       # extra call to their secondaries. We ensure here those nodes will
2235       # be locked.
2236       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2237         # Important: access only the instances whose lock is owned
2238         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2239           nodes.update(all_inst_info[inst].secondary_nodes)
2240
2241       self.needed_locks[locking.LEVEL_NODE] = nodes
2242
2243   def CheckPrereq(self):
2244     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2245     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2246
2247     group_nodes = set(self.group_info.members)
2248     group_instances = \
2249       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2250
2251     unlocked_nodes = \
2252         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2253
2254     unlocked_instances = \
2255         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2256
2257     if unlocked_nodes:
2258       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2259                                  utils.CommaJoin(unlocked_nodes),
2260                                  errors.ECODE_STATE)
2261
2262     if unlocked_instances:
2263       raise errors.OpPrereqError("Missing lock for instances: %s" %
2264                                  utils.CommaJoin(unlocked_instances),
2265                                  errors.ECODE_STATE)
2266
2267     self.all_node_info = self.cfg.GetAllNodesInfo()
2268     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2269
2270     self.my_node_names = utils.NiceSort(group_nodes)
2271     self.my_inst_names = utils.NiceSort(group_instances)
2272
2273     self.my_node_info = dict((name, self.all_node_info[name])
2274                              for name in self.my_node_names)
2275
2276     self.my_inst_info = dict((name, self.all_inst_info[name])
2277                              for name in self.my_inst_names)
2278
2279     # We detect here the nodes that will need the extra RPC calls for verifying
2280     # split LV volumes; they should be locked.
2281     extra_lv_nodes = set()
2282
2283     for inst in self.my_inst_info.values():
2284       if inst.disk_template in constants.DTS_INT_MIRROR:
2285         for nname in inst.all_nodes:
2286           if self.all_node_info[nname].group != self.group_uuid:
2287             extra_lv_nodes.add(nname)
2288
2289     unlocked_lv_nodes = \
2290         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2291
2292     if unlocked_lv_nodes:
2293       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2294                                  utils.CommaJoin(unlocked_lv_nodes),
2295                                  errors.ECODE_STATE)
2296     self.extra_lv_nodes = list(extra_lv_nodes)
2297
2298   def _VerifyNode(self, ninfo, nresult):
2299     """Perform some basic validation on data returned from a node.
2300
2301       - check the result data structure is well formed and has all the
2302         mandatory fields
2303       - check ganeti version
2304
2305     @type ninfo: L{objects.Node}
2306     @param ninfo: the node to check
2307     @param nresult: the results from the node
2308     @rtype: boolean
2309     @return: whether overall this call was successful (and we can expect
2310          reasonable values in the respose)
2311
2312     """
2313     node = ninfo.name
2314     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2315
2316     # main result, nresult should be a non-empty dict
2317     test = not nresult or not isinstance(nresult, dict)
2318     _ErrorIf(test, constants.CV_ENODERPC, node,
2319                   "unable to verify node: no data returned")
2320     if test:
2321       return False
2322
2323     # compares ganeti version
2324     local_version = constants.PROTOCOL_VERSION
2325     remote_version = nresult.get("version", None)
2326     test = not (remote_version and
2327                 isinstance(remote_version, (list, tuple)) and
2328                 len(remote_version) == 2)
2329     _ErrorIf(test, constants.CV_ENODERPC, node,
2330              "connection to node returned invalid data")
2331     if test:
2332       return False
2333
2334     test = local_version != remote_version[0]
2335     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2336              "incompatible protocol versions: master %s,"
2337              " node %s", local_version, remote_version[0])
2338     if test:
2339       return False
2340
2341     # node seems compatible, we can actually try to look into its results
2342
2343     # full package version
2344     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2345                   constants.CV_ENODEVERSION, node,
2346                   "software version mismatch: master %s, node %s",
2347                   constants.RELEASE_VERSION, remote_version[1],
2348                   code=self.ETYPE_WARNING)
2349
2350     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2351     if ninfo.vm_capable and isinstance(hyp_result, dict):
2352       for hv_name, hv_result in hyp_result.iteritems():
2353         test = hv_result is not None
2354         _ErrorIf(test, constants.CV_ENODEHV, node,
2355                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2356
2357     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2358     if ninfo.vm_capable and isinstance(hvp_result, list):
2359       for item, hv_name, hv_result in hvp_result:
2360         _ErrorIf(True, constants.CV_ENODEHV, node,
2361                  "hypervisor %s parameter verify failure (source %s): %s",
2362                  hv_name, item, hv_result)
2363
2364     test = nresult.get(constants.NV_NODESETUP,
2365                        ["Missing NODESETUP results"])
2366     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2367              "; ".join(test))
2368
2369     return True
2370
2371   def _VerifyNodeTime(self, ninfo, nresult,
2372                       nvinfo_starttime, nvinfo_endtime):
2373     """Check the node time.
2374
2375     @type ninfo: L{objects.Node}
2376     @param ninfo: the node to check
2377     @param nresult: the remote results for the node
2378     @param nvinfo_starttime: the start time of the RPC call
2379     @param nvinfo_endtime: the end time of the RPC call
2380
2381     """
2382     node = ninfo.name
2383     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2384
2385     ntime = nresult.get(constants.NV_TIME, None)
2386     try:
2387       ntime_merged = utils.MergeTime(ntime)
2388     except (ValueError, TypeError):
2389       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2390       return
2391
2392     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2393       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2394     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2395       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2396     else:
2397       ntime_diff = None
2398
2399     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2400              "Node time diverges by at least %s from master node time",
2401              ntime_diff)
2402
2403   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2404     """Check the node LVM results.
2405
2406     @type ninfo: L{objects.Node}
2407     @param ninfo: the node to check
2408     @param nresult: the remote results for the node
2409     @param vg_name: the configured VG name
2410
2411     """
2412     if vg_name is None:
2413       return
2414
2415     node = ninfo.name
2416     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2417
2418     # checks vg existence and size > 20G
2419     vglist = nresult.get(constants.NV_VGLIST, None)
2420     test = not vglist
2421     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2422     if not test:
2423       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2424                                             constants.MIN_VG_SIZE)
2425       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2426
2427     # check pv names
2428     pvlist = nresult.get(constants.NV_PVLIST, None)
2429     test = pvlist is None
2430     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2431     if not test:
2432       # check that ':' is not present in PV names, since it's a
2433       # special character for lvcreate (denotes the range of PEs to
2434       # use on the PV)
2435       for _, pvname, owner_vg in pvlist:
2436         test = ":" in pvname
2437         _ErrorIf(test, constants.CV_ENODELVM, node,
2438                  "Invalid character ':' in PV '%s' of VG '%s'",
2439                  pvname, owner_vg)
2440
2441   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2442     """Check the node bridges.
2443
2444     @type ninfo: L{objects.Node}
2445     @param ninfo: the node to check
2446     @param nresult: the remote results for the node
2447     @param bridges: the expected list of bridges
2448
2449     """
2450     if not bridges:
2451       return
2452
2453     node = ninfo.name
2454     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2455
2456     missing = nresult.get(constants.NV_BRIDGES, None)
2457     test = not isinstance(missing, list)
2458     _ErrorIf(test, constants.CV_ENODENET, node,
2459              "did not return valid bridge information")
2460     if not test:
2461       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2462                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2463
2464   def _VerifyNodeUserScripts(self, ninfo, nresult):
2465     """Check the results of user scripts presence and executability on the node
2466
2467     @type ninfo: L{objects.Node}
2468     @param ninfo: the node to check
2469     @param nresult: the remote results for the node
2470
2471     """
2472     node = ninfo.name
2473
2474     test = not constants.NV_USERSCRIPTS in nresult
2475     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2476                   "did not return user scripts information")
2477
2478     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2479     if not test:
2480       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2481                     "user scripts not present or not executable: %s" %
2482                     utils.CommaJoin(sorted(broken_scripts)))
2483
2484   def _VerifyNodeNetwork(self, ninfo, nresult):
2485     """Check the node network connectivity results.
2486
2487     @type ninfo: L{objects.Node}
2488     @param ninfo: the node to check
2489     @param nresult: the remote results for the node
2490
2491     """
2492     node = ninfo.name
2493     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2494
2495     test = constants.NV_NODELIST not in nresult
2496     _ErrorIf(test, constants.CV_ENODESSH, node,
2497              "node hasn't returned node ssh connectivity data")
2498     if not test:
2499       if nresult[constants.NV_NODELIST]:
2500         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2501           _ErrorIf(True, constants.CV_ENODESSH, node,
2502                    "ssh communication with node '%s': %s", a_node, a_msg)
2503
2504     test = constants.NV_NODENETTEST not in nresult
2505     _ErrorIf(test, constants.CV_ENODENET, node,
2506              "node hasn't returned node tcp connectivity data")
2507     if not test:
2508       if nresult[constants.NV_NODENETTEST]:
2509         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2510         for anode in nlist:
2511           _ErrorIf(True, constants.CV_ENODENET, node,
2512                    "tcp communication with node '%s': %s",
2513                    anode, nresult[constants.NV_NODENETTEST][anode])
2514
2515     test = constants.NV_MASTERIP not in nresult
2516     _ErrorIf(test, constants.CV_ENODENET, node,
2517              "node hasn't returned node master IP reachability data")
2518     if not test:
2519       if not nresult[constants.NV_MASTERIP]:
2520         if node == self.master_node:
2521           msg = "the master node cannot reach the master IP (not configured?)"
2522         else:
2523           msg = "cannot reach the master IP"
2524         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2525
2526   def _VerifyInstance(self, instance, instanceconfig, node_image,
2527                       diskstatus):
2528     """Verify an instance.
2529
2530     This function checks to see if the required block devices are
2531     available on the instance's node.
2532
2533     """
2534     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2535     node_current = instanceconfig.primary_node
2536
2537     node_vol_should = {}
2538     instanceconfig.MapLVsByNode(node_vol_should)
2539
2540     cluster = self.cfg.GetClusterInfo()
2541     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2542                                                             self.group_info)
2543     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2544     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2545              code=self.ETYPE_WARNING)
2546
2547     for node in node_vol_should:
2548       n_img = node_image[node]
2549       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2550         # ignore missing volumes on offline or broken nodes
2551         continue
2552       for volume in node_vol_should[node]:
2553         test = volume not in n_img.volumes
2554         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2555                  "volume %s missing on node %s", volume, node)
2556
2557     if instanceconfig.admin_state == constants.ADMINST_UP:
2558       pri_img = node_image[node_current]
2559       test = instance not in pri_img.instances and not pri_img.offline
2560       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2561                "instance not running on its primary node %s",
2562                node_current)
2563
2564     diskdata = [(nname, success, status, idx)
2565                 for (nname, disks) in diskstatus.items()
2566                 for idx, (success, status) in enumerate(disks)]
2567
2568     for nname, success, bdev_status, idx in diskdata:
2569       # the 'ghost node' construction in Exec() ensures that we have a
2570       # node here
2571       snode = node_image[nname]
2572       bad_snode = snode.ghost or snode.offline
2573       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2574                not success and not bad_snode,
2575                constants.CV_EINSTANCEFAULTYDISK, instance,
2576                "couldn't retrieve status for disk/%s on %s: %s",
2577                idx, nname, bdev_status)
2578       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2579                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2580                constants.CV_EINSTANCEFAULTYDISK, instance,
2581                "disk/%s on %s is faulty", idx, nname)
2582
2583   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2584     """Verify if there are any unknown volumes in the cluster.
2585
2586     The .os, .swap and backup volumes are ignored. All other volumes are
2587     reported as unknown.
2588
2589     @type reserved: L{ganeti.utils.FieldSet}
2590     @param reserved: a FieldSet of reserved volume names
2591
2592     """
2593     for node, n_img in node_image.items():
2594       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2595           self.all_node_info[node].group != self.group_uuid):
2596         # skip non-healthy nodes
2597         continue
2598       for volume in n_img.volumes:
2599         test = ((node not in node_vol_should or
2600                 volume not in node_vol_should[node]) and
2601                 not reserved.Matches(volume))
2602         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2603                       "volume %s is unknown", volume)
2604
2605   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2606     """Verify N+1 Memory Resilience.
2607
2608     Check that if one single node dies we can still start all the
2609     instances it was primary for.
2610
2611     """
2612     cluster_info = self.cfg.GetClusterInfo()
2613     for node, n_img in node_image.items():
2614       # This code checks that every node which is now listed as
2615       # secondary has enough memory to host all instances it is
2616       # supposed to should a single other node in the cluster fail.
2617       # FIXME: not ready for failover to an arbitrary node
2618       # FIXME: does not support file-backed instances
2619       # WARNING: we currently take into account down instances as well
2620       # as up ones, considering that even if they're down someone
2621       # might want to start them even in the event of a node failure.
2622       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2623         # we're skipping nodes marked offline and nodes in other groups from
2624         # the N+1 warning, since most likely we don't have good memory
2625         # infromation from them; we already list instances living on such
2626         # nodes, and that's enough warning
2627         continue
2628       #TODO(dynmem): also consider ballooning out other instances
2629       for prinode, instances in n_img.sbp.items():
2630         needed_mem = 0
2631         for instance in instances:
2632           bep = cluster_info.FillBE(instance_cfg[instance])
2633           if bep[constants.BE_AUTO_BALANCE]:
2634             needed_mem += bep[constants.BE_MINMEM]
2635         test = n_img.mfree < needed_mem
2636         self._ErrorIf(test, constants.CV_ENODEN1, node,
2637                       "not enough memory to accomodate instance failovers"
2638                       " should node %s fail (%dMiB needed, %dMiB available)",
2639                       prinode, needed_mem, n_img.mfree)
2640
2641   @classmethod
2642   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2643                    (files_all, files_opt, files_mc, files_vm)):
2644     """Verifies file checksums collected from all nodes.
2645
2646     @param errorif: Callback for reporting errors
2647     @param nodeinfo: List of L{objects.Node} objects
2648     @param master_node: Name of master node
2649     @param all_nvinfo: RPC results
2650
2651     """
2652     # Define functions determining which nodes to consider for a file
2653     files2nodefn = [
2654       (files_all, None),
2655       (files_mc, lambda node: (node.master_candidate or
2656                                node.name == master_node)),
2657       (files_vm, lambda node: node.vm_capable),
2658       ]
2659
2660     # Build mapping from filename to list of nodes which should have the file
2661     nodefiles = {}
2662     for (files, fn) in files2nodefn:
2663       if fn is None:
2664         filenodes = nodeinfo
2665       else:
2666         filenodes = filter(fn, nodeinfo)
2667       nodefiles.update((filename,
2668                         frozenset(map(operator.attrgetter("name"), filenodes)))
2669                        for filename in files)
2670
2671     assert set(nodefiles) == (files_all | files_mc | files_vm)
2672
2673     fileinfo = dict((filename, {}) for filename in nodefiles)
2674     ignore_nodes = set()
2675
2676     for node in nodeinfo:
2677       if node.offline:
2678         ignore_nodes.add(node.name)
2679         continue
2680
2681       nresult = all_nvinfo[node.name]
2682
2683       if nresult.fail_msg or not nresult.payload:
2684         node_files = None
2685       else:
2686         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2687         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2688                           for (key, value) in fingerprints.items())
2689         del fingerprints
2690
2691       test = not (node_files and isinstance(node_files, dict))
2692       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2693               "Node did not return file checksum data")
2694       if test:
2695         ignore_nodes.add(node.name)
2696         continue
2697
2698       # Build per-checksum mapping from filename to nodes having it
2699       for (filename, checksum) in node_files.items():
2700         assert filename in nodefiles
2701         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2702
2703     for (filename, checksums) in fileinfo.items():
2704       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2705
2706       # Nodes having the file
2707       with_file = frozenset(node_name
2708                             for nodes in fileinfo[filename].values()
2709                             for node_name in nodes) - ignore_nodes
2710
2711       expected_nodes = nodefiles[filename] - ignore_nodes
2712
2713       # Nodes missing file
2714       missing_file = expected_nodes - with_file
2715
2716       if filename in files_opt:
2717         # All or no nodes
2718         errorif(missing_file and missing_file != expected_nodes,
2719                 constants.CV_ECLUSTERFILECHECK, None,
2720                 "File %s is optional, but it must exist on all or no"
2721                 " nodes (not found on %s)",
2722                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2723       else:
2724         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2725                 "File %s is missing from node(s) %s", filename,
2726                 utils.CommaJoin(utils.NiceSort(missing_file)))
2727
2728         # Warn if a node has a file it shouldn't
2729         unexpected = with_file - expected_nodes
2730         errorif(unexpected,
2731                 constants.CV_ECLUSTERFILECHECK, None,
2732                 "File %s should not exist on node(s) %s",
2733                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2734
2735       # See if there are multiple versions of the file
2736       test = len(checksums) > 1
2737       if test:
2738         variants = ["variant %s on %s" %
2739                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2740                     for (idx, (checksum, nodes)) in
2741                       enumerate(sorted(checksums.items()))]
2742       else:
2743         variants = []
2744
2745       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2746               "File %s found with %s different checksums (%s)",
2747               filename, len(checksums), "; ".join(variants))
2748
2749   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2750                       drbd_map):
2751     """Verifies and the node DRBD status.
2752
2753     @type ninfo: L{objects.Node}
2754     @param ninfo: the node to check
2755     @param nresult: the remote results for the node
2756     @param instanceinfo: the dict of instances
2757     @param drbd_helper: the configured DRBD usermode helper
2758     @param drbd_map: the DRBD map as returned by
2759         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2760
2761     """
2762     node = ninfo.name
2763     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2764
2765     if drbd_helper:
2766       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2767       test = (helper_result is None)
2768       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2769                "no drbd usermode helper returned")
2770       if helper_result:
2771         status, payload = helper_result
2772         test = not status
2773         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2774                  "drbd usermode helper check unsuccessful: %s", payload)
2775         test = status and (payload != drbd_helper)
2776         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2777                  "wrong drbd usermode helper: %s", payload)
2778
2779     # compute the DRBD minors
2780     node_drbd = {}
2781     for minor, instance in drbd_map[node].items():
2782       test = instance not in instanceinfo
2783       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2784                "ghost instance '%s' in temporary DRBD map", instance)
2785         # ghost instance should not be running, but otherwise we
2786         # don't give double warnings (both ghost instance and
2787         # unallocated minor in use)
2788       if test:
2789         node_drbd[minor] = (instance, False)
2790       else:
2791         instance = instanceinfo[instance]
2792         node_drbd[minor] = (instance.name,
2793                             instance.admin_state == constants.ADMINST_UP)
2794
2795     # and now check them
2796     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2797     test = not isinstance(used_minors, (tuple, list))
2798     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2799              "cannot parse drbd status file: %s", str(used_minors))
2800     if test:
2801       # we cannot check drbd status
2802       return
2803
2804     for minor, (iname, must_exist) in node_drbd.items():
2805       test = minor not in used_minors and must_exist
2806       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2807                "drbd minor %d of instance %s is not active", minor, iname)
2808     for minor in used_minors:
2809       test = minor not in node_drbd
2810       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2811                "unallocated drbd minor %d is in use", minor)
2812
2813   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2814     """Builds the node OS structures.
2815
2816     @type ninfo: L{objects.Node}
2817     @param ninfo: the node to check
2818     @param nresult: the remote results for the node
2819     @param nimg: the node image object
2820
2821     """
2822     node = ninfo.name
2823     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2824
2825     remote_os = nresult.get(constants.NV_OSLIST, None)
2826     test = (not isinstance(remote_os, list) or
2827             not compat.all(isinstance(v, list) and len(v) == 7
2828                            for v in remote_os))
2829
2830     _ErrorIf(test, constants.CV_ENODEOS, node,
2831              "node hasn't returned valid OS data")
2832
2833     nimg.os_fail = test
2834
2835     if test:
2836       return
2837
2838     os_dict = {}
2839
2840     for (name, os_path, status, diagnose,
2841          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2842
2843       if name not in os_dict:
2844         os_dict[name] = []
2845
2846       # parameters is a list of lists instead of list of tuples due to
2847       # JSON lacking a real tuple type, fix it:
2848       parameters = [tuple(v) for v in parameters]
2849       os_dict[name].append((os_path, status, diagnose,
2850                             set(variants), set(parameters), set(api_ver)))
2851
2852     nimg.oslist = os_dict
2853
2854   def _VerifyNodeOS(self, ninfo, nimg, base):
2855     """Verifies the node OS list.
2856
2857     @type ninfo: L{objects.Node}
2858     @param ninfo: the node to check
2859     @param nimg: the node image object
2860     @param base: the 'template' node we match against (e.g. from the master)
2861
2862     """
2863     node = ninfo.name
2864     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2865
2866     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2867
2868     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2869     for os_name, os_data in nimg.oslist.items():
2870       assert os_data, "Empty OS status for OS %s?!" % os_name
2871       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2872       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2873                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2874       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2875                "OS '%s' has multiple entries (first one shadows the rest): %s",
2876                os_name, utils.CommaJoin([v[0] for v in os_data]))
2877       # comparisons with the 'base' image
2878       test = os_name not in base.oslist
2879       _ErrorIf(test, constants.CV_ENODEOS, node,
2880                "Extra OS %s not present on reference node (%s)",
2881                os_name, base.name)
2882       if test:
2883         continue
2884       assert base.oslist[os_name], "Base node has empty OS status?"
2885       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2886       if not b_status:
2887         # base OS is invalid, skipping
2888         continue
2889       for kind, a, b in [("API version", f_api, b_api),
2890                          ("variants list", f_var, b_var),
2891                          ("parameters", beautify_params(f_param),
2892                           beautify_params(b_param))]:
2893         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2894                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2895                  kind, os_name, base.name,
2896                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2897
2898     # check any missing OSes
2899     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2900     _ErrorIf(missing, constants.CV_ENODEOS, node,
2901              "OSes present on reference node %s but missing on this node: %s",
2902              base.name, utils.CommaJoin(missing))
2903
2904   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2905     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2906
2907     @type ninfo: L{objects.Node}
2908     @param ninfo: the node to check
2909     @param nresult: the remote results for the node
2910     @type is_master: bool
2911     @param is_master: Whether node is the master node
2912
2913     """
2914     node = ninfo.name
2915
2916     if (is_master and
2917         (constants.ENABLE_FILE_STORAGE or
2918          constants.ENABLE_SHARED_FILE_STORAGE)):
2919       try:
2920         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2921       except KeyError:
2922         # This should never happen
2923         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2924                       "Node did not return forbidden file storage paths")
2925       else:
2926         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2927                       "Found forbidden file storage paths: %s",
2928                       utils.CommaJoin(fspaths))
2929     else:
2930       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2931                     constants.CV_ENODEFILESTORAGEPATHS, node,
2932                     "Node should not have returned forbidden file storage"
2933                     " paths")
2934
2935   def _VerifyOob(self, ninfo, nresult):
2936     """Verifies out of band functionality of a node.
2937
2938     @type ninfo: L{objects.Node}
2939     @param ninfo: the node to check
2940     @param nresult: the remote results for the node
2941
2942     """
2943     node = ninfo.name
2944     # We just have to verify the paths on master and/or master candidates
2945     # as the oob helper is invoked on the master
2946     if ((ninfo.master_candidate or ninfo.master_capable) and
2947         constants.NV_OOB_PATHS in nresult):
2948       for path_result in nresult[constants.NV_OOB_PATHS]:
2949         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2950
2951   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2952     """Verifies and updates the node volume data.
2953
2954     This function will update a L{NodeImage}'s internal structures
2955     with data from the remote call.
2956
2957     @type ninfo: L{objects.Node}
2958     @param ninfo: the node to check
2959     @param nresult: the remote results for the node
2960     @param nimg: the node image object
2961     @param vg_name: the configured VG name
2962
2963     """
2964     node = ninfo.name
2965     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2966
2967     nimg.lvm_fail = True
2968     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2969     if vg_name is None:
2970       pass
2971     elif isinstance(lvdata, basestring):
2972       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2973                utils.SafeEncode(lvdata))
2974     elif not isinstance(lvdata, dict):
2975       _ErrorIf(True, constants.CV_ENODELVM, node,
2976                "rpc call to node failed (lvlist)")
2977     else:
2978       nimg.volumes = lvdata
2979       nimg.lvm_fail = False
2980
2981   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2982     """Verifies and updates the node instance list.
2983
2984     If the listing was successful, then updates this node's instance
2985     list. Otherwise, it marks the RPC call as failed for the instance
2986     list key.
2987
2988     @type ninfo: L{objects.Node}
2989     @param ninfo: the node to check
2990     @param nresult: the remote results for the node
2991     @param nimg: the node image object
2992
2993     """
2994     idata = nresult.get(constants.NV_INSTANCELIST, None)
2995     test = not isinstance(idata, list)
2996     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2997                   "rpc call to node failed (instancelist): %s",
2998                   utils.SafeEncode(str(idata)))
2999     if test:
3000       nimg.hyp_fail = True
3001     else:
3002       nimg.instances = idata
3003
3004   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3005     """Verifies and computes a node information map
3006
3007     @type ninfo: L{objects.Node}
3008     @param ninfo: the node to check
3009     @param nresult: the remote results for the node
3010     @param nimg: the node image object
3011     @param vg_name: the configured VG name
3012
3013     """
3014     node = ninfo.name
3015     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3016
3017     # try to read free memory (from the hypervisor)
3018     hv_info = nresult.get(constants.NV_HVINFO, None)
3019     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3020     _ErrorIf(test, constants.CV_ENODEHV, node,
3021              "rpc call to node failed (hvinfo)")
3022     if not test:
3023       try:
3024         nimg.mfree = int(hv_info["memory_free"])
3025       except (ValueError, TypeError):
3026         _ErrorIf(True, constants.CV_ENODERPC, node,
3027                  "node returned invalid nodeinfo, check hypervisor")
3028
3029     # FIXME: devise a free space model for file based instances as well
3030     if vg_name is not None:
3031       test = (constants.NV_VGLIST not in nresult or
3032               vg_name not in nresult[constants.NV_VGLIST])
3033       _ErrorIf(test, constants.CV_ENODELVM, node,
3034                "node didn't return data for the volume group '%s'"
3035                " - it is either missing or broken", vg_name)
3036       if not test:
3037         try:
3038           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3039         except (ValueError, TypeError):
3040           _ErrorIf(True, constants.CV_ENODERPC, node,
3041                    "node returned invalid LVM info, check LVM status")
3042
3043   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3044     """Gets per-disk status information for all instances.
3045
3046     @type nodelist: list of strings
3047     @param nodelist: Node names
3048     @type node_image: dict of (name, L{objects.Node})
3049     @param node_image: Node objects
3050     @type instanceinfo: dict of (name, L{objects.Instance})
3051     @param instanceinfo: Instance objects
3052     @rtype: {instance: {node: [(succes, payload)]}}
3053     @return: a dictionary of per-instance dictionaries with nodes as
3054         keys and disk information as values; the disk information is a
3055         list of tuples (success, payload)
3056
3057     """
3058     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3059
3060     node_disks = {}
3061     node_disks_devonly = {}
3062     diskless_instances = set()
3063     diskless = constants.DT_DISKLESS
3064
3065     for nname in nodelist:
3066       node_instances = list(itertools.chain(node_image[nname].pinst,
3067                                             node_image[nname].sinst))
3068       diskless_instances.update(inst for inst in node_instances
3069                                 if instanceinfo[inst].disk_template == diskless)
3070       disks = [(inst, disk)
3071                for inst in node_instances
3072                for disk in instanceinfo[inst].disks]
3073
3074       if not disks:
3075         # No need to collect data
3076         continue
3077
3078       node_disks[nname] = disks
3079
3080       # _AnnotateDiskParams makes already copies of the disks
3081       devonly = []
3082       for (inst, dev) in disks:
3083         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3084         self.cfg.SetDiskID(anno_disk, nname)
3085         devonly.append(anno_disk)
3086
3087       node_disks_devonly[nname] = devonly
3088
3089     assert len(node_disks) == len(node_disks_devonly)
3090
3091     # Collect data from all nodes with disks
3092     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3093                                                           node_disks_devonly)
3094
3095     assert len(result) == len(node_disks)
3096
3097     instdisk = {}
3098
3099     for (nname, nres) in result.items():
3100       disks = node_disks[nname]
3101
3102       if nres.offline:
3103         # No data from this node
3104         data = len(disks) * [(False, "node offline")]
3105       else:
3106         msg = nres.fail_msg
3107         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3108                  "while getting disk information: %s", msg)
3109         if msg:
3110           # No data from this node
3111           data = len(disks) * [(False, msg)]
3112         else:
3113           data = []
3114           for idx, i in enumerate(nres.payload):
3115             if isinstance(i, (tuple, list)) and len(i) == 2:
3116               data.append(i)
3117             else:
3118               logging.warning("Invalid result from node %s, entry %d: %s",
3119                               nname, idx, i)
3120               data.append((False, "Invalid result from the remote node"))
3121
3122       for ((inst, _), status) in zip(disks, data):
3123         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3124
3125     # Add empty entries for diskless instances.
3126     for inst in diskless_instances:
3127       assert inst not in instdisk
3128       instdisk[inst] = {}
3129
3130     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3131                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3132                       compat.all(isinstance(s, (tuple, list)) and
3133                                  len(s) == 2 for s in statuses)
3134                       for inst, nnames in instdisk.items()
3135                       for nname, statuses in nnames.items())
3136     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3137
3138     return instdisk
3139
3140   @staticmethod
3141   def _SshNodeSelector(group_uuid, all_nodes):
3142     """Create endless iterators for all potential SSH check hosts.
3143
3144     """
3145     nodes = [node for node in all_nodes
3146              if (node.group != group_uuid and
3147                  not node.offline)]
3148     keyfunc = operator.attrgetter("group")
3149
3150     return map(itertools.cycle,
3151                [sorted(map(operator.attrgetter("name"), names))
3152                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3153                                                   keyfunc)])
3154
3155   @classmethod
3156   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3157     """Choose which nodes should talk to which other nodes.
3158
3159     We will make nodes contact all nodes in their group, and one node from
3160     every other group.
3161
3162     @warning: This algorithm has a known issue if one node group is much
3163       smaller than others (e.g. just one node). In such a case all other
3164       nodes will talk to the single node.
3165
3166     """
3167     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3168     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3169
3170     return (online_nodes,
3171             dict((name, sorted([i.next() for i in sel]))
3172                  for name in online_nodes))
3173
3174   def BuildHooksEnv(self):
3175     """Build hooks env.
3176
3177     Cluster-Verify hooks just ran in the post phase and their failure makes
3178     the output be logged in the verify output and the verification to fail.
3179
3180     """
3181     env = {
3182       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3183       }
3184
3185     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3186                for node in self.my_node_info.values())
3187
3188     return env
3189
3190   def BuildHooksNodes(self):
3191     """Build hooks nodes.
3192
3193     """
3194     return ([], self.my_node_names)
3195
3196   def Exec(self, feedback_fn):
3197     """Verify integrity of the node group, performing various test on nodes.
3198
3199     """
3200     # This method has too many local variables. pylint: disable=R0914
3201     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3202
3203     if not self.my_node_names:
3204       # empty node group
3205       feedback_fn("* Empty node group, skipping verification")
3206       return True
3207
3208     self.bad = False
3209     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3210     verbose = self.op.verbose
3211     self._feedback_fn = feedback_fn
3212
3213     vg_name = self.cfg.GetVGName()
3214     drbd_helper = self.cfg.GetDRBDHelper()
3215     cluster = self.cfg.GetClusterInfo()
3216     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3217     hypervisors = cluster.enabled_hypervisors
3218     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3219
3220     i_non_redundant = [] # Non redundant instances
3221     i_non_a_balanced = [] # Non auto-balanced instances
3222     i_offline = 0 # Count of offline instances
3223     n_offline = 0 # Count of offline nodes
3224     n_drained = 0 # Count of nodes being drained
3225     node_vol_should = {}
3226
3227     # FIXME: verify OS list
3228
3229     # File verification
3230     filemap = _ComputeAncillaryFiles(cluster, False)
3231
3232     # do local checksums
3233     master_node = self.master_node = self.cfg.GetMasterNode()
3234     master_ip = self.cfg.GetMasterIP()
3235
3236     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3237
3238     user_scripts = []
3239     if self.cfg.GetUseExternalMipScript():
3240       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3241
3242     node_verify_param = {
3243       constants.NV_FILELIST:
3244         map(vcluster.MakeVirtualPath,
3245             utils.UniqueSequence(filename
3246                                  for files in filemap
3247                                  for filename in files)),
3248       constants.NV_NODELIST:
3249         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3250                                   self.all_node_info.values()),
3251       constants.NV_HYPERVISOR: hypervisors,
3252       constants.NV_HVPARAMS:
3253         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3254       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3255                                  for node in node_data_list
3256                                  if not node.offline],
3257       constants.NV_INSTANCELIST: hypervisors,
3258       constants.NV_VERSION: None,
3259       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3260       constants.NV_NODESETUP: None,
3261       constants.NV_TIME: None,
3262       constants.NV_MASTERIP: (master_node, master_ip),
3263       constants.NV_OSLIST: None,
3264       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3265       constants.NV_USERSCRIPTS: user_scripts,
3266       }
3267
3268     if vg_name is not None:
3269       node_verify_param[constants.NV_VGLIST] = None
3270       node_verify_param[constants.NV_LVLIST] = vg_name
3271       node_verify_param[constants.NV_PVLIST] = [vg_name]
3272
3273     if drbd_helper:
3274       node_verify_param[constants.NV_DRBDLIST] = None
3275       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3276
3277     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3278       # Load file storage paths only from master node
3279       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3280
3281     # bridge checks
3282     # FIXME: this needs to be changed per node-group, not cluster-wide
3283     bridges = set()
3284     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3285     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3286       bridges.add(default_nicpp[constants.NIC_LINK])
3287     for instance in self.my_inst_info.values():
3288       for nic in instance.nics:
3289         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3290         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3291           bridges.add(full_nic[constants.NIC_LINK])
3292
3293     if bridges:
3294       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3295
3296     # Build our expected cluster state
3297     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3298                                                  name=node.name,
3299                                                  vm_capable=node.vm_capable))
3300                       for node in node_data_list)
3301
3302     # Gather OOB paths
3303     oob_paths = []
3304     for node in self.all_node_info.values():
3305       path = _SupportsOob(self.cfg, node)
3306       if path and path not in oob_paths:
3307         oob_paths.append(path)
3308
3309     if oob_paths:
3310       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3311
3312     for instance in self.my_inst_names:
3313       inst_config = self.my_inst_info[instance]
3314       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3315         i_offline += 1
3316
3317       for nname in inst_config.all_nodes:
3318         if nname not in node_image:
3319           gnode = self.NodeImage(name=nname)
3320           gnode.ghost = (nname not in self.all_node_info)
3321           node_image[nname] = gnode
3322
3323       inst_config.MapLVsByNode(node_vol_should)
3324
3325       pnode = inst_config.primary_node
3326       node_image[pnode].pinst.append(instance)
3327
3328       for snode in inst_config.secondary_nodes:
3329         nimg = node_image[snode]
3330         nimg.sinst.append(instance)
3331         if pnode not in nimg.sbp:
3332           nimg.sbp[pnode] = []
3333         nimg.sbp[pnode].append(instance)
3334
3335     # At this point, we have the in-memory data structures complete,
3336     # except for the runtime information, which we'll gather next
3337
3338     # Due to the way our RPC system works, exact response times cannot be
3339     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3340     # time before and after executing the request, we can at least have a time
3341     # window.
3342     nvinfo_starttime = time.time()
3343     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3344                                            node_verify_param,
3345                                            self.cfg.GetClusterName())
3346     nvinfo_endtime = time.time()
3347
3348     if self.extra_lv_nodes and vg_name is not None:
3349       extra_lv_nvinfo = \
3350           self.rpc.call_node_verify(self.extra_lv_nodes,
3351                                     {constants.NV_LVLIST: vg_name},
3352                                     self.cfg.GetClusterName())
3353     else:
3354       extra_lv_nvinfo = {}
3355
3356     all_drbd_map = self.cfg.ComputeDRBDMap()
3357
3358     feedback_fn("* Gathering disk information (%s nodes)" %
3359                 len(self.my_node_names))
3360     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3361                                      self.my_inst_info)
3362
3363     feedback_fn("* Verifying configuration file consistency")
3364
3365     # If not all nodes are being checked, we need to make sure the master node
3366     # and a non-checked vm_capable node are in the list.
3367     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3368     if absent_nodes:
3369       vf_nvinfo = all_nvinfo.copy()
3370       vf_node_info = list(self.my_node_info.values())
3371       additional_nodes = []
3372       if master_node not in self.my_node_info:
3373         additional_nodes.append(master_node)
3374         vf_node_info.append(self.all_node_info[master_node])
3375       # Add the first vm_capable node we find which is not included,
3376       # excluding the master node (which we already have)
3377       for node in absent_nodes:
3378         nodeinfo = self.all_node_info[node]
3379         if (nodeinfo.vm_capable and not nodeinfo.offline and
3380             node != master_node):
3381           additional_nodes.append(node)
3382           vf_node_info.append(self.all_node_info[node])
3383           break
3384       key = constants.NV_FILELIST
3385       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3386                                                  {key: node_verify_param[key]},
3387                                                  self.cfg.GetClusterName()))
3388     else:
3389       vf_nvinfo = all_nvinfo
3390       vf_node_info = self.my_node_info.values()
3391
3392     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3393
3394     feedback_fn("* Verifying node status")
3395
3396     refos_img = None
3397
3398     for node_i in node_data_list:
3399       node = node_i.name
3400       nimg = node_image[node]
3401
3402       if node_i.offline:
3403         if verbose:
3404           feedback_fn("* Skipping offline node %s" % (node,))
3405         n_offline += 1
3406         continue
3407
3408       if node == master_node:
3409         ntype = "master"
3410       elif node_i.master_candidate:
3411         ntype = "master candidate"
3412       elif node_i.drained:
3413         ntype = "drained"
3414         n_drained += 1
3415       else:
3416         ntype = "regular"
3417       if verbose:
3418         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3419
3420       msg = all_nvinfo[node].fail_msg
3421       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3422                msg)
3423       if msg:
3424         nimg.rpc_fail = True
3425         continue
3426
3427       nresult = all_nvinfo[node].payload
3428
3429       nimg.call_ok = self._VerifyNode(node_i, nresult)
3430       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3431       self._VerifyNodeNetwork(node_i, nresult)
3432       self._VerifyNodeUserScripts(node_i, nresult)
3433       self._VerifyOob(node_i, nresult)
3434       self._VerifyFileStoragePaths(node_i, nresult,
3435                                    node == master_node)
3436
3437       if nimg.vm_capable:
3438         self._VerifyNodeLVM(node_i, nresult, vg_name)
3439         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3440                              all_drbd_map)
3441
3442         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3443         self._UpdateNodeInstances(node_i, nresult, nimg)
3444         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3445         self._UpdateNodeOS(node_i, nresult, nimg)
3446
3447         if not nimg.os_fail:
3448           if refos_img is None:
3449             refos_img = nimg
3450           self._VerifyNodeOS(node_i, nimg, refos_img)
3451         self._VerifyNodeBridges(node_i, nresult, bridges)
3452
3453         # Check whether all running instancies are primary for the node. (This
3454         # can no longer be done from _VerifyInstance below, since some of the
3455         # wrong instances could be from other node groups.)
3456         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3457
3458         for inst in non_primary_inst:
3459           test = inst in self.all_inst_info
3460           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3461                    "instance should not run on node %s", node_i.name)
3462           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3463                    "node is running unknown instance %s", inst)
3464
3465     for node, result in extra_lv_nvinfo.items():
3466       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3467                               node_image[node], vg_name)
3468
3469     feedback_fn("* Verifying instance status")
3470     for instance in self.my_inst_names:
3471       if verbose:
3472         feedback_fn("* Verifying instance %s" % instance)
3473       inst_config = self.my_inst_info[instance]
3474       self._VerifyInstance(instance, inst_config, node_image,
3475                            instdisk[instance])
3476       inst_nodes_offline = []
3477
3478       pnode = inst_config.primary_node
3479       pnode_img = node_image[pnode]
3480       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3481                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3482                " primary node failed", instance)
3483
3484       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3485                pnode_img.offline,
3486                constants.CV_EINSTANCEBADNODE, instance,
3487                "instance is marked as running and lives on offline node %s",
3488                inst_config.primary_node)
3489
3490       # If the instance is non-redundant we cannot survive losing its primary
3491       # node, so we are not N+1 compliant.
3492       if inst_config.disk_template not in constants.DTS_MIRRORED:
3493         i_non_redundant.append(instance)
3494
3495       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3496                constants.CV_EINSTANCELAYOUT,
3497                instance, "instance has multiple secondary nodes: %s",
3498                utils.CommaJoin(inst_config.secondary_nodes),
3499                code=self.ETYPE_WARNING)
3500
3501       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3502         pnode = inst_config.primary_node
3503         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3504         instance_groups = {}
3505
3506         for node in instance_nodes:
3507           instance_groups.setdefault(self.all_node_info[node].group,
3508                                      []).append(node)
3509
3510         pretty_list = [
3511           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3512           # Sort so that we always list the primary node first.
3513           for group, nodes in sorted(instance_groups.items(),
3514                                      key=lambda (_, nodes): pnode in nodes,
3515                                      reverse=True)]
3516
3517         self._ErrorIf(len(instance_groups) > 1,
3518                       constants.CV_EINSTANCESPLITGROUPS,
3519                       instance, "instance has primary and secondary nodes in"
3520                       " different groups: %s", utils.CommaJoin(pretty_list),
3521                       code=self.ETYPE_WARNING)
3522
3523       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3524         i_non_a_balanced.append(instance)
3525
3526       for snode in inst_config.secondary_nodes:
3527         s_img = node_image[snode]
3528         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3529                  snode, "instance %s, connection to secondary node failed",
3530                  instance)
3531
3532         if s_img.offline:
3533           inst_nodes_offline.append(snode)
3534
3535       # warn that the instance lives on offline nodes
3536       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3537                "instance has offline secondary node(s) %s",
3538                utils.CommaJoin(inst_nodes_offline))
3539       # ... or ghost/non-vm_capable nodes
3540       for node in inst_config.all_nodes:
3541         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3542                  instance, "instance lives on ghost node %s", node)
3543         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3544                  instance, "instance lives on non-vm_capable node %s", node)
3545
3546     feedback_fn("* Verifying orphan volumes")
3547     reserved = utils.FieldSet(*cluster.reserved_lvs)
3548
3549     # We will get spurious "unknown volume" warnings if any node of this group
3550     # is secondary for an instance whose primary is in another group. To avoid
3551     # them, we find these instances and add their volumes to node_vol_should.
3552     for inst in self.all_inst_info.values():
3553       for secondary in inst.secondary_nodes:
3554         if (secondary in self.my_node_info
3555             and inst.name not in self.my_inst_info):
3556           inst.MapLVsByNode(node_vol_should)
3557           break
3558
3559     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3560
3561     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3562       feedback_fn("* Verifying N+1 Memory redundancy")
3563       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3564
3565     feedback_fn("* Other Notes")
3566     if i_non_redundant:
3567       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3568                   % len(i_non_redundant))
3569
3570     if i_non_a_balanced:
3571       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3572                   % len(i_non_a_balanced))
3573
3574     if i_offline:
3575       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3576
3577     if n_offline:
3578       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3579
3580     if n_drained:
3581       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3582
3583     return not self.bad
3584
3585   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3586     """Analyze the post-hooks' result
3587
3588     This method analyses the hook result, handles it, and sends some
3589     nicely-formatted feedback back to the user.
3590
3591     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3592         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3593     @param hooks_results: the results of the multi-node hooks rpc call
3594     @param feedback_fn: function used send feedback back to the caller
3595     @param lu_result: previous Exec result
3596     @return: the new Exec result, based on the previous result
3597         and hook results
3598
3599     """
3600     # We only really run POST phase hooks, only for non-empty groups,
3601     # and are only interested in their results
3602     if not self.my_node_names:
3603       # empty node group
3604       pass
3605     elif phase == constants.HOOKS_PHASE_POST:
3606       # Used to change hooks' output to proper indentation
3607       feedback_fn("* Hooks Results")
3608       assert hooks_results, "invalid result from hooks"
3609
3610       for node_name in hooks_results:
3611         res = hooks_results[node_name]
3612         msg = res.fail_msg
3613         test = msg and not res.offline
3614         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3615                       "Communication failure in hooks execution: %s", msg)
3616         if res.offline or msg:
3617           # No need to investigate payload if node is offline or gave
3618           # an error.
3619           continue
3620         for script, hkr, output in res.payload:
3621           test = hkr == constants.HKR_FAIL
3622           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3623                         "Script %s failed, output:", script)
3624           if test:
3625             output = self._HOOKS_INDENT_RE.sub("      ", output)
3626             feedback_fn("%s" % output)
3627             lu_result = False
3628
3629     return lu_result
3630
3631
3632 class LUClusterVerifyDisks(NoHooksLU):
3633   """Verifies the cluster disks status.
3634
3635   """
3636   REQ_BGL = False
3637
3638   def ExpandNames(self):
3639     self.share_locks = _ShareAll()
3640     self.needed_locks = {
3641       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3642       }
3643
3644   def Exec(self, feedback_fn):
3645     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3646
3647     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3648     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3649                            for group in group_names])
3650
3651
3652 class LUGroupVerifyDisks(NoHooksLU):
3653   """Verifies the status of all disks in a node group.
3654
3655   """
3656   REQ_BGL = False
3657
3658   def ExpandNames(self):
3659     # Raises errors.OpPrereqError on its own if group can't be found
3660     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3661
3662     self.share_locks = _ShareAll()
3663     self.needed_locks = {
3664       locking.LEVEL_INSTANCE: [],
3665       locking.LEVEL_NODEGROUP: [],
3666       locking.LEVEL_NODE: [],
3667
3668       # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3669       # starts one instance of this opcode for every group, which means all
3670       # nodes will be locked for a short amount of time, so it's better to
3671       # acquire the node allocation lock as well.
3672       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3673       }
3674
3675   def DeclareLocks(self, level):
3676     if level == locking.LEVEL_INSTANCE:
3677       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3678
3679       # Lock instances optimistically, needs verification once node and group
3680       # locks have been acquired
3681       self.needed_locks[locking.LEVEL_INSTANCE] = \
3682         self.cfg.GetNodeGroupInstances(self.group_uuid)
3683
3684     elif level == locking.LEVEL_NODEGROUP:
3685       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3686
3687       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3688         set([self.group_uuid] +
3689             # Lock all groups used by instances optimistically; this requires
3690             # going via the node before it's locked, requiring verification
3691             # later on
3692             [group_uuid
3693              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3694              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3695
3696     elif level == locking.LEVEL_NODE:
3697       # This will only lock the nodes in the group to be verified which contain
3698       # actual instances
3699       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3700       self._LockInstancesNodes()
3701
3702       # Lock all nodes in group to be verified
3703       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3704       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3705       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3706
3707   def CheckPrereq(self):
3708     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3709     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3710     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3711
3712     assert self.group_uuid in owned_groups
3713
3714     # Check if locked instances are still correct
3715     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3716
3717     # Get instance information
3718     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3719
3720     # Check if node groups for locked instances are still correct
3721     _CheckInstancesNodeGroups(self.cfg, self.instances,
3722                               owned_groups, owned_nodes, self.group_uuid)
3723
3724   def Exec(self, feedback_fn):
3725     """Verify integrity of cluster disks.
3726
3727     @rtype: tuple of three items
3728     @return: a tuple of (dict of node-to-node_error, list of instances
3729         which need activate-disks, dict of instance: (node, volume) for
3730         missing volumes
3731
3732     """
3733     res_nodes = {}
3734     res_instances = set()
3735     res_missing = {}
3736
3737     nv_dict = _MapInstanceDisksToNodes(
3738       [inst for inst in self.instances.values()
3739        if inst.admin_state == constants.ADMINST_UP])
3740
3741     if nv_dict:
3742       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3743                              set(self.cfg.GetVmCapableNodeList()))
3744
3745       node_lvs = self.rpc.call_lv_list(nodes, [])
3746
3747       for (node, node_res) in node_lvs.items():
3748         if node_res.offline:
3749           continue
3750
3751         msg = node_res.fail_msg
3752         if msg:
3753           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3754           res_nodes[node] = msg
3755           continue
3756
3757         for lv_name, (_, _, lv_online) in node_res.payload.items():
3758           inst = nv_dict.pop((node, lv_name), None)
3759           if not (lv_online or inst is None):
3760             res_instances.add(inst)
3761
3762       # any leftover items in nv_dict are missing LVs, let's arrange the data
3763       # better
3764       for key, inst in nv_dict.iteritems():
3765         res_missing.setdefault(inst, []).append(list(key))
3766
3767     return (res_nodes, list(res_instances), res_missing)
3768
3769
3770 class LUClusterRepairDiskSizes(NoHooksLU):
3771   """Verifies the cluster disks sizes.
3772
3773   """
3774   REQ_BGL = False
3775
3776   def ExpandNames(self):
3777     if self.op.instances:
3778       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3779       # Not getting the node allocation lock as only a specific set of
3780       # instances (and their nodes) is going to be acquired
3781       self.needed_locks = {
3782         locking.LEVEL_NODE_RES: [],
3783         locking.LEVEL_INSTANCE: self.wanted_names,
3784         }
3785       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3786     else:
3787       self.wanted_names = None
3788       self.needed_locks = {
3789         locking.LEVEL_NODE_RES: locking.ALL_SET,
3790         locking.LEVEL_INSTANCE: locking.ALL_SET,
3791
3792         # This opcode is acquires the node locks for all instances
3793         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3794         }
3795
3796     self.share_locks = {
3797       locking.LEVEL_NODE_RES: 1,
3798       locking.LEVEL_INSTANCE: 0,
3799       locking.LEVEL_NODE_ALLOC: 1,
3800       }
3801
3802   def DeclareLocks(self, level):
3803     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3804       self._LockInstancesNodes(primary_only=True, level=level)
3805
3806   def CheckPrereq(self):
3807     """Check prerequisites.
3808
3809     This only checks the optional instance list against the existing names.
3810
3811     """
3812     if self.wanted_names is None:
3813       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3814
3815     self.wanted_instances = \
3816         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3817
3818   def _EnsureChildSizes(self, disk):
3819     """Ensure children of the disk have the needed disk size.
3820
3821     This is valid mainly for DRBD8 and fixes an issue where the
3822     children have smaller disk size.
3823
3824     @param disk: an L{ganeti.objects.Disk} object
3825
3826     """
3827     if disk.dev_type == constants.LD_DRBD8:
3828       assert disk.children, "Empty children for DRBD8?"
3829       fchild = disk.children[0]
3830       mismatch = fchild.size < disk.size
3831       if mismatch:
3832         self.LogInfo("Child disk has size %d, parent %d, fixing",
3833                      fchild.size, disk.size)
3834         fchild.size = disk.size
3835
3836       # and we recurse on this child only, not on the metadev
3837       return self._EnsureChildSizes(fchild) or mismatch
3838     else:
3839       return False
3840
3841   def Exec(self, feedback_fn):
3842     """Verify the size of cluster disks.
3843
3844     """
3845     # TODO: check child disks too
3846     # TODO: check differences in size between primary/secondary nodes
3847     per_node_disks = {}
3848     for instance in self.wanted_instances:
3849       pnode = instance.primary_node
3850       if pnode not in per_node_disks:
3851         per_node_disks[pnode] = []
3852       for idx, disk in enumerate(instance.disks):
3853         per_node_disks[pnode].append((instance, idx, disk))
3854
3855     assert not (frozenset(per_node_disks.keys()) -
3856                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3857       "Not owning correct locks"
3858     assert not self.owned_locks(locking.LEVEL_NODE)
3859
3860     changed = []
3861     for node, dskl in per_node_disks.items():
3862       newl = [v[2].Copy() for v in dskl]
3863       for dsk in newl:
3864         self.cfg.SetDiskID(dsk, node)
3865       result = self.rpc.call_blockdev_getsize(node, newl)
3866       if result.fail_msg:
3867         self.LogWarning("Failure in blockdev_getsize call to node"
3868                         " %s, ignoring", node)
3869         continue
3870       if len(result.payload) != len(dskl):
3871         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3872                         " result.payload=%s", node, len(dskl), result.payload)
3873         self.LogWarning("Invalid result from node %s, ignoring node results",
3874                         node)
3875         continue
3876       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3877         if size is None:
3878           self.LogWarning("Disk %d of instance %s did not return size"
3879                           " information, ignoring", idx, instance.name)
3880           continue
3881         if not isinstance(size, (int, long)):
3882           self.LogWarning("Disk %d of instance %s did not return valid"
3883                           " size information, ignoring", idx, instance.name)
3884           continue
3885         size = size >> 20
3886         if size != disk.size:
3887           self.LogInfo("Disk %d of instance %s has mismatched size,"
3888                        " correcting: recorded %d, actual %d", idx,
3889                        instance.name, disk.size, size)
3890           disk.size = size
3891           self.cfg.Update(instance, feedback_fn)
3892           changed.append((instance.name, idx, size))
3893         if self._EnsureChildSizes(disk):
3894           self.cfg.Update(instance, feedback_fn)
3895           changed.append((instance.name, idx, disk.size))
3896     return changed
3897
3898
3899 class LUClusterRename(LogicalUnit):
3900   """Rename the cluster.
3901
3902   """
3903   HPATH = "cluster-rename"
3904   HTYPE = constants.HTYPE_CLUSTER
3905
3906   def BuildHooksEnv(self):
3907     """Build hooks env.
3908
3909     """
3910     return {
3911       "OP_TARGET": self.cfg.GetClusterName(),
3912       "NEW_NAME": self.op.name,
3913       }
3914
3915   def BuildHooksNodes(self):
3916     """Build hooks nodes.
3917
3918     """
3919     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3920
3921   def CheckPrereq(self):
3922     """Verify that the passed name is a valid one.
3923
3924     """
3925     hostname = netutils.GetHostname(name=self.op.name,
3926                                     family=self.cfg.GetPrimaryIPFamily())
3927
3928     new_name = hostname.name
3929     self.ip = new_ip = hostname.ip
3930     old_name = self.cfg.GetClusterName()
3931     old_ip = self.cfg.GetMasterIP()
3932     if new_name == old_name and new_ip == old_ip:
3933       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3934                                  " cluster has changed",
3935                                  errors.ECODE_INVAL)
3936     if new_ip != old_ip:
3937       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3938         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3939                                    " reachable on the network" %
3940                                    new_ip, errors.ECODE_NOTUNIQUE)
3941
3942     self.op.name = new_name
3943
3944   def Exec(self, feedback_fn):
3945     """Rename the cluster.
3946
3947     """
3948     clustername = self.op.name
3949     new_ip = self.ip
3950
3951     # shutdown the master IP
3952     master_params = self.cfg.GetMasterNetworkParameters()
3953     ems = self.cfg.GetUseExternalMipScript()
3954     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3955                                                      master_params, ems)
3956     result.Raise("Could not disable the master role")
3957
3958     try:
3959       cluster = self.cfg.GetClusterInfo()
3960       cluster.cluster_name = clustername
3961       cluster.master_ip = new_ip
3962       self.cfg.Update(cluster, feedback_fn)
3963
3964       # update the known hosts file
3965       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3966       node_list = self.cfg.GetOnlineNodeList()
3967       try:
3968         node_list.remove(master_params.name)
3969       except ValueError:
3970         pass
3971       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3972     finally:
3973       master_params.ip = new_ip
3974       result = self.rpc.call_node_activate_master_ip(master_params.name,
3975                                                      master_params, ems)
3976       msg = result.fail_msg
3977       if msg:
3978         self.LogWarning("Could not re-enable the master role on"
3979                         " the master, please restart manually: %s", msg)
3980
3981     return clustername
3982
3983
3984 def _ValidateNetmask(cfg, netmask):
3985   """Checks if a netmask is valid.
3986
3987   @type cfg: L{config.ConfigWriter}
3988   @param cfg: The cluster configuration
3989   @type netmask: int
3990   @param netmask: the netmask to be verified
3991   @raise errors.OpPrereqError: if the validation fails
3992
3993   """
3994   ip_family = cfg.GetPrimaryIPFamily()
3995   try:
3996     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3997   except errors.ProgrammerError:
3998     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3999                                ip_family, errors.ECODE_INVAL)
4000   if not ipcls.ValidateNetmask(netmask):
4001     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4002                                 (netmask), errors.ECODE_INVAL)
4003
4004
4005 class LUClusterSetParams(LogicalUnit):
4006   """Change the parameters of the cluster.
4007
4008   """
4009   HPATH = "cluster-modify"
4010   HTYPE = constants.HTYPE_CLUSTER
4011   REQ_BGL = False
4012
4013   def CheckArguments(self):
4014     """Check parameters
4015
4016     """
4017     if self.op.uid_pool:
4018       uidpool.CheckUidPool(self.op.uid_pool)
4019
4020     if self.op.add_uids:
4021       uidpool.CheckUidPool(self.op.add_uids)
4022
4023     if self.op.remove_uids:
4024       uidpool.CheckUidPool(self.op.remove_uids)
4025
4026     if self.op.master_netmask is not None:
4027       _ValidateNetmask(self.cfg, self.op.master_netmask)
4028
4029     if self.op.diskparams:
4030       for dt_params in self.op.diskparams.values():
4031         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4032       try:
4033         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4034       except errors.OpPrereqError, err:
4035         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4036                                    errors.ECODE_INVAL)
4037
4038   def ExpandNames(self):
4039     # FIXME: in the future maybe other cluster params won't require checking on
4040     # all nodes to be modified.
4041     # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4042     # resource locks the right thing, shouldn't it be the BGL instead?
4043     self.needed_locks = {
4044       locking.LEVEL_NODE: locking.ALL_SET,
4045       locking.LEVEL_INSTANCE: locking.ALL_SET,
4046       locking.LEVEL_NODEGROUP: locking.ALL_SET,
4047       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4048     }
4049     self.share_locks = _ShareAll()
4050
4051   def BuildHooksEnv(self):
4052     """Build hooks env.
4053
4054     """
4055     return {
4056       "OP_TARGET": self.cfg.GetClusterName(),
4057       "NEW_VG_NAME": self.op.vg_name,
4058       }
4059
4060   def BuildHooksNodes(self):
4061     """Build hooks nodes.
4062
4063     """
4064     mn = self.cfg.GetMasterNode()
4065     return ([mn], [mn])
4066
4067   def CheckPrereq(self):
4068     """Check prerequisites.
4069
4070     This checks whether the given params don't conflict and
4071     if the given volume group is valid.
4072
4073     """
4074     if self.op.vg_name is not None and not self.op.vg_name:
4075       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4076         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4077                                    " instances exist", errors.ECODE_INVAL)
4078
4079     if self.op.drbd_helper is not None and not self.op.drbd_helper:
4080       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4081         raise errors.OpPrereqError("Cannot disable drbd helper while"
4082                                    " drbd-based instances exist",
4083                                    errors.ECODE_INVAL)
4084
4085     node_list = self.owned_locks(locking.LEVEL_NODE)
4086
4087     # if vg_name not None, checks given volume group on all nodes
4088     if self.op.vg_name:
4089       vglist = self.rpc.call_vg_list(node_list)
4090       for node in node_list:
4091         msg = vglist[node].fail_msg
4092         if msg:
4093           # ignoring down node
4094           self.LogWarning("Error while gathering data on node %s"
4095                           " (ignoring node): %s", node, msg)
4096           continue
4097         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4098                                               self.op.vg_name,
4099                                               constants.MIN_VG_SIZE)
4100         if vgstatus:
4101           raise errors.OpPrereqError("Error on node '%s': %s" %
4102                                      (node, vgstatus), errors.ECODE_ENVIRON)
4103
4104     if self.op.drbd_helper:
4105       # checks given drbd helper on all nodes
4106       helpers = self.rpc.call_drbd_helper(node_list)
4107       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4108         if ninfo.offline:
4109           self.LogInfo("Not checking drbd helper on offline node %s", node)
4110           continue
4111         msg = helpers[node].fail_msg
4112         if msg:
4113           raise errors.OpPrereqError("Error checking drbd helper on node"
4114                                      " '%s': %s" % (node, msg),
4115                                      errors.ECODE_ENVIRON)
4116         node_helper = helpers[node].payload
4117         if node_helper != self.op.drbd_helper:
4118           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4119                                      (node, node_helper), errors.ECODE_ENVIRON)
4120
4121     self.cluster = cluster = self.cfg.GetClusterInfo()
4122     # validate params changes
4123     if self.op.beparams:
4124       objects.UpgradeBeParams(self.op.beparams)
4125       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4126       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4127
4128     if self.op.ndparams:
4129       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4130       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4131
4132       # TODO: we need a more general way to handle resetting
4133       # cluster-level parameters to default values
4134       if self.new_ndparams["oob_program"] == "":
4135         self.new_ndparams["oob_program"] = \
4136             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4137
4138     if self.op.hv_state:
4139       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4140                                             self.cluster.hv_state_static)
4141       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4142                                for hv, values in new_hv_state.items())
4143
4144     if self.op.disk_state:
4145       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4146                                                 self.cluster.disk_state_static)
4147       self.new_disk_state = \
4148         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4149                             for name, values in svalues.items()))
4150              for storage, svalues in new_disk_state.items())
4151
4152     if self.op.ipolicy:
4153       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4154                                             group_policy=False)
4155
4156       all_instances = self.cfg.GetAllInstancesInfo().values()
4157       violations = set()
4158       for group in self.cfg.GetAllNodeGroupsInfo().values():
4159         instances = frozenset([inst for inst in all_instances
4160                                if compat.any(node in group.members
4161                                              for node in inst.all_nodes)])
4162         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4163         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4164         new = _ComputeNewInstanceViolations(ipol,
4165                                             new_ipolicy, instances)
4166         if new:
4167           violations.update(new)
4168
4169       if violations:
4170         self.LogWarning("After the ipolicy change the following instances"
4171                         " violate them: %s",
4172                         utils.CommaJoin(utils.NiceSort(violations)))
4173
4174     if self.op.nicparams:
4175       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4176       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4177       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4178       nic_errors = []
4179
4180       # check all instances for consistency
4181       for instance in self.cfg.GetAllInstancesInfo().values():
4182         for nic_idx, nic in enumerate(instance.nics):
4183           params_copy = copy.deepcopy(nic.nicparams)
4184           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4185
4186           # check parameter syntax
4187           try:
4188             objects.NIC.CheckParameterSyntax(params_filled)
4189           except errors.ConfigurationError, err:
4190             nic_errors.append("Instance %s, nic/%d: %s" %
4191                               (instance.name, nic_idx, err))
4192
4193           # if we're moving instances to routed, check that they have an ip
4194           target_mode = params_filled[constants.NIC_MODE]
4195           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4196             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4197                               " address" % (instance.name, nic_idx))
4198       if nic_errors:
4199         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4200                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4201
4202     # hypervisor list/parameters
4203     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4204     if self.op.hvparams:
4205       for hv_name, hv_dict in self.op.hvparams.items():
4206         if hv_name not in self.new_hvparams:
4207           self.new_hvparams[hv_name] = hv_dict
4208         else:
4209           self.new_hvparams[hv_name].update(hv_dict)
4210
4211     # disk template parameters
4212     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4213     if self.op.diskparams:
4214       for dt_name, dt_params in self.op.diskparams.items():
4215         if dt_name not in self.op.diskparams:
4216           self.new_diskparams[dt_name] = dt_params
4217         else:
4218           self.new_diskparams[dt_name].update(dt_params)
4219
4220     # os hypervisor parameters
4221     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4222     if self.op.os_hvp:
4223       for os_name, hvs in self.op.os_hvp.items():
4224         if os_name not in self.new_os_hvp:
4225           self.new_os_hvp[os_name] = hvs
4226         else:
4227           for hv_name, hv_dict in hvs.items():
4228             if hv_name not in self.new_os_hvp[os_name]:
4229               self.new_os_hvp[os_name][hv_name] = hv_dict
4230             else:
4231               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4232
4233     # os parameters
4234     self.new_osp = objects.FillDict(cluster.osparams, {})
4235     if self.op.osparams:
4236       for os_name, osp in self.op.osparams.items():
4237         if os_name not in self.new_osp:
4238           self.new_osp[os_name] = {}
4239
4240         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4241                                                   use_none=True)
4242
4243         if not self.new_osp[os_name]:
4244           # we removed all parameters
4245           del self.new_osp[os_name]
4246         else:
4247           # check the parameter validity (remote check)
4248           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4249                          os_name, self.new_osp[os_name])
4250
4251     # changes to the hypervisor list
4252     if self.op.enabled_hypervisors is not None:
4253       self.hv_list = self.op.enabled_hypervisors
4254       for hv in self.hv_list:
4255         # if the hypervisor doesn't already exist in the cluster
4256         # hvparams, we initialize it to empty, and then (in both
4257         # cases) we make sure to fill the defaults, as we might not
4258         # have a complete defaults list if the hypervisor wasn't
4259         # enabled before
4260         if hv not in new_hvp:
4261           new_hvp[hv] = {}
4262         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4263         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4264     else:
4265       self.hv_list = cluster.enabled_hypervisors
4266
4267     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4268       # either the enabled list has changed, or the parameters have, validate
4269       for hv_name, hv_params in self.new_hvparams.items():
4270         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4271             (self.op.enabled_hypervisors and
4272              hv_name in self.op.enabled_hypervisors)):
4273           # either this is a new hypervisor, or its parameters have changed
4274           hv_class = hypervisor.GetHypervisor(hv_name)
4275           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4276           hv_class.CheckParameterSyntax(hv_params)
4277           _CheckHVParams(self, node_list, hv_name, hv_params)
4278
4279     if self.op.os_hvp:
4280       # no need to check any newly-enabled hypervisors, since the
4281       # defaults have already been checked in the above code-block
4282       for os_name, os_hvp in self.new_os_hvp.items():
4283         for hv_name, hv_params in os_hvp.items():
4284           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4285           # we need to fill in the new os_hvp on top of the actual hv_p
4286           cluster_defaults = self.new_hvparams.get(hv_name, {})
4287           new_osp = objects.FillDict(cluster_defaults, hv_params)
4288           hv_class = hypervisor.GetHypervisor(hv_name)
4289           hv_class.CheckParameterSyntax(new_osp)
4290           _CheckHVParams(self, node_list, hv_name, new_osp)
4291
4292     if self.op.default_iallocator:
4293       alloc_script = utils.FindFile(self.op.default_iallocator,
4294                                     constants.IALLOCATOR_SEARCH_PATH,
4295                                     os.path.isfile)
4296       if alloc_script is None:
4297         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4298                                    " specified" % self.op.default_iallocator,
4299                                    errors.ECODE_INVAL)
4300
4301   def Exec(self, feedback_fn):
4302     """Change the parameters of the cluster.
4303
4304     """
4305     if self.op.vg_name is not None:
4306       new_volume = self.op.vg_name
4307       if not new_volume:
4308         new_volume = None
4309       if new_volume != self.cfg.GetVGName():
4310         self.cfg.SetVGName(new_volume)
4311       else:
4312         feedback_fn("Cluster LVM configuration already in desired"
4313                     " state, not changing")
4314     if self.op.drbd_helper is not None:
4315       new_helper = self.op.drbd_helper
4316       if not new_helper:
4317         new_helper = None
4318       if new_helper != self.cfg.GetDRBDHelper():
4319         self.cfg.SetDRBDHelper(new_helper)
4320       else:
4321         feedback_fn("Cluster DRBD helper already in desired state,"
4322                     " not changing")
4323     if self.op.hvparams:
4324       self.cluster.hvparams = self.new_hvparams
4325     if self.op.os_hvp:
4326       self.cluster.os_hvp = self.new_os_hvp
4327     if self.op.enabled_hypervisors is not None:
4328       self.cluster.hvparams = self.new_hvparams
4329       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4330     if self.op.beparams:
4331       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4332     if self.op.nicparams:
4333       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4334     if self.op.ipolicy:
4335       self.cluster.ipolicy = self.new_ipolicy
4336     if self.op.osparams:
4337       self.cluster.osparams = self.new_osp
4338     if self.op.ndparams:
4339       self.cluster.ndparams = self.new_ndparams
4340     if self.op.diskparams:
4341       self.cluster.diskparams = self.new_diskparams
4342     if self.op.hv_state:
4343       self.cluster.hv_state_static = self.new_hv_state
4344     if self.op.disk_state:
4345       self.cluster.disk_state_static = self.new_disk_state
4346
4347     if self.op.candidate_pool_size is not None:
4348       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4349       # we need to update the pool size here, otherwise the save will fail
4350       _AdjustCandidatePool(self, [])
4351
4352     if self.op.maintain_node_health is not None:
4353       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4354         feedback_fn("Note: CONFD was disabled at build time, node health"
4355                     " maintenance is not useful (still enabling it)")
4356       self.cluster.maintain_node_health = self.op.maintain_node_health
4357
4358     if self.op.prealloc_wipe_disks is not None:
4359       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4360
4361     if self.op.add_uids is not None:
4362       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4363
4364     if self.op.remove_uids is not None:
4365       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4366
4367     if self.op.uid_pool is not None:
4368       self.cluster.uid_pool = self.op.uid_pool
4369
4370     if self.op.default_iallocator is not None:
4371       self.cluster.default_iallocator = self.op.default_iallocator
4372
4373     if self.op.reserved_lvs is not None:
4374       self.cluster.reserved_lvs = self.op.reserved_lvs
4375
4376     if self.op.use_external_mip_script is not None:
4377       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4378
4379     def helper_os(aname, mods, desc):
4380       desc += " OS list"
4381       lst = getattr(self.cluster, aname)
4382       for key, val in mods:
4383         if key == constants.DDM_ADD:
4384           if val in lst:
4385             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4386           else:
4387             lst.append(val)
4388         elif key == constants.DDM_REMOVE:
4389           if val in lst:
4390             lst.remove(val)
4391           else:
4392             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4393         else:
4394           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4395
4396     if self.op.hidden_os:
4397       helper_os("hidden_os", self.op.hidden_os, "hidden")
4398
4399     if self.op.blacklisted_os:
4400       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4401
4402     if self.op.master_netdev:
4403       master_params = self.cfg.GetMasterNetworkParameters()
4404       ems = self.cfg.GetUseExternalMipScript()
4405       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4406                   self.cluster.master_netdev)
4407       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4408                                                        master_params, ems)
4409       result.Raise("Could not disable the master ip")
4410       feedback_fn("Changing master_netdev from %s to %s" %
4411                   (master_params.netdev, self.op.master_netdev))
4412       self.cluster.master_netdev = self.op.master_netdev
4413
4414     if self.op.master_netmask:
4415       master_params = self.cfg.GetMasterNetworkParameters()
4416       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4417       result = self.rpc.call_node_change_master_netmask(master_params.name,
4418                                                         master_params.netmask,
4419                                                         self.op.master_netmask,
4420                                                         master_params.ip,
4421                                                         master_params.netdev)
4422       if result.fail_msg:
4423         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4424         feedback_fn(msg)
4425
4426       self.cluster.master_netmask = self.op.master_netmask
4427
4428     self.cfg.Update(self.cluster, feedback_fn)
4429
4430     if self.op.master_netdev:
4431       master_params = self.cfg.GetMasterNetworkParameters()
4432       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4433                   self.op.master_netdev)
4434       ems = self.cfg.GetUseExternalMipScript()
4435       result = self.rpc.call_node_activate_master_ip(master_params.name,
4436                                                      master_params, ems)
4437       if result.fail_msg:
4438         self.LogWarning("Could not re-enable the master ip on"
4439                         " the master, please restart manually: %s",
4440                         result.fail_msg)
4441
4442
4443 def _UploadHelper(lu, nodes, fname):
4444   """Helper for uploading a file and showing warnings.
4445
4446   """
4447   if os.path.exists(fname):
4448     result = lu.rpc.call_upload_file(nodes, fname)
4449     for to_node, to_result in result.items():
4450       msg = to_result.fail_msg
4451       if msg:
4452         msg = ("Copy of file %s to node %s failed: %s" %
4453                (fname, to_node, msg))
4454         lu.LogWarning(msg)
4455
4456
4457 def _ComputeAncillaryFiles(cluster, redist):
4458   """Compute files external to Ganeti which need to be consistent.
4459
4460   @type redist: boolean
4461   @param redist: Whether to include files which need to be redistributed
4462
4463   """
4464   # Compute files for all nodes
4465   files_all = set([
4466     pathutils.SSH_KNOWN_HOSTS_FILE,
4467     pathutils.CONFD_HMAC_KEY,
4468     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4469     pathutils.SPICE_CERT_FILE,
4470     pathutils.SPICE_CACERT_FILE,
4471     pathutils.RAPI_USERS_FILE,
4472     ])
4473
4474   if redist:
4475     # we need to ship at least the RAPI certificate
4476     files_all.add(pathutils.RAPI_CERT_FILE)
4477   else:
4478     files_all.update(pathutils.ALL_CERT_FILES)
4479     files_all.update(ssconf.SimpleStore().GetFileList())
4480
4481   if cluster.modify_etc_hosts:
4482     files_all.add(pathutils.ETC_HOSTS)
4483
4484   if cluster.use_external_mip_script:
4485     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4486
4487   # Files which are optional, these must:
4488   # - be present in one other category as well
4489   # - either exist or not exist on all nodes of that category (mc, vm all)
4490   files_opt = set([
4491     pathutils.RAPI_USERS_FILE,
4492     ])
4493
4494   # Files which should only be on master candidates
4495   files_mc = set()
4496
4497   if not redist:
4498     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4499
4500   # File storage
4501   if (not redist and
4502       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4503     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4504     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4505
4506   # Files which should only be on VM-capable nodes
4507   files_vm = set(
4508     filename
4509     for hv_name in cluster.enabled_hypervisors
4510     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4511
4512   files_opt |= set(
4513     filename
4514     for hv_name in cluster.enabled_hypervisors
4515     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4516
4517   # Filenames in each category must be unique
4518   all_files_set = files_all | files_mc | files_vm
4519   assert (len(all_files_set) ==
4520           sum(map(len, [files_all, files_mc, files_vm]))), \
4521          "Found file listed in more than one file list"
4522
4523   # Optional files must be present in one other category
4524   assert all_files_set.issuperset(files_opt), \
4525          "Optional file not in a different required list"
4526
4527   # This one file should never ever be re-distributed via RPC
4528   assert not (redist and
4529               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4530
4531   return (files_all, files_opt, files_mc, files_vm)
4532
4533
4534 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4535   """Distribute additional files which are part of the cluster configuration.
4536
4537   ConfigWriter takes care of distributing the config and ssconf files, but
4538   there are more files which should be distributed to all nodes. This function
4539   makes sure those are copied.
4540
4541   @param lu: calling logical unit
4542   @param additional_nodes: list of nodes not in the config to distribute to
4543   @type additional_vm: boolean
4544   @param additional_vm: whether the additional nodes are vm-capable or not
4545
4546   """
4547   # Gather target nodes
4548   cluster = lu.cfg.GetClusterInfo()
4549   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4550
4551   online_nodes = lu.cfg.GetOnlineNodeList()
4552   online_set = frozenset(online_nodes)
4553   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4554
4555   if additional_nodes is not None:
4556     online_nodes.extend(additional_nodes)
4557     if additional_vm:
4558       vm_nodes.extend(additional_nodes)
4559
4560   # Never distribute to master node
4561   for nodelist in [online_nodes, vm_nodes]:
4562     if master_info.name in nodelist:
4563       nodelist.remove(master_info.name)
4564
4565   # Gather file lists
4566   (files_all, _, files_mc, files_vm) = \
4567     _ComputeAncillaryFiles(cluster, True)
4568
4569   # Never re-distribute configuration file from here
4570   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4571               pathutils.CLUSTER_CONF_FILE in files_vm)
4572   assert not files_mc, "Master candidates not handled in this function"
4573
4574   filemap = [
4575     (online_nodes, files_all),
4576     (vm_nodes, files_vm),
4577     ]
4578
4579   # Upload the files
4580   for (node_list, files) in filemap:
4581     for fname in files:
4582       _UploadHelper(lu, node_list, fname)
4583
4584
4585 class LUClusterRedistConf(NoHooksLU):
4586   """Force the redistribution of cluster configuration.
4587
4588   This is a very simple LU.
4589
4590   """
4591   REQ_BGL = False
4592
4593   def ExpandNames(self):
4594     self.needed_locks = {
4595       locking.LEVEL_NODE: locking.ALL_SET,
4596       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4597     }
4598     self.share_locks = _ShareAll()
4599
4600   def Exec(self, feedback_fn):
4601     """Redistribute the configuration.
4602
4603     """
4604     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4605     _RedistributeAncillaryFiles(self)
4606
4607
4608 class LUClusterActivateMasterIp(NoHooksLU):
4609   """Activate the master IP on the master node.
4610
4611   """
4612   def Exec(self, feedback_fn):
4613     """Activate the master IP.
4614
4615     """
4616     master_params = self.cfg.GetMasterNetworkParameters()
4617     ems = self.cfg.GetUseExternalMipScript()
4618     result = self.rpc.call_node_activate_master_ip(master_params.name,
4619                                                    master_params, ems)
4620     result.Raise("Could not activate the master IP")
4621
4622
4623 class LUClusterDeactivateMasterIp(NoHooksLU):
4624   """Deactivate the master IP on the master node.
4625
4626   """
4627   def Exec(self, feedback_fn):
4628     """Deactivate the master IP.
4629
4630     """
4631     master_params = self.cfg.GetMasterNetworkParameters()
4632     ems = self.cfg.GetUseExternalMipScript()
4633     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4634                                                      master_params, ems)
4635     result.Raise("Could not deactivate the master IP")
4636
4637
4638 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4639   """Sleep and poll for an instance's disk to sync.
4640
4641   """
4642   if not instance.disks or disks is not None and not disks:
4643     return True
4644
4645   disks = _ExpandCheckDisks(instance, disks)
4646
4647   if not oneshot:
4648     lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4649
4650   node = instance.primary_node
4651
4652   for dev in disks:
4653     lu.cfg.SetDiskID(dev, node)
4654
4655   # TODO: Convert to utils.Retry
4656
4657   retries = 0
4658   degr_retries = 10 # in seconds, as we sleep 1 second each time
4659   while True:
4660     max_time = 0
4661     done = True
4662     cumul_degraded = False
4663     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4664     msg = rstats.fail_msg
4665     if msg:
4666       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4667       retries += 1
4668       if retries >= 10:
4669         raise errors.RemoteError("Can't contact node %s for mirror data,"
4670                                  " aborting." % node)
4671       time.sleep(6)
4672       continue
4673     rstats = rstats.payload
4674     retries = 0
4675     for i, mstat in enumerate(rstats):
4676       if mstat is None:
4677         lu.LogWarning("Can't compute data for node %s/%s",
4678                            node, disks[i].iv_name)
4679         continue
4680
4681       cumul_degraded = (cumul_degraded or
4682                         (mstat.is_degraded and mstat.sync_percent is None))
4683       if mstat.sync_percent is not None:
4684         done = False
4685         if mstat.estimated_time is not None:
4686           rem_time = ("%s remaining (estimated)" %
4687                       utils.FormatSeconds(mstat.estimated_time))
4688           max_time = mstat.estimated_time
4689         else:
4690           rem_time = "no time estimate"
4691         lu.LogInfo("- device %s: %5.2f%% done, %s",
4692                    disks[i].iv_name, mstat.sync_percent, rem_time)
4693
4694     # if we're done but degraded, let's do a few small retries, to
4695     # make sure we see a stable and not transient situation; therefore
4696     # we force restart of the loop
4697     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4698       logging.info("Degraded disks found, %d retries left", degr_retries)
4699       degr_retries -= 1
4700       time.sleep(1)
4701       continue
4702
4703     if done or oneshot:
4704       break
4705
4706     time.sleep(min(60, max_time))
4707
4708   if done:
4709     lu.LogInfo("Instance %s's disks are in sync", instance.name)
4710
4711   return not cumul_degraded
4712
4713
4714 def _BlockdevFind(lu, node, dev, instance):
4715   """Wrapper around call_blockdev_find to annotate diskparams.
4716
4717   @param lu: A reference to the lu object
4718   @param node: The node to call out
4719   @param dev: The device to find
4720   @param instance: The instance object the device belongs to
4721   @returns The result of the rpc call
4722
4723   """
4724   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4725   return lu.rpc.call_blockdev_find(node, disk)
4726
4727
4728 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4729   """Wrapper around L{_CheckDiskConsistencyInner}.
4730
4731   """
4732   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4733   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4734                                     ldisk=ldisk)
4735
4736
4737 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4738                                ldisk=False):
4739   """Check that mirrors are not degraded.
4740
4741   @attention: The device has to be annotated already.
4742
4743   The ldisk parameter, if True, will change the test from the
4744   is_degraded attribute (which represents overall non-ok status for
4745   the device(s)) to the ldisk (representing the local storage status).
4746
4747   """
4748   lu.cfg.SetDiskID(dev, node)
4749
4750   result = True
4751
4752   if on_primary or dev.AssembleOnSecondary():
4753     rstats = lu.rpc.call_blockdev_find(node, dev)
4754     msg = rstats.fail_msg
4755     if msg:
4756       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4757       result = False
4758     elif not rstats.payload:
4759       lu.LogWarning("Can't find disk on node %s", node)
4760       result = False
4761     else:
4762       if ldisk:
4763         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4764       else:
4765         result = result and not rstats.payload.is_degraded
4766
4767   if dev.children:
4768     for child in dev.children:
4769       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4770                                                      on_primary)
4771
4772   return result
4773
4774
4775 class LUOobCommand(NoHooksLU):
4776   """Logical unit for OOB handling.
4777
4778   """
4779   REQ_BGL = False
4780   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4781
4782   def ExpandNames(self):
4783     """Gather locks we need.
4784
4785     """
4786     if self.op.node_names:
4787       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4788       lock_names = self.op.node_names
4789     else:
4790       lock_names = locking.ALL_SET
4791
4792     self.needed_locks = {
4793       locking.LEVEL_NODE: lock_names,
4794       }
4795
4796     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4797
4798     if not self.op.node_names:
4799       # Acquire node allocation lock only if all nodes are affected
4800       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4801
4802   def CheckPrereq(self):
4803     """Check prerequisites.
4804
4805     This checks:
4806      - the node exists in the configuration
4807      - OOB is supported
4808
4809     Any errors are signaled by raising errors.OpPrereqError.
4810
4811     """
4812     self.nodes = []
4813     self.master_node = self.cfg.GetMasterNode()
4814
4815     assert self.op.power_delay >= 0.0
4816
4817     if self.op.node_names:
4818       if (self.op.command in self._SKIP_MASTER and
4819           self.master_node in self.op.node_names):
4820         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4821         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4822
4823         if master_oob_handler:
4824           additional_text = ("run '%s %s %s' if you want to operate on the"
4825                              " master regardless") % (master_oob_handler,
4826                                                       self.op.command,
4827                                                       self.master_node)
4828         else:
4829           additional_text = "it does not support out-of-band operations"
4830
4831         raise errors.OpPrereqError(("Operating on the master node %s is not"
4832                                     " allowed for %s; %s") %
4833                                    (self.master_node, self.op.command,
4834                                     additional_text), errors.ECODE_INVAL)
4835     else:
4836       self.op.node_names = self.cfg.GetNodeList()
4837       if self.op.command in self._SKIP_MASTER:
4838         self.op.node_names.remove(self.master_node)
4839
4840     if self.op.command in self._SKIP_MASTER:
4841       assert self.master_node not in self.op.node_names
4842
4843     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4844       if node is None:
4845         raise errors.OpPrereqError("Node %s not found" % node_name,
4846                                    errors.ECODE_NOENT)
4847       else:
4848         self.nodes.append(node)
4849
4850       if (not self.op.ignore_status and
4851           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4852         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4853                                     " not marked offline") % node_name,
4854                                    errors.ECODE_STATE)
4855
4856   def Exec(self, feedback_fn):
4857     """Execute OOB and return result if we expect any.
4858
4859     """
4860     master_node = self.master_node
4861     ret = []
4862
4863     for idx, node in enumerate(utils.NiceSort(self.nodes,
4864                                               key=lambda node: node.name)):
4865       node_entry = [(constants.RS_NORMAL, node.name)]
4866       ret.append(node_entry)
4867
4868       oob_program = _SupportsOob(self.cfg, node)
4869
4870       if not oob_program:
4871         node_entry.append((constants.RS_UNAVAIL, None))
4872         continue
4873
4874       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4875                    self.op.command, oob_program, node.name)
4876       result = self.rpc.call_run_oob(master_node, oob_program,
4877                                      self.op.command, node.name,
4878                                      self.op.timeout)
4879
4880       if result.fail_msg:
4881         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4882                         node.name, result.fail_msg)
4883         node_entry.append((constants.RS_NODATA, None))
4884       else:
4885         try:
4886           self._CheckPayload(result)
4887         except errors.OpExecError, err:
4888           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4889                           node.name, err)
4890           node_entry.append((constants.RS_NODATA, None))
4891         else:
4892           if self.op.command == constants.OOB_HEALTH:
4893             # For health we should log important events
4894             for item, status in result.payload:
4895               if status in [constants.OOB_STATUS_WARNING,
4896                             constants.OOB_STATUS_CRITICAL]:
4897                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4898                                 item, node.name, status)
4899
4900           if self.op.command == constants.OOB_POWER_ON:
4901             node.powered = True
4902           elif self.op.command == constants.OOB_POWER_OFF:
4903             node.powered = False
4904           elif self.op.command == constants.OOB_POWER_STATUS:
4905             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4906             if powered != node.powered:
4907               logging.warning(("Recorded power state (%s) of node '%s' does not"
4908                                " match actual power state (%s)"), node.powered,
4909                               node.name, powered)
4910
4911           # For configuration changing commands we should update the node
4912           if self.op.command in (constants.OOB_POWER_ON,
4913                                  constants.OOB_POWER_OFF):
4914             self.cfg.Update(node, feedback_fn)
4915
4916           node_entry.append((constants.RS_NORMAL, result.payload))
4917
4918           if (self.op.command == constants.OOB_POWER_ON and
4919               idx < len(self.nodes) - 1):
4920             time.sleep(self.op.power_delay)
4921
4922     return ret
4923
4924   def _CheckPayload(self, result):
4925     """Checks if the payload is valid.
4926
4927     @param result: RPC result
4928     @raises errors.OpExecError: If payload is not valid
4929
4930     """
4931     errs = []
4932     if self.op.command == constants.OOB_HEALTH:
4933       if not isinstance(result.payload, list):
4934         errs.append("command 'health' is expected to return a list but got %s" %
4935                     type(result.payload))
4936       else:
4937         for item, status in result.payload:
4938           if status not in constants.OOB_STATUSES:
4939             errs.append("health item '%s' has invalid status '%s'" %
4940                         (item, status))
4941
4942     if self.op.command == constants.OOB_POWER_STATUS:
4943       if not isinstance(result.payload, dict):
4944         errs.append("power-status is expected to return a dict but got %s" %
4945                     type(result.payload))
4946
4947     if self.op.command in [
4948       constants.OOB_POWER_ON,
4949       constants.OOB_POWER_OFF,
4950       constants.OOB_POWER_CYCLE,
4951       ]:
4952       if result.payload is not None:
4953         errs.append("%s is expected to not return payload but got '%s'" %
4954                     (self.op.command, result.payload))
4955
4956     if errs:
4957       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4958                                utils.CommaJoin(errs))
4959
4960
4961 class _OsQuery(_QueryBase):
4962   FIELDS = query.OS_FIELDS
4963
4964   def ExpandNames(self, lu):
4965     # Lock all nodes in shared mode
4966     # Temporary removal of locks, should be reverted later
4967     # TODO: reintroduce locks when they are lighter-weight
4968     lu.needed_locks = {}
4969     #self.share_locks[locking.LEVEL_NODE] = 1
4970     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4971
4972     # The following variables interact with _QueryBase._GetNames
4973     if self.names:
4974       self.wanted = self.names
4975     else:
4976       self.wanted = locking.ALL_SET
4977
4978     self.do_locking = self.use_locking
4979
4980   def DeclareLocks(self, lu, level):
4981     pass
4982
4983   @staticmethod
4984   def _DiagnoseByOS(rlist):
4985     """Remaps a per-node return list into an a per-os per-node dictionary
4986
4987     @param rlist: a map with node names as keys and OS objects as values
4988
4989     @rtype: dict
4990     @return: a dictionary with osnames as keys and as value another
4991         map, with nodes as keys and tuples of (path, status, diagnose,
4992         variants, parameters, api_versions) as values, eg::
4993
4994           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4995                                      (/srv/..., False, "invalid api")],
4996                            "node2": [(/srv/..., True, "", [], [])]}
4997           }
4998
4999     """
5000     all_os = {}
5001     # we build here the list of nodes that didn't fail the RPC (at RPC
5002     # level), so that nodes with a non-responding node daemon don't
5003     # make all OSes invalid
5004     good_nodes = [node_name for node_name in rlist
5005                   if not rlist[node_name].fail_msg]
5006     for node_name, nr in rlist.items():
5007       if nr.fail_msg or not nr.payload:
5008         continue
5009       for (name, path, status, diagnose, variants,
5010            params, api_versions) in nr.payload:
5011         if name not in all_os:
5012           # build a list of nodes for this os containing empty lists
5013           # for each node in node_list
5014           all_os[name] = {}
5015           for nname in good_nodes:
5016             all_os[name][nname] = []
5017         # convert params from [name, help] to (name, help)
5018         params = [tuple(v) for v in params]
5019         all_os[name][node_name].append((path, status, diagnose,
5020                                         variants, params, api_versions))
5021     return all_os
5022
5023   def _GetQueryData(self, lu):
5024     """Computes the list of nodes and their attributes.
5025
5026     """
5027     # Locking is not used
5028     assert not (compat.any(lu.glm.is_owned(level)
5029                            for level in locking.LEVELS
5030                            if level != locking.LEVEL_CLUSTER) or
5031                 self.do_locking or self.use_locking)
5032
5033     valid_nodes = [node.name
5034                    for node in lu.cfg.GetAllNodesInfo().values()
5035                    if not node.offline and node.vm_capable]
5036     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5037     cluster = lu.cfg.GetClusterInfo()
5038
5039     data = {}
5040
5041     for (os_name, os_data) in pol.items():
5042       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5043                           hidden=(os_name in cluster.hidden_os),
5044                           blacklisted=(os_name in cluster.blacklisted_os))
5045
5046       variants = set()
5047       parameters = set()
5048       api_versions = set()
5049
5050       for idx, osl in enumerate(os_data.values()):
5051         info.valid = bool(info.valid and osl and osl[0][1])
5052         if not info.valid:
5053           break
5054
5055         (node_variants, node_params, node_api) = osl[0][3:6]
5056         if idx == 0:
5057           # First entry
5058           variants.update(node_variants)
5059           parameters.update(node_params)
5060           api_versions.update(node_api)
5061         else:
5062           # Filter out inconsistent values
5063           variants.intersection_update(node_variants)
5064           parameters.intersection_update(node_params)
5065           api_versions.intersection_update(node_api)
5066
5067       info.variants = list(variants)
5068       info.parameters = list(parameters)
5069       info.api_versions = list(api_versions)
5070
5071       data[os_name] = info
5072
5073     # Prepare data in requested order
5074     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5075             if name in data]
5076
5077
5078 class LUOsDiagnose(NoHooksLU):
5079   """Logical unit for OS diagnose/query.
5080
5081   """
5082   REQ_BGL = False
5083
5084   @staticmethod
5085   def _BuildFilter(fields, names):
5086     """Builds a filter for querying OSes.
5087
5088     """
5089     name_filter = qlang.MakeSimpleFilter("name", names)
5090
5091     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5092     # respective field is not requested
5093     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5094                      for fname in ["hidden", "blacklisted"]
5095                      if fname not in fields]
5096     if "valid" not in fields:
5097       status_filter.append([qlang.OP_TRUE, "valid"])
5098
5099     if status_filter:
5100       status_filter.insert(0, qlang.OP_AND)
5101     else:
5102       status_filter = None
5103
5104     if name_filter and status_filter:
5105       return [qlang.OP_AND, name_filter, status_filter]
5106     elif name_filter:
5107       return name_filter
5108     else:
5109       return status_filter
5110
5111   def CheckArguments(self):
5112     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5113                        self.op.output_fields, False)
5114
5115   def ExpandNames(self):
5116     self.oq.ExpandNames(self)
5117
5118   def Exec(self, feedback_fn):
5119     return self.oq.OldStyleQuery(self)
5120
5121
5122 class LUNodeRemove(LogicalUnit):
5123   """Logical unit for removing a node.
5124
5125   """
5126   HPATH = "node-remove"
5127   HTYPE = constants.HTYPE_NODE
5128
5129   def BuildHooksEnv(self):
5130     """Build hooks env.
5131
5132     """
5133     return {
5134       "OP_TARGET": self.op.node_name,
5135       "NODE_NAME": self.op.node_name,
5136       }
5137
5138   def BuildHooksNodes(self):
5139     """Build hooks nodes.
5140
5141     This doesn't run on the target node in the pre phase as a failed
5142     node would then be impossible to remove.
5143
5144     """
5145     all_nodes = self.cfg.GetNodeList()
5146     try:
5147       all_nodes.remove(self.op.node_name)
5148     except ValueError:
5149       pass
5150     return (all_nodes, all_nodes)
5151
5152   def CheckPrereq(self):
5153     """Check prerequisites.
5154
5155     This checks:
5156      - the node exists in the configuration
5157      - it does not have primary or secondary instances
5158      - it's not the master
5159
5160     Any errors are signaled by raising errors.OpPrereqError.
5161
5162     """
5163     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5164     node = self.cfg.GetNodeInfo(self.op.node_name)
5165     assert node is not None
5166
5167     masternode = self.cfg.GetMasterNode()
5168     if node.name == masternode:
5169       raise errors.OpPrereqError("Node is the master node, failover to another"
5170                                  " node is required", errors.ECODE_INVAL)
5171
5172     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5173       if node.name in instance.all_nodes:
5174         raise errors.OpPrereqError("Instance %s is still running on the node,"
5175                                    " please remove first" % instance_name,
5176                                    errors.ECODE_INVAL)
5177     self.op.node_name = node.name
5178     self.node = node
5179
5180   def Exec(self, feedback_fn):
5181     """Removes the node from the cluster.
5182
5183     """
5184     node = self.node
5185     logging.info("Stopping the node daemon and removing configs from node %s",
5186                  node.name)
5187
5188     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5189
5190     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5191       "Not owning BGL"
5192
5193     # Promote nodes to master candidate as needed
5194     _AdjustCandidatePool(self, exceptions=[node.name])
5195     self.context.RemoveNode(node.name)
5196
5197     # Run post hooks on the node before it's removed
5198     _RunPostHook(self, node.name)
5199
5200     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5201     msg = result.fail_msg
5202     if msg:
5203       self.LogWarning("Errors encountered on the remote node while leaving"
5204                       " the cluster: %s", msg)
5205
5206     # Remove node from our /etc/hosts
5207     if self.cfg.GetClusterInfo().modify_etc_hosts:
5208       master_node = self.cfg.GetMasterNode()
5209       result = self.rpc.call_etc_hosts_modify(master_node,
5210                                               constants.ETC_HOSTS_REMOVE,
5211                                               node.name, None)
5212       result.Raise("Can't update hosts file with new host data")
5213       _RedistributeAncillaryFiles(self)
5214
5215
5216 class _NodeQuery(_QueryBase):
5217   FIELDS = query.NODE_FIELDS
5218
5219   def ExpandNames(self, lu):
5220     lu.needed_locks = {}
5221     lu.share_locks = _ShareAll()
5222
5223     if self.names:
5224       self.wanted = _GetWantedNodes(lu, self.names)
5225     else:
5226       self.wanted = locking.ALL_SET
5227
5228     self.do_locking = (self.use_locking and
5229                        query.NQ_LIVE in self.requested_data)
5230
5231     if self.do_locking:
5232       # If any non-static field is requested we need to lock the nodes
5233       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5234       lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5235
5236   def DeclareLocks(self, lu, level):
5237     pass
5238
5239   def _GetQueryData(self, lu):
5240     """Computes the list of nodes and their attributes.
5241
5242     """
5243     all_info = lu.cfg.GetAllNodesInfo()
5244
5245     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5246
5247     # Gather data as requested
5248     if query.NQ_LIVE in self.requested_data:
5249       # filter out non-vm_capable nodes
5250       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5251
5252       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5253                                         [lu.cfg.GetHypervisorType()])
5254       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5255                        for (name, nresult) in node_data.items()
5256                        if not nresult.fail_msg and nresult.payload)
5257     else:
5258       live_data = None
5259
5260     if query.NQ_INST in self.requested_data:
5261       node_to_primary = dict([(name, set()) for name in nodenames])
5262       node_to_secondary = dict([(name, set()) for name in nodenames])
5263
5264       inst_data = lu.cfg.GetAllInstancesInfo()
5265
5266       for inst in inst_data.values():
5267         if inst.primary_node in node_to_primary:
5268           node_to_primary[inst.primary_node].add(inst.name)
5269         for secnode in inst.secondary_nodes:
5270           if secnode in node_to_secondary:
5271             node_to_secondary[secnode].add(inst.name)
5272     else:
5273       node_to_primary = None
5274       node_to_secondary = None
5275
5276     if query.NQ_OOB in self.requested_data:
5277       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5278                          for name, node in all_info.iteritems())
5279     else:
5280       oob_support = None
5281
5282     if query.NQ_GROUP in self.requested_data:
5283       groups = lu.cfg.GetAllNodeGroupsInfo()
5284     else:
5285       groups = {}
5286
5287     return query.NodeQueryData([all_info[name] for name in nodenames],
5288                                live_data, lu.cfg.GetMasterNode(),
5289                                node_to_primary, node_to_secondary, groups,
5290                                oob_support, lu.cfg.GetClusterInfo())
5291
5292
5293 class LUNodeQuery(NoHooksLU):
5294   """Logical unit for querying nodes.
5295
5296   """
5297   # pylint: disable=W0142
5298   REQ_BGL = False
5299
5300   def CheckArguments(self):
5301     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5302                          self.op.output_fields, self.op.use_locking)
5303
5304   def ExpandNames(self):
5305     self.nq.ExpandNames(self)
5306
5307   def DeclareLocks(self, level):
5308     self.nq.DeclareLocks(self, level)
5309
5310   def Exec(self, feedback_fn):
5311     return self.nq.OldStyleQuery(self)
5312
5313
5314 class LUNodeQueryvols(NoHooksLU):
5315   """Logical unit for getting volumes on node(s).
5316
5317   """
5318   REQ_BGL = False
5319   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5320   _FIELDS_STATIC = utils.FieldSet("node")
5321
5322   def CheckArguments(self):
5323     _CheckOutputFields(static=self._FIELDS_STATIC,
5324                        dynamic=self._FIELDS_DYNAMIC,
5325                        selected=self.op.output_fields)
5326
5327   def ExpandNames(self):
5328     self.share_locks = _ShareAll()
5329
5330     if self.op.nodes:
5331       self.needed_locks = {
5332         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5333         }
5334     else:
5335       self.needed_locks = {
5336         locking.LEVEL_NODE: locking.ALL_SET,
5337         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5338         }
5339
5340   def Exec(self, feedback_fn):
5341     """Computes the list of nodes and their attributes.
5342
5343     """
5344     nodenames = self.owned_locks(locking.LEVEL_NODE)
5345     volumes = self.rpc.call_node_volumes(nodenames)
5346
5347     ilist = self.cfg.GetAllInstancesInfo()
5348     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5349
5350     output = []
5351     for node in nodenames:
5352       nresult = volumes[node]
5353       if nresult.offline:
5354         continue
5355       msg = nresult.fail_msg
5356       if msg:
5357         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5358         continue
5359
5360       node_vols = sorted(nresult.payload,
5361                          key=operator.itemgetter("dev"))
5362
5363       for vol in node_vols:
5364         node_output = []
5365         for field in self.op.output_fields:
5366           if field == "node":
5367             val = node
5368           elif field == "phys":
5369             val = vol["dev"]
5370           elif field == "vg":
5371             val = vol["vg"]
5372           elif field == "name":
5373             val = vol["name"]
5374           elif field == "size":
5375             val = int(float(vol["size"]))
5376           elif field == "instance":
5377             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5378           else:
5379             raise errors.ParameterError(field)
5380           node_output.append(str(val))
5381
5382         output.append(node_output)
5383
5384     return output
5385
5386
5387 class LUNodeQueryStorage(NoHooksLU):
5388   """Logical unit for getting information on storage units on node(s).
5389
5390   """
5391   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5392   REQ_BGL = False
5393
5394   def CheckArguments(self):
5395     _CheckOutputFields(static=self._FIELDS_STATIC,
5396                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5397                        selected=self.op.output_fields)
5398
5399   def ExpandNames(self):
5400     self.share_locks = _ShareAll()
5401
5402     if self.op.nodes:
5403       self.needed_locks = {
5404         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5405         }
5406     else:
5407       self.needed_locks = {
5408         locking.LEVEL_NODE: locking.ALL_SET,
5409         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5410         }
5411
5412   def Exec(self, feedback_fn):
5413     """Computes the list of nodes and their attributes.
5414
5415     """
5416     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5417
5418     # Always get name to sort by
5419     if constants.SF_NAME in self.op.output_fields:
5420       fields = self.op.output_fields[:]
5421     else:
5422       fields = [constants.SF_NAME] + self.op.output_fields
5423
5424     # Never ask for node or type as it's only known to the LU
5425     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5426       while extra in fields:
5427         fields.remove(extra)
5428
5429     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5430     name_idx = field_idx[constants.SF_NAME]
5431
5432     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5433     data = self.rpc.call_storage_list(self.nodes,
5434                                       self.op.storage_type, st_args,
5435                                       self.op.name, fields)
5436
5437     result = []
5438
5439     for node in utils.NiceSort(self.nodes):
5440       nresult = data[node]
5441       if nresult.offline:
5442         continue
5443
5444       msg = nresult.fail_msg
5445       if msg:
5446         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5447         continue
5448
5449       rows = dict([(row[name_idx], row) for row in nresult.payload])
5450
5451       for name in utils.NiceSort(rows.keys()):
5452         row = rows[name]
5453
5454         out = []
5455
5456         for field in self.op.output_fields:
5457           if field == constants.SF_NODE:
5458             val = node
5459           elif field == constants.SF_TYPE:
5460             val = self.op.storage_type
5461           elif field in field_idx:
5462             val = row[field_idx[field]]
5463           else:
5464             raise errors.ParameterError(field)
5465
5466           out.append(val)
5467
5468         result.append(out)
5469
5470     return result
5471
5472
5473 class _InstanceQuery(_QueryBase):
5474   FIELDS = query.INSTANCE_FIELDS
5475
5476   def ExpandNames(self, lu):
5477     lu.needed_locks = {}
5478     lu.share_locks = _ShareAll()
5479
5480     if self.names:
5481       self.wanted = _GetWantedInstances(lu, self.names)
5482     else:
5483       self.wanted = locking.ALL_SET
5484
5485     self.do_locking = (self.use_locking and
5486                        query.IQ_LIVE in self.requested_data)
5487     if self.do_locking:
5488       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5489       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5490       lu.needed_locks[locking.LEVEL_NODE] = []
5491       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5492
5493     self.do_grouplocks = (self.do_locking and
5494                           query.IQ_NODES in self.requested_data)
5495
5496   def DeclareLocks(self, lu, level):
5497     if self.do_locking:
5498       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5499         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5500
5501         # Lock all groups used by instances optimistically; this requires going
5502         # via the node before it's locked, requiring verification later on
5503         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5504           set(group_uuid
5505               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5506               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5507       elif level == locking.LEVEL_NODE:
5508         lu._LockInstancesNodes() # pylint: disable=W0212
5509
5510   @staticmethod
5511   def _CheckGroupLocks(lu):
5512     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5513     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5514
5515     # Check if node groups for locked instances are still correct
5516     for instance_name in owned_instances:
5517       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5518
5519   def _GetQueryData(self, lu):
5520     """Computes the list of instances and their attributes.
5521
5522     """
5523     if self.do_grouplocks:
5524       self._CheckGroupLocks(lu)
5525
5526     cluster = lu.cfg.GetClusterInfo()
5527     all_info = lu.cfg.GetAllInstancesInfo()
5528
5529     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5530
5531     instance_list = [all_info[name] for name in instance_names]
5532     nodes = frozenset(itertools.chain(*(inst.all_nodes
5533                                         for inst in instance_list)))
5534     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5535     bad_nodes = []
5536     offline_nodes = []
5537     wrongnode_inst = set()
5538
5539     # Gather data as requested
5540     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5541       live_data = {}
5542       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5543       for name in nodes:
5544         result = node_data[name]
5545         if result.offline:
5546           # offline nodes will be in both lists
5547           assert result.fail_msg
5548           offline_nodes.append(name)
5549         if result.fail_msg:
5550           bad_nodes.append(name)
5551         elif result.payload:
5552           for inst in result.payload:
5553             if inst in all_info:
5554               if all_info[inst].primary_node == name:
5555                 live_data.update(result.payload)
5556               else:
5557                 wrongnode_inst.add(inst)
5558             else:
5559               # orphan instance; we don't list it here as we don't
5560               # handle this case yet in the output of instance listing
5561               logging.warning("Orphan instance '%s' found on node %s",
5562                               inst, name)
5563         # else no instance is alive
5564     else:
5565       live_data = {}
5566
5567     if query.IQ_DISKUSAGE in self.requested_data:
5568       gmi = ganeti.masterd.instance
5569       disk_usage = dict((inst.name,
5570                          gmi.ComputeDiskSize(inst.disk_template,
5571                                              [{constants.IDISK_SIZE: disk.size}
5572                                               for disk in inst.disks]))
5573                         for inst in instance_list)
5574     else:
5575       disk_usage = None
5576
5577     if query.IQ_CONSOLE in self.requested_data:
5578       consinfo = {}
5579       for inst in instance_list:
5580         if inst.name in live_data:
5581           # Instance is running
5582           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5583         else:
5584           consinfo[inst.name] = None
5585       assert set(consinfo.keys()) == set(instance_names)
5586     else:
5587       consinfo = None
5588
5589     if query.IQ_NODES in self.requested_data:
5590       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5591                                             instance_list)))
5592       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5593       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5594                     for uuid in set(map(operator.attrgetter("group"),
5595                                         nodes.values())))
5596     else:
5597       nodes = None
5598       groups = None
5599
5600     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5601                                    disk_usage, offline_nodes, bad_nodes,
5602                                    live_data, wrongnode_inst, consinfo,
5603                                    nodes, groups)
5604
5605
5606 class LUQuery(NoHooksLU):
5607   """Query for resources/items of a certain kind.
5608
5609   """
5610   # pylint: disable=W0142
5611   REQ_BGL = False
5612
5613   def CheckArguments(self):
5614     qcls = _GetQueryImplementation(self.op.what)
5615
5616     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5617
5618   def ExpandNames(self):
5619     self.impl.ExpandNames(self)
5620
5621   def DeclareLocks(self, level):
5622     self.impl.DeclareLocks(self, level)
5623
5624   def Exec(self, feedback_fn):
5625     return self.impl.NewStyleQuery(self)
5626
5627
5628 class LUQueryFields(NoHooksLU):
5629   """Query for resources/items of a certain kind.
5630
5631   """
5632   # pylint: disable=W0142
5633   REQ_BGL = False
5634
5635   def CheckArguments(self):
5636     self.qcls = _GetQueryImplementation(self.op.what)
5637
5638   def ExpandNames(self):
5639     self.needed_locks = {}
5640
5641   def Exec(self, feedback_fn):
5642     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5643
5644
5645 class LUNodeModifyStorage(NoHooksLU):
5646   """Logical unit for modifying a storage volume on a node.
5647
5648   """
5649   REQ_BGL = False
5650
5651   def CheckArguments(self):
5652     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5653
5654     storage_type = self.op.storage_type
5655
5656     try:
5657       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5658     except KeyError:
5659       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5660                                  " modified" % storage_type,
5661                                  errors.ECODE_INVAL)
5662
5663     diff = set(self.op.changes.keys()) - modifiable
5664     if diff:
5665       raise errors.OpPrereqError("The following fields can not be modified for"
5666                                  " storage units of type '%s': %r" %
5667                                  (storage_type, list(diff)),
5668                                  errors.ECODE_INVAL)
5669
5670   def ExpandNames(self):
5671     self.needed_locks = {
5672       locking.LEVEL_NODE: self.op.node_name,
5673       }
5674
5675   def Exec(self, feedback_fn):
5676     """Computes the list of nodes and their attributes.
5677
5678     """
5679     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5680     result = self.rpc.call_storage_modify(self.op.node_name,
5681                                           self.op.storage_type, st_args,
5682                                           self.op.name, self.op.changes)
5683     result.Raise("Failed to modify storage unit '%s' on %s" %
5684                  (self.op.name, self.op.node_name))
5685
5686
5687 class LUNodeAdd(LogicalUnit):
5688   """Logical unit for adding node to the cluster.
5689
5690   """
5691   HPATH = "node-add"
5692   HTYPE = constants.HTYPE_NODE
5693   _NFLAGS = ["master_capable", "vm_capable"]
5694
5695   def CheckArguments(self):
5696     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5697     # validate/normalize the node name
5698     self.hostname = netutils.GetHostname(name=self.op.node_name,
5699                                          family=self.primary_ip_family)
5700     self.op.node_name = self.hostname.name
5701
5702     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5703       raise errors.OpPrereqError("Cannot readd the master node",
5704                                  errors.ECODE_STATE)
5705
5706     if self.op.readd and self.op.group:
5707       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5708                                  " being readded", errors.ECODE_INVAL)
5709
5710   def BuildHooksEnv(self):
5711     """Build hooks env.
5712
5713     This will run on all nodes before, and on all nodes + the new node after.
5714
5715     """
5716     return {
5717       "OP_TARGET": self.op.node_name,
5718       "NODE_NAME": self.op.node_name,
5719       "NODE_PIP": self.op.primary_ip,
5720       "NODE_SIP": self.op.secondary_ip,
5721       "MASTER_CAPABLE": str(self.op.master_capable),
5722       "VM_CAPABLE": str(self.op.vm_capable),
5723       }
5724
5725   def BuildHooksNodes(self):
5726     """Build hooks nodes.
5727
5728     """
5729     # Exclude added node
5730     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5731     post_nodes = pre_nodes + [self.op.node_name, ]
5732
5733     return (pre_nodes, post_nodes)
5734
5735   def CheckPrereq(self):
5736     """Check prerequisites.
5737
5738     This checks:
5739      - the new node is not already in the config
5740      - it is resolvable
5741      - its parameters (single/dual homed) matches the cluster
5742
5743     Any errors are signaled by raising errors.OpPrereqError.
5744
5745     """
5746     cfg = self.cfg
5747     hostname = self.hostname
5748     node = hostname.name
5749     primary_ip = self.op.primary_ip = hostname.ip
5750     if self.op.secondary_ip is None:
5751       if self.primary_ip_family == netutils.IP6Address.family:
5752         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5753                                    " IPv4 address must be given as secondary",
5754                                    errors.ECODE_INVAL)
5755       self.op.secondary_ip = primary_ip
5756
5757     secondary_ip = self.op.secondary_ip
5758     if not netutils.IP4Address.IsValid(secondary_ip):
5759       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5760                                  " address" % secondary_ip, errors.ECODE_INVAL)
5761
5762     node_list = cfg.GetNodeList()
5763     if not self.op.readd and node in node_list:
5764       raise errors.OpPrereqError("Node %s is already in the configuration" %
5765                                  node, errors.ECODE_EXISTS)
5766     elif self.op.readd and node not in node_list:
5767       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5768                                  errors.ECODE_NOENT)
5769
5770     self.changed_primary_ip = False
5771
5772     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5773       if self.op.readd and node == existing_node_name:
5774         if existing_node.secondary_ip != secondary_ip:
5775           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5776                                      " address configuration as before",
5777                                      errors.ECODE_INVAL)
5778         if existing_node.primary_ip != primary_ip:
5779           self.changed_primary_ip = True
5780
5781         continue
5782
5783       if (existing_node.primary_ip == primary_ip or
5784           existing_node.secondary_ip == primary_ip or
5785           existing_node.primary_ip == secondary_ip or
5786           existing_node.secondary_ip == secondary_ip):
5787         raise errors.OpPrereqError("New node ip address(es) conflict with"
5788                                    " existing node %s" % existing_node.name,
5789                                    errors.ECODE_NOTUNIQUE)
5790
5791     # After this 'if' block, None is no longer a valid value for the
5792     # _capable op attributes
5793     if self.op.readd:
5794       old_node = self.cfg.GetNodeInfo(node)
5795       assert old_node is not None, "Can't retrieve locked node %s" % node
5796       for attr in self._NFLAGS:
5797         if getattr(self.op, attr) is None:
5798           setattr(self.op, attr, getattr(old_node, attr))
5799     else:
5800       for attr in self._NFLAGS:
5801         if getattr(self.op, attr) is None:
5802           setattr(self.op, attr, True)
5803
5804     if self.op.readd and not self.op.vm_capable:
5805       pri, sec = cfg.GetNodeInstances(node)
5806       if pri or sec:
5807         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5808                                    " flag set to false, but it already holds"
5809                                    " instances" % node,
5810                                    errors.ECODE_STATE)
5811
5812     # check that the type of the node (single versus dual homed) is the
5813     # same as for the master
5814     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5815     master_singlehomed = myself.secondary_ip == myself.primary_ip
5816     newbie_singlehomed = secondary_ip == primary_ip
5817     if master_singlehomed != newbie_singlehomed:
5818       if master_singlehomed:
5819         raise errors.OpPrereqError("The master has no secondary ip but the"
5820                                    " new node has one",
5821                                    errors.ECODE_INVAL)
5822       else:
5823         raise errors.OpPrereqError("The master has a secondary ip but the"
5824                                    " new node doesn't have one",
5825                                    errors.ECODE_INVAL)
5826
5827     # checks reachability
5828     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5829       raise errors.OpPrereqError("Node not reachable by ping",
5830                                  errors.ECODE_ENVIRON)
5831
5832     if not newbie_singlehomed:
5833       # check reachability from my secondary ip to newbie's secondary ip
5834       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5835                               source=myself.secondary_ip):
5836         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5837                                    " based ping to node daemon port",
5838                                    errors.ECODE_ENVIRON)
5839
5840     if self.op.readd:
5841       exceptions = [node]
5842     else:
5843       exceptions = []
5844
5845     if self.op.master_capable:
5846       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5847     else:
5848       self.master_candidate = False
5849
5850     if self.op.readd:
5851       self.new_node = old_node
5852     else:
5853       node_group = cfg.LookupNodeGroup(self.op.group)
5854       self.new_node = objects.Node(name=node,
5855                                    primary_ip=primary_ip,
5856                                    secondary_ip=secondary_ip,
5857                                    master_candidate=self.master_candidate,
5858                                    offline=False, drained=False,
5859                                    group=node_group)
5860
5861     if self.op.ndparams:
5862       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5863
5864     if self.op.hv_state:
5865       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5866
5867     if self.op.disk_state:
5868       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5869
5870     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5871     #       it a property on the base class.
5872     result = rpc.DnsOnlyRunner().call_version([node])[node]
5873     result.Raise("Can't get version information from node %s" % node)
5874     if constants.PROTOCOL_VERSION == result.payload:
5875       logging.info("Communication to node %s fine, sw version %s match",
5876                    node, result.payload)
5877     else:
5878       raise errors.OpPrereqError("Version mismatch master version %s,"
5879                                  " node version %s" %
5880                                  (constants.PROTOCOL_VERSION, result.payload),
5881                                  errors.ECODE_ENVIRON)
5882
5883   def Exec(self, feedback_fn):
5884     """Adds the new node to the cluster.
5885
5886     """
5887     new_node = self.new_node
5888     node = new_node.name
5889
5890     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5891       "Not owning BGL"
5892
5893     # We adding a new node so we assume it's powered
5894     new_node.powered = True
5895
5896     # for re-adds, reset the offline/drained/master-candidate flags;
5897     # we need to reset here, otherwise offline would prevent RPC calls
5898     # later in the procedure; this also means that if the re-add
5899     # fails, we are left with a non-offlined, broken node
5900     if self.op.readd:
5901       new_node.drained = new_node.offline = False # pylint: disable=W0201
5902       self.LogInfo("Readding a node, the offline/drained flags were reset")
5903       # if we demote the node, we do cleanup later in the procedure
5904       new_node.master_candidate = self.master_candidate
5905       if self.changed_primary_ip:
5906         new_node.primary_ip = self.op.primary_ip
5907
5908     # copy the master/vm_capable flags
5909     for attr in self._NFLAGS:
5910       setattr(new_node, attr, getattr(self.op, attr))
5911
5912     # notify the user about any possible mc promotion
5913     if new_node.master_candidate:
5914       self.LogInfo("Node will be a master candidate")
5915
5916     if self.op.ndparams:
5917       new_node.ndparams = self.op.ndparams
5918     else:
5919       new_node.ndparams = {}
5920
5921     if self.op.hv_state:
5922       new_node.hv_state_static = self.new_hv_state
5923
5924     if self.op.disk_state:
5925       new_node.disk_state_static = self.new_disk_state
5926
5927     # Add node to our /etc/hosts, and add key to known_hosts
5928     if self.cfg.GetClusterInfo().modify_etc_hosts:
5929       master_node = self.cfg.GetMasterNode()
5930       result = self.rpc.call_etc_hosts_modify(master_node,
5931                                               constants.ETC_HOSTS_ADD,
5932                                               self.hostname.name,
5933                                               self.hostname.ip)
5934       result.Raise("Can't update hosts file with new host data")
5935
5936     if new_node.secondary_ip != new_node.primary_ip:
5937       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5938                                False)
5939
5940     node_verify_list = [self.cfg.GetMasterNode()]
5941     node_verify_param = {
5942       constants.NV_NODELIST: ([node], {}),
5943       # TODO: do a node-net-test as well?
5944     }
5945
5946     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5947                                        self.cfg.GetClusterName())
5948     for verifier in node_verify_list:
5949       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5950       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5951       if nl_payload:
5952         for failed in nl_payload:
5953           feedback_fn("ssh/hostname verification failed"
5954                       " (checking from %s): %s" %
5955                       (verifier, nl_payload[failed]))
5956         raise errors.OpExecError("ssh/hostname verification failed")
5957
5958     if self.op.readd:
5959       _RedistributeAncillaryFiles(self)
5960       self.context.ReaddNode(new_node)
5961       # make sure we redistribute the config
5962       self.cfg.Update(new_node, feedback_fn)
5963       # and make sure the new node will not have old files around
5964       if not new_node.master_candidate:
5965         result = self.rpc.call_node_demote_from_mc(new_node.name)
5966         msg = result.fail_msg
5967         if msg:
5968           self.LogWarning("Node failed to demote itself from master"
5969                           " candidate status: %s" % msg)
5970     else:
5971       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5972                                   additional_vm=self.op.vm_capable)
5973       self.context.AddNode(new_node, self.proc.GetECId())
5974
5975
5976 class LUNodeSetParams(LogicalUnit):
5977   """Modifies the parameters of a node.
5978
5979   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5980       to the node role (as _ROLE_*)
5981   @cvar _R2F: a dictionary from node role to tuples of flags
5982   @cvar _FLAGS: a list of attribute names corresponding to the flags
5983
5984   """
5985   HPATH = "node-modify"
5986   HTYPE = constants.HTYPE_NODE
5987   REQ_BGL = False
5988   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5989   _F2R = {
5990     (True, False, False): _ROLE_CANDIDATE,
5991     (False, True, False): _ROLE_DRAINED,
5992     (False, False, True): _ROLE_OFFLINE,
5993     (False, False, False): _ROLE_REGULAR,
5994     }
5995   _R2F = dict((v, k) for k, v in _F2R.items())
5996   _FLAGS = ["master_candidate", "drained", "offline"]
5997
5998   def CheckArguments(self):
5999     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6000     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6001                 self.op.master_capable, self.op.vm_capable,
6002                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6003                 self.op.disk_state]
6004     if all_mods.count(None) == len(all_mods):
6005       raise errors.OpPrereqError("Please pass at least one modification",
6006                                  errors.ECODE_INVAL)
6007     if all_mods.count(True) > 1:
6008       raise errors.OpPrereqError("Can't set the node into more than one"
6009                                  " state at the same time",
6010                                  errors.ECODE_INVAL)
6011
6012     # Boolean value that tells us whether we might be demoting from MC
6013     self.might_demote = (self.op.master_candidate is False or
6014                          self.op.offline is True or
6015                          self.op.drained is True or
6016                          self.op.master_capable is False)
6017
6018     if self.op.secondary_ip:
6019       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6020         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6021                                    " address" % self.op.secondary_ip,
6022                                    errors.ECODE_INVAL)
6023
6024     self.lock_all = self.op.auto_promote and self.might_demote
6025     self.lock_instances = self.op.secondary_ip is not None
6026
6027   def _InstanceFilter(self, instance):
6028     """Filter for getting affected instances.
6029
6030     """
6031     return (instance.disk_template in constants.DTS_INT_MIRROR and
6032             self.op.node_name in instance.all_nodes)
6033
6034   def ExpandNames(self):
6035     if self.lock_all:
6036       self.needed_locks = {
6037         locking.LEVEL_NODE: locking.ALL_SET,
6038
6039         # Block allocations when all nodes are locked
6040         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6041         }
6042     else:
6043       self.needed_locks = {
6044         locking.LEVEL_NODE: self.op.node_name,
6045         }
6046
6047     # Since modifying a node can have severe effects on currently running
6048     # operations the resource lock is at least acquired in shared mode
6049     self.needed_locks[locking.LEVEL_NODE_RES] = \
6050       self.needed_locks[locking.LEVEL_NODE]
6051
6052     # Get all locks except nodes in shared mode; they are not used for anything
6053     # but read-only access
6054     self.share_locks = _ShareAll()
6055     self.share_locks[locking.LEVEL_NODE] = 0
6056     self.share_locks[locking.LEVEL_NODE_RES] = 0
6057     self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6058
6059     if self.lock_instances:
6060       self.needed_locks[locking.LEVEL_INSTANCE] = \
6061         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6062
6063   def BuildHooksEnv(self):
6064     """Build hooks env.
6065
6066     This runs on the master node.
6067
6068     """
6069     return {
6070       "OP_TARGET": self.op.node_name,
6071       "MASTER_CANDIDATE": str(self.op.master_candidate),
6072       "OFFLINE": str(self.op.offline),
6073       "DRAINED": str(self.op.drained),
6074       "MASTER_CAPABLE": str(self.op.master_capable),
6075       "VM_CAPABLE": str(self.op.vm_capable),
6076       }
6077
6078   def BuildHooksNodes(self):
6079     """Build hooks nodes.
6080
6081     """
6082     nl = [self.cfg.GetMasterNode(), self.op.node_name]
6083     return (nl, nl)
6084
6085   def CheckPrereq(self):
6086     """Check prerequisites.
6087
6088     This only checks the instance list against the existing names.
6089
6090     """
6091     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6092
6093     if self.lock_instances:
6094       affected_instances = \
6095         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6096
6097       # Verify instance locks
6098       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6099       wanted_instances = frozenset(affected_instances.keys())
6100       if wanted_instances - owned_instances:
6101         raise errors.OpPrereqError("Instances affected by changing node %s's"
6102                                    " secondary IP address have changed since"
6103                                    " locks were acquired, wanted '%s', have"
6104                                    " '%s'; retry the operation" %
6105                                    (self.op.node_name,
6106                                     utils.CommaJoin(wanted_instances),
6107                                     utils.CommaJoin(owned_instances)),
6108                                    errors.ECODE_STATE)
6109     else:
6110       affected_instances = None
6111
6112     if (self.op.master_candidate is not None or
6113         self.op.drained is not None or
6114         self.op.offline is not None):
6115       # we can't change the master's node flags
6116       if self.op.node_name == self.cfg.GetMasterNode():
6117         raise errors.OpPrereqError("The master role can be changed"
6118                                    " only via master-failover",
6119                                    errors.ECODE_INVAL)
6120
6121     if self.op.master_candidate and not node.master_capable:
6122       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6123                                  " it a master candidate" % node.name,
6124                                  errors.ECODE_STATE)
6125
6126     if self.op.vm_capable is False:
6127       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6128       if ipri or isec:
6129         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6130                                    " the vm_capable flag" % node.name,
6131                                    errors.ECODE_STATE)
6132
6133     if node.master_candidate and self.might_demote and not self.lock_all:
6134       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6135       # check if after removing the current node, we're missing master
6136       # candidates
6137       (mc_remaining, mc_should, _) = \
6138           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6139       if mc_remaining < mc_should:
6140         raise errors.OpPrereqError("Not enough master candidates, please"
6141                                    " pass auto promote option to allow"
6142                                    " promotion (--auto-promote or RAPI"
6143                                    " auto_promote=True)", errors.ECODE_STATE)
6144
6145     self.old_flags = old_flags = (node.master_candidate,
6146                                   node.drained, node.offline)
6147     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6148     self.old_role = old_role = self._F2R[old_flags]
6149
6150     # Check for ineffective changes
6151     for attr in self._FLAGS:
6152       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6153         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6154         setattr(self.op, attr, None)
6155
6156     # Past this point, any flag change to False means a transition
6157     # away from the respective state, as only real changes are kept
6158
6159     # TODO: We might query the real power state if it supports OOB
6160     if _SupportsOob(self.cfg, node):
6161       if self.op.offline is False and not (node.powered or
6162                                            self.op.powered is True):
6163         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6164                                     " offline status can be reset") %
6165                                    self.op.node_name, errors.ECODE_STATE)
6166     elif self.op.powered is not None:
6167       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6168                                   " as it does not support out-of-band"
6169                                   " handling") % self.op.node_name,
6170                                  errors.ECODE_STATE)
6171
6172     # If we're being deofflined/drained, we'll MC ourself if needed
6173     if (self.op.drained is False or self.op.offline is False or
6174         (self.op.master_capable and not node.master_capable)):
6175       if _DecideSelfPromotion(self):
6176         self.op.master_candidate = True
6177         self.LogInfo("Auto-promoting node to master candidate")
6178
6179     # If we're no longer master capable, we'll demote ourselves from MC
6180     if self.op.master_capable is False and node.master_candidate:
6181       self.LogInfo("Demoting from master candidate")
6182       self.op.master_candidate = False
6183
6184     # Compute new role
6185     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6186     if self.op.master_candidate:
6187       new_role = self._ROLE_CANDIDATE
6188     elif self.op.drained:
6189       new_role = self._ROLE_DRAINED
6190     elif self.op.offline:
6191       new_role = self._ROLE_OFFLINE
6192     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6193       # False is still in new flags, which means we're un-setting (the
6194       # only) True flag
6195       new_role = self._ROLE_REGULAR
6196     else: # no new flags, nothing, keep old role
6197       new_role = old_role
6198
6199     self.new_role = new_role
6200
6201     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6202       # Trying to transition out of offline status
6203       result = self.rpc.call_version([node.name])[node.name]
6204       if result.fail_msg:
6205         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6206                                    " to report its version: %s" %
6207                                    (node.name, result.fail_msg),
6208                                    errors.ECODE_STATE)
6209       else:
6210         self.LogWarning("Transitioning node from offline to online state"
6211                         " without using re-add. Please make sure the node"
6212                         " is healthy!")
6213
6214     # When changing the secondary ip, verify if this is a single-homed to
6215     # multi-homed transition or vice versa, and apply the relevant
6216     # restrictions.
6217     if self.op.secondary_ip:
6218       # Ok even without locking, because this can't be changed by any LU
6219       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6220       master_singlehomed = master.secondary_ip == master.primary_ip
6221       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6222         if self.op.force and node.name == master.name:
6223           self.LogWarning("Transitioning from single-homed to multi-homed"
6224                           " cluster; all nodes will require a secondary IP"
6225                           " address")
6226         else:
6227           raise errors.OpPrereqError("Changing the secondary ip on a"
6228                                      " single-homed cluster requires the"
6229                                      " --force option to be passed, and the"
6230                                      " target node to be the master",
6231                                      errors.ECODE_INVAL)
6232       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6233         if self.op.force and node.name == master.name:
6234           self.LogWarning("Transitioning from multi-homed to single-homed"
6235                           " cluster; secondary IP addresses will have to be"
6236                           " removed")
6237         else:
6238           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6239                                      " same as the primary IP on a multi-homed"
6240                                      " cluster, unless the --force option is"
6241                                      " passed, and the target node is the"
6242                                      " master", errors.ECODE_INVAL)
6243
6244       assert not (frozenset(affected_instances) -
6245                   self.owned_locks(locking.LEVEL_INSTANCE))
6246
6247       if node.offline:
6248         if affected_instances:
6249           msg = ("Cannot change secondary IP address: offline node has"
6250                  " instances (%s) configured to use it" %
6251                  utils.CommaJoin(affected_instances.keys()))
6252           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6253       else:
6254         # On online nodes, check that no instances are running, and that
6255         # the node has the new ip and we can reach it.
6256         for instance in affected_instances.values():
6257           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6258                               msg="cannot change secondary ip")
6259
6260         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6261         if master.name != node.name:
6262           # check reachability from master secondary ip to new secondary ip
6263           if not netutils.TcpPing(self.op.secondary_ip,
6264                                   constants.DEFAULT_NODED_PORT,
6265                                   source=master.secondary_ip):
6266             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6267                                        " based ping to node daemon port",
6268                                        errors.ECODE_ENVIRON)
6269
6270     if self.op.ndparams:
6271       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6272       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6273       self.new_ndparams = new_ndparams
6274
6275     if self.op.hv_state:
6276       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6277                                                  self.node.hv_state_static)
6278
6279     if self.op.disk_state:
6280       self.new_disk_state = \
6281         _MergeAndVerifyDiskState(self.op.disk_state,
6282                                  self.node.disk_state_static)
6283
6284   def Exec(self, feedback_fn):
6285     """Modifies a node.
6286
6287     """
6288     node = self.node
6289     old_role = self.old_role
6290     new_role = self.new_role
6291
6292     result = []
6293
6294     if self.op.ndparams:
6295       node.ndparams = self.new_ndparams
6296
6297     if self.op.powered is not None:
6298       node.powered = self.op.powered
6299
6300     if self.op.hv_state:
6301       node.hv_state_static = self.new_hv_state
6302
6303     if self.op.disk_state:
6304       node.disk_state_static = self.new_disk_state
6305
6306     for attr in ["master_capable", "vm_capable"]:
6307       val = getattr(self.op, attr)
6308       if val is not None:
6309         setattr(node, attr, val)
6310         result.append((attr, str(val)))
6311
6312     if new_role != old_role:
6313       # Tell the node to demote itself, if no longer MC and not offline
6314       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6315         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6316         if msg:
6317           self.LogWarning("Node failed to demote itself: %s", msg)
6318
6319       new_flags = self._R2F[new_role]
6320       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6321         if of != nf:
6322           result.append((desc, str(nf)))
6323       (node.master_candidate, node.drained, node.offline) = new_flags
6324
6325       # we locked all nodes, we adjust the CP before updating this node
6326       if self.lock_all:
6327         _AdjustCandidatePool(self, [node.name])
6328
6329     if self.op.secondary_ip:
6330       node.secondary_ip = self.op.secondary_ip
6331       result.append(("secondary_ip", self.op.secondary_ip))
6332
6333     # this will trigger configuration file update, if needed
6334     self.cfg.Update(node, feedback_fn)
6335
6336     # this will trigger job queue propagation or cleanup if the mc
6337     # flag changed
6338     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6339       self.context.ReaddNode(node)
6340
6341     return result
6342
6343
6344 class LUNodePowercycle(NoHooksLU):
6345   """Powercycles a node.
6346
6347   """
6348   REQ_BGL = False
6349
6350   def CheckArguments(self):
6351     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6352     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6353       raise errors.OpPrereqError("The node is the master and the force"
6354                                  " parameter was not set",
6355                                  errors.ECODE_INVAL)
6356
6357   def ExpandNames(self):
6358     """Locking for PowercycleNode.
6359
6360     This is a last-resort option and shouldn't block on other
6361     jobs. Therefore, we grab no locks.
6362
6363     """
6364     self.needed_locks = {}
6365
6366   def Exec(self, feedback_fn):
6367     """Reboots a node.
6368
6369     """
6370     result = self.rpc.call_node_powercycle(self.op.node_name,
6371                                            self.cfg.GetHypervisorType())
6372     result.Raise("Failed to schedule the reboot")
6373     return result.payload
6374
6375
6376 class LUClusterQuery(NoHooksLU):
6377   """Query cluster configuration.
6378
6379   """
6380   REQ_BGL = False
6381
6382   def ExpandNames(self):
6383     self.needed_locks = {}
6384
6385   def Exec(self, feedback_fn):
6386     """Return cluster config.
6387
6388     """
6389     cluster = self.cfg.GetClusterInfo()
6390     os_hvp = {}
6391
6392     # Filter just for enabled hypervisors
6393     for os_name, hv_dict in cluster.os_hvp.items():
6394       os_hvp[os_name] = {}
6395       for hv_name, hv_params in hv_dict.items():
6396         if hv_name in cluster.enabled_hypervisors:
6397           os_hvp[os_name][hv_name] = hv_params
6398
6399     # Convert ip_family to ip_version
6400     primary_ip_version = constants.IP4_VERSION
6401     if cluster.primary_ip_family == netutils.IP6Address.family:
6402       primary_ip_version = constants.IP6_VERSION
6403
6404     result = {
6405       "software_version": constants.RELEASE_VERSION,
6406       "protocol_version": constants.PROTOCOL_VERSION,
6407       "config_version": constants.CONFIG_VERSION,
6408       "os_api_version": max(constants.OS_API_VERSIONS),
6409       "export_version": constants.EXPORT_VERSION,
6410       "architecture": runtime.GetArchInfo(),
6411       "name": cluster.cluster_name,
6412       "master": cluster.master_node,
6413       "default_hypervisor": cluster.primary_hypervisor,
6414       "enabled_hypervisors": cluster.enabled_hypervisors,
6415       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6416                         for hypervisor_name in cluster.enabled_hypervisors]),
6417       "os_hvp": os_hvp,
6418       "beparams": cluster.beparams,
6419       "osparams": cluster.osparams,
6420       "ipolicy": cluster.ipolicy,
6421       "nicparams": cluster.nicparams,
6422       "ndparams": cluster.ndparams,
6423       "diskparams": cluster.diskparams,
6424       "candidate_pool_size": cluster.candidate_pool_size,
6425       "master_netdev": cluster.master_netdev,
6426       "master_netmask": cluster.master_netmask,
6427       "use_external_mip_script": cluster.use_external_mip_script,
6428       "volume_group_name": cluster.volume_group_name,
6429       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6430       "file_storage_dir": cluster.file_storage_dir,
6431       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6432       "maintain_node_health": cluster.maintain_node_health,
6433       "ctime": cluster.ctime,
6434       "mtime": cluster.mtime,
6435       "uuid": cluster.uuid,
6436       "tags": list(cluster.GetTags()),
6437       "uid_pool": cluster.uid_pool,
6438       "default_iallocator": cluster.default_iallocator,
6439       "reserved_lvs": cluster.reserved_lvs,
6440       "primary_ip_version": primary_ip_version,
6441       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6442       "hidden_os": cluster.hidden_os,
6443       "blacklisted_os": cluster.blacklisted_os,
6444       }
6445
6446     return result
6447
6448
6449 class LUClusterConfigQuery(NoHooksLU):
6450   """Return configuration values.
6451
6452   """
6453   REQ_BGL = False
6454
6455   def CheckArguments(self):
6456     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6457
6458   def ExpandNames(self):
6459     self.cq.ExpandNames(self)
6460
6461   def DeclareLocks(self, level):
6462     self.cq.DeclareLocks(self, level)
6463
6464   def Exec(self, feedback_fn):
6465     result = self.cq.OldStyleQuery(self)
6466
6467     assert len(result) == 1
6468
6469     return result[0]
6470
6471
6472 class _ClusterQuery(_QueryBase):
6473   FIELDS = query.CLUSTER_FIELDS
6474
6475   #: Do not sort (there is only one item)
6476   SORT_FIELD = None
6477
6478   def ExpandNames(self, lu):
6479     lu.needed_locks = {}
6480
6481     # The following variables interact with _QueryBase._GetNames
6482     self.wanted = locking.ALL_SET
6483     self.do_locking = self.use_locking
6484
6485     if self.do_locking:
6486       raise errors.OpPrereqError("Can not use locking for cluster queries",
6487                                  errors.ECODE_INVAL)
6488
6489   def DeclareLocks(self, lu, level):
6490     pass
6491
6492   def _GetQueryData(self, lu):
6493     """Computes the list of nodes and their attributes.
6494
6495     """
6496     # Locking is not used
6497     assert not (compat.any(lu.glm.is_owned(level)
6498                            for level in locking.LEVELS
6499                            if level != locking.LEVEL_CLUSTER) or
6500                 self.do_locking or self.use_locking)
6501
6502     if query.CQ_CONFIG in self.requested_data:
6503       cluster = lu.cfg.GetClusterInfo()
6504     else:
6505       cluster = NotImplemented
6506
6507     if query.CQ_QUEUE_DRAINED in self.requested_data:
6508       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6509     else:
6510       drain_flag = NotImplemented
6511
6512     if query.CQ_WATCHER_PAUSE in self.requested_data:
6513       master_name = lu.cfg.GetMasterNode()
6514
6515       result = lu.rpc.call_get_watcher_pause(master_name)
6516       result.Raise("Can't retrieve watcher pause from master node '%s'" %
6517                    master_name)
6518
6519       watcher_pause = result.payload
6520     else:
6521       watcher_pause = NotImplemented
6522
6523     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6524
6525
6526 class LUInstanceActivateDisks(NoHooksLU):
6527   """Bring up an instance's disks.
6528
6529   """
6530   REQ_BGL = False
6531
6532   def ExpandNames(self):
6533     self._ExpandAndLockInstance()
6534     self.needed_locks[locking.LEVEL_NODE] = []
6535     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6536
6537   def DeclareLocks(self, level):
6538     if level == locking.LEVEL_NODE:
6539       self._LockInstancesNodes()
6540
6541   def CheckPrereq(self):
6542     """Check prerequisites.
6543
6544     This checks that the instance is in the cluster.
6545
6546     """
6547     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6548     assert self.instance is not None, \
6549       "Cannot retrieve locked instance %s" % self.op.instance_name
6550     _CheckNodeOnline(self, self.instance.primary_node)
6551
6552   def Exec(self, feedback_fn):
6553     """Activate the disks.
6554
6555     """
6556     disks_ok, disks_info = \
6557               _AssembleInstanceDisks(self, self.instance,
6558                                      ignore_size=self.op.ignore_size)
6559     if not disks_ok:
6560       raise errors.OpExecError("Cannot activate block devices")
6561
6562     if self.op.wait_for_sync:
6563       if not _WaitForSync(self, self.instance):
6564         raise errors.OpExecError("Some disks of the instance are degraded!")
6565
6566     return disks_info
6567
6568
6569 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6570                            ignore_size=False):
6571   """Prepare the block devices for an instance.
6572
6573   This sets up the block devices on all nodes.
6574
6575   @type lu: L{LogicalUnit}
6576   @param lu: the logical unit on whose behalf we execute
6577   @type instance: L{objects.Instance}
6578   @param instance: the instance for whose disks we assemble
6579   @type disks: list of L{objects.Disk} or None
6580   @param disks: which disks to assemble (or all, if None)
6581   @type ignore_secondaries: boolean
6582   @param ignore_secondaries: if true, errors on secondary nodes
6583       won't result in an error return from the function
6584   @type ignore_size: boolean
6585   @param ignore_size: if true, the current known size of the disk
6586       will not be used during the disk activation, useful for cases
6587       when the size is wrong
6588   @return: False if the operation failed, otherwise a list of
6589       (host, instance_visible_name, node_visible_name)
6590       with the mapping from node devices to instance devices
6591
6592   """
6593   device_info = []
6594   disks_ok = True
6595   iname = instance.name
6596   disks = _ExpandCheckDisks(instance, disks)
6597
6598   # With the two passes mechanism we try to reduce the window of
6599   # opportunity for the race condition of switching DRBD to primary
6600   # before handshaking occured, but we do not eliminate it
6601
6602   # The proper fix would be to wait (with some limits) until the
6603   # connection has been made and drbd transitions from WFConnection
6604   # into any other network-connected state (Connected, SyncTarget,
6605   # SyncSource, etc.)
6606
6607   # 1st pass, assemble on all nodes in secondary mode
6608   for idx, inst_disk in enumerate(disks):
6609     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6610       if ignore_size:
6611         node_disk = node_disk.Copy()
6612         node_disk.UnsetSize()
6613       lu.cfg.SetDiskID(node_disk, node)
6614       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6615                                              False, idx)
6616       msg = result.fail_msg
6617       if msg:
6618         is_offline_secondary = (node in instance.secondary_nodes and
6619                                 result.offline)
6620         lu.LogWarning("Could not prepare block device %s on node %s"
6621                       " (is_primary=False, pass=1): %s",
6622                       inst_disk.iv_name, node, msg)
6623         if not (ignore_secondaries or is_offline_secondary):
6624           disks_ok = False
6625
6626   # FIXME: race condition on drbd migration to primary
6627
6628   # 2nd pass, do only the primary node
6629   for idx, inst_disk in enumerate(disks):
6630     dev_path = None
6631
6632     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6633       if node != instance.primary_node:
6634         continue
6635       if ignore_size:
6636         node_disk = node_disk.Copy()
6637         node_disk.UnsetSize()
6638       lu.cfg.SetDiskID(node_disk, node)
6639       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6640                                              True, idx)
6641       msg = result.fail_msg
6642       if msg:
6643         lu.LogWarning("Could not prepare block device %s on node %s"
6644                       " (is_primary=True, pass=2): %s",
6645                       inst_disk.iv_name, node, msg)
6646         disks_ok = False
6647       else:
6648         dev_path = result.payload
6649
6650     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6651
6652   # leave the disks configured for the primary node
6653   # this is a workaround that would be fixed better by
6654   # improving the logical/physical id handling
6655   for disk in disks:
6656     lu.cfg.SetDiskID(disk, instance.primary_node)
6657
6658   return disks_ok, device_info
6659
6660
6661 def _StartInstanceDisks(lu, instance, force):
6662   """Start the disks of an instance.
6663
6664   """
6665   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6666                                            ignore_secondaries=force)
6667   if not disks_ok:
6668     _ShutdownInstanceDisks(lu, instance)
6669     if force is not None and not force:
6670       lu.LogWarning("",
6671                     hint=("If the message above refers to a secondary node,"
6672                           " you can retry the operation using '--force'"))
6673     raise errors.OpExecError("Disk consistency error")
6674
6675
6676 class LUInstanceDeactivateDisks(NoHooksLU):
6677   """Shutdown an instance's disks.
6678
6679   """
6680   REQ_BGL = False
6681
6682   def ExpandNames(self):
6683     self._ExpandAndLockInstance()
6684     self.needed_locks[locking.LEVEL_NODE] = []
6685     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6686
6687   def DeclareLocks(self, level):
6688     if level == locking.LEVEL_NODE:
6689       self._LockInstancesNodes()
6690
6691   def CheckPrereq(self):
6692     """Check prerequisites.
6693
6694     This checks that the instance is in the cluster.
6695
6696     """
6697     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6698     assert self.instance is not None, \
6699       "Cannot retrieve locked instance %s" % self.op.instance_name
6700
6701   def Exec(self, feedback_fn):
6702     """Deactivate the disks
6703
6704     """
6705     instance = self.instance
6706     if self.op.force:
6707       _ShutdownInstanceDisks(self, instance)
6708     else:
6709       _SafeShutdownInstanceDisks(self, instance)
6710
6711
6712 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6713   """Shutdown block devices of an instance.
6714
6715   This function checks if an instance is running, before calling
6716   _ShutdownInstanceDisks.
6717
6718   """
6719   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6720   _ShutdownInstanceDisks(lu, instance, disks=disks)
6721
6722
6723 def _ExpandCheckDisks(instance, disks):
6724   """Return the instance disks selected by the disks list
6725
6726   @type disks: list of L{objects.Disk} or None
6727   @param disks: selected disks
6728   @rtype: list of L{objects.Disk}
6729   @return: selected instance disks to act on
6730
6731   """
6732   if disks is None:
6733     return instance.disks
6734   else:
6735     if not set(disks).issubset(instance.disks):
6736       raise errors.ProgrammerError("Can only act on disks belonging to the"
6737                                    " target instance")
6738     return disks
6739
6740
6741 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6742   """Shutdown block devices of an instance.
6743
6744   This does the shutdown on all nodes of the instance.
6745
6746   If the ignore_primary is false, errors on the primary node are
6747   ignored.
6748
6749   """
6750   all_result = True
6751   disks = _ExpandCheckDisks(instance, disks)
6752
6753   for disk in disks:
6754     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6755       lu.cfg.SetDiskID(top_disk, node)
6756       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6757       msg = result.fail_msg
6758       if msg:
6759         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6760                       disk.iv_name, node, msg)
6761         if ((node == instance.primary_node and not ignore_primary) or
6762             (node != instance.primary_node and not result.offline)):
6763           all_result = False
6764   return all_result
6765
6766
6767 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6768   """Checks if a node has enough free memory.
6769
6770   This function checks if a given node has the needed amount of free
6771   memory. In case the node has less memory or we cannot get the
6772   information from the node, this function raises an OpPrereqError
6773   exception.
6774
6775   @type lu: C{LogicalUnit}
6776   @param lu: a logical unit from which we get configuration data
6777   @type node: C{str}
6778   @param node: the node to check
6779   @type reason: C{str}
6780   @param reason: string to use in the error message
6781   @type requested: C{int}
6782   @param requested: the amount of memory in MiB to check for
6783   @type hypervisor_name: C{str}
6784   @param hypervisor_name: the hypervisor to ask for memory stats
6785   @rtype: integer
6786   @return: node current free memory
6787   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6788       we cannot check the node
6789
6790   """
6791   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6792   nodeinfo[node].Raise("Can't get data from node %s" % node,
6793                        prereq=True, ecode=errors.ECODE_ENVIRON)
6794   (_, _, (hv_info, )) = nodeinfo[node].payload
6795
6796   free_mem = hv_info.get("memory_free", None)
6797   if not isinstance(free_mem, int):
6798     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6799                                " was '%s'" % (node, free_mem),
6800                                errors.ECODE_ENVIRON)
6801   if requested > free_mem:
6802     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6803                                " needed %s MiB, available %s MiB" %
6804                                (node, reason, requested, free_mem),
6805                                errors.ECODE_NORES)
6806   return free_mem
6807
6808
6809 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6810   """Checks if nodes have enough free disk space in all the VGs.
6811
6812   This function checks if all given nodes have the needed amount of
6813   free disk. In case any node has less disk or we cannot get the
6814   information from the node, this function raises an OpPrereqError
6815   exception.
6816
6817   @type lu: C{LogicalUnit}
6818   @param lu: a logical unit from which we get configuration data
6819   @type nodenames: C{list}
6820   @param nodenames: the list of node names to check
6821   @type req_sizes: C{dict}
6822   @param req_sizes: the hash of vg and corresponding amount of disk in
6823       MiB to check for
6824   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6825       or we cannot check the node
6826
6827   """
6828   for vg, req_size in req_sizes.items():
6829     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6830
6831
6832 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6833   """Checks if nodes have enough free disk space in the specified VG.
6834
6835   This function checks if all given nodes have the needed amount of
6836   free disk. In case any node has less disk or we cannot get the
6837   information from the node, this function raises an OpPrereqError
6838   exception.
6839
6840   @type lu: C{LogicalUnit}
6841   @param lu: a logical unit from which we get configuration data
6842   @type nodenames: C{list}
6843   @param nodenames: the list of node names to check
6844   @type vg: C{str}
6845   @param vg: the volume group to check
6846   @type requested: C{int}
6847   @param requested: the amount of disk in MiB to check for
6848   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6849       or we cannot check the node
6850
6851   """
6852   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6853   for node in nodenames:
6854     info = nodeinfo[node]
6855     info.Raise("Cannot get current information from node %s" % node,
6856                prereq=True, ecode=errors.ECODE_ENVIRON)
6857     (_, (vg_info, ), _) = info.payload
6858     vg_free = vg_info.get("vg_free", None)
6859     if not isinstance(vg_free, int):
6860       raise errors.OpPrereqError("Can't compute free disk space on node"
6861                                  " %s for vg %s, result was '%s'" %
6862                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6863     if requested > vg_free:
6864       raise errors.OpPrereqError("Not enough disk space on target node %s"
6865                                  " vg %s: required %d MiB, available %d MiB" %
6866                                  (node, vg, requested, vg_free),
6867                                  errors.ECODE_NORES)
6868
6869
6870 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6871   """Checks if nodes have enough physical CPUs
6872
6873   This function checks if all given nodes have the needed number of
6874   physical CPUs. In case any node has less CPUs or we cannot get the
6875   information from the node, this function raises an OpPrereqError
6876   exception.
6877
6878   @type lu: C{LogicalUnit}
6879   @param lu: a logical unit from which we get configuration data
6880   @type nodenames: C{list}
6881   @param nodenames: the list of node names to check
6882   @type requested: C{int}
6883   @param requested: the minimum acceptable number of physical CPUs
6884   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6885       or we cannot check the node
6886
6887   """
6888   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6889   for node in nodenames:
6890     info = nodeinfo[node]
6891     info.Raise("Cannot get current information from node %s" % node,
6892                prereq=True, ecode=errors.ECODE_ENVIRON)
6893     (_, _, (hv_info, )) = info.payload
6894     num_cpus = hv_info.get("cpu_total", None)
6895     if not isinstance(num_cpus, int):
6896       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6897                                  " on node %s, result was '%s'" %
6898                                  (node, num_cpus), errors.ECODE_ENVIRON)
6899     if requested > num_cpus:
6900       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6901                                  "required" % (node, num_cpus, requested),
6902                                  errors.ECODE_NORES)
6903
6904
6905 class LUInstanceStartup(LogicalUnit):
6906   """Starts an instance.
6907
6908   """
6909   HPATH = "instance-start"
6910   HTYPE = constants.HTYPE_INSTANCE
6911   REQ_BGL = False
6912
6913   def CheckArguments(self):
6914     # extra beparams
6915     if self.op.beparams:
6916       # fill the beparams dict
6917       objects.UpgradeBeParams(self.op.beparams)
6918       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6919
6920   def ExpandNames(self):
6921     self._ExpandAndLockInstance()
6922     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6923
6924   def DeclareLocks(self, level):
6925     if level == locking.LEVEL_NODE_RES:
6926       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6927
6928   def BuildHooksEnv(self):
6929     """Build hooks env.
6930
6931     This runs on master, primary and secondary nodes of the instance.
6932
6933     """
6934     env = {
6935       "FORCE": self.op.force,
6936       }
6937
6938     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6939
6940     return env
6941
6942   def BuildHooksNodes(self):
6943     """Build hooks nodes.
6944
6945     """
6946     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6947     return (nl, nl)
6948
6949   def CheckPrereq(self):
6950     """Check prerequisites.
6951
6952     This checks that the instance is in the cluster.
6953
6954     """
6955     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6956     assert self.instance is not None, \
6957       "Cannot retrieve locked instance %s" % self.op.instance_name
6958
6959     # extra hvparams
6960     if self.op.hvparams:
6961       # check hypervisor parameter syntax (locally)
6962       cluster = self.cfg.GetClusterInfo()
6963       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6964       filled_hvp = cluster.FillHV(instance)
6965       filled_hvp.update(self.op.hvparams)
6966       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6967       hv_type.CheckParameterSyntax(filled_hvp)
6968       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6969
6970     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6971
6972     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6973
6974     if self.primary_offline and self.op.ignore_offline_nodes:
6975       self.LogWarning("Ignoring offline primary node")
6976
6977       if self.op.hvparams or self.op.beparams:
6978         self.LogWarning("Overridden parameters are ignored")
6979     else:
6980       _CheckNodeOnline(self, instance.primary_node)
6981
6982       bep = self.cfg.GetClusterInfo().FillBE(instance)
6983       bep.update(self.op.beparams)
6984
6985       # check bridges existence
6986       _CheckInstanceBridgesExist(self, instance)
6987
6988       remote_info = self.rpc.call_instance_info(instance.primary_node,
6989                                                 instance.name,
6990                                                 instance.hypervisor)
6991       remote_info.Raise("Error checking node %s" % instance.primary_node,
6992                         prereq=True, ecode=errors.ECODE_ENVIRON)
6993       if not remote_info.payload: # not running already
6994         _CheckNodeFreeMemory(self, instance.primary_node,
6995                              "starting instance %s" % instance.name,
6996                              bep[constants.BE_MINMEM], instance.hypervisor)
6997
6998   def Exec(self, feedback_fn):
6999     """Start the instance.
7000
7001     """
7002     instance = self.instance
7003     force = self.op.force
7004
7005     if not self.op.no_remember:
7006       self.cfg.MarkInstanceUp(instance.name)
7007
7008     if self.primary_offline:
7009       assert self.op.ignore_offline_nodes
7010       self.LogInfo("Primary node offline, marked instance as started")
7011     else:
7012       node_current = instance.primary_node
7013
7014       _StartInstanceDisks(self, instance, force)
7015
7016       result = \
7017         self.rpc.call_instance_start(node_current,
7018                                      (instance, self.op.hvparams,
7019                                       self.op.beparams),
7020                                      self.op.startup_paused)
7021       msg = result.fail_msg
7022       if msg:
7023         _ShutdownInstanceDisks(self, instance)
7024         raise errors.OpExecError("Could not start instance: %s" % msg)
7025
7026
7027 class LUInstanceReboot(LogicalUnit):
7028   """Reboot an instance.
7029
7030   """
7031   HPATH = "instance-reboot"
7032   HTYPE = constants.HTYPE_INSTANCE
7033   REQ_BGL = False
7034
7035   def ExpandNames(self):
7036     self._ExpandAndLockInstance()
7037
7038   def BuildHooksEnv(self):
7039     """Build hooks env.
7040
7041     This runs on master, primary and secondary nodes of the instance.
7042
7043     """
7044     env = {
7045       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7046       "REBOOT_TYPE": self.op.reboot_type,
7047       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7048       }
7049
7050     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7051
7052     return env
7053
7054   def BuildHooksNodes(self):
7055     """Build hooks nodes.
7056
7057     """
7058     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7059     return (nl, nl)
7060
7061   def CheckPrereq(self):
7062     """Check prerequisites.
7063
7064     This checks that the instance is in the cluster.
7065
7066     """
7067     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7068     assert self.instance is not None, \
7069       "Cannot retrieve locked instance %s" % self.op.instance_name
7070     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7071     _CheckNodeOnline(self, instance.primary_node)
7072
7073     # check bridges existence
7074     _CheckInstanceBridgesExist(self, instance)
7075
7076   def Exec(self, feedback_fn):
7077     """Reboot the instance.
7078
7079     """
7080     instance = self.instance
7081     ignore_secondaries = self.op.ignore_secondaries
7082     reboot_type = self.op.reboot_type
7083
7084     remote_info = self.rpc.call_instance_info(instance.primary_node,
7085                                               instance.name,
7086                                               instance.hypervisor)
7087     remote_info.Raise("Error checking node %s" % instance.primary_node)
7088     instance_running = bool(remote_info.payload)
7089
7090     node_current = instance.primary_node
7091
7092     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7093                                             constants.INSTANCE_REBOOT_HARD]:
7094       for disk in instance.disks:
7095         self.cfg.SetDiskID(disk, node_current)
7096       result = self.rpc.call_instance_reboot(node_current, instance,
7097                                              reboot_type,
7098                                              self.op.shutdown_timeout)
7099       result.Raise("Could not reboot instance")
7100     else:
7101       if instance_running:
7102         result = self.rpc.call_instance_shutdown(node_current, instance,
7103                                                  self.op.shutdown_timeout)
7104         result.Raise("Could not shutdown instance for full reboot")
7105         _ShutdownInstanceDisks(self, instance)
7106       else:
7107         self.LogInfo("Instance %s was already stopped, starting now",
7108                      instance.name)
7109       _StartInstanceDisks(self, instance, ignore_secondaries)
7110       result = self.rpc.call_instance_start(node_current,
7111                                             (instance, None, None), False)
7112       msg = result.fail_msg
7113       if msg:
7114         _ShutdownInstanceDisks(self, instance)
7115         raise errors.OpExecError("Could not start instance for"
7116                                  " full reboot: %s" % msg)
7117
7118     self.cfg.MarkInstanceUp(instance.name)
7119
7120
7121 class LUInstanceShutdown(LogicalUnit):
7122   """Shutdown an instance.
7123
7124   """
7125   HPATH = "instance-stop"
7126   HTYPE = constants.HTYPE_INSTANCE
7127   REQ_BGL = False
7128
7129   def ExpandNames(self):
7130     self._ExpandAndLockInstance()
7131
7132   def BuildHooksEnv(self):
7133     """Build hooks env.
7134
7135     This runs on master, primary and secondary nodes of the instance.
7136
7137     """
7138     env = _BuildInstanceHookEnvByObject(self, self.instance)
7139     env["TIMEOUT"] = self.op.timeout
7140     return env
7141
7142   def BuildHooksNodes(self):
7143     """Build hooks nodes.
7144
7145     """
7146     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7147     return (nl, nl)
7148
7149   def CheckPrereq(self):
7150     """Check prerequisites.
7151
7152     This checks that the instance is in the cluster.
7153
7154     """
7155     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7156     assert self.instance is not None, \
7157       "Cannot retrieve locked instance %s" % self.op.instance_name
7158
7159     if not self.op.force:
7160       _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7161     else:
7162       self.LogWarning("Ignoring offline instance check")
7163
7164     self.primary_offline = \
7165       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7166
7167     if self.primary_offline and self.op.ignore_offline_nodes:
7168       self.LogWarning("Ignoring offline primary node")
7169     else:
7170       _CheckNodeOnline(self, self.instance.primary_node)
7171
7172   def Exec(self, feedback_fn):
7173     """Shutdown the instance.
7174
7175     """
7176     instance = self.instance
7177     node_current = instance.primary_node
7178     timeout = self.op.timeout
7179
7180     # If the instance is offline we shouldn't mark it as down, as that
7181     # resets the offline flag.
7182     if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7183       self.cfg.MarkInstanceDown(instance.name)
7184
7185     if self.primary_offline:
7186       assert self.op.ignore_offline_nodes
7187       self.LogInfo("Primary node offline, marked instance as stopped")
7188     else:
7189       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7190       msg = result.fail_msg
7191       if msg:
7192         self.LogWarning("Could not shutdown instance: %s", msg)
7193
7194       _ShutdownInstanceDisks(self, instance)
7195
7196
7197 class LUInstanceReinstall(LogicalUnit):
7198   """Reinstall an instance.
7199
7200   """
7201   HPATH = "instance-reinstall"
7202   HTYPE = constants.HTYPE_INSTANCE
7203   REQ_BGL = False
7204
7205   def ExpandNames(self):
7206     self._ExpandAndLockInstance()
7207
7208   def BuildHooksEnv(self):
7209     """Build hooks env.
7210
7211     This runs on master, primary and secondary nodes of the instance.
7212
7213     """
7214     return _BuildInstanceHookEnvByObject(self, self.instance)
7215
7216   def BuildHooksNodes(self):
7217     """Build hooks nodes.
7218
7219     """
7220     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7221     return (nl, nl)
7222
7223   def CheckPrereq(self):
7224     """Check prerequisites.
7225
7226     This checks that the instance is in the cluster and is not running.
7227
7228     """
7229     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7230     assert instance is not None, \
7231       "Cannot retrieve locked instance %s" % self.op.instance_name
7232     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7233                      " offline, cannot reinstall")
7234
7235     if instance.disk_template == constants.DT_DISKLESS:
7236       raise errors.OpPrereqError("Instance '%s' has no disks" %
7237                                  self.op.instance_name,
7238                                  errors.ECODE_INVAL)
7239     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7240
7241     if self.op.os_type is not None:
7242       # OS verification
7243       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7244       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7245       instance_os = self.op.os_type
7246     else:
7247       instance_os = instance.os
7248
7249     nodelist = list(instance.all_nodes)
7250
7251     if self.op.osparams:
7252       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7253       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7254       self.os_inst = i_osdict # the new dict (without defaults)
7255     else:
7256       self.os_inst = None
7257
7258     self.instance = instance
7259
7260   def Exec(self, feedback_fn):
7261     """Reinstall the instance.
7262
7263     """
7264     inst = self.instance
7265
7266     if self.op.os_type is not None:
7267       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7268       inst.os = self.op.os_type
7269       # Write to configuration
7270       self.cfg.Update(inst, feedback_fn)
7271
7272     _StartInstanceDisks(self, inst, None)
7273     try:
7274       feedback_fn("Running the instance OS create scripts...")
7275       # FIXME: pass debug option from opcode to backend
7276       result = self.rpc.call_instance_os_add(inst.primary_node,
7277                                              (inst, self.os_inst), True,
7278                                              self.op.debug_level)
7279       result.Raise("Could not install OS for instance %s on node %s" %
7280                    (inst.name, inst.primary_node))
7281     finally:
7282       _ShutdownInstanceDisks(self, inst)
7283
7284
7285 class LUInstanceRecreateDisks(LogicalUnit):
7286   """Recreate an instance's missing disks.
7287
7288   """
7289   HPATH = "instance-recreate-disks"
7290   HTYPE = constants.HTYPE_INSTANCE
7291   REQ_BGL = False
7292
7293   _MODIFYABLE = compat.UniqueFrozenset([
7294     constants.IDISK_SIZE,
7295     constants.IDISK_MODE,
7296     ])
7297
7298   # New or changed disk parameters may have different semantics
7299   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7300     constants.IDISK_ADOPT,
7301
7302     # TODO: Implement support changing VG while recreating
7303     constants.IDISK_VG,
7304     constants.IDISK_METAVG,
7305     ]))
7306
7307   def _RunAllocator(self):
7308     """Run the allocator based on input opcode.
7309
7310     """
7311     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7312
7313     # FIXME
7314     # The allocator should actually run in "relocate" mode, but current
7315     # allocators don't support relocating all the nodes of an instance at
7316     # the same time. As a workaround we use "allocate" mode, but this is
7317     # suboptimal for two reasons:
7318     # - The instance name passed to the allocator is present in the list of
7319     #   existing instances, so there could be a conflict within the
7320     #   internal structures of the allocator. This doesn't happen with the
7321     #   current allocators, but it's a liability.
7322     # - The allocator counts the resources used by the instance twice: once
7323     #   because the instance exists already, and once because it tries to
7324     #   allocate a new instance.
7325     # The allocator could choose some of the nodes on which the instance is
7326     # running, but that's not a problem. If the instance nodes are broken,
7327     # they should be already be marked as drained or offline, and hence
7328     # skipped by the allocator. If instance disks have been lost for other
7329     # reasons, then recreating the disks on the same nodes should be fine.
7330     disk_template = self.instance.disk_template
7331     spindle_use = be_full[constants.BE_SPINDLE_USE]
7332     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7333                                         disk_template=disk_template,
7334                                         tags=list(self.instance.GetTags()),
7335                                         os=self.instance.os,
7336                                         nics=[{}],
7337                                         vcpus=be_full[constants.BE_VCPUS],
7338                                         memory=be_full[constants.BE_MAXMEM],
7339                                         spindle_use=spindle_use,
7340                                         disks=[{constants.IDISK_SIZE: d.size,
7341                                                 constants.IDISK_MODE: d.mode}
7342                                                 for d in self.instance.disks],
7343                                         hypervisor=self.instance.hypervisor)
7344     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7345
7346     ial.Run(self.op.iallocator)
7347
7348     assert req.RequiredNodes() == len(self.instance.all_nodes)
7349
7350     if not ial.success:
7351       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7352                                  " %s" % (self.op.iallocator, ial.info),
7353                                  errors.ECODE_NORES)
7354
7355     self.op.nodes = ial.result
7356     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7357                  self.op.instance_name, self.op.iallocator,
7358                  utils.CommaJoin(ial.result))
7359
7360   def CheckArguments(self):
7361     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7362       # Normalize and convert deprecated list of disk indices
7363       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7364
7365     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7366     if duplicates:
7367       raise errors.OpPrereqError("Some disks have been specified more than"
7368                                  " once: %s" % utils.CommaJoin(duplicates),
7369                                  errors.ECODE_INVAL)
7370
7371     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7372     # when neither iallocator nor nodes are specified
7373     if self.op.iallocator or self.op.nodes:
7374       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7375
7376     for (idx, params) in self.op.disks:
7377       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7378       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7379       if unsupported:
7380         raise errors.OpPrereqError("Parameters for disk %s try to change"
7381                                    " unmodifyable parameter(s): %s" %
7382                                    (idx, utils.CommaJoin(unsupported)),
7383                                    errors.ECODE_INVAL)
7384
7385   def ExpandNames(self):
7386     self._ExpandAndLockInstance()
7387     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7388
7389     if self.op.nodes:
7390       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7391       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7392     else:
7393       self.needed_locks[locking.LEVEL_NODE] = []
7394       if self.op.iallocator:
7395         # iallocator will select a new node in the same group
7396         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7397         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7398
7399     self.needed_locks[locking.LEVEL_NODE_RES] = []
7400
7401   def DeclareLocks(self, level):
7402     if level == locking.LEVEL_NODEGROUP:
7403       assert self.op.iallocator is not None
7404       assert not self.op.nodes
7405       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7406       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7407       # Lock the primary group used by the instance optimistically; this
7408       # requires going via the node before it's locked, requiring
7409       # verification later on
7410       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7411         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7412
7413     elif level == locking.LEVEL_NODE:
7414       # If an allocator is used, then we lock all the nodes in the current
7415       # instance group, as we don't know yet which ones will be selected;
7416       # if we replace the nodes without using an allocator, locks are
7417       # already declared in ExpandNames; otherwise, we need to lock all the
7418       # instance nodes for disk re-creation
7419       if self.op.iallocator:
7420         assert not self.op.nodes
7421         assert not self.needed_locks[locking.LEVEL_NODE]
7422         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7423
7424         # Lock member nodes of the group of the primary node
7425         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7426           self.needed_locks[locking.LEVEL_NODE].extend(
7427             self.cfg.GetNodeGroup(group_uuid).members)
7428
7429         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7430       elif not self.op.nodes:
7431         self._LockInstancesNodes(primary_only=False)
7432     elif level == locking.LEVEL_NODE_RES:
7433       # Copy node locks
7434       self.needed_locks[locking.LEVEL_NODE_RES] = \
7435         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7436
7437   def BuildHooksEnv(self):
7438     """Build hooks env.
7439
7440     This runs on master, primary and secondary nodes of the instance.
7441
7442     """
7443     return _BuildInstanceHookEnvByObject(self, self.instance)
7444
7445   def BuildHooksNodes(self):
7446     """Build hooks nodes.
7447
7448     """
7449     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7450     return (nl, nl)
7451
7452   def CheckPrereq(self):
7453     """Check prerequisites.
7454
7455     This checks that the instance is in the cluster and is not running.
7456
7457     """
7458     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7459     assert instance is not None, \
7460       "Cannot retrieve locked instance %s" % self.op.instance_name
7461     if self.op.nodes:
7462       if len(self.op.nodes) != len(instance.all_nodes):
7463         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7464                                    " %d replacement nodes were specified" %
7465                                    (instance.name, len(instance.all_nodes),
7466                                     len(self.op.nodes)),
7467                                    errors.ECODE_INVAL)
7468       assert instance.disk_template != constants.DT_DRBD8 or \
7469           len(self.op.nodes) == 2
7470       assert instance.disk_template != constants.DT_PLAIN or \
7471           len(self.op.nodes) == 1
7472       primary_node = self.op.nodes[0]
7473     else:
7474       primary_node = instance.primary_node
7475     if not self.op.iallocator:
7476       _CheckNodeOnline(self, primary_node)
7477
7478     if instance.disk_template == constants.DT_DISKLESS:
7479       raise errors.OpPrereqError("Instance '%s' has no disks" %
7480                                  self.op.instance_name, errors.ECODE_INVAL)
7481
7482     # Verify if node group locks are still correct
7483     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7484     if owned_groups:
7485       # Node group locks are acquired only for the primary node (and only
7486       # when the allocator is used)
7487       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7488                                primary_only=True)
7489
7490     # if we replace nodes *and* the old primary is offline, we don't
7491     # check the instance state
7492     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7493     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7494       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7495                           msg="cannot recreate disks")
7496
7497     if self.op.disks:
7498       self.disks = dict(self.op.disks)
7499     else:
7500       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7501
7502     maxidx = max(self.disks.keys())
7503     if maxidx >= len(instance.disks):
7504       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7505                                  errors.ECODE_INVAL)
7506
7507     if ((self.op.nodes or self.op.iallocator) and
7508         sorted(self.disks.keys()) != range(len(instance.disks))):
7509       raise errors.OpPrereqError("Can't recreate disks partially and"
7510                                  " change the nodes at the same time",
7511                                  errors.ECODE_INVAL)
7512
7513     self.instance = instance
7514
7515     if self.op.iallocator:
7516       self._RunAllocator()
7517       # Release unneeded node and node resource locks
7518       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7519       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7520       _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7521
7522     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7523
7524   def Exec(self, feedback_fn):
7525     """Recreate the disks.
7526
7527     """
7528     instance = self.instance
7529
7530     assert (self.owned_locks(locking.LEVEL_NODE) ==
7531             self.owned_locks(locking.LEVEL_NODE_RES))
7532
7533     to_skip = []
7534     mods = [] # keeps track of needed changes
7535
7536     for idx, disk in enumerate(instance.disks):
7537       try:
7538         changes = self.disks[idx]
7539       except KeyError:
7540         # Disk should not be recreated
7541         to_skip.append(idx)
7542         continue
7543
7544       # update secondaries for disks, if needed
7545       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7546         # need to update the nodes and minors
7547         assert len(self.op.nodes) == 2
7548         assert len(disk.logical_id) == 6 # otherwise disk internals
7549                                          # have changed
7550         (_, _, old_port, _, _, old_secret) = disk.logical_id
7551         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7552         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7553                   new_minors[0], new_minors[1], old_secret)
7554         assert len(disk.logical_id) == len(new_id)
7555       else:
7556         new_id = None
7557
7558       mods.append((idx, new_id, changes))
7559
7560     # now that we have passed all asserts above, we can apply the mods
7561     # in a single run (to avoid partial changes)
7562     for idx, new_id, changes in mods:
7563       disk = instance.disks[idx]
7564       if new_id is not None:
7565         assert disk.dev_type == constants.LD_DRBD8
7566         disk.logical_id = new_id
7567       if changes:
7568         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7569                     mode=changes.get(constants.IDISK_MODE, None))
7570
7571     # change primary node, if needed
7572     if self.op.nodes:
7573       instance.primary_node = self.op.nodes[0]
7574       self.LogWarning("Changing the instance's nodes, you will have to"
7575                       " remove any disks left on the older nodes manually")
7576
7577     if self.op.nodes:
7578       self.cfg.Update(instance, feedback_fn)
7579
7580     # All touched nodes must be locked
7581     mylocks = self.owned_locks(locking.LEVEL_NODE)
7582     assert mylocks.issuperset(frozenset(instance.all_nodes))
7583     _CreateDisks(self, instance, to_skip=to_skip)
7584
7585
7586 class LUInstanceRename(LogicalUnit):
7587   """Rename an instance.
7588
7589   """
7590   HPATH = "instance-rename"
7591   HTYPE = constants.HTYPE_INSTANCE
7592
7593   def CheckArguments(self):
7594     """Check arguments.
7595
7596     """
7597     if self.op.ip_check and not self.op.name_check:
7598       # TODO: make the ip check more flexible and not depend on the name check
7599       raise errors.OpPrereqError("IP address check requires a name check",
7600                                  errors.ECODE_INVAL)
7601
7602   def BuildHooksEnv(self):
7603     """Build hooks env.
7604
7605     This runs on master, primary and secondary nodes of the instance.
7606
7607     """
7608     env = _BuildInstanceHookEnvByObject(self, self.instance)
7609     env["INSTANCE_NEW_NAME"] = self.op.new_name
7610     return env
7611
7612   def BuildHooksNodes(self):
7613     """Build hooks nodes.
7614
7615     """
7616     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7617     return (nl, nl)
7618
7619   def CheckPrereq(self):
7620     """Check prerequisites.
7621
7622     This checks that the instance is in the cluster and is not running.
7623
7624     """
7625     self.op.instance_name = _ExpandInstanceName(self.cfg,
7626                                                 self.op.instance_name)
7627     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7628     assert instance is not None
7629     _CheckNodeOnline(self, instance.primary_node)
7630     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7631                         msg="cannot rename")
7632     self.instance = instance
7633
7634     new_name = self.op.new_name
7635     if self.op.name_check:
7636       hostname = _CheckHostnameSane(self, new_name)
7637       new_name = self.op.new_name = hostname.name
7638       if (self.op.ip_check and
7639           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7640         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7641                                    (hostname.ip, new_name),
7642                                    errors.ECODE_NOTUNIQUE)
7643
7644     instance_list = self.cfg.GetInstanceList()
7645     if new_name in instance_list and new_name != instance.name:
7646       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7647                                  new_name, errors.ECODE_EXISTS)
7648
7649   def Exec(self, feedback_fn):
7650     """Rename the instance.
7651
7652     """
7653     inst = self.instance
7654     old_name = inst.name
7655
7656     rename_file_storage = False
7657     if (inst.disk_template in constants.DTS_FILEBASED and
7658         self.op.new_name != inst.name):
7659       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7660       rename_file_storage = True
7661
7662     self.cfg.RenameInstance(inst.name, self.op.new_name)
7663     # Change the instance lock. This is definitely safe while we hold the BGL.
7664     # Otherwise the new lock would have to be added in acquired mode.
7665     assert self.REQ_BGL
7666     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7667     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7668     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7669
7670     # re-read the instance from the configuration after rename
7671     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7672
7673     if rename_file_storage:
7674       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7675       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7676                                                      old_file_storage_dir,
7677                                                      new_file_storage_dir)
7678       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7679                    " (but the instance has been renamed in Ganeti)" %
7680                    (inst.primary_node, old_file_storage_dir,
7681                     new_file_storage_dir))
7682
7683     _StartInstanceDisks(self, inst, None)
7684     # update info on disks
7685     info = _GetInstanceInfoText(inst)
7686     for (idx, disk) in enumerate(inst.disks):
7687       for node in inst.all_nodes:
7688         self.cfg.SetDiskID(disk, node)
7689         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7690         if result.fail_msg:
7691           self.LogWarning("Error setting info on node %s for disk %s: %s",
7692                           node, idx, result.fail_msg)
7693     try:
7694       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7695                                                  old_name, self.op.debug_level)
7696       msg = result.fail_msg
7697       if msg:
7698         msg = ("Could not run OS rename script for instance %s on node %s"
7699                " (but the instance has been renamed in Ganeti): %s" %
7700                (inst.name, inst.primary_node, msg))
7701         self.LogWarning(msg)
7702     finally:
7703       _ShutdownInstanceDisks(self, inst)
7704
7705     return inst.name
7706
7707
7708 class LUInstanceRemove(LogicalUnit):
7709   """Remove an instance.
7710
7711   """
7712   HPATH = "instance-remove"
7713   HTYPE = constants.HTYPE_INSTANCE
7714   REQ_BGL = False
7715
7716   def ExpandNames(self):
7717     self._ExpandAndLockInstance()
7718     self.needed_locks[locking.LEVEL_NODE] = []
7719     self.needed_locks[locking.LEVEL_NODE_RES] = []
7720     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7721
7722   def DeclareLocks(self, level):
7723     if level == locking.LEVEL_NODE:
7724       self._LockInstancesNodes()
7725     elif level == locking.LEVEL_NODE_RES:
7726       # Copy node locks
7727       self.needed_locks[locking.LEVEL_NODE_RES] = \
7728         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7729
7730   def BuildHooksEnv(self):
7731     """Build hooks env.
7732
7733     This runs on master, primary and secondary nodes of the instance.
7734
7735     """
7736     env = _BuildInstanceHookEnvByObject(self, self.instance)
7737     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7738     return env
7739
7740   def BuildHooksNodes(self):
7741     """Build hooks nodes.
7742
7743     """
7744     nl = [self.cfg.GetMasterNode()]
7745     nl_post = list(self.instance.all_nodes) + nl
7746     return (nl, nl_post)
7747
7748   def CheckPrereq(self):
7749     """Check prerequisites.
7750
7751     This checks that the instance is in the cluster.
7752
7753     """
7754     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7755     assert self.instance is not None, \
7756       "Cannot retrieve locked instance %s" % self.op.instance_name
7757
7758   def Exec(self, feedback_fn):
7759     """Remove the instance.
7760
7761     """
7762     instance = self.instance
7763     logging.info("Shutting down instance %s on node %s",
7764                  instance.name, instance.primary_node)
7765
7766     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7767                                              self.op.shutdown_timeout)
7768     msg = result.fail_msg
7769     if msg:
7770       if self.op.ignore_failures:
7771         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7772       else:
7773         raise errors.OpExecError("Could not shutdown instance %s on"
7774                                  " node %s: %s" %
7775                                  (instance.name, instance.primary_node, msg))
7776
7777     assert (self.owned_locks(locking.LEVEL_NODE) ==
7778             self.owned_locks(locking.LEVEL_NODE_RES))
7779     assert not (set(instance.all_nodes) -
7780                 self.owned_locks(locking.LEVEL_NODE)), \
7781       "Not owning correct locks"
7782
7783     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7784
7785
7786 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7787   """Utility function to remove an instance.
7788
7789   """
7790   logging.info("Removing block devices for instance %s", instance.name)
7791
7792   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7793     if not ignore_failures:
7794       raise errors.OpExecError("Can't remove instance's disks")
7795     feedback_fn("Warning: can't remove instance's disks")
7796
7797   logging.info("Removing instance %s out of cluster config", instance.name)
7798
7799   lu.cfg.RemoveInstance(instance.name)
7800
7801   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7802     "Instance lock removal conflict"
7803
7804   # Remove lock for the instance
7805   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7806
7807
7808 class LUInstanceQuery(NoHooksLU):
7809   """Logical unit for querying instances.
7810
7811   """
7812   # pylint: disable=W0142
7813   REQ_BGL = False
7814
7815   def CheckArguments(self):
7816     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7817                              self.op.output_fields, self.op.use_locking)
7818
7819   def ExpandNames(self):
7820     self.iq.ExpandNames(self)
7821
7822   def DeclareLocks(self, level):
7823     self.iq.DeclareLocks(self, level)
7824
7825   def Exec(self, feedback_fn):
7826     return self.iq.OldStyleQuery(self)
7827
7828
7829 def _ExpandNamesForMigration(lu):
7830   """Expands names for use with L{TLMigrateInstance}.
7831
7832   @type lu: L{LogicalUnit}
7833
7834   """
7835   if lu.op.target_node is not None:
7836     lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
7837
7838   lu.needed_locks[locking.LEVEL_NODE] = []
7839   lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7840
7841   lu.needed_locks[locking.LEVEL_NODE_RES] = []
7842   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7843
7844   # The node allocation lock is actually only needed for replicated instances
7845   # (e.g. DRBD8) and if an iallocator is used.
7846   lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
7847
7848
7849 def _DeclareLocksForMigration(lu, level):
7850   """Declares locks for L{TLMigrateInstance}.
7851
7852   @type lu: L{LogicalUnit}
7853   @param level: Lock level
7854
7855   """
7856   if level == locking.LEVEL_NODE_ALLOC:
7857     assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
7858
7859     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
7860
7861     # Node locks are already declared here rather than at LEVEL_NODE as we need
7862     # the instance object anyway to declare the node allocation lock.
7863     if instance.disk_template in constants.DTS_EXT_MIRROR:
7864       if lu.op.target_node is None:
7865         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7866         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7867       else:
7868         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7869                                                lu.op.target_node]
7870       del lu.recalculate_locks[locking.LEVEL_NODE]
7871     else:
7872       lu._LockInstancesNodes() # pylint: disable=W0212
7873
7874   elif level == locking.LEVEL_NODE:
7875     # Node locks are declared together with the node allocation lock
7876     assert (lu.needed_locks[locking.LEVEL_NODE] or
7877             lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
7878
7879   elif level == locking.LEVEL_NODE_RES:
7880     # Copy node locks
7881     lu.needed_locks[locking.LEVEL_NODE_RES] = \
7882       _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
7883
7884
7885 class LUInstanceFailover(LogicalUnit):
7886   """Failover an instance.
7887
7888   """
7889   HPATH = "instance-failover"
7890   HTYPE = constants.HTYPE_INSTANCE
7891   REQ_BGL = False
7892
7893   def CheckArguments(self):
7894     """Check the arguments.
7895
7896     """
7897     self.iallocator = getattr(self.op, "iallocator", None)
7898     self.target_node = getattr(self.op, "target_node", None)
7899
7900   def ExpandNames(self):
7901     self._ExpandAndLockInstance()
7902     _ExpandNamesForMigration(self)
7903
7904     self._migrater = \
7905       TLMigrateInstance(self, self.op.instance_name, False, True, False,
7906                         self.op.ignore_consistency, True,
7907                         self.op.shutdown_timeout, self.op.ignore_ipolicy)
7908
7909     self.tasklets = [self._migrater]
7910
7911   def DeclareLocks(self, level):
7912     _DeclareLocksForMigration(self, level)
7913
7914   def BuildHooksEnv(self):
7915     """Build hooks env.
7916
7917     This runs on master, primary and secondary nodes of the instance.
7918
7919     """
7920     instance = self._migrater.instance
7921     source_node = instance.primary_node
7922     target_node = self.op.target_node
7923     env = {
7924       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7925       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7926       "OLD_PRIMARY": source_node,
7927       "NEW_PRIMARY": target_node,
7928       }
7929
7930     if instance.disk_template in constants.DTS_INT_MIRROR:
7931       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7932       env["NEW_SECONDARY"] = source_node
7933     else:
7934       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7935
7936     env.update(_BuildInstanceHookEnvByObject(self, instance))
7937
7938     return env
7939
7940   def BuildHooksNodes(self):
7941     """Build hooks nodes.
7942
7943     """
7944     instance = self._migrater.instance
7945     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7946     return (nl, nl + [instance.primary_node])
7947
7948
7949 class LUInstanceMigrate(LogicalUnit):
7950   """Migrate an instance.
7951
7952   This is migration without shutting down, compared to the failover,
7953   which is done with shutdown.
7954
7955   """
7956   HPATH = "instance-migrate"
7957   HTYPE = constants.HTYPE_INSTANCE
7958   REQ_BGL = False
7959
7960   def ExpandNames(self):
7961     self._ExpandAndLockInstance()
7962     _ExpandNamesForMigration(self)
7963
7964     self._migrater = \
7965       TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
7966                         False, self.op.allow_failover, False,
7967                         self.op.allow_runtime_changes,
7968                         constants.DEFAULT_SHUTDOWN_TIMEOUT,
7969                         self.op.ignore_ipolicy)
7970
7971     self.tasklets = [self._migrater]
7972
7973   def DeclareLocks(self, level):
7974     _DeclareLocksForMigration(self, level)
7975
7976   def BuildHooksEnv(self):
7977     """Build hooks env.
7978
7979     This runs on master, primary and secondary nodes of the instance.
7980
7981     """
7982     instance = self._migrater.instance
7983     source_node = instance.primary_node
7984     target_node = self.op.target_node
7985     env = _BuildInstanceHookEnvByObject(self, instance)
7986     env.update({
7987       "MIGRATE_LIVE": self._migrater.live,
7988       "MIGRATE_CLEANUP": self.op.cleanup,
7989       "OLD_PRIMARY": source_node,
7990       "NEW_PRIMARY": target_node,
7991       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7992       })
7993
7994     if instance.disk_template in constants.DTS_INT_MIRROR:
7995       env["OLD_SECONDARY"] = target_node
7996       env["NEW_SECONDARY"] = source_node
7997     else:
7998       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7999
8000     return env
8001
8002   def BuildHooksNodes(self):
8003     """Build hooks nodes.
8004
8005     """
8006     instance = self._migrater.instance
8007     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8008     return (nl, nl + [instance.primary_node])
8009
8010
8011 class LUInstanceMove(LogicalUnit):
8012   """Move an instance by data-copying.
8013
8014   """
8015   HPATH = "instance-move"
8016   HTYPE = constants.HTYPE_INSTANCE
8017   REQ_BGL = False
8018
8019   def ExpandNames(self):
8020     self._ExpandAndLockInstance()
8021     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8022     self.op.target_node = target_node
8023     self.needed_locks[locking.LEVEL_NODE] = [target_node]
8024     self.needed_locks[locking.LEVEL_NODE_RES] = []
8025     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8026
8027   def DeclareLocks(self, level):
8028     if level == locking.LEVEL_NODE:
8029       self._LockInstancesNodes(primary_only=True)
8030     elif level == locking.LEVEL_NODE_RES:
8031       # Copy node locks
8032       self.needed_locks[locking.LEVEL_NODE_RES] = \
8033         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8034
8035   def BuildHooksEnv(self):
8036     """Build hooks env.
8037
8038     This runs on master, primary and secondary nodes of the instance.
8039
8040     """
8041     env = {
8042       "TARGET_NODE": self.op.target_node,
8043       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8044       }
8045     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8046     return env
8047
8048   def BuildHooksNodes(self):
8049     """Build hooks nodes.
8050
8051     """
8052     nl = [
8053       self.cfg.GetMasterNode(),
8054       self.instance.primary_node,
8055       self.op.target_node,
8056       ]
8057     return (nl, nl)
8058
8059   def CheckPrereq(self):
8060     """Check prerequisites.
8061
8062     This checks that the instance is in the cluster.
8063
8064     """
8065     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8066     assert self.instance is not None, \
8067       "Cannot retrieve locked instance %s" % self.op.instance_name
8068
8069     node = self.cfg.GetNodeInfo(self.op.target_node)
8070     assert node is not None, \
8071       "Cannot retrieve locked node %s" % self.op.target_node
8072
8073     self.target_node = target_node = node.name
8074
8075     if target_node == instance.primary_node:
8076       raise errors.OpPrereqError("Instance %s is already on the node %s" %
8077                                  (instance.name, target_node),
8078                                  errors.ECODE_STATE)
8079
8080     bep = self.cfg.GetClusterInfo().FillBE(instance)
8081
8082     for idx, dsk in enumerate(instance.disks):
8083       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8084         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8085                                    " cannot copy" % idx, errors.ECODE_STATE)
8086
8087     _CheckNodeOnline(self, target_node)
8088     _CheckNodeNotDrained(self, target_node)
8089     _CheckNodeVmCapable(self, target_node)
8090     cluster = self.cfg.GetClusterInfo()
8091     group_info = self.cfg.GetNodeGroup(node.group)
8092     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8093     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8094                             ignore=self.op.ignore_ipolicy)
8095
8096     if instance.admin_state == constants.ADMINST_UP:
8097       # check memory requirements on the secondary node
8098       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8099                            instance.name, bep[constants.BE_MAXMEM],
8100                            instance.hypervisor)
8101     else:
8102       self.LogInfo("Not checking memory on the secondary node as"
8103                    " instance will not be started")
8104
8105     # check bridge existance
8106     _CheckInstanceBridgesExist(self, instance, node=target_node)
8107
8108   def Exec(self, feedback_fn):
8109     """Move an instance.
8110
8111     The move is done by shutting it down on its present node, copying
8112     the data over (slow) and starting it on the new node.
8113
8114     """
8115     instance = self.instance
8116
8117     source_node = instance.primary_node
8118     target_node = self.target_node
8119
8120     self.LogInfo("Shutting down instance %s on source node %s",
8121                  instance.name, source_node)
8122
8123     assert (self.owned_locks(locking.LEVEL_NODE) ==
8124             self.owned_locks(locking.LEVEL_NODE_RES))
8125
8126     result = self.rpc.call_instance_shutdown(source_node, instance,
8127                                              self.op.shutdown_timeout)
8128     msg = result.fail_msg
8129     if msg:
8130       if self.op.ignore_consistency:
8131         self.LogWarning("Could not shutdown instance %s on node %s."
8132                         " Proceeding anyway. Please make sure node"
8133                         " %s is down. Error details: %s",
8134                         instance.name, source_node, source_node, msg)
8135       else:
8136         raise errors.OpExecError("Could not shutdown instance %s on"
8137                                  " node %s: %s" %
8138                                  (instance.name, source_node, msg))
8139
8140     # create the target disks
8141     try:
8142       _CreateDisks(self, instance, target_node=target_node)
8143     except errors.OpExecError:
8144       self.LogWarning("Device creation failed, reverting...")
8145       try:
8146         _RemoveDisks(self, instance, target_node=target_node)
8147       finally:
8148         self.cfg.ReleaseDRBDMinors(instance.name)
8149         raise
8150
8151     cluster_name = self.cfg.GetClusterInfo().cluster_name
8152
8153     errs = []
8154     # activate, get path, copy the data over
8155     for idx, disk in enumerate(instance.disks):
8156       self.LogInfo("Copying data for disk %d", idx)
8157       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8158                                                instance.name, True, idx)
8159       if result.fail_msg:
8160         self.LogWarning("Can't assemble newly created disk %d: %s",
8161                         idx, result.fail_msg)
8162         errs.append(result.fail_msg)
8163         break
8164       dev_path = result.payload
8165       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8166                                              target_node, dev_path,
8167                                              cluster_name)
8168       if result.fail_msg:
8169         self.LogWarning("Can't copy data over for disk %d: %s",
8170                         idx, result.fail_msg)
8171         errs.append(result.fail_msg)
8172         break
8173
8174     if errs:
8175       self.LogWarning("Some disks failed to copy, aborting")
8176       try:
8177         _RemoveDisks(self, instance, target_node=target_node)
8178       finally:
8179         self.cfg.ReleaseDRBDMinors(instance.name)
8180         raise errors.OpExecError("Errors during disk copy: %s" %
8181                                  (",".join(errs),))
8182
8183     instance.primary_node = target_node
8184     self.cfg.Update(instance, feedback_fn)
8185
8186     self.LogInfo("Removing the disks on the original node")
8187     _RemoveDisks(self, instance, target_node=source_node)
8188
8189     # Only start the instance if it's marked as up
8190     if instance.admin_state == constants.ADMINST_UP:
8191       self.LogInfo("Starting instance %s on node %s",
8192                    instance.name, target_node)
8193
8194       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8195                                            ignore_secondaries=True)
8196       if not disks_ok:
8197         _ShutdownInstanceDisks(self, instance)
8198         raise errors.OpExecError("Can't activate the instance's disks")
8199
8200       result = self.rpc.call_instance_start(target_node,
8201                                             (instance, None, None), False)
8202       msg = result.fail_msg
8203       if msg:
8204         _ShutdownInstanceDisks(self, instance)
8205         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8206                                  (instance.name, target_node, msg))
8207
8208
8209 class LUNodeMigrate(LogicalUnit):
8210   """Migrate all instances from a node.
8211
8212   """
8213   HPATH = "node-migrate"
8214   HTYPE = constants.HTYPE_NODE
8215   REQ_BGL = False
8216
8217   def CheckArguments(self):
8218     pass
8219
8220   def ExpandNames(self):
8221     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8222
8223     self.share_locks = _ShareAll()
8224     self.needed_locks = {
8225       locking.LEVEL_NODE: [self.op.node_name],
8226       }
8227
8228   def BuildHooksEnv(self):
8229     """Build hooks env.
8230
8231     This runs on the master, the primary and all the secondaries.
8232
8233     """
8234     return {
8235       "NODE_NAME": self.op.node_name,
8236       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8237       }
8238
8239   def BuildHooksNodes(self):
8240     """Build hooks nodes.
8241
8242     """
8243     nl = [self.cfg.GetMasterNode()]
8244     return (nl, nl)
8245
8246   def CheckPrereq(self):
8247     pass
8248
8249   def Exec(self, feedback_fn):
8250     # Prepare jobs for migration instances
8251     allow_runtime_changes = self.op.allow_runtime_changes
8252     jobs = [
8253       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8254                                  mode=self.op.mode,
8255                                  live=self.op.live,
8256                                  iallocator=self.op.iallocator,
8257                                  target_node=self.op.target_node,
8258                                  allow_runtime_changes=allow_runtime_changes,
8259                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8260       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8261
8262     # TODO: Run iallocator in this opcode and pass correct placement options to
8263     # OpInstanceMigrate. Since other jobs can modify the cluster between
8264     # running the iallocator and the actual migration, a good consistency model
8265     # will have to be found.
8266
8267     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8268             frozenset([self.op.node_name]))
8269
8270     return ResultWithJobs(jobs)
8271
8272
8273 class TLMigrateInstance(Tasklet):
8274   """Tasklet class for instance migration.
8275
8276   @type live: boolean
8277   @ivar live: whether the migration will be done live or non-live;
8278       this variable is initalized only after CheckPrereq has run
8279   @type cleanup: boolean
8280   @ivar cleanup: Wheater we cleanup from a failed migration
8281   @type iallocator: string
8282   @ivar iallocator: The iallocator used to determine target_node
8283   @type target_node: string
8284   @ivar target_node: If given, the target_node to reallocate the instance to
8285   @type failover: boolean
8286   @ivar failover: Whether operation results in failover or migration
8287   @type fallback: boolean
8288   @ivar fallback: Whether fallback to failover is allowed if migration not
8289                   possible
8290   @type ignore_consistency: boolean
8291   @ivar ignore_consistency: Wheter we should ignore consistency between source
8292                             and target node
8293   @type shutdown_timeout: int
8294   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8295   @type ignore_ipolicy: bool
8296   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8297
8298   """
8299
8300   # Constants
8301   _MIGRATION_POLL_INTERVAL = 1      # seconds
8302   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8303
8304   def __init__(self, lu, instance_name, cleanup, failover, fallback,
8305                ignore_consistency, allow_runtime_changes, shutdown_timeout,
8306                ignore_ipolicy):
8307     """Initializes this class.
8308
8309     """
8310     Tasklet.__init__(self, lu)
8311
8312     # Parameters
8313     self.instance_name = instance_name
8314     self.cleanup = cleanup
8315     self.live = False # will be overridden later
8316     self.failover = failover
8317     self.fallback = fallback
8318     self.ignore_consistency = ignore_consistency
8319     self.shutdown_timeout = shutdown_timeout
8320     self.ignore_ipolicy = ignore_ipolicy
8321     self.allow_runtime_changes = allow_runtime_changes
8322
8323   def CheckPrereq(self):
8324     """Check prerequisites.
8325
8326     This checks that the instance is in the cluster.
8327
8328     """
8329     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8330     instance = self.cfg.GetInstanceInfo(instance_name)
8331     assert instance is not None
8332     self.instance = instance
8333     cluster = self.cfg.GetClusterInfo()
8334
8335     if (not self.cleanup and
8336         not instance.admin_state == constants.ADMINST_UP and
8337         not self.failover and self.fallback):
8338       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8339                       " switching to failover")
8340       self.failover = True
8341
8342     if instance.disk_template not in constants.DTS_MIRRORED:
8343       if self.failover:
8344         text = "failovers"
8345       else:
8346         text = "migrations"
8347       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8348                                  " %s" % (instance.disk_template, text),
8349                                  errors.ECODE_STATE)
8350
8351     if instance.disk_template in constants.DTS_EXT_MIRROR:
8352       assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8353
8354       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8355
8356       if self.lu.op.iallocator:
8357         self._RunAllocator()
8358       else:
8359         # We set set self.target_node as it is required by
8360         # BuildHooksEnv
8361         self.target_node = self.lu.op.target_node
8362
8363       # Check that the target node is correct in terms of instance policy
8364       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8365       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8366       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8367                                                               group_info)
8368       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8369                               ignore=self.ignore_ipolicy)
8370
8371       # self.target_node is already populated, either directly or by the
8372       # iallocator run
8373       target_node = self.target_node
8374       if self.target_node == instance.primary_node:
8375         raise errors.OpPrereqError("Cannot migrate instance %s"
8376                                    " to its primary (%s)" %
8377                                    (instance.name, instance.primary_node),
8378                                    errors.ECODE_STATE)
8379
8380       if len(self.lu.tasklets) == 1:
8381         # It is safe to release locks only when we're the only tasklet
8382         # in the LU
8383         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8384                       keep=[instance.primary_node, self.target_node])
8385         _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8386
8387     else:
8388       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8389
8390       secondary_nodes = instance.secondary_nodes
8391       if not secondary_nodes:
8392         raise errors.ConfigurationError("No secondary node but using"
8393                                         " %s disk template" %
8394                                         instance.disk_template)
8395       target_node = secondary_nodes[0]
8396       if self.lu.op.iallocator or (self.lu.op.target_node and
8397                                    self.lu.op.target_node != target_node):
8398         if self.failover:
8399           text = "failed over"
8400         else:
8401           text = "migrated"
8402         raise errors.OpPrereqError("Instances with disk template %s cannot"
8403                                    " be %s to arbitrary nodes"
8404                                    " (neither an iallocator nor a target"
8405                                    " node can be passed)" %
8406                                    (instance.disk_template, text),
8407                                    errors.ECODE_INVAL)
8408       nodeinfo = self.cfg.GetNodeInfo(target_node)
8409       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8410       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8411                                                               group_info)
8412       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8413                               ignore=self.ignore_ipolicy)
8414
8415     i_be = cluster.FillBE(instance)
8416
8417     # check memory requirements on the secondary node
8418     if (not self.cleanup and
8419          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8420       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8421                                                "migrating instance %s" %
8422                                                instance.name,
8423                                                i_be[constants.BE_MINMEM],
8424                                                instance.hypervisor)
8425     else:
8426       self.lu.LogInfo("Not checking memory on the secondary node as"
8427                       " instance will not be started")
8428
8429     # check if failover must be forced instead of migration
8430     if (not self.cleanup and not self.failover and
8431         i_be[constants.BE_ALWAYS_FAILOVER]):
8432       self.lu.LogInfo("Instance configured to always failover; fallback"
8433                       " to failover")
8434       self.failover = True
8435
8436     # check bridge existance
8437     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8438
8439     if not self.cleanup:
8440       _CheckNodeNotDrained(self.lu, target_node)
8441       if not self.failover:
8442         result = self.rpc.call_instance_migratable(instance.primary_node,
8443                                                    instance)
8444         if result.fail_msg and self.fallback:
8445           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8446                           " failover")
8447           self.failover = True
8448         else:
8449           result.Raise("Can't migrate, please use failover",
8450                        prereq=True, ecode=errors.ECODE_STATE)
8451
8452     assert not (self.failover and self.cleanup)
8453
8454     if not self.failover:
8455       if self.lu.op.live is not None and self.lu.op.mode is not None:
8456         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8457                                    " parameters are accepted",
8458                                    errors.ECODE_INVAL)
8459       if self.lu.op.live is not None:
8460         if self.lu.op.live:
8461           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8462         else:
8463           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8464         # reset the 'live' parameter to None so that repeated
8465         # invocations of CheckPrereq do not raise an exception
8466         self.lu.op.live = None
8467       elif self.lu.op.mode is None:
8468         # read the default value from the hypervisor
8469         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8470         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8471
8472       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8473     else:
8474       # Failover is never live
8475       self.live = False
8476
8477     if not (self.failover or self.cleanup):
8478       remote_info = self.rpc.call_instance_info(instance.primary_node,
8479                                                 instance.name,
8480                                                 instance.hypervisor)
8481       remote_info.Raise("Error checking instance on node %s" %
8482                         instance.primary_node)
8483       instance_running = bool(remote_info.payload)
8484       if instance_running:
8485         self.current_mem = int(remote_info.payload["memory"])
8486
8487   def _RunAllocator(self):
8488     """Run the allocator based on input opcode.
8489
8490     """
8491     assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8492
8493     # FIXME: add a self.ignore_ipolicy option
8494     req = iallocator.IAReqRelocate(name=self.instance_name,
8495                                    relocate_from=[self.instance.primary_node])
8496     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8497
8498     ial.Run(self.lu.op.iallocator)
8499
8500     if not ial.success:
8501       raise errors.OpPrereqError("Can't compute nodes using"
8502                                  " iallocator '%s': %s" %
8503                                  (self.lu.op.iallocator, ial.info),
8504                                  errors.ECODE_NORES)
8505     self.target_node = ial.result[0]
8506     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8507                     self.instance_name, self.lu.op.iallocator,
8508                     utils.CommaJoin(ial.result))
8509
8510   def _WaitUntilSync(self):
8511     """Poll with custom rpc for disk sync.
8512
8513     This uses our own step-based rpc call.
8514
8515     """
8516     self.feedback_fn("* wait until resync is done")
8517     all_done = False
8518     while not all_done:
8519       all_done = True
8520       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8521                                             self.nodes_ip,
8522                                             (self.instance.disks,
8523                                              self.instance))
8524       min_percent = 100
8525       for node, nres in result.items():
8526         nres.Raise("Cannot resync disks on node %s" % node)
8527         node_done, node_percent = nres.payload
8528         all_done = all_done and node_done
8529         if node_percent is not None:
8530           min_percent = min(min_percent, node_percent)
8531       if not all_done:
8532         if min_percent < 100:
8533           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8534         time.sleep(2)
8535
8536   def _EnsureSecondary(self, node):
8537     """Demote a node to secondary.
8538
8539     """
8540     self.feedback_fn("* switching node %s to secondary mode" % node)
8541
8542     for dev in self.instance.disks:
8543       self.cfg.SetDiskID(dev, node)
8544
8545     result = self.rpc.call_blockdev_close(node, self.instance.name,
8546                                           self.instance.disks)
8547     result.Raise("Cannot change disk to secondary on node %s" % node)
8548
8549   def _GoStandalone(self):
8550     """Disconnect from the network.
8551
8552     """
8553     self.feedback_fn("* changing into standalone mode")
8554     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8555                                                self.instance.disks)
8556     for node, nres in result.items():
8557       nres.Raise("Cannot disconnect disks node %s" % node)
8558
8559   def _GoReconnect(self, multimaster):
8560     """Reconnect to the network.
8561
8562     """
8563     if multimaster:
8564       msg = "dual-master"
8565     else:
8566       msg = "single-master"
8567     self.feedback_fn("* changing disks into %s mode" % msg)
8568     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8569                                            (self.instance.disks, self.instance),
8570                                            self.instance.name, multimaster)
8571     for node, nres in result.items():
8572       nres.Raise("Cannot change disks config on node %s" % node)
8573
8574   def _ExecCleanup(self):
8575     """Try to cleanup after a failed migration.
8576
8577     The cleanup is done by:
8578       - check that the instance is running only on one node
8579         (and update the config if needed)
8580       - change disks on its secondary node to secondary
8581       - wait until disks are fully synchronized
8582       - disconnect from the network
8583       - change disks into single-master mode
8584       - wait again until disks are fully synchronized
8585
8586     """
8587     instance = self.instance
8588     target_node = self.target_node
8589     source_node = self.source_node
8590
8591     # check running on only one node
8592     self.feedback_fn("* checking where the instance actually runs"
8593                      " (if this hangs, the hypervisor might be in"
8594                      " a bad state)")
8595     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8596     for node, result in ins_l.items():
8597       result.Raise("Can't contact node %s" % node)
8598
8599     runningon_source = instance.name in ins_l[source_node].payload
8600     runningon_target = instance.name in ins_l[target_node].payload
8601
8602     if runningon_source and runningon_target:
8603       raise errors.OpExecError("Instance seems to be running on two nodes,"
8604                                " or the hypervisor is confused; you will have"
8605                                " to ensure manually that it runs only on one"
8606                                " and restart this operation")
8607
8608     if not (runningon_source or runningon_target):
8609       raise errors.OpExecError("Instance does not seem to be running at all;"
8610                                " in this case it's safer to repair by"
8611                                " running 'gnt-instance stop' to ensure disk"
8612                                " shutdown, and then restarting it")
8613
8614     if runningon_target:
8615       # the migration has actually succeeded, we need to update the config
8616       self.feedback_fn("* instance running on secondary node (%s),"
8617                        " updating config" % target_node)
8618       instance.primary_node = target_node
8619       self.cfg.Update(instance, self.feedback_fn)
8620       demoted_node = source_node
8621     else:
8622       self.feedback_fn("* instance confirmed to be running on its"
8623                        " primary node (%s)" % source_node)
8624       demoted_node = target_node
8625
8626     if instance.disk_template in constants.DTS_INT_MIRROR:
8627       self._EnsureSecondary(demoted_node)
8628       try:
8629         self._WaitUntilSync()
8630       except errors.OpExecError:
8631         # we ignore here errors, since if the device is standalone, it
8632         # won't be able to sync
8633         pass
8634       self._GoStandalone()
8635       self._GoReconnect(False)
8636       self._WaitUntilSync()
8637
8638     self.feedback_fn("* done")
8639
8640   def _RevertDiskStatus(self):
8641     """Try to revert the disk status after a failed migration.
8642
8643     """
8644     target_node = self.target_node
8645     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8646       return
8647
8648     try:
8649       self._EnsureSecondary(target_node)
8650       self._GoStandalone()
8651       self._GoReconnect(False)
8652       self._WaitUntilSync()
8653     except errors.OpExecError, err:
8654       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8655                          " please try to recover the instance manually;"
8656                          " error '%s'" % str(err))
8657
8658   def _AbortMigration(self):
8659     """Call the hypervisor code to abort a started migration.
8660
8661     """
8662     instance = self.instance
8663     target_node = self.target_node
8664     source_node = self.source_node
8665     migration_info = self.migration_info
8666
8667     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8668                                                                  instance,
8669                                                                  migration_info,
8670                                                                  False)
8671     abort_msg = abort_result.fail_msg
8672     if abort_msg:
8673       logging.error("Aborting migration failed on target node %s: %s",
8674                     target_node, abort_msg)
8675       # Don't raise an exception here, as we stil have to try to revert the
8676       # disk status, even if this step failed.
8677
8678     abort_result = self.rpc.call_instance_finalize_migration_src(
8679       source_node, instance, False, self.live)
8680     abort_msg = abort_result.fail_msg
8681     if abort_msg:
8682       logging.error("Aborting migration failed on source node %s: %s",
8683                     source_node, abort_msg)
8684
8685   def _ExecMigration(self):
8686     """Migrate an instance.
8687
8688     The migrate is done by:
8689       - change the disks into dual-master mode
8690       - wait until disks are fully synchronized again
8691       - migrate the instance
8692       - change disks on the new secondary node (the old primary) to secondary
8693       - wait until disks are fully synchronized
8694       - change disks into single-master mode
8695
8696     """
8697     instance = self.instance
8698     target_node = self.target_node
8699     source_node = self.source_node
8700
8701     # Check for hypervisor version mismatch and warn the user.
8702     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8703                                        None, [self.instance.hypervisor])
8704     for ninfo in nodeinfo.values():
8705       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8706                   ninfo.node)
8707     (_, _, (src_info, )) = nodeinfo[source_node].payload
8708     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8709
8710     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8711         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8712       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8713       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8714       if src_version != dst_version:
8715         self.feedback_fn("* warning: hypervisor version mismatch between"
8716                          " source (%s) and target (%s) node" %
8717                          (src_version, dst_version))
8718
8719     self.feedback_fn("* checking disk consistency between source and target")
8720     for (idx, dev) in enumerate(instance.disks):
8721       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8722         raise errors.OpExecError("Disk %s is degraded or not fully"
8723                                  " synchronized on target node,"
8724                                  " aborting migration" % idx)
8725
8726     if self.current_mem > self.tgt_free_mem:
8727       if not self.allow_runtime_changes:
8728         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8729                                  " free memory to fit instance %s on target"
8730                                  " node %s (have %dMB, need %dMB)" %
8731                                  (instance.name, target_node,
8732                                   self.tgt_free_mem, self.current_mem))
8733       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8734       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8735                                                      instance,
8736                                                      self.tgt_free_mem)
8737       rpcres.Raise("Cannot modify instance runtime memory")
8738
8739     # First get the migration information from the remote node
8740     result = self.rpc.call_migration_info(source_node, instance)
8741     msg = result.fail_msg
8742     if msg:
8743       log_err = ("Failed fetching source migration information from %s: %s" %
8744                  (source_node, msg))
8745       logging.error(log_err)
8746       raise errors.OpExecError(log_err)
8747
8748     self.migration_info = migration_info = result.payload
8749
8750     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8751       # Then switch the disks to master/master mode
8752       self._EnsureSecondary(target_node)
8753       self._GoStandalone()
8754       self._GoReconnect(True)
8755       self._WaitUntilSync()
8756
8757     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8758     result = self.rpc.call_accept_instance(target_node,
8759                                            instance,
8760                                            migration_info,
8761                                            self.nodes_ip[target_node])
8762
8763     msg = result.fail_msg
8764     if msg:
8765       logging.error("Instance pre-migration failed, trying to revert"
8766                     " disk status: %s", msg)
8767       self.feedback_fn("Pre-migration failed, aborting")
8768       self._AbortMigration()
8769       self._RevertDiskStatus()
8770       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8771                                (instance.name, msg))
8772
8773     self.feedback_fn("* migrating instance to %s" % target_node)
8774     result = self.rpc.call_instance_migrate(source_node, instance,
8775                                             self.nodes_ip[target_node],
8776                                             self.live)
8777     msg = result.fail_msg
8778     if msg:
8779       logging.error("Instance migration failed, trying to revert"
8780                     " disk status: %s", msg)
8781       self.feedback_fn("Migration failed, aborting")
8782       self._AbortMigration()
8783       self._RevertDiskStatus()
8784       raise errors.OpExecError("Could not migrate instance %s: %s" %
8785                                (instance.name, msg))
8786
8787     self.feedback_fn("* starting memory transfer")
8788     last_feedback = time.time()
8789     while True:
8790       result = self.rpc.call_instance_get_migration_status(source_node,
8791                                                            instance)
8792       msg = result.fail_msg
8793       ms = result.payload   # MigrationStatus instance
8794       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8795         logging.error("Instance migration failed, trying to revert"
8796                       " disk status: %s", msg)
8797         self.feedback_fn("Migration failed, aborting")
8798         self._AbortMigration()
8799         self._RevertDiskStatus()
8800         if not msg:
8801           msg = "hypervisor returned failure"
8802         raise errors.OpExecError("Could not migrate instance %s: %s" %
8803                                  (instance.name, msg))
8804
8805       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8806         self.feedback_fn("* memory transfer complete")
8807         break
8808
8809       if (utils.TimeoutExpired(last_feedback,
8810                                self._MIGRATION_FEEDBACK_INTERVAL) and
8811           ms.transferred_ram is not None):
8812         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8813         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8814         last_feedback = time.time()
8815
8816       time.sleep(self._MIGRATION_POLL_INTERVAL)
8817
8818     result = self.rpc.call_instance_finalize_migration_src(source_node,
8819                                                            instance,
8820                                                            True,
8821                                                            self.live)
8822     msg = result.fail_msg
8823     if msg:
8824       logging.error("Instance migration succeeded, but finalization failed"
8825                     " on the source node: %s", msg)
8826       raise errors.OpExecError("Could not finalize instance migration: %s" %
8827                                msg)
8828
8829     instance.primary_node = target_node
8830
8831     # distribute new instance config to the other nodes
8832     self.cfg.Update(instance, self.feedback_fn)
8833
8834     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8835                                                            instance,
8836                                                            migration_info,
8837                                                            True)
8838     msg = result.fail_msg
8839     if msg:
8840       logging.error("Instance migration succeeded, but finalization failed"
8841                     " on the target node: %s", msg)
8842       raise errors.OpExecError("Could not finalize instance migration: %s" %
8843                                msg)
8844
8845     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8846       self._EnsureSecondary(source_node)
8847       self._WaitUntilSync()
8848       self._GoStandalone()
8849       self._GoReconnect(False)
8850       self._WaitUntilSync()
8851
8852     # If the instance's disk template is `rbd' and there was a successful
8853     # migration, unmap the device from the source node.
8854     if self.instance.disk_template == constants.DT_RBD:
8855       disks = _ExpandCheckDisks(instance, instance.disks)
8856       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8857       for disk in disks:
8858         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8859         msg = result.fail_msg
8860         if msg:
8861           logging.error("Migration was successful, but couldn't unmap the"
8862                         " block device %s on source node %s: %s",
8863                         disk.iv_name, source_node, msg)
8864           logging.error("You need to unmap the device %s manually on %s",
8865                         disk.iv_name, source_node)
8866
8867     self.feedback_fn("* done")
8868
8869   def _ExecFailover(self):
8870     """Failover an instance.
8871
8872     The failover is done by shutting it down on its present node and
8873     starting it on the secondary.
8874
8875     """
8876     instance = self.instance
8877     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8878
8879     source_node = instance.primary_node
8880     target_node = self.target_node
8881
8882     if instance.admin_state == constants.ADMINST_UP:
8883       self.feedback_fn("* checking disk consistency between source and target")
8884       for (idx, dev) in enumerate(instance.disks):
8885         # for drbd, these are drbd over lvm
8886         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8887                                      False):
8888           if primary_node.offline:
8889             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8890                              " target node %s" %
8891                              (primary_node.name, idx, target_node))
8892           elif not self.ignore_consistency:
8893             raise errors.OpExecError("Disk %s is degraded on target node,"
8894                                      " aborting failover" % idx)
8895     else:
8896       self.feedback_fn("* not checking disk consistency as instance is not"
8897                        " running")
8898
8899     self.feedback_fn("* shutting down instance on source node")
8900     logging.info("Shutting down instance %s on node %s",
8901                  instance.name, source_node)
8902
8903     result = self.rpc.call_instance_shutdown(source_node, instance,
8904                                              self.shutdown_timeout)
8905     msg = result.fail_msg
8906     if msg:
8907       if self.ignore_consistency or primary_node.offline:
8908         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8909                            " proceeding anyway; please make sure node"
8910                            " %s is down; error details: %s",
8911                            instance.name, source_node, source_node, msg)
8912       else:
8913         raise errors.OpExecError("Could not shutdown instance %s on"
8914                                  " node %s: %s" %
8915                                  (instance.name, source_node, msg))
8916
8917     self.feedback_fn("* deactivating the instance's disks on source node")
8918     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8919       raise errors.OpExecError("Can't shut down the instance's disks")
8920
8921     instance.primary_node = target_node
8922     # distribute new instance config to the other nodes
8923     self.cfg.Update(instance, self.feedback_fn)
8924
8925     # Only start the instance if it's marked as up
8926     if instance.admin_state == constants.ADMINST_UP:
8927       self.feedback_fn("* activating the instance's disks on target node %s" %
8928                        target_node)
8929       logging.info("Starting instance %s on node %s",
8930                    instance.name, target_node)
8931
8932       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8933                                            ignore_secondaries=True)
8934       if not disks_ok:
8935         _ShutdownInstanceDisks(self.lu, instance)
8936         raise errors.OpExecError("Can't activate the instance's disks")
8937
8938       self.feedback_fn("* starting the instance on the target node %s" %
8939                        target_node)
8940       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8941                                             False)
8942       msg = result.fail_msg
8943       if msg:
8944         _ShutdownInstanceDisks(self.lu, instance)
8945         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8946                                  (instance.name, target_node, msg))
8947
8948   def Exec(self, feedback_fn):
8949     """Perform the migration.
8950
8951     """
8952     self.feedback_fn = feedback_fn
8953     self.source_node = self.instance.primary_node
8954
8955     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8956     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8957       self.target_node = self.instance.secondary_nodes[0]
8958       # Otherwise self.target_node has been populated either
8959       # directly, or through an iallocator.
8960
8961     self.all_nodes = [self.source_node, self.target_node]
8962     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8963                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8964
8965     if self.failover:
8966       feedback_fn("Failover instance %s" % self.instance.name)
8967       self._ExecFailover()
8968     else:
8969       feedback_fn("Migrating instance %s" % self.instance.name)
8970
8971       if self.cleanup:
8972         return self._ExecCleanup()
8973       else:
8974         return self._ExecMigration()
8975
8976
8977 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8978                     force_open):
8979   """Wrapper around L{_CreateBlockDevInner}.
8980
8981   This method annotates the root device first.
8982
8983   """
8984   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8985   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8986                               force_open)
8987
8988
8989 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8990                          info, force_open):
8991   """Create a tree of block devices on a given node.
8992
8993   If this device type has to be created on secondaries, create it and
8994   all its children.
8995
8996   If not, just recurse to children keeping the same 'force' value.
8997
8998   @attention: The device has to be annotated already.
8999
9000   @param lu: the lu on whose behalf we execute
9001   @param node: the node on which to create the device
9002   @type instance: L{objects.Instance}
9003   @param instance: the instance which owns the device
9004   @type device: L{objects.Disk}
9005   @param device: the device to create
9006   @type force_create: boolean
9007   @param force_create: whether to force creation of this device; this
9008       will be change to True whenever we find a device which has
9009       CreateOnSecondary() attribute
9010   @param info: the extra 'metadata' we should attach to the device
9011       (this will be represented as a LVM tag)
9012   @type force_open: boolean
9013   @param force_open: this parameter will be passes to the
9014       L{backend.BlockdevCreate} function where it specifies
9015       whether we run on primary or not, and it affects both
9016       the child assembly and the device own Open() execution
9017
9018   """
9019   if device.CreateOnSecondary():
9020     force_create = True
9021
9022   if device.children:
9023     for child in device.children:
9024       _CreateBlockDevInner(lu, node, instance, child, force_create,
9025                            info, force_open)
9026
9027   if not force_create:
9028     return
9029
9030   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
9031
9032
9033 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
9034   """Create a single block device on a given node.
9035
9036   This will not recurse over children of the device, so they must be
9037   created in advance.
9038
9039   @param lu: the lu on whose behalf we execute
9040   @param node: the node on which to create the device
9041   @type instance: L{objects.Instance}
9042   @param instance: the instance which owns the device
9043   @type device: L{objects.Disk}
9044   @param device: the device to create
9045   @param info: the extra 'metadata' we should attach to the device
9046       (this will be represented as a LVM tag)
9047   @type force_open: boolean
9048   @param force_open: this parameter will be passes to the
9049       L{backend.BlockdevCreate} function where it specifies
9050       whether we run on primary or not, and it affects both
9051       the child assembly and the device own Open() execution
9052
9053   """
9054   lu.cfg.SetDiskID(device, node)
9055   result = lu.rpc.call_blockdev_create(node, device, device.size,
9056                                        instance.name, force_open, info)
9057   result.Raise("Can't create block device %s on"
9058                " node %s for instance %s" % (device, node, instance.name))
9059   if device.physical_id is None:
9060     device.physical_id = result.payload
9061
9062
9063 def _GenerateUniqueNames(lu, exts):
9064   """Generate a suitable LV name.
9065
9066   This will generate a logical volume name for the given instance.
9067
9068   """
9069   results = []
9070   for val in exts:
9071     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9072     results.append("%s%s" % (new_id, val))
9073   return results
9074
9075
9076 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9077                          iv_name, p_minor, s_minor):
9078   """Generate a drbd8 device complete with its children.
9079
9080   """
9081   assert len(vgnames) == len(names) == 2
9082   port = lu.cfg.AllocatePort()
9083   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9084
9085   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9086                           logical_id=(vgnames[0], names[0]),
9087                           params={})
9088   dev_meta = objects.Disk(dev_type=constants.LD_LV,
9089                           size=constants.DRBD_META_SIZE,
9090                           logical_id=(vgnames[1], names[1]),
9091                           params={})
9092   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9093                           logical_id=(primary, secondary, port,
9094                                       p_minor, s_minor,
9095                                       shared_secret),
9096                           children=[dev_data, dev_meta],
9097                           iv_name=iv_name, params={})
9098   return drbd_dev
9099
9100
9101 _DISK_TEMPLATE_NAME_PREFIX = {
9102   constants.DT_PLAIN: "",
9103   constants.DT_RBD: ".rbd",
9104   }
9105
9106
9107 _DISK_TEMPLATE_DEVICE_TYPE = {
9108   constants.DT_PLAIN: constants.LD_LV,
9109   constants.DT_FILE: constants.LD_FILE,
9110   constants.DT_SHARED_FILE: constants.LD_FILE,
9111   constants.DT_BLOCK: constants.LD_BLOCKDEV,
9112   constants.DT_RBD: constants.LD_RBD,
9113   }
9114
9115
9116 def _GenerateDiskTemplate(
9117   lu, template_name, instance_name, primary_node, secondary_nodes,
9118   disk_info, file_storage_dir, file_driver, base_index,
9119   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9120   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9121   """Generate the entire disk layout for a given template type.
9122
9123   """
9124   vgname = lu.cfg.GetVGName()
9125   disk_count = len(disk_info)
9126   disks = []
9127
9128   if template_name == constants.DT_DISKLESS:
9129     pass
9130   elif template_name == constants.DT_DRBD8:
9131     if len(secondary_nodes) != 1:
9132       raise errors.ProgrammerError("Wrong template configuration")
9133     remote_node = secondary_nodes[0]
9134     minors = lu.cfg.AllocateDRBDMinor(
9135       [primary_node, remote_node] * len(disk_info), instance_name)
9136
9137     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9138                                                        full_disk_params)
9139     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9140
9141     names = []
9142     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9143                                                for i in range(disk_count)]):
9144       names.append(lv_prefix + "_data")
9145       names.append(lv_prefix + "_meta")
9146     for idx, disk in enumerate(disk_info):
9147       disk_index = idx + base_index
9148       data_vg = disk.get(constants.IDISK_VG, vgname)
9149       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9150       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9151                                       disk[constants.IDISK_SIZE],
9152                                       [data_vg, meta_vg],
9153                                       names[idx * 2:idx * 2 + 2],
9154                                       "disk/%d" % disk_index,
9155                                       minors[idx * 2], minors[idx * 2 + 1])
9156       disk_dev.mode = disk[constants.IDISK_MODE]
9157       disks.append(disk_dev)
9158   else:
9159     if secondary_nodes:
9160       raise errors.ProgrammerError("Wrong template configuration")
9161
9162     if template_name == constants.DT_FILE:
9163       _req_file_storage()
9164     elif template_name == constants.DT_SHARED_FILE:
9165       _req_shr_file_storage()
9166
9167     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9168     if name_prefix is None:
9169       names = None
9170     else:
9171       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9172                                         (name_prefix, base_index + i)
9173                                         for i in range(disk_count)])
9174
9175     if template_name == constants.DT_PLAIN:
9176
9177       def logical_id_fn(idx, _, disk):
9178         vg = disk.get(constants.IDISK_VG, vgname)
9179         return (vg, names[idx])
9180
9181     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9182       logical_id_fn = \
9183         lambda _, disk_index, disk: (file_driver,
9184                                      "%s/disk%d" % (file_storage_dir,
9185                                                     disk_index))
9186     elif template_name == constants.DT_BLOCK:
9187       logical_id_fn = \
9188         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9189                                        disk[constants.IDISK_ADOPT])
9190     elif template_name == constants.DT_RBD:
9191       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9192     else:
9193       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9194
9195     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9196
9197     for idx, disk in enumerate(disk_info):
9198       disk_index = idx + base_index
9199       size = disk[constants.IDISK_SIZE]
9200       feedback_fn("* disk %s, size %s" %
9201                   (disk_index, utils.FormatUnit(size, "h")))
9202       disks.append(objects.Disk(dev_type=dev_type, size=size,
9203                                 logical_id=logical_id_fn(idx, disk_index, disk),
9204                                 iv_name="disk/%d" % disk_index,
9205                                 mode=disk[constants.IDISK_MODE],
9206                                 params={}))
9207
9208   return disks
9209
9210
9211 def _GetInstanceInfoText(instance):
9212   """Compute that text that should be added to the disk's metadata.
9213
9214   """
9215   return "originstname+%s" % instance.name
9216
9217
9218 def _CalcEta(time_taken, written, total_size):
9219   """Calculates the ETA based on size written and total size.
9220
9221   @param time_taken: The time taken so far
9222   @param written: amount written so far
9223   @param total_size: The total size of data to be written
9224   @return: The remaining time in seconds
9225
9226   """
9227   avg_time = time_taken / float(written)
9228   return (total_size - written) * avg_time
9229
9230
9231 def _WipeDisks(lu, instance, disks=None):
9232   """Wipes instance disks.
9233
9234   @type lu: L{LogicalUnit}
9235   @param lu: the logical unit on whose behalf we execute
9236   @type instance: L{objects.Instance}
9237   @param instance: the instance whose disks we should create
9238   @return: the success of the wipe
9239
9240   """
9241   node = instance.primary_node
9242
9243   if disks is None:
9244     disks = [(idx, disk, 0)
9245              for (idx, disk) in enumerate(instance.disks)]
9246
9247   for (_, device, _) in disks:
9248     lu.cfg.SetDiskID(device, node)
9249
9250   logging.info("Pausing synchronization of disks of instance '%s'",
9251                instance.name)
9252   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9253                                                   (map(compat.snd, disks),
9254                                                    instance),
9255                                                   True)
9256   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9257
9258   for idx, success in enumerate(result.payload):
9259     if not success:
9260       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9261                    " failed", idx, instance.name)
9262
9263   try:
9264     for (idx, device, offset) in disks:
9265       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9266       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9267       wipe_chunk_size = \
9268         int(min(constants.MAX_WIPE_CHUNK,
9269                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9270
9271       size = device.size
9272       last_output = 0
9273       start_time = time.time()
9274
9275       if offset == 0:
9276         info_text = ""
9277       else:
9278         info_text = (" (from %s to %s)" %
9279                      (utils.FormatUnit(offset, "h"),
9280                       utils.FormatUnit(size, "h")))
9281
9282       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9283
9284       logging.info("Wiping disk %d for instance %s on node %s using"
9285                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9286
9287       while offset < size:
9288         wipe_size = min(wipe_chunk_size, size - offset)
9289
9290         logging.debug("Wiping disk %d, offset %s, chunk %s",
9291                       idx, offset, wipe_size)
9292
9293         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9294                                            wipe_size)
9295         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9296                      (idx, offset, wipe_size))
9297
9298         now = time.time()
9299         offset += wipe_size
9300         if now - last_output >= 60:
9301           eta = _CalcEta(now - start_time, offset, size)
9302           lu.LogInfo(" - done: %.1f%% ETA: %s",
9303                      offset / float(size) * 100, utils.FormatSeconds(eta))
9304           last_output = now
9305   finally:
9306     logging.info("Resuming synchronization of disks for instance '%s'",
9307                  instance.name)
9308
9309     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9310                                                     (map(compat.snd, disks),
9311                                                      instance),
9312                                                     False)
9313
9314     if result.fail_msg:
9315       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9316                     node, result.fail_msg)
9317     else:
9318       for idx, success in enumerate(result.payload):
9319         if not success:
9320           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9321                         " failed", idx, instance.name)
9322
9323
9324 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9325   """Create all disks for an instance.
9326
9327   This abstracts away some work from AddInstance.
9328
9329   @type lu: L{LogicalUnit}
9330   @param lu: the logical unit on whose behalf we execute
9331   @type instance: L{objects.Instance}
9332   @param instance: the instance whose disks we should create
9333   @type to_skip: list
9334   @param to_skip: list of indices to skip
9335   @type target_node: string
9336   @param target_node: if passed, overrides the target node for creation
9337   @rtype: boolean
9338   @return: the success of the creation
9339
9340   """
9341   info = _GetInstanceInfoText(instance)
9342   if target_node is None:
9343     pnode = instance.primary_node
9344     all_nodes = instance.all_nodes
9345   else:
9346     pnode = target_node
9347     all_nodes = [pnode]
9348
9349   if instance.disk_template in constants.DTS_FILEBASED:
9350     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9351     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9352
9353     result.Raise("Failed to create directory '%s' on"
9354                  " node %s" % (file_storage_dir, pnode))
9355
9356   # Note: this needs to be kept in sync with adding of disks in
9357   # LUInstanceSetParams
9358   for idx, device in enumerate(instance.disks):
9359     if to_skip and idx in to_skip:
9360       continue
9361     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9362     #HARDCODE
9363     for node in all_nodes:
9364       f_create = node == pnode
9365       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9366
9367
9368 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9369   """Remove all disks for an instance.
9370
9371   This abstracts away some work from `AddInstance()` and
9372   `RemoveInstance()`. Note that in case some of the devices couldn't
9373   be removed, the removal will continue with the other ones (compare
9374   with `_CreateDisks()`).
9375
9376   @type lu: L{LogicalUnit}
9377   @param lu: the logical unit on whose behalf we execute
9378   @type instance: L{objects.Instance}
9379   @param instance: the instance whose disks we should remove
9380   @type target_node: string
9381   @param target_node: used to override the node on which to remove the disks
9382   @rtype: boolean
9383   @return: the success of the removal
9384
9385   """
9386   logging.info("Removing block devices for instance %s", instance.name)
9387
9388   all_result = True
9389   ports_to_release = set()
9390   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9391   for (idx, device) in enumerate(anno_disks):
9392     if target_node:
9393       edata = [(target_node, device)]
9394     else:
9395       edata = device.ComputeNodeTree(instance.primary_node)
9396     for node, disk in edata:
9397       lu.cfg.SetDiskID(disk, node)
9398       result = lu.rpc.call_blockdev_remove(node, disk)
9399       if result.fail_msg:
9400         lu.LogWarning("Could not remove disk %s on node %s,"
9401                       " continuing anyway: %s", idx, node, result.fail_msg)
9402         if not (result.offline and node != instance.primary_node):
9403           all_result = False
9404
9405     # if this is a DRBD disk, return its port to the pool
9406     if device.dev_type in constants.LDS_DRBD:
9407       ports_to_release.add(device.logical_id[2])
9408
9409   if all_result or ignore_failures:
9410     for port in ports_to_release:
9411       lu.cfg.AddTcpUdpPort(port)
9412
9413   if instance.disk_template in constants.DTS_FILEBASED:
9414     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9415     if target_node:
9416       tgt = target_node
9417     else:
9418       tgt = instance.primary_node
9419     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9420     if result.fail_msg:
9421       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9422                     file_storage_dir, instance.primary_node, result.fail_msg)
9423       all_result = False
9424
9425   return all_result
9426
9427
9428 def _ComputeDiskSizePerVG(disk_template, disks):
9429   """Compute disk size requirements in the volume group
9430
9431   """
9432   def _compute(disks, payload):
9433     """Universal algorithm.
9434
9435     """
9436     vgs = {}
9437     for disk in disks:
9438       vgs[disk[constants.IDISK_VG]] = \
9439         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9440
9441     return vgs
9442
9443   # Required free disk space as a function of disk and swap space
9444   req_size_dict = {
9445     constants.DT_DISKLESS: {},
9446     constants.DT_PLAIN: _compute(disks, 0),
9447     # 128 MB are added for drbd metadata for each disk
9448     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9449     constants.DT_FILE: {},
9450     constants.DT_SHARED_FILE: {},
9451   }
9452
9453   if disk_template not in req_size_dict:
9454     raise errors.ProgrammerError("Disk template '%s' size requirement"
9455                                  " is unknown" % disk_template)
9456
9457   return req_size_dict[disk_template]
9458
9459
9460 def _FilterVmNodes(lu, nodenames):
9461   """Filters out non-vm_capable nodes from a list.
9462
9463   @type lu: L{LogicalUnit}
9464   @param lu: the logical unit for which we check
9465   @type nodenames: list
9466   @param nodenames: the list of nodes on which we should check
9467   @rtype: list
9468   @return: the list of vm-capable nodes
9469
9470   """
9471   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9472   return [name for name in nodenames if name not in vm_nodes]
9473
9474
9475 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9476   """Hypervisor parameter validation.
9477
9478   This function abstract the hypervisor parameter validation to be
9479   used in both instance create and instance modify.
9480
9481   @type lu: L{LogicalUnit}
9482   @param lu: the logical unit for which we check
9483   @type nodenames: list
9484   @param nodenames: the list of nodes on which we should check
9485   @type hvname: string
9486   @param hvname: the name of the hypervisor we should use
9487   @type hvparams: dict
9488   @param hvparams: the parameters which we need to check
9489   @raise errors.OpPrereqError: if the parameters are not valid
9490
9491   """
9492   nodenames = _FilterVmNodes(lu, nodenames)
9493
9494   cluster = lu.cfg.GetClusterInfo()
9495   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9496
9497   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9498   for node in nodenames:
9499     info = hvinfo[node]
9500     if info.offline:
9501       continue
9502     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9503
9504
9505 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9506   """OS parameters validation.
9507
9508   @type lu: L{LogicalUnit}
9509   @param lu: the logical unit for which we check
9510   @type required: boolean
9511   @param required: whether the validation should fail if the OS is not
9512       found
9513   @type nodenames: list
9514   @param nodenames: the list of nodes on which we should check
9515   @type osname: string
9516   @param osname: the name of the hypervisor we should use
9517   @type osparams: dict
9518   @param osparams: the parameters which we need to check
9519   @raise errors.OpPrereqError: if the parameters are not valid
9520
9521   """
9522   nodenames = _FilterVmNodes(lu, nodenames)
9523   result = lu.rpc.call_os_validate(nodenames, required, osname,
9524                                    [constants.OS_VALIDATE_PARAMETERS],
9525                                    osparams)
9526   for node, nres in result.items():
9527     # we don't check for offline cases since this should be run only
9528     # against the master node and/or an instance's nodes
9529     nres.Raise("OS Parameters validation failed on node %s" % node)
9530     if not nres.payload:
9531       lu.LogInfo("OS %s not found on node %s, validation skipped",
9532                  osname, node)
9533
9534
9535 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9536   """Wrapper around IAReqInstanceAlloc.
9537
9538   @param op: The instance opcode
9539   @param disks: The computed disks
9540   @param nics: The computed nics
9541   @param beparams: The full filled beparams
9542   @param node_whitelist: List of nodes which should appear as online to the
9543     allocator (unless the node is already marked offline)
9544
9545   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9546
9547   """
9548   spindle_use = beparams[constants.BE_SPINDLE_USE]
9549   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9550                                        disk_template=op.disk_template,
9551                                        tags=op.tags,
9552                                        os=op.os_type,
9553                                        vcpus=beparams[constants.BE_VCPUS],
9554                                        memory=beparams[constants.BE_MAXMEM],
9555                                        spindle_use=spindle_use,
9556                                        disks=disks,
9557                                        nics=[n.ToDict() for n in nics],
9558                                        hypervisor=op.hypervisor,
9559                                        node_whitelist=node_whitelist)
9560
9561
9562 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9563   """Computes the nics.
9564
9565   @param op: The instance opcode
9566   @param cluster: Cluster configuration object
9567   @param default_ip: The default ip to assign
9568   @param cfg: An instance of the configuration object
9569   @param ec_id: Execution context ID
9570
9571   @returns: The build up nics
9572
9573   """
9574   nics = []
9575   for nic in op.nics:
9576     nic_mode_req = nic.get(constants.INIC_MODE, None)
9577     nic_mode = nic_mode_req
9578     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9579       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9580
9581     net = nic.get(constants.INIC_NETWORK, None)
9582     link = nic.get(constants.NIC_LINK, None)
9583     ip = nic.get(constants.INIC_IP, None)
9584
9585     if net is None or net.lower() == constants.VALUE_NONE:
9586       net = None
9587     else:
9588       if nic_mode_req is not None or link is not None:
9589         raise errors.OpPrereqError("If network is given, no mode or link"
9590                                    " is allowed to be passed",
9591                                    errors.ECODE_INVAL)
9592
9593     # ip validity checks
9594     if ip is None or ip.lower() == constants.VALUE_NONE:
9595       nic_ip = None
9596     elif ip.lower() == constants.VALUE_AUTO:
9597       if not op.name_check:
9598         raise errors.OpPrereqError("IP address set to auto but name checks"
9599                                    " have been skipped",
9600                                    errors.ECODE_INVAL)
9601       nic_ip = default_ip
9602     else:
9603       # We defer pool operations until later, so that the iallocator has
9604       # filled in the instance's node(s) dimara
9605       if ip.lower() == constants.NIC_IP_POOL:
9606         if net is None:
9607           raise errors.OpPrereqError("if ip=pool, parameter network"
9608                                      " must be passed too",
9609                                      errors.ECODE_INVAL)
9610
9611       elif not netutils.IPAddress.IsValid(ip):
9612         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9613                                    errors.ECODE_INVAL)
9614
9615       nic_ip = ip
9616
9617     # TODO: check the ip address for uniqueness
9618     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9619       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9620                                  errors.ECODE_INVAL)
9621
9622     # MAC address verification
9623     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9624     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9625       mac = utils.NormalizeAndValidateMac(mac)
9626
9627       try:
9628         # TODO: We need to factor this out
9629         cfg.ReserveMAC(mac, ec_id)
9630       except errors.ReservationError:
9631         raise errors.OpPrereqError("MAC address %s already in use"
9632                                    " in cluster" % mac,
9633                                    errors.ECODE_NOTUNIQUE)
9634
9635     #  Build nic parameters
9636     nicparams = {}
9637     if nic_mode_req:
9638       nicparams[constants.NIC_MODE] = nic_mode
9639     if link:
9640       nicparams[constants.NIC_LINK] = link
9641
9642     check_params = cluster.SimpleFillNIC(nicparams)
9643     objects.NIC.CheckParameterSyntax(check_params)
9644     nics.append(objects.NIC(mac=mac, ip=nic_ip,
9645                             network=net, nicparams=nicparams))
9646
9647   return nics
9648
9649
9650 def _ComputeDisks(op, default_vg):
9651   """Computes the instance disks.
9652
9653   @param op: The instance opcode
9654   @param default_vg: The default_vg to assume
9655
9656   @return: The computer disks
9657
9658   """
9659   disks = []
9660   for disk in op.disks:
9661     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9662     if mode not in constants.DISK_ACCESS_SET:
9663       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9664                                  mode, errors.ECODE_INVAL)
9665     size = disk.get(constants.IDISK_SIZE, None)
9666     if size is None:
9667       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9668     try:
9669       size = int(size)
9670     except (TypeError, ValueError):
9671       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9672                                  errors.ECODE_INVAL)
9673
9674     data_vg = disk.get(constants.IDISK_VG, default_vg)
9675     new_disk = {
9676       constants.IDISK_SIZE: size,
9677       constants.IDISK_MODE: mode,
9678       constants.IDISK_VG: data_vg,
9679       }
9680     if constants.IDISK_METAVG in disk:
9681       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9682     if constants.IDISK_ADOPT in disk:
9683       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9684     disks.append(new_disk)
9685
9686   return disks
9687
9688
9689 def _ComputeFullBeParams(op, cluster):
9690   """Computes the full beparams.
9691
9692   @param op: The instance opcode
9693   @param cluster: The cluster config object
9694
9695   @return: The fully filled beparams
9696
9697   """
9698   default_beparams = cluster.beparams[constants.PP_DEFAULT]
9699   for param, value in op.beparams.iteritems():
9700     if value == constants.VALUE_AUTO:
9701       op.beparams[param] = default_beparams[param]
9702   objects.UpgradeBeParams(op.beparams)
9703   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9704   return cluster.SimpleFillBE(op.beparams)
9705
9706
9707 class LUInstanceCreate(LogicalUnit):
9708   """Create an instance.
9709
9710   """
9711   HPATH = "instance-add"
9712   HTYPE = constants.HTYPE_INSTANCE
9713   REQ_BGL = False
9714
9715   def CheckArguments(self):
9716     """Check arguments.
9717
9718     """
9719     # do not require name_check to ease forward/backward compatibility
9720     # for tools
9721     if self.op.no_install and self.op.start:
9722       self.LogInfo("No-installation mode selected, disabling startup")
9723       self.op.start = False
9724     # validate/normalize the instance name
9725     self.op.instance_name = \
9726       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9727
9728     if self.op.ip_check and not self.op.name_check:
9729       # TODO: make the ip check more flexible and not depend on the name check
9730       raise errors.OpPrereqError("Cannot do IP address check without a name"
9731                                  " check", errors.ECODE_INVAL)
9732
9733     # check nics' parameter names
9734     for nic in self.op.nics:
9735       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9736
9737     # check disks. parameter names and consistent adopt/no-adopt strategy
9738     has_adopt = has_no_adopt = False
9739     for disk in self.op.disks:
9740       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9741       if constants.IDISK_ADOPT in disk:
9742         has_adopt = True
9743       else:
9744         has_no_adopt = True
9745     if has_adopt and has_no_adopt:
9746       raise errors.OpPrereqError("Either all disks are adopted or none is",
9747                                  errors.ECODE_INVAL)
9748     if has_adopt:
9749       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9750         raise errors.OpPrereqError("Disk adoption is not supported for the"
9751                                    " '%s' disk template" %
9752                                    self.op.disk_template,
9753                                    errors.ECODE_INVAL)
9754       if self.op.iallocator is not None:
9755         raise errors.OpPrereqError("Disk adoption not allowed with an"
9756                                    " iallocator script", errors.ECODE_INVAL)
9757       if self.op.mode == constants.INSTANCE_IMPORT:
9758         raise errors.OpPrereqError("Disk adoption not allowed for"
9759                                    " instance import", errors.ECODE_INVAL)
9760     else:
9761       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9762         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9763                                    " but no 'adopt' parameter given" %
9764                                    self.op.disk_template,
9765                                    errors.ECODE_INVAL)
9766
9767     self.adopt_disks = has_adopt
9768
9769     # instance name verification
9770     if self.op.name_check:
9771       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9772       self.op.instance_name = self.hostname1.name
9773       # used in CheckPrereq for ip ping check
9774       self.check_ip = self.hostname1.ip
9775     else:
9776       self.check_ip = None
9777
9778     # file storage checks
9779     if (self.op.file_driver and
9780         not self.op.file_driver in constants.FILE_DRIVER):
9781       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9782                                  self.op.file_driver, errors.ECODE_INVAL)
9783
9784     if self.op.disk_template == constants.DT_FILE:
9785       opcodes.RequireFileStorage()
9786     elif self.op.disk_template == constants.DT_SHARED_FILE:
9787       opcodes.RequireSharedFileStorage()
9788
9789     ### Node/iallocator related checks
9790     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9791
9792     if self.op.pnode is not None:
9793       if self.op.disk_template in constants.DTS_INT_MIRROR:
9794         if self.op.snode is None:
9795           raise errors.OpPrereqError("The networked disk templates need"
9796                                      " a mirror node", errors.ECODE_INVAL)
9797       elif self.op.snode:
9798         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9799                         " template")
9800         self.op.snode = None
9801
9802     self._cds = _GetClusterDomainSecret()
9803
9804     if self.op.mode == constants.INSTANCE_IMPORT:
9805       # On import force_variant must be True, because if we forced it at
9806       # initial install, our only chance when importing it back is that it
9807       # works again!
9808       self.op.force_variant = True
9809
9810       if self.op.no_install:
9811         self.LogInfo("No-installation mode has no effect during import")
9812
9813     elif self.op.mode == constants.INSTANCE_CREATE:
9814       if self.op.os_type is None:
9815         raise errors.OpPrereqError("No guest OS specified",
9816                                    errors.ECODE_INVAL)
9817       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9818         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9819                                    " installation" % self.op.os_type,
9820                                    errors.ECODE_STATE)
9821       if self.op.disk_template is None:
9822         raise errors.OpPrereqError("No disk template specified",
9823                                    errors.ECODE_INVAL)
9824
9825     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9826       # Check handshake to ensure both clusters have the same domain secret
9827       src_handshake = self.op.source_handshake
9828       if not src_handshake:
9829         raise errors.OpPrereqError("Missing source handshake",
9830                                    errors.ECODE_INVAL)
9831
9832       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9833                                                            src_handshake)
9834       if errmsg:
9835         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9836                                    errors.ECODE_INVAL)
9837
9838       # Load and check source CA
9839       self.source_x509_ca_pem = self.op.source_x509_ca
9840       if not self.source_x509_ca_pem:
9841         raise errors.OpPrereqError("Missing source X509 CA",
9842                                    errors.ECODE_INVAL)
9843
9844       try:
9845         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9846                                                     self._cds)
9847       except OpenSSL.crypto.Error, err:
9848         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9849                                    (err, ), errors.ECODE_INVAL)
9850
9851       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9852       if errcode is not None:
9853         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9854                                    errors.ECODE_INVAL)
9855
9856       self.source_x509_ca = cert
9857
9858       src_instance_name = self.op.source_instance_name
9859       if not src_instance_name:
9860         raise errors.OpPrereqError("Missing source instance name",
9861                                    errors.ECODE_INVAL)
9862
9863       self.source_instance_name = \
9864           netutils.GetHostname(name=src_instance_name).name
9865
9866     else:
9867       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9868                                  self.op.mode, errors.ECODE_INVAL)
9869
9870   def ExpandNames(self):
9871     """ExpandNames for CreateInstance.
9872
9873     Figure out the right locks for instance creation.
9874
9875     """
9876     self.needed_locks = {}
9877
9878     instance_name = self.op.instance_name
9879     # this is just a preventive check, but someone might still add this
9880     # instance in the meantime, and creation will fail at lock-add time
9881     if instance_name in self.cfg.GetInstanceList():
9882       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9883                                  instance_name, errors.ECODE_EXISTS)
9884
9885     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9886
9887     if self.op.iallocator:
9888       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9889       # specifying a group on instance creation and then selecting nodes from
9890       # that group
9891       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9892       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9893
9894       if self.op.opportunistic_locking:
9895         self.opportunistic_locks[locking.LEVEL_NODE] = True
9896         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
9897     else:
9898       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9899       nodelist = [self.op.pnode]
9900       if self.op.snode is not None:
9901         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9902         nodelist.append(self.op.snode)
9903       self.needed_locks[locking.LEVEL_NODE] = nodelist
9904
9905     # in case of import lock the source node too
9906     if self.op.mode == constants.INSTANCE_IMPORT:
9907       src_node = self.op.src_node
9908       src_path = self.op.src_path
9909
9910       if src_path is None:
9911         self.op.src_path = src_path = self.op.instance_name
9912
9913       if src_node is None:
9914         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9915         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9916         self.op.src_node = None
9917         if os.path.isabs(src_path):
9918           raise errors.OpPrereqError("Importing an instance from a path"
9919                                      " requires a source node option",
9920                                      errors.ECODE_INVAL)
9921       else:
9922         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9923         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9924           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9925         if not os.path.isabs(src_path):
9926           self.op.src_path = src_path = \
9927             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9928
9929     self.needed_locks[locking.LEVEL_NODE_RES] = \
9930       _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
9931
9932   def _RunAllocator(self):
9933     """Run the allocator based on input opcode.
9934
9935     """
9936     if self.op.opportunistic_locking:
9937       # Only consider nodes for which a lock is held
9938       node_whitelist = self.owned_locks(locking.LEVEL_NODE)
9939     else:
9940       node_whitelist = None
9941
9942     #TODO Export network to iallocator so that it chooses a pnode
9943     #     in a nodegroup that has the desired network connected to
9944     req = _CreateInstanceAllocRequest(self.op, self.disks,
9945                                       self.nics, self.be_full,
9946                                       node_whitelist)
9947     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9948
9949     ial.Run(self.op.iallocator)
9950
9951     if not ial.success:
9952       # When opportunistic locks are used only a temporary failure is generated
9953       if self.op.opportunistic_locking:
9954         ecode = errors.ECODE_TEMP_NORES
9955       else:
9956         ecode = errors.ECODE_NORES
9957
9958       raise errors.OpPrereqError("Can't compute nodes using"
9959                                  " iallocator '%s': %s" %
9960                                  (self.op.iallocator, ial.info),
9961                                  ecode)
9962
9963     self.op.pnode = ial.result[0]
9964     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9965                  self.op.instance_name, self.op.iallocator,
9966                  utils.CommaJoin(ial.result))
9967
9968     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9969
9970     if req.RequiredNodes() == 2:
9971       self.op.snode = ial.result[1]
9972
9973   def BuildHooksEnv(self):
9974     """Build hooks env.
9975
9976     This runs on master, primary and secondary nodes of the instance.
9977
9978     """
9979     env = {
9980       "ADD_MODE": self.op.mode,
9981       }
9982     if self.op.mode == constants.INSTANCE_IMPORT:
9983       env["SRC_NODE"] = self.op.src_node
9984       env["SRC_PATH"] = self.op.src_path
9985       env["SRC_IMAGES"] = self.src_images
9986
9987     env.update(_BuildInstanceHookEnv(
9988       name=self.op.instance_name,
9989       primary_node=self.op.pnode,
9990       secondary_nodes=self.secondaries,
9991       status=self.op.start,
9992       os_type=self.op.os_type,
9993       minmem=self.be_full[constants.BE_MINMEM],
9994       maxmem=self.be_full[constants.BE_MAXMEM],
9995       vcpus=self.be_full[constants.BE_VCPUS],
9996       nics=_NICListToTuple(self, self.nics),
9997       disk_template=self.op.disk_template,
9998       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9999              for d in self.disks],
10000       bep=self.be_full,
10001       hvp=self.hv_full,
10002       hypervisor_name=self.op.hypervisor,
10003       tags=self.op.tags,
10004     ))
10005
10006     return env
10007
10008   def BuildHooksNodes(self):
10009     """Build hooks nodes.
10010
10011     """
10012     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10013     return nl, nl
10014
10015   def _ReadExportInfo(self):
10016     """Reads the export information from disk.
10017
10018     It will override the opcode source node and path with the actual
10019     information, if these two were not specified before.
10020
10021     @return: the export information
10022
10023     """
10024     assert self.op.mode == constants.INSTANCE_IMPORT
10025
10026     src_node = self.op.src_node
10027     src_path = self.op.src_path
10028
10029     if src_node is None:
10030       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10031       exp_list = self.rpc.call_export_list(locked_nodes)
10032       found = False
10033       for node in exp_list:
10034         if exp_list[node].fail_msg:
10035           continue
10036         if src_path in exp_list[node].payload:
10037           found = True
10038           self.op.src_node = src_node = node
10039           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10040                                                        src_path)
10041           break
10042       if not found:
10043         raise errors.OpPrereqError("No export found for relative path %s" %
10044                                     src_path, errors.ECODE_INVAL)
10045
10046     _CheckNodeOnline(self, src_node)
10047     result = self.rpc.call_export_info(src_node, src_path)
10048     result.Raise("No export or invalid export found in dir %s" % src_path)
10049
10050     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10051     if not export_info.has_section(constants.INISECT_EXP):
10052       raise errors.ProgrammerError("Corrupted export config",
10053                                    errors.ECODE_ENVIRON)
10054
10055     ei_version = export_info.get(constants.INISECT_EXP, "version")
10056     if (int(ei_version) != constants.EXPORT_VERSION):
10057       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10058                                  (ei_version, constants.EXPORT_VERSION),
10059                                  errors.ECODE_ENVIRON)
10060     return export_info
10061
10062   def _ReadExportParams(self, einfo):
10063     """Use export parameters as defaults.
10064
10065     In case the opcode doesn't specify (as in override) some instance
10066     parameters, then try to use them from the export information, if
10067     that declares them.
10068
10069     """
10070     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10071
10072     if self.op.disk_template is None:
10073       if einfo.has_option(constants.INISECT_INS, "disk_template"):
10074         self.op.disk_template = einfo.get(constants.INISECT_INS,
10075                                           "disk_template")
10076         if self.op.disk_template not in constants.DISK_TEMPLATES:
10077           raise errors.OpPrereqError("Disk template specified in configuration"
10078                                      " file is not one of the allowed values:"
10079                                      " %s" %
10080                                      " ".join(constants.DISK_TEMPLATES),
10081                                      errors.ECODE_INVAL)
10082       else:
10083         raise errors.OpPrereqError("No disk template specified and the export"
10084                                    " is missing the disk_template information",
10085                                    errors.ECODE_INVAL)
10086
10087     if not self.op.disks:
10088       disks = []
10089       # TODO: import the disk iv_name too
10090       for idx in range(constants.MAX_DISKS):
10091         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10092           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10093           disks.append({constants.IDISK_SIZE: disk_sz})
10094       self.op.disks = disks
10095       if not disks and self.op.disk_template != constants.DT_DISKLESS:
10096         raise errors.OpPrereqError("No disk info specified and the export"
10097                                    " is missing the disk information",
10098                                    errors.ECODE_INVAL)
10099
10100     if not self.op.nics:
10101       nics = []
10102       for idx in range(constants.MAX_NICS):
10103         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10104           ndict = {}
10105           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10106             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10107             ndict[name] = v
10108           nics.append(ndict)
10109         else:
10110           break
10111       self.op.nics = nics
10112
10113     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10114       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10115
10116     if (self.op.hypervisor is None and
10117         einfo.has_option(constants.INISECT_INS, "hypervisor")):
10118       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10119
10120     if einfo.has_section(constants.INISECT_HYP):
10121       # use the export parameters but do not override the ones
10122       # specified by the user
10123       for name, value in einfo.items(constants.INISECT_HYP):
10124         if name not in self.op.hvparams:
10125           self.op.hvparams[name] = value
10126
10127     if einfo.has_section(constants.INISECT_BEP):
10128       # use the parameters, without overriding
10129       for name, value in einfo.items(constants.INISECT_BEP):
10130         if name not in self.op.beparams:
10131           self.op.beparams[name] = value
10132         # Compatibility for the old "memory" be param
10133         if name == constants.BE_MEMORY:
10134           if constants.BE_MAXMEM not in self.op.beparams:
10135             self.op.beparams[constants.BE_MAXMEM] = value
10136           if constants.BE_MINMEM not in self.op.beparams:
10137             self.op.beparams[constants.BE_MINMEM] = value
10138     else:
10139       # try to read the parameters old style, from the main section
10140       for name in constants.BES_PARAMETERS:
10141         if (name not in self.op.beparams and
10142             einfo.has_option(constants.INISECT_INS, name)):
10143           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10144
10145     if einfo.has_section(constants.INISECT_OSP):
10146       # use the parameters, without overriding
10147       for name, value in einfo.items(constants.INISECT_OSP):
10148         if name not in self.op.osparams:
10149           self.op.osparams[name] = value
10150
10151   def _RevertToDefaults(self, cluster):
10152     """Revert the instance parameters to the default values.
10153
10154     """
10155     # hvparams
10156     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10157     for name in self.op.hvparams.keys():
10158       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10159         del self.op.hvparams[name]
10160     # beparams
10161     be_defs = cluster.SimpleFillBE({})
10162     for name in self.op.beparams.keys():
10163       if name in be_defs and be_defs[name] == self.op.beparams[name]:
10164         del self.op.beparams[name]
10165     # nic params
10166     nic_defs = cluster.SimpleFillNIC({})
10167     for nic in self.op.nics:
10168       for name in constants.NICS_PARAMETERS:
10169         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10170           del nic[name]
10171     # osparams
10172     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10173     for name in self.op.osparams.keys():
10174       if name in os_defs and os_defs[name] == self.op.osparams[name]:
10175         del self.op.osparams[name]
10176
10177   def _CalculateFileStorageDir(self):
10178     """Calculate final instance file storage dir.
10179
10180     """
10181     # file storage dir calculation/check
10182     self.instance_file_storage_dir = None
10183     if self.op.disk_template in constants.DTS_FILEBASED:
10184       # build the full file storage dir path
10185       joinargs = []
10186
10187       if self.op.disk_template == constants.DT_SHARED_FILE:
10188         get_fsd_fn = self.cfg.GetSharedFileStorageDir
10189       else:
10190         get_fsd_fn = self.cfg.GetFileStorageDir
10191
10192       cfg_storagedir = get_fsd_fn()
10193       if not cfg_storagedir:
10194         raise errors.OpPrereqError("Cluster file storage dir not defined",
10195                                    errors.ECODE_STATE)
10196       joinargs.append(cfg_storagedir)
10197
10198       if self.op.file_storage_dir is not None:
10199         joinargs.append(self.op.file_storage_dir)
10200
10201       joinargs.append(self.op.instance_name)
10202
10203       # pylint: disable=W0142
10204       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10205
10206   def CheckPrereq(self): # pylint: disable=R0914
10207     """Check prerequisites.
10208
10209     """
10210     self._CalculateFileStorageDir()
10211
10212     if self.op.mode == constants.INSTANCE_IMPORT:
10213       export_info = self._ReadExportInfo()
10214       self._ReadExportParams(export_info)
10215       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10216     else:
10217       self._old_instance_name = None
10218
10219     if (not self.cfg.GetVGName() and
10220         self.op.disk_template not in constants.DTS_NOT_LVM):
10221       raise errors.OpPrereqError("Cluster does not support lvm-based"
10222                                  " instances", errors.ECODE_STATE)
10223
10224     if (self.op.hypervisor is None or
10225         self.op.hypervisor == constants.VALUE_AUTO):
10226       self.op.hypervisor = self.cfg.GetHypervisorType()
10227
10228     cluster = self.cfg.GetClusterInfo()
10229     enabled_hvs = cluster.enabled_hypervisors
10230     if self.op.hypervisor not in enabled_hvs:
10231       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10232                                  " cluster (%s)" %
10233                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10234                                  errors.ECODE_STATE)
10235
10236     # Check tag validity
10237     for tag in self.op.tags:
10238       objects.TaggableObject.ValidateTag(tag)
10239
10240     # check hypervisor parameter syntax (locally)
10241     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10242     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10243                                       self.op.hvparams)
10244     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10245     hv_type.CheckParameterSyntax(filled_hvp)
10246     self.hv_full = filled_hvp
10247     # check that we don't specify global parameters on an instance
10248     _CheckGlobalHvParams(self.op.hvparams)
10249
10250     # fill and remember the beparams dict
10251     self.be_full = _ComputeFullBeParams(self.op, cluster)
10252
10253     # build os parameters
10254     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10255
10256     # now that hvp/bep are in final format, let's reset to defaults,
10257     # if told to do so
10258     if self.op.identify_defaults:
10259       self._RevertToDefaults(cluster)
10260
10261     # NIC buildup
10262     self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10263                              self.proc.GetECId())
10264
10265     # disk checks/pre-build
10266     default_vg = self.cfg.GetVGName()
10267     self.disks = _ComputeDisks(self.op, default_vg)
10268
10269     if self.op.mode == constants.INSTANCE_IMPORT:
10270       disk_images = []
10271       for idx in range(len(self.disks)):
10272         option = "disk%d_dump" % idx
10273         if export_info.has_option(constants.INISECT_INS, option):
10274           # FIXME: are the old os-es, disk sizes, etc. useful?
10275           export_name = export_info.get(constants.INISECT_INS, option)
10276           image = utils.PathJoin(self.op.src_path, export_name)
10277           disk_images.append(image)
10278         else:
10279           disk_images.append(False)
10280
10281       self.src_images = disk_images
10282
10283       if self.op.instance_name == self._old_instance_name:
10284         for idx, nic in enumerate(self.nics):
10285           if nic.mac == constants.VALUE_AUTO:
10286             nic_mac_ini = "nic%d_mac" % idx
10287             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10288
10289     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10290
10291     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10292     if self.op.ip_check:
10293       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10294         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10295                                    (self.check_ip, self.op.instance_name),
10296                                    errors.ECODE_NOTUNIQUE)
10297
10298     #### mac address generation
10299     # By generating here the mac address both the allocator and the hooks get
10300     # the real final mac address rather than the 'auto' or 'generate' value.
10301     # There is a race condition between the generation and the instance object
10302     # creation, which means that we know the mac is valid now, but we're not
10303     # sure it will be when we actually add the instance. If things go bad
10304     # adding the instance will abort because of a duplicate mac, and the
10305     # creation job will fail.
10306     for nic in self.nics:
10307       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10308         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10309
10310     #### allocator run
10311
10312     if self.op.iallocator is not None:
10313       self._RunAllocator()
10314
10315     # Release all unneeded node locks
10316     keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10317     _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10318     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10319     _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10320
10321     assert (self.owned_locks(locking.LEVEL_NODE) ==
10322             self.owned_locks(locking.LEVEL_NODE_RES)), \
10323       "Node locks differ from node resource locks"
10324
10325     #### node related checks
10326
10327     # check primary node
10328     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10329     assert self.pnode is not None, \
10330       "Cannot retrieve locked node %s" % self.op.pnode
10331     if pnode.offline:
10332       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10333                                  pnode.name, errors.ECODE_STATE)
10334     if pnode.drained:
10335       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10336                                  pnode.name, errors.ECODE_STATE)
10337     if not pnode.vm_capable:
10338       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10339                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10340
10341     self.secondaries = []
10342
10343     # Fill in any IPs from IP pools. This must happen here, because we need to
10344     # know the nic's primary node, as specified by the iallocator
10345     for idx, nic in enumerate(self.nics):
10346       net = nic.network
10347       if net is not None:
10348         netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10349         if netparams is None:
10350           raise errors.OpPrereqError("No netparams found for network"
10351                                      " %s. Propably not connected to"
10352                                      " node's %s nodegroup" %
10353                                      (net, self.pnode.name),
10354                                      errors.ECODE_INVAL)
10355         self.LogInfo("NIC/%d inherits netparams %s" %
10356                      (idx, netparams.values()))
10357         nic.nicparams = dict(netparams)
10358         if nic.ip is not None:
10359           if nic.ip.lower() == constants.NIC_IP_POOL:
10360             try:
10361               nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10362             except errors.ReservationError:
10363               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10364                                          " from the address pool" % idx,
10365                                          errors.ECODE_STATE)
10366             self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10367           else:
10368             try:
10369               self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10370             except errors.ReservationError:
10371               raise errors.OpPrereqError("IP address %s already in use"
10372                                          " or does not belong to network %s" %
10373                                          (nic.ip, net),
10374                                          errors.ECODE_NOTUNIQUE)
10375       else:
10376         # net is None, ip None or given
10377         if self.op.conflicts_check:
10378           _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10379
10380     # mirror node verification
10381     if self.op.disk_template in constants.DTS_INT_MIRROR:
10382       if self.op.snode == pnode.name:
10383         raise errors.OpPrereqError("The secondary node cannot be the"
10384                                    " primary node", errors.ECODE_INVAL)
10385       _CheckNodeOnline(self, self.op.snode)
10386       _CheckNodeNotDrained(self, self.op.snode)
10387       _CheckNodeVmCapable(self, self.op.snode)
10388       self.secondaries.append(self.op.snode)
10389
10390       snode = self.cfg.GetNodeInfo(self.op.snode)
10391       if pnode.group != snode.group:
10392         self.LogWarning("The primary and secondary nodes are in two"
10393                         " different node groups; the disk parameters"
10394                         " from the first disk's node group will be"
10395                         " used")
10396
10397     nodenames = [pnode.name] + self.secondaries
10398
10399     # Verify instance specs
10400     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10401     ispec = {
10402       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10403       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10404       constants.ISPEC_DISK_COUNT: len(self.disks),
10405       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10406       constants.ISPEC_NIC_COUNT: len(self.nics),
10407       constants.ISPEC_SPINDLE_USE: spindle_use,
10408       }
10409
10410     group_info = self.cfg.GetNodeGroup(pnode.group)
10411     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10412     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10413     if not self.op.ignore_ipolicy and res:
10414       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10415              (pnode.group, group_info.name, utils.CommaJoin(res)))
10416       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10417
10418     if not self.adopt_disks:
10419       if self.op.disk_template == constants.DT_RBD:
10420         # _CheckRADOSFreeSpace() is just a placeholder.
10421         # Any function that checks prerequisites can be placed here.
10422         # Check if there is enough space on the RADOS cluster.
10423         _CheckRADOSFreeSpace()
10424       else:
10425         # Check lv size requirements, if not adopting
10426         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10427         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10428
10429     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10430       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10431                                 disk[constants.IDISK_ADOPT])
10432                      for disk in self.disks])
10433       if len(all_lvs) != len(self.disks):
10434         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10435                                    errors.ECODE_INVAL)
10436       for lv_name in all_lvs:
10437         try:
10438           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10439           # to ReserveLV uses the same syntax
10440           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10441         except errors.ReservationError:
10442           raise errors.OpPrereqError("LV named %s used by another instance" %
10443                                      lv_name, errors.ECODE_NOTUNIQUE)
10444
10445       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10446       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10447
10448       node_lvs = self.rpc.call_lv_list([pnode.name],
10449                                        vg_names.payload.keys())[pnode.name]
10450       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10451       node_lvs = node_lvs.payload
10452
10453       delta = all_lvs.difference(node_lvs.keys())
10454       if delta:
10455         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10456                                    utils.CommaJoin(delta),
10457                                    errors.ECODE_INVAL)
10458       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10459       if online_lvs:
10460         raise errors.OpPrereqError("Online logical volumes found, cannot"
10461                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10462                                    errors.ECODE_STATE)
10463       # update the size of disk based on what is found
10464       for dsk in self.disks:
10465         dsk[constants.IDISK_SIZE] = \
10466           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10467                                         dsk[constants.IDISK_ADOPT])][0]))
10468
10469     elif self.op.disk_template == constants.DT_BLOCK:
10470       # Normalize and de-duplicate device paths
10471       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10472                        for disk in self.disks])
10473       if len(all_disks) != len(self.disks):
10474         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10475                                    errors.ECODE_INVAL)
10476       baddisks = [d for d in all_disks
10477                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10478       if baddisks:
10479         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10480                                    " cannot be adopted" %
10481                                    (utils.CommaJoin(baddisks),
10482                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10483                                    errors.ECODE_INVAL)
10484
10485       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10486                                             list(all_disks))[pnode.name]
10487       node_disks.Raise("Cannot get block device information from node %s" %
10488                        pnode.name)
10489       node_disks = node_disks.payload
10490       delta = all_disks.difference(node_disks.keys())
10491       if delta:
10492         raise errors.OpPrereqError("Missing block device(s): %s" %
10493                                    utils.CommaJoin(delta),
10494                                    errors.ECODE_INVAL)
10495       for dsk in self.disks:
10496         dsk[constants.IDISK_SIZE] = \
10497           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10498
10499     # Verify instance specs
10500     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10501     ispec = {
10502       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10503       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10504       constants.ISPEC_DISK_COUNT: len(self.disks),
10505       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10506                                   for disk in self.disks],
10507       constants.ISPEC_NIC_COUNT: len(self.nics),
10508       constants.ISPEC_SPINDLE_USE: spindle_use,
10509       }
10510
10511     group_info = self.cfg.GetNodeGroup(pnode.group)
10512     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10513     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10514     if not self.op.ignore_ipolicy and res:
10515       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10516                                   " policy: %s") % (pnode.group,
10517                                                     utils.CommaJoin(res)),
10518                                   errors.ECODE_INVAL)
10519
10520     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10521
10522     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10523     # check OS parameters (remotely)
10524     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10525
10526     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10527
10528     # memory check on primary node
10529     #TODO(dynmem): use MINMEM for checking
10530     if self.op.start:
10531       _CheckNodeFreeMemory(self, self.pnode.name,
10532                            "creating instance %s" % self.op.instance_name,
10533                            self.be_full[constants.BE_MAXMEM],
10534                            self.op.hypervisor)
10535
10536     self.dry_run_result = list(nodenames)
10537
10538   def Exec(self, feedback_fn):
10539     """Create and add the instance to the cluster.
10540
10541     """
10542     instance = self.op.instance_name
10543     pnode_name = self.pnode.name
10544
10545     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10546                 self.owned_locks(locking.LEVEL_NODE)), \
10547       "Node locks differ from node resource locks"
10548     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10549
10550     ht_kind = self.op.hypervisor
10551     if ht_kind in constants.HTS_REQ_PORT:
10552       network_port = self.cfg.AllocatePort()
10553     else:
10554       network_port = None
10555
10556     # This is ugly but we got a chicken-egg problem here
10557     # We can only take the group disk parameters, as the instance
10558     # has no disks yet (we are generating them right here).
10559     node = self.cfg.GetNodeInfo(pnode_name)
10560     nodegroup = self.cfg.GetNodeGroup(node.group)
10561     disks = _GenerateDiskTemplate(self,
10562                                   self.op.disk_template,
10563                                   instance, pnode_name,
10564                                   self.secondaries,
10565                                   self.disks,
10566                                   self.instance_file_storage_dir,
10567                                   self.op.file_driver,
10568                                   0,
10569                                   feedback_fn,
10570                                   self.cfg.GetGroupDiskParams(nodegroup))
10571
10572     iobj = objects.Instance(name=instance, os=self.op.os_type,
10573                             primary_node=pnode_name,
10574                             nics=self.nics, disks=disks,
10575                             disk_template=self.op.disk_template,
10576                             admin_state=constants.ADMINST_DOWN,
10577                             network_port=network_port,
10578                             beparams=self.op.beparams,
10579                             hvparams=self.op.hvparams,
10580                             hypervisor=self.op.hypervisor,
10581                             osparams=self.op.osparams,
10582                             )
10583
10584     if self.op.tags:
10585       for tag in self.op.tags:
10586         iobj.AddTag(tag)
10587
10588     if self.adopt_disks:
10589       if self.op.disk_template == constants.DT_PLAIN:
10590         # rename LVs to the newly-generated names; we need to construct
10591         # 'fake' LV disks with the old data, plus the new unique_id
10592         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10593         rename_to = []
10594         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10595           rename_to.append(t_dsk.logical_id)
10596           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10597           self.cfg.SetDiskID(t_dsk, pnode_name)
10598         result = self.rpc.call_blockdev_rename(pnode_name,
10599                                                zip(tmp_disks, rename_to))
10600         result.Raise("Failed to rename adoped LVs")
10601     else:
10602       feedback_fn("* creating instance disks...")
10603       try:
10604         _CreateDisks(self, iobj)
10605       except errors.OpExecError:
10606         self.LogWarning("Device creation failed, reverting...")
10607         try:
10608           _RemoveDisks(self, iobj)
10609         finally:
10610           self.cfg.ReleaseDRBDMinors(instance)
10611           raise
10612
10613     feedback_fn("adding instance %s to cluster config" % instance)
10614
10615     self.cfg.AddInstance(iobj, self.proc.GetECId())
10616
10617     # Declare that we don't want to remove the instance lock anymore, as we've
10618     # added the instance to the config
10619     del self.remove_locks[locking.LEVEL_INSTANCE]
10620
10621     if self.op.mode == constants.INSTANCE_IMPORT:
10622       # Release unused nodes
10623       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10624     else:
10625       # Release all nodes
10626       _ReleaseLocks(self, locking.LEVEL_NODE)
10627
10628     disk_abort = False
10629     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10630       feedback_fn("* wiping instance disks...")
10631       try:
10632         _WipeDisks(self, iobj)
10633       except errors.OpExecError, err:
10634         logging.exception("Wiping disks failed")
10635         self.LogWarning("Wiping instance disks failed (%s)", err)
10636         disk_abort = True
10637
10638     if disk_abort:
10639       # Something is already wrong with the disks, don't do anything else
10640       pass
10641     elif self.op.wait_for_sync:
10642       disk_abort = not _WaitForSync(self, iobj)
10643     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10644       # make sure the disks are not degraded (still sync-ing is ok)
10645       feedback_fn("* checking mirrors status")
10646       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10647     else:
10648       disk_abort = False
10649
10650     if disk_abort:
10651       _RemoveDisks(self, iobj)
10652       self.cfg.RemoveInstance(iobj.name)
10653       # Make sure the instance lock gets removed
10654       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10655       raise errors.OpExecError("There are some degraded disks for"
10656                                " this instance")
10657
10658     # Release all node resource locks
10659     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10660
10661     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10662       # we need to set the disks ID to the primary node, since the
10663       # preceding code might or might have not done it, depending on
10664       # disk template and other options
10665       for disk in iobj.disks:
10666         self.cfg.SetDiskID(disk, pnode_name)
10667       if self.op.mode == constants.INSTANCE_CREATE:
10668         if not self.op.no_install:
10669           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10670                         not self.op.wait_for_sync)
10671           if pause_sync:
10672             feedback_fn("* pausing disk sync to install instance OS")
10673             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10674                                                               (iobj.disks,
10675                                                                iobj), True)
10676             for idx, success in enumerate(result.payload):
10677               if not success:
10678                 logging.warn("pause-sync of instance %s for disk %d failed",
10679                              instance, idx)
10680
10681           feedback_fn("* running the instance OS create scripts...")
10682           # FIXME: pass debug option from opcode to backend
10683           os_add_result = \
10684             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10685                                           self.op.debug_level)
10686           if pause_sync:
10687             feedback_fn("* resuming disk sync")
10688             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10689                                                               (iobj.disks,
10690                                                                iobj), False)
10691             for idx, success in enumerate(result.payload):
10692               if not success:
10693                 logging.warn("resume-sync of instance %s for disk %d failed",
10694                              instance, idx)
10695
10696           os_add_result.Raise("Could not add os for instance %s"
10697                               " on node %s" % (instance, pnode_name))
10698
10699       else:
10700         if self.op.mode == constants.INSTANCE_IMPORT:
10701           feedback_fn("* running the instance OS import scripts...")
10702
10703           transfers = []
10704
10705           for idx, image in enumerate(self.src_images):
10706             if not image:
10707               continue
10708
10709             # FIXME: pass debug option from opcode to backend
10710             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10711                                                constants.IEIO_FILE, (image, ),
10712                                                constants.IEIO_SCRIPT,
10713                                                (iobj.disks[idx], idx),
10714                                                None)
10715             transfers.append(dt)
10716
10717           import_result = \
10718             masterd.instance.TransferInstanceData(self, feedback_fn,
10719                                                   self.op.src_node, pnode_name,
10720                                                   self.pnode.secondary_ip,
10721                                                   iobj, transfers)
10722           if not compat.all(import_result):
10723             self.LogWarning("Some disks for instance %s on node %s were not"
10724                             " imported successfully" % (instance, pnode_name))
10725
10726           rename_from = self._old_instance_name
10727
10728         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10729           feedback_fn("* preparing remote import...")
10730           # The source cluster will stop the instance before attempting to make
10731           # a connection. In some cases stopping an instance can take a long
10732           # time, hence the shutdown timeout is added to the connection
10733           # timeout.
10734           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10735                              self.op.source_shutdown_timeout)
10736           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10737
10738           assert iobj.primary_node == self.pnode.name
10739           disk_results = \
10740             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10741                                           self.source_x509_ca,
10742                                           self._cds, timeouts)
10743           if not compat.all(disk_results):
10744             # TODO: Should the instance still be started, even if some disks
10745             # failed to import (valid for local imports, too)?
10746             self.LogWarning("Some disks for instance %s on node %s were not"
10747                             " imported successfully" % (instance, pnode_name))
10748
10749           rename_from = self.source_instance_name
10750
10751         else:
10752           # also checked in the prereq part
10753           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10754                                        % self.op.mode)
10755
10756         # Run rename script on newly imported instance
10757         assert iobj.name == instance
10758         feedback_fn("Running rename script for %s" % instance)
10759         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10760                                                    rename_from,
10761                                                    self.op.debug_level)
10762         if result.fail_msg:
10763           self.LogWarning("Failed to run rename script for %s on node"
10764                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10765
10766     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10767
10768     if self.op.start:
10769       iobj.admin_state = constants.ADMINST_UP
10770       self.cfg.Update(iobj, feedback_fn)
10771       logging.info("Starting instance %s on node %s", instance, pnode_name)
10772       feedback_fn("* starting instance...")
10773       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10774                                             False)
10775       result.Raise("Could not start instance")
10776
10777     return list(iobj.all_nodes)
10778
10779
10780 class LUInstanceMultiAlloc(NoHooksLU):
10781   """Allocates multiple instances at the same time.
10782
10783   """
10784   REQ_BGL = False
10785
10786   def CheckArguments(self):
10787     """Check arguments.
10788
10789     """
10790     nodes = []
10791     for inst in self.op.instances:
10792       if inst.iallocator is not None:
10793         raise errors.OpPrereqError("iallocator are not allowed to be set on"
10794                                    " instance objects", errors.ECODE_INVAL)
10795       nodes.append(bool(inst.pnode))
10796       if inst.disk_template in constants.DTS_INT_MIRROR:
10797         nodes.append(bool(inst.snode))
10798
10799     has_nodes = compat.any(nodes)
10800     if compat.all(nodes) ^ has_nodes:
10801       raise errors.OpPrereqError("There are instance objects providing"
10802                                  " pnode/snode while others do not",
10803                                  errors.ECODE_INVAL)
10804
10805     if self.op.iallocator is None:
10806       default_iallocator = self.cfg.GetDefaultIAllocator()
10807       if default_iallocator and has_nodes:
10808         self.op.iallocator = default_iallocator
10809       else:
10810         raise errors.OpPrereqError("No iallocator or nodes on the instances"
10811                                    " given and no cluster-wide default"
10812                                    " iallocator found; please specify either"
10813                                    " an iallocator or nodes on the instances"
10814                                    " or set a cluster-wide default iallocator",
10815                                    errors.ECODE_INVAL)
10816
10817     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10818     if dups:
10819       raise errors.OpPrereqError("There are duplicate instance names: %s" %
10820                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
10821
10822   def ExpandNames(self):
10823     """Calculate the locks.
10824
10825     """
10826     self.share_locks = _ShareAll()
10827     self.needed_locks = {
10828       # iallocator will select nodes and even if no iallocator is used,
10829       # collisions with LUInstanceCreate should be avoided
10830       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
10831       }
10832
10833     if self.op.iallocator:
10834       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10835       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10836
10837       if self.op.opportunistic_locking:
10838         self.opportunistic_locks[locking.LEVEL_NODE] = True
10839         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10840     else:
10841       nodeslist = []
10842       for inst in self.op.instances:
10843         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10844         nodeslist.append(inst.pnode)
10845         if inst.snode is not None:
10846           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10847           nodeslist.append(inst.snode)
10848
10849       self.needed_locks[locking.LEVEL_NODE] = nodeslist
10850       # Lock resources of instance's primary and secondary nodes (copy to
10851       # prevent accidential modification)
10852       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10853
10854   def CheckPrereq(self):
10855     """Check prerequisite.
10856
10857     """
10858     cluster = self.cfg.GetClusterInfo()
10859     default_vg = self.cfg.GetVGName()
10860     ec_id = self.proc.GetECId()
10861
10862     if self.op.opportunistic_locking:
10863       # Only consider nodes for which a lock is held
10864       node_whitelist = self.owned_locks(locking.LEVEL_NODE)
10865     else:
10866       node_whitelist = None
10867
10868     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10869                                          _ComputeNics(op, cluster, None,
10870                                                       self.cfg, ec_id),
10871                                          _ComputeFullBeParams(op, cluster),
10872                                          node_whitelist)
10873              for op in self.op.instances]
10874
10875     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10876     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10877
10878     ial.Run(self.op.iallocator)
10879
10880     if not ial.success:
10881       raise errors.OpPrereqError("Can't compute nodes using"
10882                                  " iallocator '%s': %s" %
10883                                  (self.op.iallocator, ial.info),
10884                                  errors.ECODE_NORES)
10885
10886     self.ia_result = ial.result
10887
10888     if self.op.dry_run:
10889       self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
10890         constants.JOB_IDS_KEY: [],
10891         })
10892
10893   def _ConstructPartialResult(self):
10894     """Contructs the partial result.
10895
10896     """
10897     (allocatable, failed) = self.ia_result
10898     return {
10899       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10900         map(compat.fst, allocatable),
10901       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10902       }
10903
10904   def Exec(self, feedback_fn):
10905     """Executes the opcode.
10906
10907     """
10908     op2inst = dict((op.instance_name, op) for op in self.op.instances)
10909     (allocatable, failed) = self.ia_result
10910
10911     jobs = []
10912     for (name, nodes) in allocatable:
10913       op = op2inst.pop(name)
10914
10915       if len(nodes) > 1:
10916         (op.pnode, op.snode) = nodes
10917       else:
10918         (op.pnode,) = nodes
10919
10920       jobs.append([op])
10921
10922     missing = set(op2inst.keys()) - set(failed)
10923     assert not missing, \
10924       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10925
10926     return ResultWithJobs(jobs, **self._ConstructPartialResult())
10927
10928
10929 def _CheckRADOSFreeSpace():
10930   """Compute disk size requirements inside the RADOS cluster.
10931
10932   """
10933   # For the RADOS cluster we assume there is always enough space.
10934   pass
10935
10936
10937 class LUInstanceConsole(NoHooksLU):
10938   """Connect to an instance's console.
10939
10940   This is somewhat special in that it returns the command line that
10941   you need to run on the master node in order to connect to the
10942   console.
10943
10944   """
10945   REQ_BGL = False
10946
10947   def ExpandNames(self):
10948     self.share_locks = _ShareAll()
10949     self._ExpandAndLockInstance()
10950
10951   def CheckPrereq(self):
10952     """Check prerequisites.
10953
10954     This checks that the instance is in the cluster.
10955
10956     """
10957     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10958     assert self.instance is not None, \
10959       "Cannot retrieve locked instance %s" % self.op.instance_name
10960     _CheckNodeOnline(self, self.instance.primary_node)
10961
10962   def Exec(self, feedback_fn):
10963     """Connect to the console of an instance
10964
10965     """
10966     instance = self.instance
10967     node = instance.primary_node
10968
10969     node_insts = self.rpc.call_instance_list([node],
10970                                              [instance.hypervisor])[node]
10971     node_insts.Raise("Can't get node information from %s" % node)
10972
10973     if instance.name not in node_insts.payload:
10974       if instance.admin_state == constants.ADMINST_UP:
10975         state = constants.INSTST_ERRORDOWN
10976       elif instance.admin_state == constants.ADMINST_DOWN:
10977         state = constants.INSTST_ADMINDOWN
10978       else:
10979         state = constants.INSTST_ADMINOFFLINE
10980       raise errors.OpExecError("Instance %s is not running (state %s)" %
10981                                (instance.name, state))
10982
10983     logging.debug("Connecting to console of %s on %s", instance.name, node)
10984
10985     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10986
10987
10988 def _GetInstanceConsole(cluster, instance):
10989   """Returns console information for an instance.
10990
10991   @type cluster: L{objects.Cluster}
10992   @type instance: L{objects.Instance}
10993   @rtype: dict
10994
10995   """
10996   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10997   # beparams and hvparams are passed separately, to avoid editing the
10998   # instance and then saving the defaults in the instance itself.
10999   hvparams = cluster.FillHV(instance)
11000   beparams = cluster.FillBE(instance)
11001   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11002
11003   assert console.instance == instance.name
11004   assert console.Validate()
11005
11006   return console.ToDict()
11007
11008
11009 class LUInstanceReplaceDisks(LogicalUnit):
11010   """Replace the disks of an instance.
11011
11012   """
11013   HPATH = "mirrors-replace"
11014   HTYPE = constants.HTYPE_INSTANCE
11015   REQ_BGL = False
11016
11017   def CheckArguments(self):
11018     """Check arguments.
11019
11020     """
11021     remote_node = self.op.remote_node
11022     ialloc = self.op.iallocator
11023     if self.op.mode == constants.REPLACE_DISK_CHG:
11024       if remote_node is None and ialloc is None:
11025         raise errors.OpPrereqError("When changing the secondary either an"
11026                                    " iallocator script must be used or the"
11027                                    " new node given", errors.ECODE_INVAL)
11028       else:
11029         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11030
11031     elif remote_node is not None or ialloc is not None:
11032       # Not replacing the secondary
11033       raise errors.OpPrereqError("The iallocator and new node options can"
11034                                  " only be used when changing the"
11035                                  " secondary node", errors.ECODE_INVAL)
11036
11037   def ExpandNames(self):
11038     self._ExpandAndLockInstance()
11039
11040     assert locking.LEVEL_NODE not in self.needed_locks
11041     assert locking.LEVEL_NODE_RES not in self.needed_locks
11042     assert locking.LEVEL_NODEGROUP not in self.needed_locks
11043
11044     assert self.op.iallocator is None or self.op.remote_node is None, \
11045       "Conflicting options"
11046
11047     if self.op.remote_node is not None:
11048       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11049
11050       # Warning: do not remove the locking of the new secondary here
11051       # unless DRBD8.AddChildren is changed to work in parallel;
11052       # currently it doesn't since parallel invocations of
11053       # FindUnusedMinor will conflict
11054       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11055       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11056     else:
11057       self.needed_locks[locking.LEVEL_NODE] = []
11058       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11059
11060       if self.op.iallocator is not None:
11061         # iallocator will select a new node in the same group
11062         self.needed_locks[locking.LEVEL_NODEGROUP] = []
11063         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11064
11065     self.needed_locks[locking.LEVEL_NODE_RES] = []
11066
11067     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11068                                    self.op.iallocator, self.op.remote_node,
11069                                    self.op.disks, self.op.early_release,
11070                                    self.op.ignore_ipolicy)
11071
11072     self.tasklets = [self.replacer]
11073
11074   def DeclareLocks(self, level):
11075     if level == locking.LEVEL_NODEGROUP:
11076       assert self.op.remote_node is None
11077       assert self.op.iallocator is not None
11078       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11079
11080       self.share_locks[locking.LEVEL_NODEGROUP] = 1
11081       # Lock all groups used by instance optimistically; this requires going
11082       # via the node before it's locked, requiring verification later on
11083       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11084         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11085
11086     elif level == locking.LEVEL_NODE:
11087       if self.op.iallocator is not None:
11088         assert self.op.remote_node is None
11089         assert not self.needed_locks[locking.LEVEL_NODE]
11090         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11091
11092         # Lock member nodes of all locked groups
11093         self.needed_locks[locking.LEVEL_NODE] = \
11094             [node_name
11095              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11096              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11097       else:
11098         assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11099
11100         self._LockInstancesNodes()
11101
11102     elif level == locking.LEVEL_NODE_RES:
11103       # Reuse node locks
11104       self.needed_locks[locking.LEVEL_NODE_RES] = \
11105         self.needed_locks[locking.LEVEL_NODE]
11106
11107   def BuildHooksEnv(self):
11108     """Build hooks env.
11109
11110     This runs on the master, the primary and all the secondaries.
11111
11112     """
11113     instance = self.replacer.instance
11114     env = {
11115       "MODE": self.op.mode,
11116       "NEW_SECONDARY": self.op.remote_node,
11117       "OLD_SECONDARY": instance.secondary_nodes[0],
11118       }
11119     env.update(_BuildInstanceHookEnvByObject(self, instance))
11120     return env
11121
11122   def BuildHooksNodes(self):
11123     """Build hooks nodes.
11124
11125     """
11126     instance = self.replacer.instance
11127     nl = [
11128       self.cfg.GetMasterNode(),
11129       instance.primary_node,
11130       ]
11131     if self.op.remote_node is not None:
11132       nl.append(self.op.remote_node)
11133     return nl, nl
11134
11135   def CheckPrereq(self):
11136     """Check prerequisites.
11137
11138     """
11139     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11140             self.op.iallocator is None)
11141
11142     # Verify if node group locks are still correct
11143     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11144     if owned_groups:
11145       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11146
11147     return LogicalUnit.CheckPrereq(self)
11148
11149
11150 class TLReplaceDisks(Tasklet):
11151   """Replaces disks for an instance.
11152
11153   Note: Locking is not within the scope of this class.
11154
11155   """
11156   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11157                disks, early_release, ignore_ipolicy):
11158     """Initializes this class.
11159
11160     """
11161     Tasklet.__init__(self, lu)
11162
11163     # Parameters
11164     self.instance_name = instance_name
11165     self.mode = mode
11166     self.iallocator_name = iallocator_name
11167     self.remote_node = remote_node
11168     self.disks = disks
11169     self.early_release = early_release
11170     self.ignore_ipolicy = ignore_ipolicy
11171
11172     # Runtime data
11173     self.instance = None
11174     self.new_node = None
11175     self.target_node = None
11176     self.other_node = None
11177     self.remote_node_info = None
11178     self.node_secondary_ip = None
11179
11180   @staticmethod
11181   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11182     """Compute a new secondary node using an IAllocator.
11183
11184     """
11185     req = iallocator.IAReqRelocate(name=instance_name,
11186                                    relocate_from=list(relocate_from))
11187     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11188
11189     ial.Run(iallocator_name)
11190
11191     if not ial.success:
11192       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11193                                  " %s" % (iallocator_name, ial.info),
11194                                  errors.ECODE_NORES)
11195
11196     remote_node_name = ial.result[0]
11197
11198     lu.LogInfo("Selected new secondary for instance '%s': %s",
11199                instance_name, remote_node_name)
11200
11201     return remote_node_name
11202
11203   def _FindFaultyDisks(self, node_name):
11204     """Wrapper for L{_FindFaultyInstanceDisks}.
11205
11206     """
11207     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11208                                     node_name, True)
11209
11210   def _CheckDisksActivated(self, instance):
11211     """Checks if the instance disks are activated.
11212
11213     @param instance: The instance to check disks
11214     @return: True if they are activated, False otherwise
11215
11216     """
11217     nodes = instance.all_nodes
11218
11219     for idx, dev in enumerate(instance.disks):
11220       for node in nodes:
11221         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11222         self.cfg.SetDiskID(dev, node)
11223
11224         result = _BlockdevFind(self, node, dev, instance)
11225
11226         if result.offline:
11227           continue
11228         elif result.fail_msg or not result.payload:
11229           return False
11230
11231     return True
11232
11233   def CheckPrereq(self):
11234     """Check prerequisites.
11235
11236     This checks that the instance is in the cluster.
11237
11238     """
11239     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11240     assert instance is not None, \
11241       "Cannot retrieve locked instance %s" % self.instance_name
11242
11243     if instance.disk_template != constants.DT_DRBD8:
11244       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11245                                  " instances", errors.ECODE_INVAL)
11246
11247     if len(instance.secondary_nodes) != 1:
11248       raise errors.OpPrereqError("The instance has a strange layout,"
11249                                  " expected one secondary but found %d" %
11250                                  len(instance.secondary_nodes),
11251                                  errors.ECODE_FAULT)
11252
11253     instance = self.instance
11254     secondary_node = instance.secondary_nodes[0]
11255
11256     if self.iallocator_name is None:
11257       remote_node = self.remote_node
11258     else:
11259       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11260                                        instance.name, instance.secondary_nodes)
11261
11262     if remote_node is None:
11263       self.remote_node_info = None
11264     else:
11265       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11266              "Remote node '%s' is not locked" % remote_node
11267
11268       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11269       assert self.remote_node_info is not None, \
11270         "Cannot retrieve locked node %s" % remote_node
11271
11272     if remote_node == self.instance.primary_node:
11273       raise errors.OpPrereqError("The specified node is the primary node of"
11274                                  " the instance", errors.ECODE_INVAL)
11275
11276     if remote_node == secondary_node:
11277       raise errors.OpPrereqError("The specified node is already the"
11278                                  " secondary node of the instance",
11279                                  errors.ECODE_INVAL)
11280
11281     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11282                                     constants.REPLACE_DISK_CHG):
11283       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11284                                  errors.ECODE_INVAL)
11285
11286     if self.mode == constants.REPLACE_DISK_AUTO:
11287       if not self._CheckDisksActivated(instance):
11288         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11289                                    " first" % self.instance_name,
11290                                    errors.ECODE_STATE)
11291       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11292       faulty_secondary = self._FindFaultyDisks(secondary_node)
11293
11294       if faulty_primary and faulty_secondary:
11295         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11296                                    " one node and can not be repaired"
11297                                    " automatically" % self.instance_name,
11298                                    errors.ECODE_STATE)
11299
11300       if faulty_primary:
11301         self.disks = faulty_primary
11302         self.target_node = instance.primary_node
11303         self.other_node = secondary_node
11304         check_nodes = [self.target_node, self.other_node]
11305       elif faulty_secondary:
11306         self.disks = faulty_secondary
11307         self.target_node = secondary_node
11308         self.other_node = instance.primary_node
11309         check_nodes = [self.target_node, self.other_node]
11310       else:
11311         self.disks = []
11312         check_nodes = []
11313
11314     else:
11315       # Non-automatic modes
11316       if self.mode == constants.REPLACE_DISK_PRI:
11317         self.target_node = instance.primary_node
11318         self.other_node = secondary_node
11319         check_nodes = [self.target_node, self.other_node]
11320
11321       elif self.mode == constants.REPLACE_DISK_SEC:
11322         self.target_node = secondary_node
11323         self.other_node = instance.primary_node
11324         check_nodes = [self.target_node, self.other_node]
11325
11326       elif self.mode == constants.REPLACE_DISK_CHG:
11327         self.new_node = remote_node
11328         self.other_node = instance.primary_node
11329         self.target_node = secondary_node
11330         check_nodes = [self.new_node, self.other_node]
11331
11332         _CheckNodeNotDrained(self.lu, remote_node)
11333         _CheckNodeVmCapable(self.lu, remote_node)
11334
11335         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11336         assert old_node_info is not None
11337         if old_node_info.offline and not self.early_release:
11338           # doesn't make sense to delay the release
11339           self.early_release = True
11340           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11341                           " early-release mode", secondary_node)
11342
11343       else:
11344         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11345                                      self.mode)
11346
11347       # If not specified all disks should be replaced
11348       if not self.disks:
11349         self.disks = range(len(self.instance.disks))
11350
11351     # TODO: This is ugly, but right now we can't distinguish between internal
11352     # submitted opcode and external one. We should fix that.
11353     if self.remote_node_info:
11354       # We change the node, lets verify it still meets instance policy
11355       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11356       cluster = self.cfg.GetClusterInfo()
11357       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11358                                                               new_group_info)
11359       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11360                               ignore=self.ignore_ipolicy)
11361
11362     for node in check_nodes:
11363       _CheckNodeOnline(self.lu, node)
11364
11365     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11366                                                           self.other_node,
11367                                                           self.target_node]
11368                               if node_name is not None)
11369
11370     # Release unneeded node and node resource locks
11371     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11372     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11373     _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11374
11375     # Release any owned node group
11376     _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11377
11378     # Check whether disks are valid
11379     for disk_idx in self.disks:
11380       instance.FindDisk(disk_idx)
11381
11382     # Get secondary node IP addresses
11383     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11384                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11385
11386   def Exec(self, feedback_fn):
11387     """Execute disk replacement.
11388
11389     This dispatches the disk replacement to the appropriate handler.
11390
11391     """
11392     if __debug__:
11393       # Verify owned locks before starting operation
11394       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11395       assert set(owned_nodes) == set(self.node_secondary_ip), \
11396           ("Incorrect node locks, owning %s, expected %s" %
11397            (owned_nodes, self.node_secondary_ip.keys()))
11398       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11399               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11400       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11401
11402       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11403       assert list(owned_instances) == [self.instance_name], \
11404           "Instance '%s' not locked" % self.instance_name
11405
11406       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11407           "Should not own any node group lock at this point"
11408
11409     if not self.disks:
11410       feedback_fn("No disks need replacement for instance '%s'" %
11411                   self.instance.name)
11412       return
11413
11414     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11415                 (utils.CommaJoin(self.disks), self.instance.name))
11416     feedback_fn("Current primary node: %s" % self.instance.primary_node)
11417     feedback_fn("Current seconary node: %s" %
11418                 utils.CommaJoin(self.instance.secondary_nodes))
11419
11420     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11421
11422     # Activate the instance disks if we're replacing them on a down instance
11423     if activate_disks:
11424       _StartInstanceDisks(self.lu, self.instance, True)
11425
11426     try:
11427       # Should we replace the secondary node?
11428       if self.new_node is not None:
11429         fn = self._ExecDrbd8Secondary
11430       else:
11431         fn = self._ExecDrbd8DiskOnly
11432
11433       result = fn(feedback_fn)
11434     finally:
11435       # Deactivate the instance disks if we're replacing them on a
11436       # down instance
11437       if activate_disks:
11438         _SafeShutdownInstanceDisks(self.lu, self.instance)
11439
11440     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11441
11442     if __debug__:
11443       # Verify owned locks
11444       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11445       nodes = frozenset(self.node_secondary_ip)
11446       assert ((self.early_release and not owned_nodes) or
11447               (not self.early_release and not (set(owned_nodes) - nodes))), \
11448         ("Not owning the correct locks, early_release=%s, owned=%r,"
11449          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11450
11451     return result
11452
11453   def _CheckVolumeGroup(self, nodes):
11454     self.lu.LogInfo("Checking volume groups")
11455
11456     vgname = self.cfg.GetVGName()
11457
11458     # Make sure volume group exists on all involved nodes
11459     results = self.rpc.call_vg_list(nodes)
11460     if not results:
11461       raise errors.OpExecError("Can't list volume groups on the nodes")
11462
11463     for node in nodes:
11464       res = results[node]
11465       res.Raise("Error checking node %s" % node)
11466       if vgname not in res.payload:
11467         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11468                                  (vgname, node))
11469
11470   def _CheckDisksExistence(self, nodes):
11471     # Check disk existence
11472     for idx, dev in enumerate(self.instance.disks):
11473       if idx not in self.disks:
11474         continue
11475
11476       for node in nodes:
11477         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11478         self.cfg.SetDiskID(dev, node)
11479
11480         result = _BlockdevFind(self, node, dev, self.instance)
11481
11482         msg = result.fail_msg
11483         if msg or not result.payload:
11484           if not msg:
11485             msg = "disk not found"
11486           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11487                                    (idx, node, msg))
11488
11489   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11490     for idx, dev in enumerate(self.instance.disks):
11491       if idx not in self.disks:
11492         continue
11493
11494       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11495                       (idx, node_name))
11496
11497       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11498                                    on_primary, ldisk=ldisk):
11499         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11500                                  " replace disks for instance %s" %
11501                                  (node_name, self.instance.name))
11502
11503   def _CreateNewStorage(self, node_name):
11504     """Create new storage on the primary or secondary node.
11505
11506     This is only used for same-node replaces, not for changing the
11507     secondary node, hence we don't want to modify the existing disk.
11508
11509     """
11510     iv_names = {}
11511
11512     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11513     for idx, dev in enumerate(disks):
11514       if idx not in self.disks:
11515         continue
11516
11517       self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11518
11519       self.cfg.SetDiskID(dev, node_name)
11520
11521       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11522       names = _GenerateUniqueNames(self.lu, lv_names)
11523
11524       (data_disk, meta_disk) = dev.children
11525       vg_data = data_disk.logical_id[0]
11526       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11527                              logical_id=(vg_data, names[0]),
11528                              params=data_disk.params)
11529       vg_meta = meta_disk.logical_id[0]
11530       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11531                              size=constants.DRBD_META_SIZE,
11532                              logical_id=(vg_meta, names[1]),
11533                              params=meta_disk.params)
11534
11535       new_lvs = [lv_data, lv_meta]
11536       old_lvs = [child.Copy() for child in dev.children]
11537       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11538
11539       # we pass force_create=True to force the LVM creation
11540       for new_lv in new_lvs:
11541         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11542                              _GetInstanceInfoText(self.instance), False)
11543
11544     return iv_names
11545
11546   def _CheckDevices(self, node_name, iv_names):
11547     for name, (dev, _, _) in iv_names.iteritems():
11548       self.cfg.SetDiskID(dev, node_name)
11549
11550       result = _BlockdevFind(self, node_name, dev, self.instance)
11551
11552       msg = result.fail_msg
11553       if msg or not result.payload:
11554         if not msg:
11555           msg = "disk not found"
11556         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11557                                  (name, msg))
11558
11559       if result.payload.is_degraded:
11560         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11561
11562   def _RemoveOldStorage(self, node_name, iv_names):
11563     for name, (_, old_lvs, _) in iv_names.iteritems():
11564       self.lu.LogInfo("Remove logical volumes for %s", name)
11565
11566       for lv in old_lvs:
11567         self.cfg.SetDiskID(lv, node_name)
11568
11569         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11570         if msg:
11571           self.lu.LogWarning("Can't remove old LV: %s", msg,
11572                              hint="remove unused LVs manually")
11573
11574   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11575     """Replace a disk on the primary or secondary for DRBD 8.
11576
11577     The algorithm for replace is quite complicated:
11578
11579       1. for each disk to be replaced:
11580
11581         1. create new LVs on the target node with unique names
11582         1. detach old LVs from the drbd device
11583         1. rename old LVs to name_replaced.<time_t>
11584         1. rename new LVs to old LVs
11585         1. attach the new LVs (with the old names now) to the drbd device
11586
11587       1. wait for sync across all devices
11588
11589       1. for each modified disk:
11590
11591         1. remove old LVs (which have the name name_replaces.<time_t>)
11592
11593     Failures are not very well handled.
11594
11595     """
11596     steps_total = 6
11597
11598     # Step: check device activation
11599     self.lu.LogStep(1, steps_total, "Check device existence")
11600     self._CheckDisksExistence([self.other_node, self.target_node])
11601     self._CheckVolumeGroup([self.target_node, self.other_node])
11602
11603     # Step: check other node consistency
11604     self.lu.LogStep(2, steps_total, "Check peer consistency")
11605     self._CheckDisksConsistency(self.other_node,
11606                                 self.other_node == self.instance.primary_node,
11607                                 False)
11608
11609     # Step: create new storage
11610     self.lu.LogStep(3, steps_total, "Allocate new storage")
11611     iv_names = self._CreateNewStorage(self.target_node)
11612
11613     # Step: for each lv, detach+rename*2+attach
11614     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11615     for dev, old_lvs, new_lvs in iv_names.itervalues():
11616       self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11617
11618       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11619                                                      old_lvs)
11620       result.Raise("Can't detach drbd from local storage on node"
11621                    " %s for device %s" % (self.target_node, dev.iv_name))
11622       #dev.children = []
11623       #cfg.Update(instance)
11624
11625       # ok, we created the new LVs, so now we know we have the needed
11626       # storage; as such, we proceed on the target node to rename
11627       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11628       # using the assumption that logical_id == physical_id (which in
11629       # turn is the unique_id on that node)
11630
11631       # FIXME(iustin): use a better name for the replaced LVs
11632       temp_suffix = int(time.time())
11633       ren_fn = lambda d, suff: (d.physical_id[0],
11634                                 d.physical_id[1] + "_replaced-%s" % suff)
11635
11636       # Build the rename list based on what LVs exist on the node
11637       rename_old_to_new = []
11638       for to_ren in old_lvs:
11639         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11640         if not result.fail_msg and result.payload:
11641           # device exists
11642           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11643
11644       self.lu.LogInfo("Renaming the old LVs on the target node")
11645       result = self.rpc.call_blockdev_rename(self.target_node,
11646                                              rename_old_to_new)
11647       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11648
11649       # Now we rename the new LVs to the old LVs
11650       self.lu.LogInfo("Renaming the new LVs on the target node")
11651       rename_new_to_old = [(new, old.physical_id)
11652                            for old, new in zip(old_lvs, new_lvs)]
11653       result = self.rpc.call_blockdev_rename(self.target_node,
11654                                              rename_new_to_old)
11655       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11656
11657       # Intermediate steps of in memory modifications
11658       for old, new in zip(old_lvs, new_lvs):
11659         new.logical_id = old.logical_id
11660         self.cfg.SetDiskID(new, self.target_node)
11661
11662       # We need to modify old_lvs so that removal later removes the
11663       # right LVs, not the newly added ones; note that old_lvs is a
11664       # copy here
11665       for disk in old_lvs:
11666         disk.logical_id = ren_fn(disk, temp_suffix)
11667         self.cfg.SetDiskID(disk, self.target_node)
11668
11669       # Now that the new lvs have the old name, we can add them to the device
11670       self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
11671       result = self.rpc.call_blockdev_addchildren(self.target_node,
11672                                                   (dev, self.instance), new_lvs)
11673       msg = result.fail_msg
11674       if msg:
11675         for new_lv in new_lvs:
11676           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11677                                                new_lv).fail_msg
11678           if msg2:
11679             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11680                                hint=("cleanup manually the unused logical"
11681                                      "volumes"))
11682         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11683
11684     cstep = itertools.count(5)
11685
11686     if self.early_release:
11687       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11688       self._RemoveOldStorage(self.target_node, iv_names)
11689       # TODO: Check if releasing locks early still makes sense
11690       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11691     else:
11692       # Release all resource locks except those used by the instance
11693       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11694                     keep=self.node_secondary_ip.keys())
11695
11696     # Release all node locks while waiting for sync
11697     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11698
11699     # TODO: Can the instance lock be downgraded here? Take the optional disk
11700     # shutdown in the caller into consideration.
11701
11702     # Wait for sync
11703     # This can fail as the old devices are degraded and _WaitForSync
11704     # does a combined result over all disks, so we don't check its return value
11705     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11706     _WaitForSync(self.lu, self.instance)
11707
11708     # Check all devices manually
11709     self._CheckDevices(self.instance.primary_node, iv_names)
11710
11711     # Step: remove old storage
11712     if not self.early_release:
11713       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11714       self._RemoveOldStorage(self.target_node, iv_names)
11715
11716   def _ExecDrbd8Secondary(self, feedback_fn):
11717     """Replace the secondary node for DRBD 8.
11718
11719     The algorithm for replace is quite complicated:
11720       - for all disks of the instance:
11721         - create new LVs on the new node with same names
11722         - shutdown the drbd device on the old secondary
11723         - disconnect the drbd network on the primary
11724         - create the drbd device on the new secondary
11725         - network attach the drbd on the primary, using an artifice:
11726           the drbd code for Attach() will connect to the network if it
11727           finds a device which is connected to the good local disks but
11728           not network enabled
11729       - wait for sync across all devices
11730       - remove all disks from the old secondary
11731
11732     Failures are not very well handled.
11733
11734     """
11735     steps_total = 6
11736
11737     pnode = self.instance.primary_node
11738
11739     # Step: check device activation
11740     self.lu.LogStep(1, steps_total, "Check device existence")
11741     self._CheckDisksExistence([self.instance.primary_node])
11742     self._CheckVolumeGroup([self.instance.primary_node])
11743
11744     # Step: check other node consistency
11745     self.lu.LogStep(2, steps_total, "Check peer consistency")
11746     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11747
11748     # Step: create new storage
11749     self.lu.LogStep(3, steps_total, "Allocate new storage")
11750     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11751     for idx, dev in enumerate(disks):
11752       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11753                       (self.new_node, idx))
11754       # we pass force_create=True to force LVM creation
11755       for new_lv in dev.children:
11756         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11757                              True, _GetInstanceInfoText(self.instance), False)
11758
11759     # Step 4: dbrd minors and drbd setups changes
11760     # after this, we must manually remove the drbd minors on both the
11761     # error and the success paths
11762     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11763     minors = self.cfg.AllocateDRBDMinor([self.new_node
11764                                          for dev in self.instance.disks],
11765                                         self.instance.name)
11766     logging.debug("Allocated minors %r", minors)
11767
11768     iv_names = {}
11769     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11770       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11771                       (self.new_node, idx))
11772       # create new devices on new_node; note that we create two IDs:
11773       # one without port, so the drbd will be activated without
11774       # networking information on the new node at this stage, and one
11775       # with network, for the latter activation in step 4
11776       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11777       if self.instance.primary_node == o_node1:
11778         p_minor = o_minor1
11779       else:
11780         assert self.instance.primary_node == o_node2, "Three-node instance?"
11781         p_minor = o_minor2
11782
11783       new_alone_id = (self.instance.primary_node, self.new_node, None,
11784                       p_minor, new_minor, o_secret)
11785       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11786                     p_minor, new_minor, o_secret)
11787
11788       iv_names[idx] = (dev, dev.children, new_net_id)
11789       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11790                     new_net_id)
11791       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11792                               logical_id=new_alone_id,
11793                               children=dev.children,
11794                               size=dev.size,
11795                               params={})
11796       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11797                                              self.cfg)
11798       try:
11799         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11800                               anno_new_drbd,
11801                               _GetInstanceInfoText(self.instance), False)
11802       except errors.GenericError:
11803         self.cfg.ReleaseDRBDMinors(self.instance.name)
11804         raise
11805
11806     # We have new devices, shutdown the drbd on the old secondary
11807     for idx, dev in enumerate(self.instance.disks):
11808       self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
11809       self.cfg.SetDiskID(dev, self.target_node)
11810       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11811                                             (dev, self.instance)).fail_msg
11812       if msg:
11813         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11814                            "node: %s" % (idx, msg),
11815                            hint=("Please cleanup this device manually as"
11816                                  " soon as possible"))
11817
11818     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11819     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11820                                                self.instance.disks)[pnode]
11821
11822     msg = result.fail_msg
11823     if msg:
11824       # detaches didn't succeed (unlikely)
11825       self.cfg.ReleaseDRBDMinors(self.instance.name)
11826       raise errors.OpExecError("Can't detach the disks from the network on"
11827                                " old node: %s" % (msg,))
11828
11829     # if we managed to detach at least one, we update all the disks of
11830     # the instance to point to the new secondary
11831     self.lu.LogInfo("Updating instance configuration")
11832     for dev, _, new_logical_id in iv_names.itervalues():
11833       dev.logical_id = new_logical_id
11834       self.cfg.SetDiskID(dev, self.instance.primary_node)
11835
11836     self.cfg.Update(self.instance, feedback_fn)
11837
11838     # Release all node locks (the configuration has been updated)
11839     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11840
11841     # and now perform the drbd attach
11842     self.lu.LogInfo("Attaching primary drbds to new secondary"
11843                     " (standalone => connected)")
11844     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11845                                             self.new_node],
11846                                            self.node_secondary_ip,
11847                                            (self.instance.disks, self.instance),
11848                                            self.instance.name,
11849                                            False)
11850     for to_node, to_result in result.items():
11851       msg = to_result.fail_msg
11852       if msg:
11853         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11854                            to_node, msg,
11855                            hint=("please do a gnt-instance info to see the"
11856                                  " status of disks"))
11857
11858     cstep = itertools.count(5)
11859
11860     if self.early_release:
11861       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11862       self._RemoveOldStorage(self.target_node, iv_names)
11863       # TODO: Check if releasing locks early still makes sense
11864       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11865     else:
11866       # Release all resource locks except those used by the instance
11867       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11868                     keep=self.node_secondary_ip.keys())
11869
11870     # TODO: Can the instance lock be downgraded here? Take the optional disk
11871     # shutdown in the caller into consideration.
11872
11873     # Wait for sync
11874     # This can fail as the old devices are degraded and _WaitForSync
11875     # does a combined result over all disks, so we don't check its return value
11876     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11877     _WaitForSync(self.lu, self.instance)
11878
11879     # Check all devices manually
11880     self._CheckDevices(self.instance.primary_node, iv_names)
11881
11882     # Step: remove old storage
11883     if not self.early_release:
11884       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11885       self._RemoveOldStorage(self.target_node, iv_names)
11886
11887
11888 class LURepairNodeStorage(NoHooksLU):
11889   """Repairs the volume group on a node.
11890
11891   """
11892   REQ_BGL = False
11893
11894   def CheckArguments(self):
11895     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11896
11897     storage_type = self.op.storage_type
11898
11899     if (constants.SO_FIX_CONSISTENCY not in
11900         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11901       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11902                                  " repaired" % storage_type,
11903                                  errors.ECODE_INVAL)
11904
11905   def ExpandNames(self):
11906     self.needed_locks = {
11907       locking.LEVEL_NODE: [self.op.node_name],
11908       }
11909
11910   def _CheckFaultyDisks(self, instance, node_name):
11911     """Ensure faulty disks abort the opcode or at least warn."""
11912     try:
11913       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11914                                   node_name, True):
11915         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11916                                    " node '%s'" % (instance.name, node_name),
11917                                    errors.ECODE_STATE)
11918     except errors.OpPrereqError, err:
11919       if self.op.ignore_consistency:
11920         self.LogWarning(str(err.args[0]))
11921       else:
11922         raise
11923
11924   def CheckPrereq(self):
11925     """Check prerequisites.
11926
11927     """
11928     # Check whether any instance on this node has faulty disks
11929     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11930       if inst.admin_state != constants.ADMINST_UP:
11931         continue
11932       check_nodes = set(inst.all_nodes)
11933       check_nodes.discard(self.op.node_name)
11934       for inst_node_name in check_nodes:
11935         self._CheckFaultyDisks(inst, inst_node_name)
11936
11937   def Exec(self, feedback_fn):
11938     feedback_fn("Repairing storage unit '%s' on %s ..." %
11939                 (self.op.name, self.op.node_name))
11940
11941     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11942     result = self.rpc.call_storage_execute(self.op.node_name,
11943                                            self.op.storage_type, st_args,
11944                                            self.op.name,
11945                                            constants.SO_FIX_CONSISTENCY)
11946     result.Raise("Failed to repair storage unit '%s' on %s" %
11947                  (self.op.name, self.op.node_name))
11948
11949
11950 class LUNodeEvacuate(NoHooksLU):
11951   """Evacuates instances off a list of nodes.
11952
11953   """
11954   REQ_BGL = False
11955
11956   _MODE2IALLOCATOR = {
11957     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11958     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11959     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11960     }
11961   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11962   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11963           constants.IALLOCATOR_NEVAC_MODES)
11964
11965   def CheckArguments(self):
11966     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11967
11968   def ExpandNames(self):
11969     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11970
11971     if self.op.remote_node is not None:
11972       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11973       assert self.op.remote_node
11974
11975       if self.op.remote_node == self.op.node_name:
11976         raise errors.OpPrereqError("Can not use evacuated node as a new"
11977                                    " secondary node", errors.ECODE_INVAL)
11978
11979       if self.op.mode != constants.NODE_EVAC_SEC:
11980         raise errors.OpPrereqError("Without the use of an iallocator only"
11981                                    " secondary instances can be evacuated",
11982                                    errors.ECODE_INVAL)
11983
11984     # Declare locks
11985     self.share_locks = _ShareAll()
11986     self.needed_locks = {
11987       locking.LEVEL_INSTANCE: [],
11988       locking.LEVEL_NODEGROUP: [],
11989       locking.LEVEL_NODE: [],
11990       }
11991
11992     # Determine nodes (via group) optimistically, needs verification once locks
11993     # have been acquired
11994     self.lock_nodes = self._DetermineNodes()
11995
11996   def _DetermineNodes(self):
11997     """Gets the list of nodes to operate on.
11998
11999     """
12000     if self.op.remote_node is None:
12001       # Iallocator will choose any node(s) in the same group
12002       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12003     else:
12004       group_nodes = frozenset([self.op.remote_node])
12005
12006     # Determine nodes to be locked
12007     return set([self.op.node_name]) | group_nodes
12008
12009   def _DetermineInstances(self):
12010     """Builds list of instances to operate on.
12011
12012     """
12013     assert self.op.mode in constants.NODE_EVAC_MODES
12014
12015     if self.op.mode == constants.NODE_EVAC_PRI:
12016       # Primary instances only
12017       inst_fn = _GetNodePrimaryInstances
12018       assert self.op.remote_node is None, \
12019         "Evacuating primary instances requires iallocator"
12020     elif self.op.mode == constants.NODE_EVAC_SEC:
12021       # Secondary instances only
12022       inst_fn = _GetNodeSecondaryInstances
12023     else:
12024       # All instances
12025       assert self.op.mode == constants.NODE_EVAC_ALL
12026       inst_fn = _GetNodeInstances
12027       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12028       # per instance
12029       raise errors.OpPrereqError("Due to an issue with the iallocator"
12030                                  " interface it is not possible to evacuate"
12031                                  " all instances at once; specify explicitly"
12032                                  " whether to evacuate primary or secondary"
12033                                  " instances",
12034                                  errors.ECODE_INVAL)
12035
12036     return inst_fn(self.cfg, self.op.node_name)
12037
12038   def DeclareLocks(self, level):
12039     if level == locking.LEVEL_INSTANCE:
12040       # Lock instances optimistically, needs verification once node and group
12041       # locks have been acquired
12042       self.needed_locks[locking.LEVEL_INSTANCE] = \
12043         set(i.name for i in self._DetermineInstances())
12044
12045     elif level == locking.LEVEL_NODEGROUP:
12046       # Lock node groups for all potential target nodes optimistically, needs
12047       # verification once nodes have been acquired
12048       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12049         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12050
12051     elif level == locking.LEVEL_NODE:
12052       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12053
12054   def CheckPrereq(self):
12055     # Verify locks
12056     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12057     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12058     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12059
12060     need_nodes = self._DetermineNodes()
12061
12062     if not owned_nodes.issuperset(need_nodes):
12063       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12064                                  " locks were acquired, current nodes are"
12065                                  " are '%s', used to be '%s'; retry the"
12066                                  " operation" %
12067                                  (self.op.node_name,
12068                                   utils.CommaJoin(need_nodes),
12069                                   utils.CommaJoin(owned_nodes)),
12070                                  errors.ECODE_STATE)
12071
12072     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12073     if owned_groups != wanted_groups:
12074       raise errors.OpExecError("Node groups changed since locks were acquired,"
12075                                " current groups are '%s', used to be '%s';"
12076                                " retry the operation" %
12077                                (utils.CommaJoin(wanted_groups),
12078                                 utils.CommaJoin(owned_groups)))
12079
12080     # Determine affected instances
12081     self.instances = self._DetermineInstances()
12082     self.instance_names = [i.name for i in self.instances]
12083
12084     if set(self.instance_names) != owned_instances:
12085       raise errors.OpExecError("Instances on node '%s' changed since locks"
12086                                " were acquired, current instances are '%s',"
12087                                " used to be '%s'; retry the operation" %
12088                                (self.op.node_name,
12089                                 utils.CommaJoin(self.instance_names),
12090                                 utils.CommaJoin(owned_instances)))
12091
12092     if self.instance_names:
12093       self.LogInfo("Evacuating instances from node '%s': %s",
12094                    self.op.node_name,
12095                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
12096     else:
12097       self.LogInfo("No instances to evacuate from node '%s'",
12098                    self.op.node_name)
12099
12100     if self.op.remote_node is not None:
12101       for i in self.instances:
12102         if i.primary_node == self.op.remote_node:
12103           raise errors.OpPrereqError("Node %s is the primary node of"
12104                                      " instance %s, cannot use it as"
12105                                      " secondary" %
12106                                      (self.op.remote_node, i.name),
12107                                      errors.ECODE_INVAL)
12108
12109   def Exec(self, feedback_fn):
12110     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12111
12112     if not self.instance_names:
12113       # No instances to evacuate
12114       jobs = []
12115
12116     elif self.op.iallocator is not None:
12117       # TODO: Implement relocation to other group
12118       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12119       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12120                                      instances=list(self.instance_names))
12121       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12122
12123       ial.Run(self.op.iallocator)
12124
12125       if not ial.success:
12126         raise errors.OpPrereqError("Can't compute node evacuation using"
12127                                    " iallocator '%s': %s" %
12128                                    (self.op.iallocator, ial.info),
12129                                    errors.ECODE_NORES)
12130
12131       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12132
12133     elif self.op.remote_node is not None:
12134       assert self.op.mode == constants.NODE_EVAC_SEC
12135       jobs = [
12136         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12137                                         remote_node=self.op.remote_node,
12138                                         disks=[],
12139                                         mode=constants.REPLACE_DISK_CHG,
12140                                         early_release=self.op.early_release)]
12141         for instance_name in self.instance_names]
12142
12143     else:
12144       raise errors.ProgrammerError("No iallocator or remote node")
12145
12146     return ResultWithJobs(jobs)
12147
12148
12149 def _SetOpEarlyRelease(early_release, op):
12150   """Sets C{early_release} flag on opcodes if available.
12151
12152   """
12153   try:
12154     op.early_release = early_release
12155   except AttributeError:
12156     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12157
12158   return op
12159
12160
12161 def _NodeEvacDest(use_nodes, group, nodes):
12162   """Returns group or nodes depending on caller's choice.
12163
12164   """
12165   if use_nodes:
12166     return utils.CommaJoin(nodes)
12167   else:
12168     return group
12169
12170
12171 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12172   """Unpacks the result of change-group and node-evacuate iallocator requests.
12173
12174   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12175   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12176
12177   @type lu: L{LogicalUnit}
12178   @param lu: Logical unit instance
12179   @type alloc_result: tuple/list
12180   @param alloc_result: Result from iallocator
12181   @type early_release: bool
12182   @param early_release: Whether to release locks early if possible
12183   @type use_nodes: bool
12184   @param use_nodes: Whether to display node names instead of groups
12185
12186   """
12187   (moved, failed, jobs) = alloc_result
12188
12189   if failed:
12190     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12191                                  for (name, reason) in failed)
12192     lu.LogWarning("Unable to evacuate instances %s", failreason)
12193     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12194
12195   if moved:
12196     lu.LogInfo("Instances to be moved: %s",
12197                utils.CommaJoin("%s (to %s)" %
12198                                (name, _NodeEvacDest(use_nodes, group, nodes))
12199                                for (name, group, nodes) in moved))
12200
12201   return [map(compat.partial(_SetOpEarlyRelease, early_release),
12202               map(opcodes.OpCode.LoadOpCode, ops))
12203           for ops in jobs]
12204
12205
12206 def _DiskSizeInBytesToMebibytes(lu, size):
12207   """Converts a disk size in bytes to mebibytes.
12208
12209   Warns and rounds up if the size isn't an even multiple of 1 MiB.
12210
12211   """
12212   (mib, remainder) = divmod(size, 1024 * 1024)
12213
12214   if remainder != 0:
12215     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12216                   " to not overwrite existing data (%s bytes will not be"
12217                   " wiped)", (1024 * 1024) - remainder)
12218     mib += 1
12219
12220   return mib
12221
12222
12223 class LUInstanceGrowDisk(LogicalUnit):
12224   """Grow a disk of an instance.
12225
12226   """
12227   HPATH = "disk-grow"
12228   HTYPE = constants.HTYPE_INSTANCE
12229   REQ_BGL = False
12230
12231   def ExpandNames(self):
12232     self._ExpandAndLockInstance()
12233     self.needed_locks[locking.LEVEL_NODE] = []
12234     self.needed_locks[locking.LEVEL_NODE_RES] = []
12235     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12236     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12237
12238   def DeclareLocks(self, level):
12239     if level == locking.LEVEL_NODE:
12240       self._LockInstancesNodes()
12241     elif level == locking.LEVEL_NODE_RES:
12242       # Copy node locks
12243       self.needed_locks[locking.LEVEL_NODE_RES] = \
12244         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12245
12246   def BuildHooksEnv(self):
12247     """Build hooks env.
12248
12249     This runs on the master, the primary and all the secondaries.
12250
12251     """
12252     env = {
12253       "DISK": self.op.disk,
12254       "AMOUNT": self.op.amount,
12255       "ABSOLUTE": self.op.absolute,
12256       }
12257     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12258     return env
12259
12260   def BuildHooksNodes(self):
12261     """Build hooks nodes.
12262
12263     """
12264     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12265     return (nl, nl)
12266
12267   def CheckPrereq(self):
12268     """Check prerequisites.
12269
12270     This checks that the instance is in the cluster.
12271
12272     """
12273     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12274     assert instance is not None, \
12275       "Cannot retrieve locked instance %s" % self.op.instance_name
12276     nodenames = list(instance.all_nodes)
12277     for node in nodenames:
12278       _CheckNodeOnline(self, node)
12279
12280     self.instance = instance
12281
12282     if instance.disk_template not in constants.DTS_GROWABLE:
12283       raise errors.OpPrereqError("Instance's disk layout does not support"
12284                                  " growing", errors.ECODE_INVAL)
12285
12286     self.disk = instance.FindDisk(self.op.disk)
12287
12288     if self.op.absolute:
12289       self.target = self.op.amount
12290       self.delta = self.target - self.disk.size
12291       if self.delta < 0:
12292         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12293                                    "current disk size (%s)" %
12294                                    (utils.FormatUnit(self.target, "h"),
12295                                     utils.FormatUnit(self.disk.size, "h")),
12296                                    errors.ECODE_STATE)
12297     else:
12298       self.delta = self.op.amount
12299       self.target = self.disk.size + self.delta
12300       if self.delta < 0:
12301         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12302                                    utils.FormatUnit(self.delta, "h"),
12303                                    errors.ECODE_INVAL)
12304
12305     if instance.disk_template not in (constants.DT_FILE,
12306                                       constants.DT_SHARED_FILE,
12307                                       constants.DT_RBD):
12308       # TODO: check the free disk space for file, when that feature will be
12309       # supported
12310       _CheckNodesFreeDiskPerVG(self, nodenames,
12311                                self.disk.ComputeGrowth(self.delta))
12312
12313   def Exec(self, feedback_fn):
12314     """Execute disk grow.
12315
12316     """
12317     instance = self.instance
12318     disk = self.disk
12319
12320     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12321     assert (self.owned_locks(locking.LEVEL_NODE) ==
12322             self.owned_locks(locking.LEVEL_NODE_RES))
12323
12324     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12325
12326     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12327     if not disks_ok:
12328       raise errors.OpExecError("Cannot activate block device to grow")
12329
12330     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12331                 (self.op.disk, instance.name,
12332                  utils.FormatUnit(self.delta, "h"),
12333                  utils.FormatUnit(self.target, "h")))
12334
12335     # First run all grow ops in dry-run mode
12336     for node in instance.all_nodes:
12337       self.cfg.SetDiskID(disk, node)
12338       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12339                                            True, True)
12340       result.Raise("Dry-run grow request failed to node %s" % node)
12341
12342     if wipe_disks:
12343       # Get disk size from primary node for wiping
12344       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12345       result.Raise("Failed to retrieve disk size from node '%s'" %
12346                    instance.primary_node)
12347
12348       (disk_size_in_bytes, ) = result.payload
12349
12350       if disk_size_in_bytes is None:
12351         raise errors.OpExecError("Failed to retrieve disk size from primary"
12352                                  " node '%s'" % instance.primary_node)
12353
12354       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12355
12356       assert old_disk_size >= disk.size, \
12357         ("Retrieved disk size too small (got %s, should be at least %s)" %
12358          (old_disk_size, disk.size))
12359     else:
12360       old_disk_size = None
12361
12362     # We know that (as far as we can test) operations across different
12363     # nodes will succeed, time to run it for real on the backing storage
12364     for node in instance.all_nodes:
12365       self.cfg.SetDiskID(disk, node)
12366       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12367                                            False, True)
12368       result.Raise("Grow request failed to node %s" % node)
12369
12370     # And now execute it for logical storage, on the primary node
12371     node = instance.primary_node
12372     self.cfg.SetDiskID(disk, node)
12373     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12374                                          False, False)
12375     result.Raise("Grow request failed to node %s" % node)
12376
12377     disk.RecordGrow(self.delta)
12378     self.cfg.Update(instance, feedback_fn)
12379
12380     # Changes have been recorded, release node lock
12381     _ReleaseLocks(self, locking.LEVEL_NODE)
12382
12383     # Downgrade lock while waiting for sync
12384     self.glm.downgrade(locking.LEVEL_INSTANCE)
12385
12386     assert wipe_disks ^ (old_disk_size is None)
12387
12388     if wipe_disks:
12389       assert instance.disks[self.op.disk] == disk
12390
12391       # Wipe newly added disk space
12392       _WipeDisks(self, instance,
12393                  disks=[(self.op.disk, disk, old_disk_size)])
12394
12395     if self.op.wait_for_sync:
12396       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12397       if disk_abort:
12398         self.LogWarning("Disk syncing has not returned a good status; check"
12399                         " the instance")
12400       if instance.admin_state != constants.ADMINST_UP:
12401         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12402     elif instance.admin_state != constants.ADMINST_UP:
12403       self.LogWarning("Not shutting down the disk even if the instance is"
12404                       " not supposed to be running because no wait for"
12405                       " sync mode was requested")
12406
12407     assert self.owned_locks(locking.LEVEL_NODE_RES)
12408     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12409
12410
12411 class LUInstanceQueryData(NoHooksLU):
12412   """Query runtime instance data.
12413
12414   """
12415   REQ_BGL = False
12416
12417   def ExpandNames(self):
12418     self.needed_locks = {}
12419
12420     # Use locking if requested or when non-static information is wanted
12421     if not (self.op.static or self.op.use_locking):
12422       self.LogWarning("Non-static data requested, locks need to be acquired")
12423       self.op.use_locking = True
12424
12425     if self.op.instances or not self.op.use_locking:
12426       # Expand instance names right here
12427       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12428     else:
12429       # Will use acquired locks
12430       self.wanted_names = None
12431
12432     if self.op.use_locking:
12433       self.share_locks = _ShareAll()
12434
12435       if self.wanted_names is None:
12436         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12437       else:
12438         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12439
12440       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12441       self.needed_locks[locking.LEVEL_NODE] = []
12442       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12443
12444   def DeclareLocks(self, level):
12445     if self.op.use_locking:
12446       if level == locking.LEVEL_NODEGROUP:
12447         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12448
12449         # Lock all groups used by instances optimistically; this requires going
12450         # via the node before it's locked, requiring verification later on
12451         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12452           frozenset(group_uuid
12453                     for instance_name in owned_instances
12454                     for group_uuid in
12455                       self.cfg.GetInstanceNodeGroups(instance_name))
12456
12457       elif level == locking.LEVEL_NODE:
12458         self._LockInstancesNodes()
12459
12460   def CheckPrereq(self):
12461     """Check prerequisites.
12462
12463     This only checks the optional instance list against the existing names.
12464
12465     """
12466     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12467     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12468     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12469
12470     if self.wanted_names is None:
12471       assert self.op.use_locking, "Locking was not used"
12472       self.wanted_names = owned_instances
12473
12474     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12475
12476     if self.op.use_locking:
12477       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12478                                 None)
12479     else:
12480       assert not (owned_instances or owned_groups or owned_nodes)
12481
12482     self.wanted_instances = instances.values()
12483
12484   def _ComputeBlockdevStatus(self, node, instance, dev):
12485     """Returns the status of a block device
12486
12487     """
12488     if self.op.static or not node:
12489       return None
12490
12491     self.cfg.SetDiskID(dev, node)
12492
12493     result = self.rpc.call_blockdev_find(node, dev)
12494     if result.offline:
12495       return None
12496
12497     result.Raise("Can't compute disk status for %s" % instance.name)
12498
12499     status = result.payload
12500     if status is None:
12501       return None
12502
12503     return (status.dev_path, status.major, status.minor,
12504             status.sync_percent, status.estimated_time,
12505             status.is_degraded, status.ldisk_status)
12506
12507   def _ComputeDiskStatus(self, instance, snode, dev):
12508     """Compute block device status.
12509
12510     """
12511     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12512
12513     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12514
12515   def _ComputeDiskStatusInner(self, instance, snode, dev):
12516     """Compute block device status.
12517
12518     @attention: The device has to be annotated already.
12519
12520     """
12521     if dev.dev_type in constants.LDS_DRBD:
12522       # we change the snode then (otherwise we use the one passed in)
12523       if dev.logical_id[0] == instance.primary_node:
12524         snode = dev.logical_id[1]
12525       else:
12526         snode = dev.logical_id[0]
12527
12528     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12529                                               instance, dev)
12530     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12531
12532     if dev.children:
12533       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12534                                         instance, snode),
12535                          dev.children)
12536     else:
12537       dev_children = []
12538
12539     return {
12540       "iv_name": dev.iv_name,
12541       "dev_type": dev.dev_type,
12542       "logical_id": dev.logical_id,
12543       "physical_id": dev.physical_id,
12544       "pstatus": dev_pstatus,
12545       "sstatus": dev_sstatus,
12546       "children": dev_children,
12547       "mode": dev.mode,
12548       "size": dev.size,
12549       }
12550
12551   def Exec(self, feedback_fn):
12552     """Gather and return data"""
12553     result = {}
12554
12555     cluster = self.cfg.GetClusterInfo()
12556
12557     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12558     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12559
12560     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12561                                                  for node in nodes.values()))
12562
12563     group2name_fn = lambda uuid: groups[uuid].name
12564
12565     for instance in self.wanted_instances:
12566       pnode = nodes[instance.primary_node]
12567
12568       if self.op.static or pnode.offline:
12569         remote_state = None
12570         if pnode.offline:
12571           self.LogWarning("Primary node %s is marked offline, returning static"
12572                           " information only for instance %s" %
12573                           (pnode.name, instance.name))
12574       else:
12575         remote_info = self.rpc.call_instance_info(instance.primary_node,
12576                                                   instance.name,
12577                                                   instance.hypervisor)
12578         remote_info.Raise("Error checking node %s" % instance.primary_node)
12579         remote_info = remote_info.payload
12580         if remote_info and "state" in remote_info:
12581           remote_state = "up"
12582         else:
12583           if instance.admin_state == constants.ADMINST_UP:
12584             remote_state = "down"
12585           else:
12586             remote_state = instance.admin_state
12587
12588       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12589                   instance.disks)
12590
12591       snodes_group_uuids = [nodes[snode_name].group
12592                             for snode_name in instance.secondary_nodes]
12593
12594       result[instance.name] = {
12595         "name": instance.name,
12596         "config_state": instance.admin_state,
12597         "run_state": remote_state,
12598         "pnode": instance.primary_node,
12599         "pnode_group_uuid": pnode.group,
12600         "pnode_group_name": group2name_fn(pnode.group),
12601         "snodes": instance.secondary_nodes,
12602         "snodes_group_uuids": snodes_group_uuids,
12603         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12604         "os": instance.os,
12605         # this happens to be the same format used for hooks
12606         "nics": _NICListToTuple(self, instance.nics),
12607         "disk_template": instance.disk_template,
12608         "disks": disks,
12609         "hypervisor": instance.hypervisor,
12610         "network_port": instance.network_port,
12611         "hv_instance": instance.hvparams,
12612         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12613         "be_instance": instance.beparams,
12614         "be_actual": cluster.FillBE(instance),
12615         "os_instance": instance.osparams,
12616         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12617         "serial_no": instance.serial_no,
12618         "mtime": instance.mtime,
12619         "ctime": instance.ctime,
12620         "uuid": instance.uuid,
12621         }
12622
12623     return result
12624
12625
12626 def PrepareContainerMods(mods, private_fn):
12627   """Prepares a list of container modifications by adding a private data field.
12628
12629   @type mods: list of tuples; (operation, index, parameters)
12630   @param mods: List of modifications
12631   @type private_fn: callable or None
12632   @param private_fn: Callable for constructing a private data field for a
12633     modification
12634   @rtype: list
12635
12636   """
12637   if private_fn is None:
12638     fn = lambda: None
12639   else:
12640     fn = private_fn
12641
12642   return [(op, idx, params, fn()) for (op, idx, params) in mods]
12643
12644
12645 #: Type description for changes as returned by L{ApplyContainerMods}'s
12646 #: callbacks
12647 _TApplyContModsCbChanges = \
12648   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12649     ht.TNonEmptyString,
12650     ht.TAny,
12651     ])))
12652
12653
12654 def ApplyContainerMods(kind, container, chgdesc, mods,
12655                        create_fn, modify_fn, remove_fn):
12656   """Applies descriptions in C{mods} to C{container}.
12657
12658   @type kind: string
12659   @param kind: One-word item description
12660   @type container: list
12661   @param container: Container to modify
12662   @type chgdesc: None or list
12663   @param chgdesc: List of applied changes
12664   @type mods: list
12665   @param mods: Modifications as returned by L{PrepareContainerMods}
12666   @type create_fn: callable
12667   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12668     receives absolute item index, parameters and private data object as added
12669     by L{PrepareContainerMods}, returns tuple containing new item and changes
12670     as list
12671   @type modify_fn: callable
12672   @param modify_fn: Callback for modifying an existing item
12673     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12674     and private data object as added by L{PrepareContainerMods}, returns
12675     changes as list
12676   @type remove_fn: callable
12677   @param remove_fn: Callback on removing item; receives absolute item index,
12678     item and private data object as added by L{PrepareContainerMods}
12679
12680   """
12681   for (op, idx, params, private) in mods:
12682     if idx == -1:
12683       # Append
12684       absidx = len(container) - 1
12685     elif idx < 0:
12686       raise IndexError("Not accepting negative indices other than -1")
12687     elif idx > len(container):
12688       raise IndexError("Got %s index %s, but there are only %s" %
12689                        (kind, idx, len(container)))
12690     else:
12691       absidx = idx
12692
12693     changes = None
12694
12695     if op == constants.DDM_ADD:
12696       # Calculate where item will be added
12697       if idx == -1:
12698         addidx = len(container)
12699       else:
12700         addidx = idx
12701
12702       if create_fn is None:
12703         item = params
12704       else:
12705         (item, changes) = create_fn(addidx, params, private)
12706
12707       if idx == -1:
12708         container.append(item)
12709       else:
12710         assert idx >= 0
12711         assert idx <= len(container)
12712         # list.insert does so before the specified index
12713         container.insert(idx, item)
12714     else:
12715       # Retrieve existing item
12716       try:
12717         item = container[absidx]
12718       except IndexError:
12719         raise IndexError("Invalid %s index %s" % (kind, idx))
12720
12721       if op == constants.DDM_REMOVE:
12722         assert not params
12723
12724         if remove_fn is not None:
12725           remove_fn(absidx, item, private)
12726
12727         changes = [("%s/%s" % (kind, absidx), "remove")]
12728
12729         assert container[absidx] == item
12730         del container[absidx]
12731       elif op == constants.DDM_MODIFY:
12732         if modify_fn is not None:
12733           changes = modify_fn(absidx, item, params, private)
12734       else:
12735         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12736
12737     assert _TApplyContModsCbChanges(changes)
12738
12739     if not (chgdesc is None or changes is None):
12740       chgdesc.extend(changes)
12741
12742
12743 def _UpdateIvNames(base_index, disks):
12744   """Updates the C{iv_name} attribute of disks.
12745
12746   @type disks: list of L{objects.Disk}
12747
12748   """
12749   for (idx, disk) in enumerate(disks):
12750     disk.iv_name = "disk/%s" % (base_index + idx, )
12751
12752
12753 class _InstNicModPrivate:
12754   """Data structure for network interface modifications.
12755
12756   Used by L{LUInstanceSetParams}.
12757
12758   """
12759   def __init__(self):
12760     self.params = None
12761     self.filled = None
12762
12763
12764 class LUInstanceSetParams(LogicalUnit):
12765   """Modifies an instances's parameters.
12766
12767   """
12768   HPATH = "instance-modify"
12769   HTYPE = constants.HTYPE_INSTANCE
12770   REQ_BGL = False
12771
12772   @staticmethod
12773   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12774     assert ht.TList(mods)
12775     assert not mods or len(mods[0]) in (2, 3)
12776
12777     if mods and len(mods[0]) == 2:
12778       result = []
12779
12780       addremove = 0
12781       for op, params in mods:
12782         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12783           result.append((op, -1, params))
12784           addremove += 1
12785
12786           if addremove > 1:
12787             raise errors.OpPrereqError("Only one %s add or remove operation is"
12788                                        " supported at a time" % kind,
12789                                        errors.ECODE_INVAL)
12790         else:
12791           result.append((constants.DDM_MODIFY, op, params))
12792
12793       assert verify_fn(result)
12794     else:
12795       result = mods
12796
12797     return result
12798
12799   @staticmethod
12800   def _CheckMods(kind, mods, key_types, item_fn):
12801     """Ensures requested disk/NIC modifications are valid.
12802
12803     """
12804     for (op, _, params) in mods:
12805       assert ht.TDict(params)
12806
12807       utils.ForceDictType(params, key_types)
12808
12809       if op == constants.DDM_REMOVE:
12810         if params:
12811           raise errors.OpPrereqError("No settings should be passed when"
12812                                      " removing a %s" % kind,
12813                                      errors.ECODE_INVAL)
12814       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12815         item_fn(op, params)
12816       else:
12817         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12818
12819   @staticmethod
12820   def _VerifyDiskModification(op, params):
12821     """Verifies a disk modification.
12822
12823     """
12824     if op == constants.DDM_ADD:
12825       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12826       if mode not in constants.DISK_ACCESS_SET:
12827         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12828                                    errors.ECODE_INVAL)
12829
12830       size = params.get(constants.IDISK_SIZE, None)
12831       if size is None:
12832         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12833                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12834
12835       try:
12836         size = int(size)
12837       except (TypeError, ValueError), err:
12838         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12839                                    errors.ECODE_INVAL)
12840
12841       params[constants.IDISK_SIZE] = size
12842
12843     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12844       raise errors.OpPrereqError("Disk size change not possible, use"
12845                                  " grow-disk", errors.ECODE_INVAL)
12846
12847   @staticmethod
12848   def _VerifyNicModification(op, params):
12849     """Verifies a network interface modification.
12850
12851     """
12852     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12853       ip = params.get(constants.INIC_IP, None)
12854       req_net = params.get(constants.INIC_NETWORK, None)
12855       link = params.get(constants.NIC_LINK, None)
12856       mode = params.get(constants.NIC_MODE, None)
12857       if req_net is not None:
12858         if req_net.lower() == constants.VALUE_NONE:
12859           params[constants.INIC_NETWORK] = None
12860           req_net = None
12861         elif link is not None or mode is not None:
12862           raise errors.OpPrereqError("If network is given"
12863                                      " mode or link should not",
12864                                      errors.ECODE_INVAL)
12865
12866       if op == constants.DDM_ADD:
12867         macaddr = params.get(constants.INIC_MAC, None)
12868         if macaddr is None:
12869           params[constants.INIC_MAC] = constants.VALUE_AUTO
12870
12871       if ip is not None:
12872         if ip.lower() == constants.VALUE_NONE:
12873           params[constants.INIC_IP] = None
12874         else:
12875           if ip.lower() == constants.NIC_IP_POOL:
12876             if op == constants.DDM_ADD and req_net is None:
12877               raise errors.OpPrereqError("If ip=pool, parameter network"
12878                                          " cannot be none",
12879                                          errors.ECODE_INVAL)
12880           else:
12881             if not netutils.IPAddress.IsValid(ip):
12882               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12883                                          errors.ECODE_INVAL)
12884
12885       if constants.INIC_MAC in params:
12886         macaddr = params[constants.INIC_MAC]
12887         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12888           macaddr = utils.NormalizeAndValidateMac(macaddr)
12889
12890         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12891           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12892                                      " modifying an existing NIC",
12893                                      errors.ECODE_INVAL)
12894
12895   def CheckArguments(self):
12896     if not (self.op.nics or self.op.disks or self.op.disk_template or
12897             self.op.hvparams or self.op.beparams or self.op.os_name or
12898             self.op.offline is not None or self.op.runtime_mem):
12899       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12900
12901     if self.op.hvparams:
12902       _CheckGlobalHvParams(self.op.hvparams)
12903
12904     self.op.disks = self._UpgradeDiskNicMods(
12905       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12906     self.op.nics = self._UpgradeDiskNicMods(
12907       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12908
12909     # Check disk modifications
12910     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12911                     self._VerifyDiskModification)
12912
12913     if self.op.disks and self.op.disk_template is not None:
12914       raise errors.OpPrereqError("Disk template conversion and other disk"
12915                                  " changes not supported at the same time",
12916                                  errors.ECODE_INVAL)
12917
12918     if (self.op.disk_template and
12919         self.op.disk_template in constants.DTS_INT_MIRROR and
12920         self.op.remote_node is None):
12921       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12922                                  " one requires specifying a secondary node",
12923                                  errors.ECODE_INVAL)
12924
12925     # Check NIC modifications
12926     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12927                     self._VerifyNicModification)
12928
12929   def ExpandNames(self):
12930     self._ExpandAndLockInstance()
12931     self.needed_locks[locking.LEVEL_NODEGROUP] = []
12932     # Can't even acquire node locks in shared mode as upcoming changes in
12933     # Ganeti 2.6 will start to modify the node object on disk conversion
12934     self.needed_locks[locking.LEVEL_NODE] = []
12935     self.needed_locks[locking.LEVEL_NODE_RES] = []
12936     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12937     # Look node group to look up the ipolicy
12938     self.share_locks[locking.LEVEL_NODEGROUP] = 1
12939
12940   def DeclareLocks(self, level):
12941     if level == locking.LEVEL_NODEGROUP:
12942       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12943       # Acquire locks for the instance's nodegroups optimistically. Needs
12944       # to be verified in CheckPrereq
12945       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12946         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12947     elif level == locking.LEVEL_NODE:
12948       self._LockInstancesNodes()
12949       if self.op.disk_template and self.op.remote_node:
12950         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12951         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12952     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12953       # Copy node locks
12954       self.needed_locks[locking.LEVEL_NODE_RES] = \
12955         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12956
12957   def BuildHooksEnv(self):
12958     """Build hooks env.
12959
12960     This runs on the master, primary and secondaries.
12961
12962     """
12963     args = {}
12964     if constants.BE_MINMEM in self.be_new:
12965       args["minmem"] = self.be_new[constants.BE_MINMEM]
12966     if constants.BE_MAXMEM in self.be_new:
12967       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12968     if constants.BE_VCPUS in self.be_new:
12969       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12970     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12971     # information at all.
12972
12973     if self._new_nics is not None:
12974       nics = []
12975
12976       for nic in self._new_nics:
12977         n = copy.deepcopy(nic)
12978         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12979         n.nicparams = nicparams
12980         nics.append(_NICToTuple(self, n))
12981
12982       args["nics"] = nics
12983
12984     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12985     if self.op.disk_template:
12986       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12987     if self.op.runtime_mem:
12988       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12989
12990     return env
12991
12992   def BuildHooksNodes(self):
12993     """Build hooks nodes.
12994
12995     """
12996     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12997     return (nl, nl)
12998
12999   def _PrepareNicModification(self, params, private, old_ip, old_net,
13000                               old_params, cluster, pnode):
13001
13002     update_params_dict = dict([(key, params[key])
13003                                for key in constants.NICS_PARAMETERS
13004                                if key in params])
13005
13006     req_link = update_params_dict.get(constants.NIC_LINK, None)
13007     req_mode = update_params_dict.get(constants.NIC_MODE, None)
13008
13009     new_net = params.get(constants.INIC_NETWORK, old_net)
13010     if new_net is not None:
13011       netparams = self.cfg.GetGroupNetParams(new_net, pnode)
13012       if netparams is None:
13013         raise errors.OpPrereqError("No netparams found for the network"
13014                                    " %s, probably not connected" % new_net,
13015                                    errors.ECODE_INVAL)
13016       new_params = dict(netparams)
13017     else:
13018       new_params = _GetUpdatedParams(old_params, update_params_dict)
13019
13020     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13021
13022     new_filled_params = cluster.SimpleFillNIC(new_params)
13023     objects.NIC.CheckParameterSyntax(new_filled_params)
13024
13025     new_mode = new_filled_params[constants.NIC_MODE]
13026     if new_mode == constants.NIC_MODE_BRIDGED:
13027       bridge = new_filled_params[constants.NIC_LINK]
13028       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13029       if msg:
13030         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13031         if self.op.force:
13032           self.warn.append(msg)
13033         else:
13034           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13035
13036     elif new_mode == constants.NIC_MODE_ROUTED:
13037       ip = params.get(constants.INIC_IP, old_ip)
13038       if ip is None:
13039         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13040                                    " on a routed NIC", errors.ECODE_INVAL)
13041
13042     elif new_mode == constants.NIC_MODE_OVS:
13043       # TODO: check OVS link
13044       self.LogInfo("OVS links are currently not checked for correctness")
13045
13046     if constants.INIC_MAC in params:
13047       mac = params[constants.INIC_MAC]
13048       if mac is None:
13049         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13050                                    errors.ECODE_INVAL)
13051       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13052         # otherwise generate the MAC address
13053         params[constants.INIC_MAC] = \
13054           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13055       else:
13056         # or validate/reserve the current one
13057         try:
13058           self.cfg.ReserveMAC(mac, self.proc.GetECId())
13059         except errors.ReservationError:
13060           raise errors.OpPrereqError("MAC address '%s' already in use"
13061                                      " in cluster" % mac,
13062                                      errors.ECODE_NOTUNIQUE)
13063     elif new_net != old_net:
13064
13065       def get_net_prefix(net):
13066         if net:
13067           uuid = self.cfg.LookupNetwork(net)
13068           if uuid:
13069             nobj = self.cfg.GetNetwork(uuid)
13070             return nobj.mac_prefix
13071         return None
13072
13073       new_prefix = get_net_prefix(new_net)
13074       old_prefix = get_net_prefix(old_net)
13075       if old_prefix != new_prefix:
13076         params[constants.INIC_MAC] = \
13077           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13078
13079     #if there is a change in nic-network configuration
13080     new_ip = params.get(constants.INIC_IP, old_ip)
13081     if (new_ip, new_net) != (old_ip, old_net):
13082       if new_ip:
13083         if new_net:
13084           if new_ip.lower() == constants.NIC_IP_POOL:
13085             try:
13086               new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13087             except errors.ReservationError:
13088               raise errors.OpPrereqError("Unable to get a free IP"
13089                                          " from the address pool",
13090                                          errors.ECODE_STATE)
13091             self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13092             params[constants.INIC_IP] = new_ip
13093           elif new_ip != old_ip or new_net != old_net:
13094             try:
13095               self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13096               self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13097             except errors.ReservationError:
13098               raise errors.OpPrereqError("IP %s not available in network %s" %
13099                                          (new_ip, new_net),
13100                                          errors.ECODE_NOTUNIQUE)
13101         elif new_ip.lower() == constants.NIC_IP_POOL:
13102           raise errors.OpPrereqError("ip=pool, but no network found",
13103                                      errors.ECODE_INVAL)
13104         else:
13105           # new net is None
13106           if self.op.conflicts_check:
13107             _CheckForConflictingIp(self, new_ip, pnode)
13108
13109       if old_ip:
13110         if old_net:
13111           try:
13112             self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13113           except errors.AddressPoolError:
13114             logging.warning("Release IP %s not contained in network %s",
13115                             old_ip, old_net)
13116
13117     # there are no changes in (net, ip) tuple
13118     elif (old_net is not None and
13119           (req_link is not None or req_mode is not None)):
13120       raise errors.OpPrereqError("Not allowed to change link or mode of"
13121                                  " a NIC that is connected to a network",
13122                                  errors.ECODE_INVAL)
13123
13124     private.params = new_params
13125     private.filled = new_filled_params
13126
13127   def CheckPrereq(self):
13128     """Check prerequisites.
13129
13130     This only checks the instance list against the existing names.
13131
13132     """
13133     assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13134     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13135
13136     cluster = self.cluster = self.cfg.GetClusterInfo()
13137     assert self.instance is not None, \
13138       "Cannot retrieve locked instance %s" % self.op.instance_name
13139
13140     pnode = instance.primary_node
13141     assert pnode in self.owned_locks(locking.LEVEL_NODE)
13142     nodelist = list(instance.all_nodes)
13143     pnode_info = self.cfg.GetNodeInfo(pnode)
13144     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13145
13146     #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13147     assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13148     group_info = self.cfg.GetNodeGroup(pnode_info.group)
13149
13150     # dictionary with instance information after the modification
13151     ispec = {}
13152
13153     # Prepare disk/NIC modifications
13154     self.diskmod = PrepareContainerMods(self.op.disks, None)
13155     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13156
13157     # OS change
13158     if self.op.os_name and not self.op.force:
13159       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13160                       self.op.force_variant)
13161       instance_os = self.op.os_name
13162     else:
13163       instance_os = instance.os
13164
13165     assert not (self.op.disk_template and self.op.disks), \
13166       "Can't modify disk template and apply disk changes at the same time"
13167
13168     if self.op.disk_template:
13169       if instance.disk_template == self.op.disk_template:
13170         raise errors.OpPrereqError("Instance already has disk template %s" %
13171                                    instance.disk_template, errors.ECODE_INVAL)
13172
13173       if (instance.disk_template,
13174           self.op.disk_template) not in self._DISK_CONVERSIONS:
13175         raise errors.OpPrereqError("Unsupported disk template conversion from"
13176                                    " %s to %s" % (instance.disk_template,
13177                                                   self.op.disk_template),
13178                                    errors.ECODE_INVAL)
13179       _CheckInstanceState(self, instance, INSTANCE_DOWN,
13180                           msg="cannot change disk template")
13181       if self.op.disk_template in constants.DTS_INT_MIRROR:
13182         if self.op.remote_node == pnode:
13183           raise errors.OpPrereqError("Given new secondary node %s is the same"
13184                                      " as the primary node of the instance" %
13185                                      self.op.remote_node, errors.ECODE_STATE)
13186         _CheckNodeOnline(self, self.op.remote_node)
13187         _CheckNodeNotDrained(self, self.op.remote_node)
13188         # FIXME: here we assume that the old instance type is DT_PLAIN
13189         assert instance.disk_template == constants.DT_PLAIN
13190         disks = [{constants.IDISK_SIZE: d.size,
13191                   constants.IDISK_VG: d.logical_id[0]}
13192                  for d in instance.disks]
13193         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13194         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13195
13196         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13197         snode_group = self.cfg.GetNodeGroup(snode_info.group)
13198         ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13199                                                                 snode_group)
13200         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13201                                 ignore=self.op.ignore_ipolicy)
13202         if pnode_info.group != snode_info.group:
13203           self.LogWarning("The primary and secondary nodes are in two"
13204                           " different node groups; the disk parameters"
13205                           " from the first disk's node group will be"
13206                           " used")
13207
13208     # hvparams processing
13209     if self.op.hvparams:
13210       hv_type = instance.hypervisor
13211       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13212       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13213       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13214
13215       # local check
13216       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
13217       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13218       self.hv_proposed = self.hv_new = hv_new # the new actual values
13219       self.hv_inst = i_hvdict # the new dict (without defaults)
13220     else:
13221       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13222                                               instance.hvparams)
13223       self.hv_new = self.hv_inst = {}
13224
13225     # beparams processing
13226     if self.op.beparams:
13227       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13228                                    use_none=True)
13229       objects.UpgradeBeParams(i_bedict)
13230       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13231       be_new = cluster.SimpleFillBE(i_bedict)
13232       self.be_proposed = self.be_new = be_new # the new actual values
13233       self.be_inst = i_bedict # the new dict (without defaults)
13234     else:
13235       self.be_new = self.be_inst = {}
13236       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13237     be_old = cluster.FillBE(instance)
13238
13239     # CPU param validation -- checking every time a parameter is
13240     # changed to cover all cases where either CPU mask or vcpus have
13241     # changed
13242     if (constants.BE_VCPUS in self.be_proposed and
13243         constants.HV_CPU_MASK in self.hv_proposed):
13244       cpu_list = \
13245         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13246       # Verify mask is consistent with number of vCPUs. Can skip this
13247       # test if only 1 entry in the CPU mask, which means same mask
13248       # is applied to all vCPUs.
13249       if (len(cpu_list) > 1 and
13250           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13251         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13252                                    " CPU mask [%s]" %
13253                                    (self.be_proposed[constants.BE_VCPUS],
13254                                     self.hv_proposed[constants.HV_CPU_MASK]),
13255                                    errors.ECODE_INVAL)
13256
13257       # Only perform this test if a new CPU mask is given
13258       if constants.HV_CPU_MASK in self.hv_new:
13259         # Calculate the largest CPU number requested
13260         max_requested_cpu = max(map(max, cpu_list))
13261         # Check that all of the instance's nodes have enough physical CPUs to
13262         # satisfy the requested CPU mask
13263         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13264                                 max_requested_cpu + 1, instance.hypervisor)
13265
13266     # osparams processing
13267     if self.op.osparams:
13268       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13269       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13270       self.os_inst = i_osdict # the new dict (without defaults)
13271     else:
13272       self.os_inst = {}
13273
13274     self.warn = []
13275
13276     #TODO(dynmem): do the appropriate check involving MINMEM
13277     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13278         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13279       mem_check_list = [pnode]
13280       if be_new[constants.BE_AUTO_BALANCE]:
13281         # either we changed auto_balance to yes or it was from before
13282         mem_check_list.extend(instance.secondary_nodes)
13283       instance_info = self.rpc.call_instance_info(pnode, instance.name,
13284                                                   instance.hypervisor)
13285       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13286                                          [instance.hypervisor])
13287       pninfo = nodeinfo[pnode]
13288       msg = pninfo.fail_msg
13289       if msg:
13290         # Assume the primary node is unreachable and go ahead
13291         self.warn.append("Can't get info from primary node %s: %s" %
13292                          (pnode, msg))
13293       else:
13294         (_, _, (pnhvinfo, )) = pninfo.payload
13295         if not isinstance(pnhvinfo.get("memory_free", None), int):
13296           self.warn.append("Node data from primary node %s doesn't contain"
13297                            " free memory information" % pnode)
13298         elif instance_info.fail_msg:
13299           self.warn.append("Can't get instance runtime information: %s" %
13300                            instance_info.fail_msg)
13301         else:
13302           if instance_info.payload:
13303             current_mem = int(instance_info.payload["memory"])
13304           else:
13305             # Assume instance not running
13306             # (there is a slight race condition here, but it's not very
13307             # probable, and we have no other way to check)
13308             # TODO: Describe race condition
13309             current_mem = 0
13310           #TODO(dynmem): do the appropriate check involving MINMEM
13311           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13312                       pnhvinfo["memory_free"])
13313           if miss_mem > 0:
13314             raise errors.OpPrereqError("This change will prevent the instance"
13315                                        " from starting, due to %d MB of memory"
13316                                        " missing on its primary node" %
13317                                        miss_mem, errors.ECODE_NORES)
13318
13319       if be_new[constants.BE_AUTO_BALANCE]:
13320         for node, nres in nodeinfo.items():
13321           if node not in instance.secondary_nodes:
13322             continue
13323           nres.Raise("Can't get info from secondary node %s" % node,
13324                      prereq=True, ecode=errors.ECODE_STATE)
13325           (_, _, (nhvinfo, )) = nres.payload
13326           if not isinstance(nhvinfo.get("memory_free", None), int):
13327             raise errors.OpPrereqError("Secondary node %s didn't return free"
13328                                        " memory information" % node,
13329                                        errors.ECODE_STATE)
13330           #TODO(dynmem): do the appropriate check involving MINMEM
13331           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13332             raise errors.OpPrereqError("This change will prevent the instance"
13333                                        " from failover to its secondary node"
13334                                        " %s, due to not enough memory" % node,
13335                                        errors.ECODE_STATE)
13336
13337     if self.op.runtime_mem:
13338       remote_info = self.rpc.call_instance_info(instance.primary_node,
13339                                                 instance.name,
13340                                                 instance.hypervisor)
13341       remote_info.Raise("Error checking node %s" % instance.primary_node)
13342       if not remote_info.payload: # not running already
13343         raise errors.OpPrereqError("Instance %s is not running" %
13344                                    instance.name, errors.ECODE_STATE)
13345
13346       current_memory = remote_info.payload["memory"]
13347       if (not self.op.force and
13348            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13349             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13350         raise errors.OpPrereqError("Instance %s must have memory between %d"
13351                                    " and %d MB of memory unless --force is"
13352                                    " given" %
13353                                    (instance.name,
13354                                     self.be_proposed[constants.BE_MINMEM],
13355                                     self.be_proposed[constants.BE_MAXMEM]),
13356                                    errors.ECODE_INVAL)
13357
13358       delta = self.op.runtime_mem - current_memory
13359       if delta > 0:
13360         _CheckNodeFreeMemory(self, instance.primary_node,
13361                              "ballooning memory for instance %s" %
13362                              instance.name, delta, instance.hypervisor)
13363
13364     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13365       raise errors.OpPrereqError("Disk operations not supported for"
13366                                  " diskless instances", errors.ECODE_INVAL)
13367
13368     def _PrepareNicCreate(_, params, private):
13369       self._PrepareNicModification(params, private, None, None,
13370                                    {}, cluster, pnode)
13371       return (None, None)
13372
13373     def _PrepareNicMod(_, nic, params, private):
13374       self._PrepareNicModification(params, private, nic.ip, nic.network,
13375                                    nic.nicparams, cluster, pnode)
13376       return None
13377
13378     def _PrepareNicRemove(_, params, __):
13379       ip = params.ip
13380       net = params.network
13381       if net is not None and ip is not None:
13382         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13383
13384     # Verify NIC changes (operating on copy)
13385     nics = instance.nics[:]
13386     ApplyContainerMods("NIC", nics, None, self.nicmod,
13387                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13388     if len(nics) > constants.MAX_NICS:
13389       raise errors.OpPrereqError("Instance has too many network interfaces"
13390                                  " (%d), cannot add more" % constants.MAX_NICS,
13391                                  errors.ECODE_STATE)
13392
13393     # Verify disk changes (operating on a copy)
13394     disks = instance.disks[:]
13395     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13396     if len(disks) > constants.MAX_DISKS:
13397       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13398                                  " more" % constants.MAX_DISKS,
13399                                  errors.ECODE_STATE)
13400     disk_sizes = [disk.size for disk in instance.disks]
13401     disk_sizes.extend(params["size"] for (op, idx, params, private) in
13402                       self.diskmod if op == constants.DDM_ADD)
13403     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13404     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13405
13406     if self.op.offline is not None and self.op.offline:
13407       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13408                           msg="can't change to offline")
13409
13410     # Pre-compute NIC changes (necessary to use result in hooks)
13411     self._nic_chgdesc = []
13412     if self.nicmod:
13413       # Operate on copies as this is still in prereq
13414       nics = [nic.Copy() for nic in instance.nics]
13415       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13416                          self._CreateNewNic, self._ApplyNicMods, None)
13417       self._new_nics = nics
13418       ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13419     else:
13420       self._new_nics = None
13421       ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13422
13423     if not self.op.ignore_ipolicy:
13424       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13425                                                               group_info)
13426
13427       # Fill ispec with backend parameters
13428       ispec[constants.ISPEC_SPINDLE_USE] = \
13429         self.be_new.get(constants.BE_SPINDLE_USE, None)
13430       ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13431                                                          None)
13432
13433       # Copy ispec to verify parameters with min/max values separately
13434       ispec_max = ispec.copy()
13435       ispec_max[constants.ISPEC_MEM_SIZE] = \
13436         self.be_new.get(constants.BE_MAXMEM, None)
13437       res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13438       ispec_min = ispec.copy()
13439       ispec_min[constants.ISPEC_MEM_SIZE] = \
13440         self.be_new.get(constants.BE_MINMEM, None)
13441       res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13442
13443       if (res_max or res_min):
13444         # FIXME: Improve error message by including information about whether
13445         # the upper or lower limit of the parameter fails the ipolicy.
13446         msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13447                (group_info, group_info.name,
13448                 utils.CommaJoin(set(res_max + res_min))))
13449         raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13450
13451   def _ConvertPlainToDrbd(self, feedback_fn):
13452     """Converts an instance from plain to drbd.
13453
13454     """
13455     feedback_fn("Converting template to drbd")
13456     instance = self.instance
13457     pnode = instance.primary_node
13458     snode = self.op.remote_node
13459
13460     assert instance.disk_template == constants.DT_PLAIN
13461
13462     # create a fake disk info for _GenerateDiskTemplate
13463     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13464                   constants.IDISK_VG: d.logical_id[0]}
13465                  for d in instance.disks]
13466     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13467                                       instance.name, pnode, [snode],
13468                                       disk_info, None, None, 0, feedback_fn,
13469                                       self.diskparams)
13470     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13471                                         self.diskparams)
13472     info = _GetInstanceInfoText(instance)
13473     feedback_fn("Creating additional volumes...")
13474     # first, create the missing data and meta devices
13475     for disk in anno_disks:
13476       # unfortunately this is... not too nice
13477       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13478                             info, True)
13479       for child in disk.children:
13480         _CreateSingleBlockDev(self, snode, instance, child, info, True)
13481     # at this stage, all new LVs have been created, we can rename the
13482     # old ones
13483     feedback_fn("Renaming original volumes...")
13484     rename_list = [(o, n.children[0].logical_id)
13485                    for (o, n) in zip(instance.disks, new_disks)]
13486     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13487     result.Raise("Failed to rename original LVs")
13488
13489     feedback_fn("Initializing DRBD devices...")
13490     # all child devices are in place, we can now create the DRBD devices
13491     for disk in anno_disks:
13492       for node in [pnode, snode]:
13493         f_create = node == pnode
13494         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13495
13496     # at this point, the instance has been modified
13497     instance.disk_template = constants.DT_DRBD8
13498     instance.disks = new_disks
13499     self.cfg.Update(instance, feedback_fn)
13500
13501     # Release node locks while waiting for sync
13502     _ReleaseLocks(self, locking.LEVEL_NODE)
13503
13504     # disks are created, waiting for sync
13505     disk_abort = not _WaitForSync(self, instance,
13506                                   oneshot=not self.op.wait_for_sync)
13507     if disk_abort:
13508       raise errors.OpExecError("There are some degraded disks for"
13509                                " this instance, please cleanup manually")
13510
13511     # Node resource locks will be released by caller
13512
13513   def _ConvertDrbdToPlain(self, feedback_fn):
13514     """Converts an instance from drbd to plain.
13515
13516     """
13517     instance = self.instance
13518
13519     assert len(instance.secondary_nodes) == 1
13520     assert instance.disk_template == constants.DT_DRBD8
13521
13522     pnode = instance.primary_node
13523     snode = instance.secondary_nodes[0]
13524     feedback_fn("Converting template to plain")
13525
13526     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13527     new_disks = [d.children[0] for d in instance.disks]
13528
13529     # copy over size and mode
13530     for parent, child in zip(old_disks, new_disks):
13531       child.size = parent.size
13532       child.mode = parent.mode
13533
13534     # this is a DRBD disk, return its port to the pool
13535     # NOTE: this must be done right before the call to cfg.Update!
13536     for disk in old_disks:
13537       tcp_port = disk.logical_id[2]
13538       self.cfg.AddTcpUdpPort(tcp_port)
13539
13540     # update instance structure
13541     instance.disks = new_disks
13542     instance.disk_template = constants.DT_PLAIN
13543     self.cfg.Update(instance, feedback_fn)
13544
13545     # Release locks in case removing disks takes a while
13546     _ReleaseLocks(self, locking.LEVEL_NODE)
13547
13548     feedback_fn("Removing volumes on the secondary node...")
13549     for disk in old_disks:
13550       self.cfg.SetDiskID(disk, snode)
13551       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13552       if msg:
13553         self.LogWarning("Could not remove block device %s on node %s,"
13554                         " continuing anyway: %s", disk.iv_name, snode, msg)
13555
13556     feedback_fn("Removing unneeded volumes on the primary node...")
13557     for idx, disk in enumerate(old_disks):
13558       meta = disk.children[1]
13559       self.cfg.SetDiskID(meta, pnode)
13560       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13561       if msg:
13562         self.LogWarning("Could not remove metadata for disk %d on node %s,"
13563                         " continuing anyway: %s", idx, pnode, msg)
13564
13565   def _CreateNewDisk(self, idx, params, _):
13566     """Creates a new disk.
13567
13568     """
13569     instance = self.instance
13570
13571     # add a new disk
13572     if instance.disk_template in constants.DTS_FILEBASED:
13573       (file_driver, file_path) = instance.disks[0].logical_id
13574       file_path = os.path.dirname(file_path)
13575     else:
13576       file_driver = file_path = None
13577
13578     disk = \
13579       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13580                             instance.primary_node, instance.secondary_nodes,
13581                             [params], file_path, file_driver, idx,
13582                             self.Log, self.diskparams)[0]
13583
13584     info = _GetInstanceInfoText(instance)
13585
13586     logging.info("Creating volume %s for instance %s",
13587                  disk.iv_name, instance.name)
13588     # Note: this needs to be kept in sync with _CreateDisks
13589     #HARDCODE
13590     for node in instance.all_nodes:
13591       f_create = (node == instance.primary_node)
13592       try:
13593         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13594       except errors.OpExecError, err:
13595         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13596                         disk.iv_name, disk, node, err)
13597
13598     return (disk, [
13599       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13600       ])
13601
13602   @staticmethod
13603   def _ModifyDisk(idx, disk, params, _):
13604     """Modifies a disk.
13605
13606     """
13607     disk.mode = params[constants.IDISK_MODE]
13608
13609     return [
13610       ("disk.mode/%d" % idx, disk.mode),
13611       ]
13612
13613   def _RemoveDisk(self, idx, root, _):
13614     """Removes a disk.
13615
13616     """
13617     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13618     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13619       self.cfg.SetDiskID(disk, node)
13620       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13621       if msg:
13622         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13623                         " continuing anyway", idx, node, msg)
13624
13625     # if this is a DRBD disk, return its port to the pool
13626     if root.dev_type in constants.LDS_DRBD:
13627       self.cfg.AddTcpUdpPort(root.logical_id[2])
13628
13629   @staticmethod
13630   def _CreateNewNic(idx, params, private):
13631     """Creates data structure for a new network interface.
13632
13633     """
13634     mac = params[constants.INIC_MAC]
13635     ip = params.get(constants.INIC_IP, None)
13636     net = params.get(constants.INIC_NETWORK, None)
13637     #TODO: not private.filled?? can a nic have no nicparams??
13638     nicparams = private.filled
13639
13640     return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
13641       ("nic.%d" % idx,
13642        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13643        (mac, ip, private.filled[constants.NIC_MODE],
13644        private.filled[constants.NIC_LINK],
13645        net)),
13646       ])
13647
13648   @staticmethod
13649   def _ApplyNicMods(idx, nic, params, private):
13650     """Modifies a network interface.
13651
13652     """
13653     changes = []
13654
13655     for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13656       if key in params:
13657         changes.append(("nic.%s/%d" % (key, idx), params[key]))
13658         setattr(nic, key, params[key])
13659
13660     if private.filled:
13661       nic.nicparams = private.filled
13662
13663       for (key, val) in nic.nicparams.items():
13664         changes.append(("nic.%s/%d" % (key, idx), val))
13665
13666     return changes
13667
13668   def Exec(self, feedback_fn):
13669     """Modifies an instance.
13670
13671     All parameters take effect only at the next restart of the instance.
13672
13673     """
13674     # Process here the warnings from CheckPrereq, as we don't have a
13675     # feedback_fn there.
13676     # TODO: Replace with self.LogWarning
13677     for warn in self.warn:
13678       feedback_fn("WARNING: %s" % warn)
13679
13680     assert ((self.op.disk_template is None) ^
13681             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13682       "Not owning any node resource locks"
13683
13684     result = []
13685     instance = self.instance
13686
13687     # runtime memory
13688     if self.op.runtime_mem:
13689       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13690                                                      instance,
13691                                                      self.op.runtime_mem)
13692       rpcres.Raise("Cannot modify instance runtime memory")
13693       result.append(("runtime_memory", self.op.runtime_mem))
13694
13695     # Apply disk changes
13696     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13697                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13698     _UpdateIvNames(0, instance.disks)
13699
13700     if self.op.disk_template:
13701       if __debug__:
13702         check_nodes = set(instance.all_nodes)
13703         if self.op.remote_node:
13704           check_nodes.add(self.op.remote_node)
13705         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13706           owned = self.owned_locks(level)
13707           assert not (check_nodes - owned), \
13708             ("Not owning the correct locks, owning %r, expected at least %r" %
13709              (owned, check_nodes))
13710
13711       r_shut = _ShutdownInstanceDisks(self, instance)
13712       if not r_shut:
13713         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13714                                  " proceed with disk template conversion")
13715       mode = (instance.disk_template, self.op.disk_template)
13716       try:
13717         self._DISK_CONVERSIONS[mode](self, feedback_fn)
13718       except:
13719         self.cfg.ReleaseDRBDMinors(instance.name)
13720         raise
13721       result.append(("disk_template", self.op.disk_template))
13722
13723       assert instance.disk_template == self.op.disk_template, \
13724         ("Expected disk template '%s', found '%s'" %
13725          (self.op.disk_template, instance.disk_template))
13726
13727     # Release node and resource locks if there are any (they might already have
13728     # been released during disk conversion)
13729     _ReleaseLocks(self, locking.LEVEL_NODE)
13730     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13731
13732     # Apply NIC changes
13733     if self._new_nics is not None:
13734       instance.nics = self._new_nics
13735       result.extend(self._nic_chgdesc)
13736
13737     # hvparams changes
13738     if self.op.hvparams:
13739       instance.hvparams = self.hv_inst
13740       for key, val in self.op.hvparams.iteritems():
13741         result.append(("hv/%s" % key, val))
13742
13743     # beparams changes
13744     if self.op.beparams:
13745       instance.beparams = self.be_inst
13746       for key, val in self.op.beparams.iteritems():
13747         result.append(("be/%s" % key, val))
13748
13749     # OS change
13750     if self.op.os_name:
13751       instance.os = self.op.os_name
13752
13753     # osparams changes
13754     if self.op.osparams:
13755       instance.osparams = self.os_inst
13756       for key, val in self.op.osparams.iteritems():
13757         result.append(("os/%s" % key, val))
13758
13759     if self.op.offline is None:
13760       # Ignore
13761       pass
13762     elif self.op.offline:
13763       # Mark instance as offline
13764       self.cfg.MarkInstanceOffline(instance.name)
13765       result.append(("admin_state", constants.ADMINST_OFFLINE))
13766     else:
13767       # Mark instance as online, but stopped
13768       self.cfg.MarkInstanceDown(instance.name)
13769       result.append(("admin_state", constants.ADMINST_DOWN))
13770
13771     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13772
13773     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13774                 self.owned_locks(locking.LEVEL_NODE)), \
13775       "All node locks should have been released by now"
13776
13777     return result
13778
13779   _DISK_CONVERSIONS = {
13780     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13781     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13782     }
13783
13784
13785 class LUInstanceChangeGroup(LogicalUnit):
13786   HPATH = "instance-change-group"
13787   HTYPE = constants.HTYPE_INSTANCE
13788   REQ_BGL = False
13789
13790   def ExpandNames(self):
13791     self.share_locks = _ShareAll()
13792
13793     self.needed_locks = {
13794       locking.LEVEL_NODEGROUP: [],
13795       locking.LEVEL_NODE: [],
13796       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
13797       }
13798
13799     self._ExpandAndLockInstance()
13800
13801     if self.op.target_groups:
13802       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13803                                   self.op.target_groups)
13804     else:
13805       self.req_target_uuids = None
13806
13807     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13808
13809   def DeclareLocks(self, level):
13810     if level == locking.LEVEL_NODEGROUP:
13811       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13812
13813       if self.req_target_uuids:
13814         lock_groups = set(self.req_target_uuids)
13815
13816         # Lock all groups used by instance optimistically; this requires going
13817         # via the node before it's locked, requiring verification later on
13818         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13819         lock_groups.update(instance_groups)
13820       else:
13821         # No target groups, need to lock all of them
13822         lock_groups = locking.ALL_SET
13823
13824       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13825
13826     elif level == locking.LEVEL_NODE:
13827       if self.req_target_uuids:
13828         # Lock all nodes used by instances
13829         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13830         self._LockInstancesNodes()
13831
13832         # Lock all nodes in all potential target groups
13833         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13834                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13835         member_nodes = [node_name
13836                         for group in lock_groups
13837                         for node_name in self.cfg.GetNodeGroup(group).members]
13838         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13839       else:
13840         # Lock all nodes as all groups are potential targets
13841         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13842
13843   def CheckPrereq(self):
13844     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13845     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13846     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13847
13848     assert (self.req_target_uuids is None or
13849             owned_groups.issuperset(self.req_target_uuids))
13850     assert owned_instances == set([self.op.instance_name])
13851
13852     # Get instance information
13853     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13854
13855     # Check if node groups for locked instance are still correct
13856     assert owned_nodes.issuperset(self.instance.all_nodes), \
13857       ("Instance %s's nodes changed while we kept the lock" %
13858        self.op.instance_name)
13859
13860     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13861                                            owned_groups)
13862
13863     if self.req_target_uuids:
13864       # User requested specific target groups
13865       self.target_uuids = frozenset(self.req_target_uuids)
13866     else:
13867       # All groups except those used by the instance are potential targets
13868       self.target_uuids = owned_groups - inst_groups
13869
13870     conflicting_groups = self.target_uuids & inst_groups
13871     if conflicting_groups:
13872       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13873                                  " used by the instance '%s'" %
13874                                  (utils.CommaJoin(conflicting_groups),
13875                                   self.op.instance_name),
13876                                  errors.ECODE_INVAL)
13877
13878     if not self.target_uuids:
13879       raise errors.OpPrereqError("There are no possible target groups",
13880                                  errors.ECODE_INVAL)
13881
13882   def BuildHooksEnv(self):
13883     """Build hooks env.
13884
13885     """
13886     assert self.target_uuids
13887
13888     env = {
13889       "TARGET_GROUPS": " ".join(self.target_uuids),
13890       }
13891
13892     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13893
13894     return env
13895
13896   def BuildHooksNodes(self):
13897     """Build hooks nodes.
13898
13899     """
13900     mn = self.cfg.GetMasterNode()
13901     return ([mn], [mn])
13902
13903   def Exec(self, feedback_fn):
13904     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13905
13906     assert instances == [self.op.instance_name], "Instance not locked"
13907
13908     req = iallocator.IAReqGroupChange(instances=instances,
13909                                       target_groups=list(self.target_uuids))
13910     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13911
13912     ial.Run(self.op.iallocator)
13913
13914     if not ial.success:
13915       raise errors.OpPrereqError("Can't compute solution for changing group of"
13916                                  " instance '%s' using iallocator '%s': %s" %
13917                                  (self.op.instance_name, self.op.iallocator,
13918                                   ial.info), errors.ECODE_NORES)
13919
13920     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13921
13922     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13923                  " instance '%s'", len(jobs), self.op.instance_name)
13924
13925     return ResultWithJobs(jobs)
13926
13927
13928 class LUBackupQuery(NoHooksLU):
13929   """Query the exports list
13930
13931   """
13932   REQ_BGL = False
13933
13934   def CheckArguments(self):
13935     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13936                              ["node", "export"], self.op.use_locking)
13937
13938   def ExpandNames(self):
13939     self.expq.ExpandNames(self)
13940
13941   def DeclareLocks(self, level):
13942     self.expq.DeclareLocks(self, level)
13943
13944   def Exec(self, feedback_fn):
13945     result = {}
13946
13947     for (node, expname) in self.expq.OldStyleQuery(self):
13948       if expname is None:
13949         result[node] = False
13950       else:
13951         result.setdefault(node, []).append(expname)
13952
13953     return result
13954
13955
13956 class _ExportQuery(_QueryBase):
13957   FIELDS = query.EXPORT_FIELDS
13958
13959   #: The node name is not a unique key for this query
13960   SORT_FIELD = "node"
13961
13962   def ExpandNames(self, lu):
13963     lu.needed_locks = {}
13964
13965     # The following variables interact with _QueryBase._GetNames
13966     if self.names:
13967       self.wanted = _GetWantedNodes(lu, self.names)
13968     else:
13969       self.wanted = locking.ALL_SET
13970
13971     self.do_locking = self.use_locking
13972
13973     if self.do_locking:
13974       lu.share_locks = _ShareAll()
13975       lu.needed_locks = {
13976         locking.LEVEL_NODE: self.wanted,
13977         }
13978
13979       if not self.names:
13980         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
13981
13982   def DeclareLocks(self, lu, level):
13983     pass
13984
13985   def _GetQueryData(self, lu):
13986     """Computes the list of nodes and their attributes.
13987
13988     """
13989     # Locking is not used
13990     # TODO
13991     assert not (compat.any(lu.glm.is_owned(level)
13992                            for level in locking.LEVELS
13993                            if level != locking.LEVEL_CLUSTER) or
13994                 self.do_locking or self.use_locking)
13995
13996     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13997
13998     result = []
13999
14000     for (node, nres) in lu.rpc.call_export_list(nodes).items():
14001       if nres.fail_msg:
14002         result.append((node, None))
14003       else:
14004         result.extend((node, expname) for expname in nres.payload)
14005
14006     return result
14007
14008
14009 class LUBackupPrepare(NoHooksLU):
14010   """Prepares an instance for an export and returns useful information.
14011
14012   """
14013   REQ_BGL = False
14014
14015   def ExpandNames(self):
14016     self._ExpandAndLockInstance()
14017
14018   def CheckPrereq(self):
14019     """Check prerequisites.
14020
14021     """
14022     instance_name = self.op.instance_name
14023
14024     self.instance = self.cfg.GetInstanceInfo(instance_name)
14025     assert self.instance is not None, \
14026           "Cannot retrieve locked instance %s" % self.op.instance_name
14027     _CheckNodeOnline(self, self.instance.primary_node)
14028
14029     self._cds = _GetClusterDomainSecret()
14030
14031   def Exec(self, feedback_fn):
14032     """Prepares an instance for an export.
14033
14034     """
14035     instance = self.instance
14036
14037     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14038       salt = utils.GenerateSecret(8)
14039
14040       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14041       result = self.rpc.call_x509_cert_create(instance.primary_node,
14042                                               constants.RIE_CERT_VALIDITY)
14043       result.Raise("Can't create X509 key and certificate on %s" % result.node)
14044
14045       (name, cert_pem) = result.payload
14046
14047       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14048                                              cert_pem)
14049
14050       return {
14051         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14052         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14053                           salt),
14054         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14055         }
14056
14057     return None
14058
14059
14060 class LUBackupExport(LogicalUnit):
14061   """Export an instance to an image in the cluster.
14062
14063   """
14064   HPATH = "instance-export"
14065   HTYPE = constants.HTYPE_INSTANCE
14066   REQ_BGL = False
14067
14068   def CheckArguments(self):
14069     """Check the arguments.
14070
14071     """
14072     self.x509_key_name = self.op.x509_key_name
14073     self.dest_x509_ca_pem = self.op.destination_x509_ca
14074
14075     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14076       if not self.x509_key_name:
14077         raise errors.OpPrereqError("Missing X509 key name for encryption",
14078                                    errors.ECODE_INVAL)
14079
14080       if not self.dest_x509_ca_pem:
14081         raise errors.OpPrereqError("Missing destination X509 CA",
14082                                    errors.ECODE_INVAL)
14083
14084   def ExpandNames(self):
14085     self._ExpandAndLockInstance()
14086
14087     # Lock all nodes for local exports
14088     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14089       # FIXME: lock only instance primary and destination node
14090       #
14091       # Sad but true, for now we have do lock all nodes, as we don't know where
14092       # the previous export might be, and in this LU we search for it and
14093       # remove it from its current node. In the future we could fix this by:
14094       #  - making a tasklet to search (share-lock all), then create the
14095       #    new one, then one to remove, after
14096       #  - removing the removal operation altogether
14097       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14098
14099       # Allocations should be stopped while this LU runs with node locks, but
14100       # it doesn't have to be exclusive
14101       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14102       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14103
14104   def DeclareLocks(self, level):
14105     """Last minute lock declaration."""
14106     # All nodes are locked anyway, so nothing to do here.
14107
14108   def BuildHooksEnv(self):
14109     """Build hooks env.
14110
14111     This will run on the master, primary node and target node.
14112
14113     """
14114     env = {
14115       "EXPORT_MODE": self.op.mode,
14116       "EXPORT_NODE": self.op.target_node,
14117       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14118       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14119       # TODO: Generic function for boolean env variables
14120       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14121       }
14122
14123     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14124
14125     return env
14126
14127   def BuildHooksNodes(self):
14128     """Build hooks nodes.
14129
14130     """
14131     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14132
14133     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14134       nl.append(self.op.target_node)
14135
14136     return (nl, nl)
14137
14138   def CheckPrereq(self):
14139     """Check prerequisites.
14140
14141     This checks that the instance and node names are valid.
14142
14143     """
14144     instance_name = self.op.instance_name
14145
14146     self.instance = self.cfg.GetInstanceInfo(instance_name)
14147     assert self.instance is not None, \
14148           "Cannot retrieve locked instance %s" % self.op.instance_name
14149     _CheckNodeOnline(self, self.instance.primary_node)
14150
14151     if (self.op.remove_instance and
14152         self.instance.admin_state == constants.ADMINST_UP and
14153         not self.op.shutdown):
14154       raise errors.OpPrereqError("Can not remove instance without shutting it"
14155                                  " down before", errors.ECODE_STATE)
14156
14157     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14158       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14159       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14160       assert self.dst_node is not None
14161
14162       _CheckNodeOnline(self, self.dst_node.name)
14163       _CheckNodeNotDrained(self, self.dst_node.name)
14164
14165       self._cds = None
14166       self.dest_disk_info = None
14167       self.dest_x509_ca = None
14168
14169     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14170       self.dst_node = None
14171
14172       if len(self.op.target_node) != len(self.instance.disks):
14173         raise errors.OpPrereqError(("Received destination information for %s"
14174                                     " disks, but instance %s has %s disks") %
14175                                    (len(self.op.target_node), instance_name,
14176                                     len(self.instance.disks)),
14177                                    errors.ECODE_INVAL)
14178
14179       cds = _GetClusterDomainSecret()
14180
14181       # Check X509 key name
14182       try:
14183         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14184       except (TypeError, ValueError), err:
14185         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14186                                    errors.ECODE_INVAL)
14187
14188       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14189         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14190                                    errors.ECODE_INVAL)
14191
14192       # Load and verify CA
14193       try:
14194         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14195       except OpenSSL.crypto.Error, err:
14196         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14197                                    (err, ), errors.ECODE_INVAL)
14198
14199       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14200       if errcode is not None:
14201         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14202                                    (msg, ), errors.ECODE_INVAL)
14203
14204       self.dest_x509_ca = cert
14205
14206       # Verify target information
14207       disk_info = []
14208       for idx, disk_data in enumerate(self.op.target_node):
14209         try:
14210           (host, port, magic) = \
14211             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14212         except errors.GenericError, err:
14213           raise errors.OpPrereqError("Target info for disk %s: %s" %
14214                                      (idx, err), errors.ECODE_INVAL)
14215
14216         disk_info.append((host, port, magic))
14217
14218       assert len(disk_info) == len(self.op.target_node)
14219       self.dest_disk_info = disk_info
14220
14221     else:
14222       raise errors.ProgrammerError("Unhandled export mode %r" %
14223                                    self.op.mode)
14224
14225     # instance disk type verification
14226     # TODO: Implement export support for file-based disks
14227     for disk in self.instance.disks:
14228       if disk.dev_type == constants.LD_FILE:
14229         raise errors.OpPrereqError("Export not supported for instances with"
14230                                    " file-based disks", errors.ECODE_INVAL)
14231
14232   def _CleanupExports(self, feedback_fn):
14233     """Removes exports of current instance from all other nodes.
14234
14235     If an instance in a cluster with nodes A..D was exported to node C, its
14236     exports will be removed from the nodes A, B and D.
14237
14238     """
14239     assert self.op.mode != constants.EXPORT_MODE_REMOTE
14240
14241     nodelist = self.cfg.GetNodeList()
14242     nodelist.remove(self.dst_node.name)
14243
14244     # on one-node clusters nodelist will be empty after the removal
14245     # if we proceed the backup would be removed because OpBackupQuery
14246     # substitutes an empty list with the full cluster node list.
14247     iname = self.instance.name
14248     if nodelist:
14249       feedback_fn("Removing old exports for instance %s" % iname)
14250       exportlist = self.rpc.call_export_list(nodelist)
14251       for node in exportlist:
14252         if exportlist[node].fail_msg:
14253           continue
14254         if iname in exportlist[node].payload:
14255           msg = self.rpc.call_export_remove(node, iname).fail_msg
14256           if msg:
14257             self.LogWarning("Could not remove older export for instance %s"
14258                             " on node %s: %s", iname, node, msg)
14259
14260   def Exec(self, feedback_fn):
14261     """Export an instance to an image in the cluster.
14262
14263     """
14264     assert self.op.mode in constants.EXPORT_MODES
14265
14266     instance = self.instance
14267     src_node = instance.primary_node
14268
14269     if self.op.shutdown:
14270       # shutdown the instance, but not the disks
14271       feedback_fn("Shutting down instance %s" % instance.name)
14272       result = self.rpc.call_instance_shutdown(src_node, instance,
14273                                                self.op.shutdown_timeout)
14274       # TODO: Maybe ignore failures if ignore_remove_failures is set
14275       result.Raise("Could not shutdown instance %s on"
14276                    " node %s" % (instance.name, src_node))
14277
14278     # set the disks ID correctly since call_instance_start needs the
14279     # correct drbd minor to create the symlinks
14280     for disk in instance.disks:
14281       self.cfg.SetDiskID(disk, src_node)
14282
14283     activate_disks = (instance.admin_state != constants.ADMINST_UP)
14284
14285     if activate_disks:
14286       # Activate the instance disks if we'exporting a stopped instance
14287       feedback_fn("Activating disks for %s" % instance.name)
14288       _StartInstanceDisks(self, instance, None)
14289
14290     try:
14291       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14292                                                      instance)
14293
14294       helper.CreateSnapshots()
14295       try:
14296         if (self.op.shutdown and
14297             instance.admin_state == constants.ADMINST_UP and
14298             not self.op.remove_instance):
14299           assert not activate_disks
14300           feedback_fn("Starting instance %s" % instance.name)
14301           result = self.rpc.call_instance_start(src_node,
14302                                                 (instance, None, None), False)
14303           msg = result.fail_msg
14304           if msg:
14305             feedback_fn("Failed to start instance: %s" % msg)
14306             _ShutdownInstanceDisks(self, instance)
14307             raise errors.OpExecError("Could not start instance: %s" % msg)
14308
14309         if self.op.mode == constants.EXPORT_MODE_LOCAL:
14310           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14311         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14312           connect_timeout = constants.RIE_CONNECT_TIMEOUT
14313           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14314
14315           (key_name, _, _) = self.x509_key_name
14316
14317           dest_ca_pem = \
14318             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14319                                             self.dest_x509_ca)
14320
14321           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14322                                                      key_name, dest_ca_pem,
14323                                                      timeouts)
14324       finally:
14325         helper.Cleanup()
14326
14327       # Check for backwards compatibility
14328       assert len(dresults) == len(instance.disks)
14329       assert compat.all(isinstance(i, bool) for i in dresults), \
14330              "Not all results are boolean: %r" % dresults
14331
14332     finally:
14333       if activate_disks:
14334         feedback_fn("Deactivating disks for %s" % instance.name)
14335         _ShutdownInstanceDisks(self, instance)
14336
14337     if not (compat.all(dresults) and fin_resu):
14338       failures = []
14339       if not fin_resu:
14340         failures.append("export finalization")
14341       if not compat.all(dresults):
14342         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14343                                if not dsk)
14344         failures.append("disk export: disk(s) %s" % fdsk)
14345
14346       raise errors.OpExecError("Export failed, errors in %s" %
14347                                utils.CommaJoin(failures))
14348
14349     # At this point, the export was successful, we can cleanup/finish
14350
14351     # Remove instance if requested
14352     if self.op.remove_instance:
14353       feedback_fn("Removing instance %s" % instance.name)
14354       _RemoveInstance(self, feedback_fn, instance,
14355                       self.op.ignore_remove_failures)
14356
14357     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14358       self._CleanupExports(feedback_fn)
14359
14360     return fin_resu, dresults
14361
14362
14363 class LUBackupRemove(NoHooksLU):
14364   """Remove exports related to the named instance.
14365
14366   """
14367   REQ_BGL = False
14368
14369   def ExpandNames(self):
14370     self.needed_locks = {
14371       # We need all nodes to be locked in order for RemoveExport to work, but
14372       # we don't need to lock the instance itself, as nothing will happen to it
14373       # (and we can remove exports also for a removed instance)
14374       locking.LEVEL_NODE: locking.ALL_SET,
14375
14376       # Removing backups is quick, so blocking allocations is justified
14377       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14378       }
14379
14380     # Allocations should be stopped while this LU runs with node locks, but it
14381     # doesn't have to be exclusive
14382     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14383
14384   def Exec(self, feedback_fn):
14385     """Remove any export.
14386
14387     """
14388     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14389     # If the instance was not found we'll try with the name that was passed in.
14390     # This will only work if it was an FQDN, though.
14391     fqdn_warn = False
14392     if not instance_name:
14393       fqdn_warn = True
14394       instance_name = self.op.instance_name
14395
14396     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14397     exportlist = self.rpc.call_export_list(locked_nodes)
14398     found = False
14399     for node in exportlist:
14400       msg = exportlist[node].fail_msg
14401       if msg:
14402         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14403         continue
14404       if instance_name in exportlist[node].payload:
14405         found = True
14406         result = self.rpc.call_export_remove(node, instance_name)
14407         msg = result.fail_msg
14408         if msg:
14409           logging.error("Could not remove export for instance %s"
14410                         " on node %s: %s", instance_name, node, msg)
14411
14412     if fqdn_warn and not found:
14413       feedback_fn("Export not found. If trying to remove an export belonging"
14414                   " to a deleted instance please use its Fully Qualified"
14415                   " Domain Name.")
14416
14417
14418 class LUGroupAdd(LogicalUnit):
14419   """Logical unit for creating node groups.
14420
14421   """
14422   HPATH = "group-add"
14423   HTYPE = constants.HTYPE_GROUP
14424   REQ_BGL = False
14425
14426   def ExpandNames(self):
14427     # We need the new group's UUID here so that we can create and acquire the
14428     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14429     # that it should not check whether the UUID exists in the configuration.
14430     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14431     self.needed_locks = {}
14432     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14433
14434   def CheckPrereq(self):
14435     """Check prerequisites.
14436
14437     This checks that the given group name is not an existing node group
14438     already.
14439
14440     """
14441     try:
14442       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14443     except errors.OpPrereqError:
14444       pass
14445     else:
14446       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14447                                  " node group (UUID: %s)" %
14448                                  (self.op.group_name, existing_uuid),
14449                                  errors.ECODE_EXISTS)
14450
14451     if self.op.ndparams:
14452       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14453
14454     if self.op.hv_state:
14455       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14456     else:
14457       self.new_hv_state = None
14458
14459     if self.op.disk_state:
14460       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14461     else:
14462       self.new_disk_state = None
14463
14464     if self.op.diskparams:
14465       for templ in constants.DISK_TEMPLATES:
14466         if templ in self.op.diskparams:
14467           utils.ForceDictType(self.op.diskparams[templ],
14468                               constants.DISK_DT_TYPES)
14469       self.new_diskparams = self.op.diskparams
14470       try:
14471         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14472       except errors.OpPrereqError, err:
14473         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14474                                    errors.ECODE_INVAL)
14475     else:
14476       self.new_diskparams = {}
14477
14478     if self.op.ipolicy:
14479       cluster = self.cfg.GetClusterInfo()
14480       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14481       try:
14482         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14483       except errors.ConfigurationError, err:
14484         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14485                                    errors.ECODE_INVAL)
14486
14487   def BuildHooksEnv(self):
14488     """Build hooks env.
14489
14490     """
14491     return {
14492       "GROUP_NAME": self.op.group_name,
14493       }
14494
14495   def BuildHooksNodes(self):
14496     """Build hooks nodes.
14497
14498     """
14499     mn = self.cfg.GetMasterNode()
14500     return ([mn], [mn])
14501
14502   def Exec(self, feedback_fn):
14503     """Add the node group to the cluster.
14504
14505     """
14506     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14507                                   uuid=self.group_uuid,
14508                                   alloc_policy=self.op.alloc_policy,
14509                                   ndparams=self.op.ndparams,
14510                                   diskparams=self.new_diskparams,
14511                                   ipolicy=self.op.ipolicy,
14512                                   hv_state_static=self.new_hv_state,
14513                                   disk_state_static=self.new_disk_state)
14514
14515     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14516     del self.remove_locks[locking.LEVEL_NODEGROUP]
14517
14518
14519 class LUGroupAssignNodes(NoHooksLU):
14520   """Logical unit for assigning nodes to groups.
14521
14522   """
14523   REQ_BGL = False
14524
14525   def ExpandNames(self):
14526     # These raise errors.OpPrereqError on their own:
14527     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14528     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14529
14530     # We want to lock all the affected nodes and groups. We have readily
14531     # available the list of nodes, and the *destination* group. To gather the
14532     # list of "source" groups, we need to fetch node information later on.
14533     self.needed_locks = {
14534       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14535       locking.LEVEL_NODE: self.op.nodes,
14536       }
14537
14538   def DeclareLocks(self, level):
14539     if level == locking.LEVEL_NODEGROUP:
14540       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14541
14542       # Try to get all affected nodes' groups without having the group or node
14543       # lock yet. Needs verification later in the code flow.
14544       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14545
14546       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14547
14548   def CheckPrereq(self):
14549     """Check prerequisites.
14550
14551     """
14552     assert self.needed_locks[locking.LEVEL_NODEGROUP]
14553     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14554             frozenset(self.op.nodes))
14555
14556     expected_locks = (set([self.group_uuid]) |
14557                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14558     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14559     if actual_locks != expected_locks:
14560       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14561                                " current groups are '%s', used to be '%s'" %
14562                                (utils.CommaJoin(expected_locks),
14563                                 utils.CommaJoin(actual_locks)))
14564
14565     self.node_data = self.cfg.GetAllNodesInfo()
14566     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14567     instance_data = self.cfg.GetAllInstancesInfo()
14568
14569     if self.group is None:
14570       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14571                                (self.op.group_name, self.group_uuid))
14572
14573     (new_splits, previous_splits) = \
14574       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14575                                              for node in self.op.nodes],
14576                                             self.node_data, instance_data)
14577
14578     if new_splits:
14579       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14580
14581       if not self.op.force:
14582         raise errors.OpExecError("The following instances get split by this"
14583                                  " change and --force was not given: %s" %
14584                                  fmt_new_splits)
14585       else:
14586         self.LogWarning("This operation will split the following instances: %s",
14587                         fmt_new_splits)
14588
14589         if previous_splits:
14590           self.LogWarning("In addition, these already-split instances continue"
14591                           " to be split across groups: %s",
14592                           utils.CommaJoin(utils.NiceSort(previous_splits)))
14593
14594   def Exec(self, feedback_fn):
14595     """Assign nodes to a new group.
14596
14597     """
14598     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14599
14600     self.cfg.AssignGroupNodes(mods)
14601
14602   @staticmethod
14603   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14604     """Check for split instances after a node assignment.
14605
14606     This method considers a series of node assignments as an atomic operation,
14607     and returns information about split instances after applying the set of
14608     changes.
14609
14610     In particular, it returns information about newly split instances, and
14611     instances that were already split, and remain so after the change.
14612
14613     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14614     considered.
14615
14616     @type changes: list of (node_name, new_group_uuid) pairs.
14617     @param changes: list of node assignments to consider.
14618     @param node_data: a dict with data for all nodes
14619     @param instance_data: a dict with all instances to consider
14620     @rtype: a two-tuple
14621     @return: a list of instances that were previously okay and result split as a
14622       consequence of this change, and a list of instances that were previously
14623       split and this change does not fix.
14624
14625     """
14626     changed_nodes = dict((node, group) for node, group in changes
14627                          if node_data[node].group != group)
14628
14629     all_split_instances = set()
14630     previously_split_instances = set()
14631
14632     def InstanceNodes(instance):
14633       return [instance.primary_node] + list(instance.secondary_nodes)
14634
14635     for inst in instance_data.values():
14636       if inst.disk_template not in constants.DTS_INT_MIRROR:
14637         continue
14638
14639       instance_nodes = InstanceNodes(inst)
14640
14641       if len(set(node_data[node].group for node in instance_nodes)) > 1:
14642         previously_split_instances.add(inst.name)
14643
14644       if len(set(changed_nodes.get(node, node_data[node].group)
14645                  for node in instance_nodes)) > 1:
14646         all_split_instances.add(inst.name)
14647
14648     return (list(all_split_instances - previously_split_instances),
14649             list(previously_split_instances & all_split_instances))
14650
14651
14652 class _GroupQuery(_QueryBase):
14653   FIELDS = query.GROUP_FIELDS
14654
14655   def ExpandNames(self, lu):
14656     lu.needed_locks = {}
14657
14658     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14659     self._cluster = lu.cfg.GetClusterInfo()
14660     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14661
14662     if not self.names:
14663       self.wanted = [name_to_uuid[name]
14664                      for name in utils.NiceSort(name_to_uuid.keys())]
14665     else:
14666       # Accept names to be either names or UUIDs.
14667       missing = []
14668       self.wanted = []
14669       all_uuid = frozenset(self._all_groups.keys())
14670
14671       for name in self.names:
14672         if name in all_uuid:
14673           self.wanted.append(name)
14674         elif name in name_to_uuid:
14675           self.wanted.append(name_to_uuid[name])
14676         else:
14677           missing.append(name)
14678
14679       if missing:
14680         raise errors.OpPrereqError("Some groups do not exist: %s" %
14681                                    utils.CommaJoin(missing),
14682                                    errors.ECODE_NOENT)
14683
14684   def DeclareLocks(self, lu, level):
14685     pass
14686
14687   def _GetQueryData(self, lu):
14688     """Computes the list of node groups and their attributes.
14689
14690     """
14691     do_nodes = query.GQ_NODE in self.requested_data
14692     do_instances = query.GQ_INST in self.requested_data
14693
14694     group_to_nodes = None
14695     group_to_instances = None
14696
14697     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14698     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14699     # latter GetAllInstancesInfo() is not enough, for we have to go through
14700     # instance->node. Hence, we will need to process nodes even if we only need
14701     # instance information.
14702     if do_nodes or do_instances:
14703       all_nodes = lu.cfg.GetAllNodesInfo()
14704       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14705       node_to_group = {}
14706
14707       for node in all_nodes.values():
14708         if node.group in group_to_nodes:
14709           group_to_nodes[node.group].append(node.name)
14710           node_to_group[node.name] = node.group
14711
14712       if do_instances:
14713         all_instances = lu.cfg.GetAllInstancesInfo()
14714         group_to_instances = dict((uuid, []) for uuid in self.wanted)
14715
14716         for instance in all_instances.values():
14717           node = instance.primary_node
14718           if node in node_to_group:
14719             group_to_instances[node_to_group[node]].append(instance.name)
14720
14721         if not do_nodes:
14722           # Do not pass on node information if it was not requested.
14723           group_to_nodes = None
14724
14725     return query.GroupQueryData(self._cluster,
14726                                 [self._all_groups[uuid]
14727                                  for uuid in self.wanted],
14728                                 group_to_nodes, group_to_instances,
14729                                 query.GQ_DISKPARAMS in self.requested_data)
14730
14731
14732 class LUGroupQuery(NoHooksLU):
14733   """Logical unit for querying node groups.
14734
14735   """
14736   REQ_BGL = False
14737
14738   def CheckArguments(self):
14739     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14740                           self.op.output_fields, False)
14741
14742   def ExpandNames(self):
14743     self.gq.ExpandNames(self)
14744
14745   def DeclareLocks(self, level):
14746     self.gq.DeclareLocks(self, level)
14747
14748   def Exec(self, feedback_fn):
14749     return self.gq.OldStyleQuery(self)
14750
14751
14752 class LUGroupSetParams(LogicalUnit):
14753   """Modifies the parameters of a node group.
14754
14755   """
14756   HPATH = "group-modify"
14757   HTYPE = constants.HTYPE_GROUP
14758   REQ_BGL = False
14759
14760   def CheckArguments(self):
14761     all_changes = [
14762       self.op.ndparams,
14763       self.op.diskparams,
14764       self.op.alloc_policy,
14765       self.op.hv_state,
14766       self.op.disk_state,
14767       self.op.ipolicy,
14768       ]
14769
14770     if all_changes.count(None) == len(all_changes):
14771       raise errors.OpPrereqError("Please pass at least one modification",
14772                                  errors.ECODE_INVAL)
14773
14774   def ExpandNames(self):
14775     # This raises errors.OpPrereqError on its own:
14776     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14777
14778     self.needed_locks = {
14779       locking.LEVEL_INSTANCE: [],
14780       locking.LEVEL_NODEGROUP: [self.group_uuid],
14781       }
14782
14783     self.share_locks[locking.LEVEL_INSTANCE] = 1
14784
14785   def DeclareLocks(self, level):
14786     if level == locking.LEVEL_INSTANCE:
14787       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14788
14789       # Lock instances optimistically, needs verification once group lock has
14790       # been acquired
14791       self.needed_locks[locking.LEVEL_INSTANCE] = \
14792           self.cfg.GetNodeGroupInstances(self.group_uuid)
14793
14794   @staticmethod
14795   def _UpdateAndVerifyDiskParams(old, new):
14796     """Updates and verifies disk parameters.
14797
14798     """
14799     new_params = _GetUpdatedParams(old, new)
14800     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14801     return new_params
14802
14803   def CheckPrereq(self):
14804     """Check prerequisites.
14805
14806     """
14807     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14808
14809     # Check if locked instances are still correct
14810     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14811
14812     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14813     cluster = self.cfg.GetClusterInfo()
14814
14815     if self.group is None:
14816       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14817                                (self.op.group_name, self.group_uuid))
14818
14819     if self.op.ndparams:
14820       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14821       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14822       self.new_ndparams = new_ndparams
14823
14824     if self.op.diskparams:
14825       diskparams = self.group.diskparams
14826       uavdp = self._UpdateAndVerifyDiskParams
14827       # For each disktemplate subdict update and verify the values
14828       new_diskparams = dict((dt,
14829                              uavdp(diskparams.get(dt, {}),
14830                                    self.op.diskparams[dt]))
14831                             for dt in constants.DISK_TEMPLATES
14832                             if dt in self.op.diskparams)
14833       # As we've all subdicts of diskparams ready, lets merge the actual
14834       # dict with all updated subdicts
14835       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14836       try:
14837         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14838       except errors.OpPrereqError, err:
14839         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14840                                    errors.ECODE_INVAL)
14841
14842     if self.op.hv_state:
14843       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14844                                                  self.group.hv_state_static)
14845
14846     if self.op.disk_state:
14847       self.new_disk_state = \
14848         _MergeAndVerifyDiskState(self.op.disk_state,
14849                                  self.group.disk_state_static)
14850
14851     if self.op.ipolicy:
14852       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14853                                             self.op.ipolicy,
14854                                             group_policy=True)
14855
14856       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14857       inst_filter = lambda inst: inst.name in owned_instances
14858       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14859       gmi = ganeti.masterd.instance
14860       violations = \
14861           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14862                                                                   self.group),
14863                                         new_ipolicy, instances)
14864
14865       if violations:
14866         self.LogWarning("After the ipolicy change the following instances"
14867                         " violate them: %s",
14868                         utils.CommaJoin(violations))
14869
14870   def BuildHooksEnv(self):
14871     """Build hooks env.
14872
14873     """
14874     return {
14875       "GROUP_NAME": self.op.group_name,
14876       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14877       }
14878
14879   def BuildHooksNodes(self):
14880     """Build hooks nodes.
14881
14882     """
14883     mn = self.cfg.GetMasterNode()
14884     return ([mn], [mn])
14885
14886   def Exec(self, feedback_fn):
14887     """Modifies the node group.
14888
14889     """
14890     result = []
14891
14892     if self.op.ndparams:
14893       self.group.ndparams = self.new_ndparams
14894       result.append(("ndparams", str(self.group.ndparams)))
14895
14896     if self.op.diskparams:
14897       self.group.diskparams = self.new_diskparams
14898       result.append(("diskparams", str(self.group.diskparams)))
14899
14900     if self.op.alloc_policy:
14901       self.group.alloc_policy = self.op.alloc_policy
14902
14903     if self.op.hv_state:
14904       self.group.hv_state_static = self.new_hv_state
14905
14906     if self.op.disk_state:
14907       self.group.disk_state_static = self.new_disk_state
14908
14909     if self.op.ipolicy:
14910       self.group.ipolicy = self.new_ipolicy
14911
14912     self.cfg.Update(self.group, feedback_fn)
14913     return result
14914
14915
14916 class LUGroupRemove(LogicalUnit):
14917   HPATH = "group-remove"
14918   HTYPE = constants.HTYPE_GROUP
14919   REQ_BGL = False
14920
14921   def ExpandNames(self):
14922     # This will raises errors.OpPrereqError on its own:
14923     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14924     self.needed_locks = {
14925       locking.LEVEL_NODEGROUP: [self.group_uuid],
14926       }
14927
14928   def CheckPrereq(self):
14929     """Check prerequisites.
14930
14931     This checks that the given group name exists as a node group, that is
14932     empty (i.e., contains no nodes), and that is not the last group of the
14933     cluster.
14934
14935     """
14936     # Verify that the group is empty.
14937     group_nodes = [node.name
14938                    for node in self.cfg.GetAllNodesInfo().values()
14939                    if node.group == self.group_uuid]
14940
14941     if group_nodes:
14942       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14943                                  " nodes: %s" %
14944                                  (self.op.group_name,
14945                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14946                                  errors.ECODE_STATE)
14947
14948     # Verify the cluster would not be left group-less.
14949     if len(self.cfg.GetNodeGroupList()) == 1:
14950       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14951                                  " removed" % self.op.group_name,
14952                                  errors.ECODE_STATE)
14953
14954   def BuildHooksEnv(self):
14955     """Build hooks env.
14956
14957     """
14958     return {
14959       "GROUP_NAME": self.op.group_name,
14960       }
14961
14962   def BuildHooksNodes(self):
14963     """Build hooks nodes.
14964
14965     """
14966     mn = self.cfg.GetMasterNode()
14967     return ([mn], [mn])
14968
14969   def Exec(self, feedback_fn):
14970     """Remove the node group.
14971
14972     """
14973     try:
14974       self.cfg.RemoveNodeGroup(self.group_uuid)
14975     except errors.ConfigurationError:
14976       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14977                                (self.op.group_name, self.group_uuid))
14978
14979     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14980
14981
14982 class LUGroupRename(LogicalUnit):
14983   HPATH = "group-rename"
14984   HTYPE = constants.HTYPE_GROUP
14985   REQ_BGL = False
14986
14987   def ExpandNames(self):
14988     # This raises errors.OpPrereqError on its own:
14989     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14990
14991     self.needed_locks = {
14992       locking.LEVEL_NODEGROUP: [self.group_uuid],
14993       }
14994
14995   def CheckPrereq(self):
14996     """Check prerequisites.
14997
14998     Ensures requested new name is not yet used.
14999
15000     """
15001     try:
15002       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15003     except errors.OpPrereqError:
15004       pass
15005     else:
15006       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15007                                  " node group (UUID: %s)" %
15008                                  (self.op.new_name, new_name_uuid),
15009                                  errors.ECODE_EXISTS)
15010
15011   def BuildHooksEnv(self):
15012     """Build hooks env.
15013
15014     """
15015     return {
15016       "OLD_NAME": self.op.group_name,
15017       "NEW_NAME": self.op.new_name,
15018       }
15019
15020   def BuildHooksNodes(self):
15021     """Build hooks nodes.
15022
15023     """
15024     mn = self.cfg.GetMasterNode()
15025
15026     all_nodes = self.cfg.GetAllNodesInfo()
15027     all_nodes.pop(mn, None)
15028
15029     run_nodes = [mn]
15030     run_nodes.extend(node.name for node in all_nodes.values()
15031                      if node.group == self.group_uuid)
15032
15033     return (run_nodes, run_nodes)
15034
15035   def Exec(self, feedback_fn):
15036     """Rename the node group.
15037
15038     """
15039     group = self.cfg.GetNodeGroup(self.group_uuid)
15040
15041     if group is None:
15042       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15043                                (self.op.group_name, self.group_uuid))
15044
15045     group.name = self.op.new_name
15046     self.cfg.Update(group, feedback_fn)
15047
15048     return self.op.new_name
15049
15050
15051 class LUGroupEvacuate(LogicalUnit):
15052   HPATH = "group-evacuate"
15053   HTYPE = constants.HTYPE_GROUP
15054   REQ_BGL = False
15055
15056   def ExpandNames(self):
15057     # This raises errors.OpPrereqError on its own:
15058     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15059
15060     if self.op.target_groups:
15061       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15062                                   self.op.target_groups)
15063     else:
15064       self.req_target_uuids = []
15065
15066     if self.group_uuid in self.req_target_uuids:
15067       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15068                                  " as a target group (targets are %s)" %
15069                                  (self.group_uuid,
15070                                   utils.CommaJoin(self.req_target_uuids)),
15071                                  errors.ECODE_INVAL)
15072
15073     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15074
15075     self.share_locks = _ShareAll()
15076     self.needed_locks = {
15077       locking.LEVEL_INSTANCE: [],
15078       locking.LEVEL_NODEGROUP: [],
15079       locking.LEVEL_NODE: [],
15080       }
15081
15082   def DeclareLocks(self, level):
15083     if level == locking.LEVEL_INSTANCE:
15084       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15085
15086       # Lock instances optimistically, needs verification once node and group
15087       # locks have been acquired
15088       self.needed_locks[locking.LEVEL_INSTANCE] = \
15089         self.cfg.GetNodeGroupInstances(self.group_uuid)
15090
15091     elif level == locking.LEVEL_NODEGROUP:
15092       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15093
15094       if self.req_target_uuids:
15095         lock_groups = set([self.group_uuid] + self.req_target_uuids)
15096
15097         # Lock all groups used by instances optimistically; this requires going
15098         # via the node before it's locked, requiring verification later on
15099         lock_groups.update(group_uuid
15100                            for instance_name in
15101                              self.owned_locks(locking.LEVEL_INSTANCE)
15102                            for group_uuid in
15103                              self.cfg.GetInstanceNodeGroups(instance_name))
15104       else:
15105         # No target groups, need to lock all of them
15106         lock_groups = locking.ALL_SET
15107
15108       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15109
15110     elif level == locking.LEVEL_NODE:
15111       # This will only lock the nodes in the group to be evacuated which
15112       # contain actual instances
15113       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15114       self._LockInstancesNodes()
15115
15116       # Lock all nodes in group to be evacuated and target groups
15117       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15118       assert self.group_uuid in owned_groups
15119       member_nodes = [node_name
15120                       for group in owned_groups
15121                       for node_name in self.cfg.GetNodeGroup(group).members]
15122       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15123
15124   def CheckPrereq(self):
15125     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15126     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15127     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15128
15129     assert owned_groups.issuperset(self.req_target_uuids)
15130     assert self.group_uuid in owned_groups
15131
15132     # Check if locked instances are still correct
15133     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15134
15135     # Get instance information
15136     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15137
15138     # Check if node groups for locked instances are still correct
15139     _CheckInstancesNodeGroups(self.cfg, self.instances,
15140                               owned_groups, owned_nodes, self.group_uuid)
15141
15142     if self.req_target_uuids:
15143       # User requested specific target groups
15144       self.target_uuids = self.req_target_uuids
15145     else:
15146       # All groups except the one to be evacuated are potential targets
15147       self.target_uuids = [group_uuid for group_uuid in owned_groups
15148                            if group_uuid != self.group_uuid]
15149
15150       if not self.target_uuids:
15151         raise errors.OpPrereqError("There are no possible target groups",
15152                                    errors.ECODE_INVAL)
15153
15154   def BuildHooksEnv(self):
15155     """Build hooks env.
15156
15157     """
15158     return {
15159       "GROUP_NAME": self.op.group_name,
15160       "TARGET_GROUPS": " ".join(self.target_uuids),
15161       }
15162
15163   def BuildHooksNodes(self):
15164     """Build hooks nodes.
15165
15166     """
15167     mn = self.cfg.GetMasterNode()
15168
15169     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15170
15171     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15172
15173     return (run_nodes, run_nodes)
15174
15175   def Exec(self, feedback_fn):
15176     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15177
15178     assert self.group_uuid not in self.target_uuids
15179
15180     req = iallocator.IAReqGroupChange(instances=instances,
15181                                       target_groups=self.target_uuids)
15182     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15183
15184     ial.Run(self.op.iallocator)
15185
15186     if not ial.success:
15187       raise errors.OpPrereqError("Can't compute group evacuation using"
15188                                  " iallocator '%s': %s" %
15189                                  (self.op.iallocator, ial.info),
15190                                  errors.ECODE_NORES)
15191
15192     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15193
15194     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15195                  len(jobs), self.op.group_name)
15196
15197     return ResultWithJobs(jobs)
15198
15199
15200 class TagsLU(NoHooksLU): # pylint: disable=W0223
15201   """Generic tags LU.
15202
15203   This is an abstract class which is the parent of all the other tags LUs.
15204
15205   """
15206   def ExpandNames(self):
15207     self.group_uuid = None
15208     self.needed_locks = {}
15209
15210     if self.op.kind == constants.TAG_NODE:
15211       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15212       lock_level = locking.LEVEL_NODE
15213       lock_name = self.op.name
15214     elif self.op.kind == constants.TAG_INSTANCE:
15215       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15216       lock_level = locking.LEVEL_INSTANCE
15217       lock_name = self.op.name
15218     elif self.op.kind == constants.TAG_NODEGROUP:
15219       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15220       lock_level = locking.LEVEL_NODEGROUP
15221       lock_name = self.group_uuid
15222     elif self.op.kind == constants.TAG_NETWORK:
15223       self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15224       lock_level = locking.LEVEL_NETWORK
15225       lock_name = self.network_uuid
15226     else:
15227       lock_level = None
15228       lock_name = None
15229
15230     if lock_level and getattr(self.op, "use_locking", True):
15231       self.needed_locks[lock_level] = lock_name
15232
15233     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15234     # not possible to acquire the BGL based on opcode parameters)
15235
15236   def CheckPrereq(self):
15237     """Check prerequisites.
15238
15239     """
15240     if self.op.kind == constants.TAG_CLUSTER:
15241       self.target = self.cfg.GetClusterInfo()
15242     elif self.op.kind == constants.TAG_NODE:
15243       self.target = self.cfg.GetNodeInfo(self.op.name)
15244     elif self.op.kind == constants.TAG_INSTANCE:
15245       self.target = self.cfg.GetInstanceInfo(self.op.name)
15246     elif self.op.kind == constants.TAG_NODEGROUP:
15247       self.target = self.cfg.GetNodeGroup(self.group_uuid)
15248     elif self.op.kind == constants.TAG_NETWORK:
15249       self.target = self.cfg.GetNetwork(self.network_uuid)
15250     else:
15251       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15252                                  str(self.op.kind), errors.ECODE_INVAL)
15253
15254
15255 class LUTagsGet(TagsLU):
15256   """Returns the tags of a given object.
15257
15258   """
15259   REQ_BGL = False
15260
15261   def ExpandNames(self):
15262     TagsLU.ExpandNames(self)
15263
15264     # Share locks as this is only a read operation
15265     self.share_locks = _ShareAll()
15266
15267   def Exec(self, feedback_fn):
15268     """Returns the tag list.
15269
15270     """
15271     return list(self.target.GetTags())
15272
15273
15274 class LUTagsSearch(NoHooksLU):
15275   """Searches the tags for a given pattern.
15276
15277   """
15278   REQ_BGL = False
15279
15280   def ExpandNames(self):
15281     self.needed_locks = {}
15282
15283   def CheckPrereq(self):
15284     """Check prerequisites.
15285
15286     This checks the pattern passed for validity by compiling it.
15287
15288     """
15289     try:
15290       self.re = re.compile(self.op.pattern)
15291     except re.error, err:
15292       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15293                                  (self.op.pattern, err), errors.ECODE_INVAL)
15294
15295   def Exec(self, feedback_fn):
15296     """Returns the tag list.
15297
15298     """
15299     cfg = self.cfg
15300     tgts = [("/cluster", cfg.GetClusterInfo())]
15301     ilist = cfg.GetAllInstancesInfo().values()
15302     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15303     nlist = cfg.GetAllNodesInfo().values()
15304     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15305     tgts.extend(("/nodegroup/%s" % n.name, n)
15306                 for n in cfg.GetAllNodeGroupsInfo().values())
15307     results = []
15308     for path, target in tgts:
15309       for tag in target.GetTags():
15310         if self.re.search(tag):
15311           results.append((path, tag))
15312     return results
15313
15314
15315 class LUTagsSet(TagsLU):
15316   """Sets a tag on a given object.
15317
15318   """
15319   REQ_BGL = False
15320
15321   def CheckPrereq(self):
15322     """Check prerequisites.
15323
15324     This checks the type and length of the tag name and value.
15325
15326     """
15327     TagsLU.CheckPrereq(self)
15328     for tag in self.op.tags:
15329       objects.TaggableObject.ValidateTag(tag)
15330
15331   def Exec(self, feedback_fn):
15332     """Sets the tag.
15333
15334     """
15335     try:
15336       for tag in self.op.tags:
15337         self.target.AddTag(tag)
15338     except errors.TagError, err:
15339       raise errors.OpExecError("Error while setting tag: %s" % str(err))
15340     self.cfg.Update(self.target, feedback_fn)
15341
15342
15343 class LUTagsDel(TagsLU):
15344   """Delete a list of tags from a given object.
15345
15346   """
15347   REQ_BGL = False
15348
15349   def CheckPrereq(self):
15350     """Check prerequisites.
15351
15352     This checks that we have the given tag.
15353
15354     """
15355     TagsLU.CheckPrereq(self)
15356     for tag in self.op.tags:
15357       objects.TaggableObject.ValidateTag(tag)
15358     del_tags = frozenset(self.op.tags)
15359     cur_tags = self.target.GetTags()
15360
15361     diff_tags = del_tags - cur_tags
15362     if diff_tags:
15363       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15364       raise errors.OpPrereqError("Tag(s) %s not found" %
15365                                  (utils.CommaJoin(diff_names), ),
15366                                  errors.ECODE_NOENT)
15367
15368   def Exec(self, feedback_fn):
15369     """Remove the tag from the object.
15370
15371     """
15372     for tag in self.op.tags:
15373       self.target.RemoveTag(tag)
15374     self.cfg.Update(self.target, feedback_fn)
15375
15376
15377 class LUTestDelay(NoHooksLU):
15378   """Sleep for a specified amount of time.
15379
15380   This LU sleeps on the master and/or nodes for a specified amount of
15381   time.
15382
15383   """
15384   REQ_BGL = False
15385
15386   def ExpandNames(self):
15387     """Expand names and set required locks.
15388
15389     This expands the node list, if any.
15390
15391     """
15392     self.needed_locks = {}
15393     if self.op.on_nodes:
15394       # _GetWantedNodes can be used here, but is not always appropriate to use
15395       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15396       # more information.
15397       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15398       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15399
15400   def _TestDelay(self):
15401     """Do the actual sleep.
15402
15403     """
15404     if self.op.on_master:
15405       if not utils.TestDelay(self.op.duration):
15406         raise errors.OpExecError("Error during master delay test")
15407     if self.op.on_nodes:
15408       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15409       for node, node_result in result.items():
15410         node_result.Raise("Failure during rpc call to node %s" % node)
15411
15412   def Exec(self, feedback_fn):
15413     """Execute the test delay opcode, with the wanted repetitions.
15414
15415     """
15416     if self.op.repeat == 0:
15417       self._TestDelay()
15418     else:
15419       top_value = self.op.repeat - 1
15420       for i in range(self.op.repeat):
15421         self.LogInfo("Test delay iteration %d/%d", i, top_value)
15422         self._TestDelay()
15423
15424
15425 class LURestrictedCommand(NoHooksLU):
15426   """Logical unit for executing restricted commands.
15427
15428   """
15429   REQ_BGL = False
15430
15431   def ExpandNames(self):
15432     if self.op.nodes:
15433       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15434
15435     self.needed_locks = {
15436       locking.LEVEL_NODE: self.op.nodes,
15437       }
15438     self.share_locks = {
15439       locking.LEVEL_NODE: not self.op.use_locking,
15440       }
15441
15442   def CheckPrereq(self):
15443     """Check prerequisites.
15444
15445     """
15446
15447   def Exec(self, feedback_fn):
15448     """Execute restricted command and return output.
15449
15450     """
15451     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15452
15453     # Check if correct locks are held
15454     assert set(self.op.nodes).issubset(owned_nodes)
15455
15456     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15457
15458     result = []
15459
15460     for node_name in self.op.nodes:
15461       nres = rpcres[node_name]
15462       if nres.fail_msg:
15463         msg = ("Command '%s' on node '%s' failed: %s" %
15464                (self.op.command, node_name, nres.fail_msg))
15465         result.append((False, msg))
15466       else:
15467         result.append((True, nres.payload))
15468
15469     return result
15470
15471
15472 class LUTestJqueue(NoHooksLU):
15473   """Utility LU to test some aspects of the job queue.
15474
15475   """
15476   REQ_BGL = False
15477
15478   # Must be lower than default timeout for WaitForJobChange to see whether it
15479   # notices changed jobs
15480   _CLIENT_CONNECT_TIMEOUT = 20.0
15481   _CLIENT_CONFIRM_TIMEOUT = 60.0
15482
15483   @classmethod
15484   def _NotifyUsingSocket(cls, cb, errcls):
15485     """Opens a Unix socket and waits for another program to connect.
15486
15487     @type cb: callable
15488     @param cb: Callback to send socket name to client
15489     @type errcls: class
15490     @param errcls: Exception class to use for errors
15491
15492     """
15493     # Using a temporary directory as there's no easy way to create temporary
15494     # sockets without writing a custom loop around tempfile.mktemp and
15495     # socket.bind
15496     tmpdir = tempfile.mkdtemp()
15497     try:
15498       tmpsock = utils.PathJoin(tmpdir, "sock")
15499
15500       logging.debug("Creating temporary socket at %s", tmpsock)
15501       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15502       try:
15503         sock.bind(tmpsock)
15504         sock.listen(1)
15505
15506         # Send details to client
15507         cb(tmpsock)
15508
15509         # Wait for client to connect before continuing
15510         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15511         try:
15512           (conn, _) = sock.accept()
15513         except socket.error, err:
15514           raise errcls("Client didn't connect in time (%s)" % err)
15515       finally:
15516         sock.close()
15517     finally:
15518       # Remove as soon as client is connected
15519       shutil.rmtree(tmpdir)
15520
15521     # Wait for client to close
15522     try:
15523       try:
15524         # pylint: disable=E1101
15525         # Instance of '_socketobject' has no ... member
15526         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15527         conn.recv(1)
15528       except socket.error, err:
15529         raise errcls("Client failed to confirm notification (%s)" % err)
15530     finally:
15531       conn.close()
15532
15533   def _SendNotification(self, test, arg, sockname):
15534     """Sends a notification to the client.
15535
15536     @type test: string
15537     @param test: Test name
15538     @param arg: Test argument (depends on test)
15539     @type sockname: string
15540     @param sockname: Socket path
15541
15542     """
15543     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15544
15545   def _Notify(self, prereq, test, arg):
15546     """Notifies the client of a test.
15547
15548     @type prereq: bool
15549     @param prereq: Whether this is a prereq-phase test
15550     @type test: string
15551     @param test: Test name
15552     @param arg: Test argument (depends on test)
15553
15554     """
15555     if prereq:
15556       errcls = errors.OpPrereqError
15557     else:
15558       errcls = errors.OpExecError
15559
15560     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15561                                                   test, arg),
15562                                    errcls)
15563
15564   def CheckArguments(self):
15565     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15566     self.expandnames_calls = 0
15567
15568   def ExpandNames(self):
15569     checkargs_calls = getattr(self, "checkargs_calls", 0)
15570     if checkargs_calls < 1:
15571       raise errors.ProgrammerError("CheckArguments was not called")
15572
15573     self.expandnames_calls += 1
15574
15575     if self.op.notify_waitlock:
15576       self._Notify(True, constants.JQT_EXPANDNAMES, None)
15577
15578     self.LogInfo("Expanding names")
15579
15580     # Get lock on master node (just to get a lock, not for a particular reason)
15581     self.needed_locks = {
15582       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15583       }
15584
15585   def Exec(self, feedback_fn):
15586     if self.expandnames_calls < 1:
15587       raise errors.ProgrammerError("ExpandNames was not called")
15588
15589     if self.op.notify_exec:
15590       self._Notify(False, constants.JQT_EXEC, None)
15591
15592     self.LogInfo("Executing")
15593
15594     if self.op.log_messages:
15595       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15596       for idx, msg in enumerate(self.op.log_messages):
15597         self.LogInfo("Sending log message %s", idx + 1)
15598         feedback_fn(constants.JQT_MSGPREFIX + msg)
15599         # Report how many test messages have been sent
15600         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15601
15602     if self.op.fail:
15603       raise errors.OpExecError("Opcode failure was requested")
15604
15605     return True
15606
15607
15608 class LUTestAllocator(NoHooksLU):
15609   """Run allocator tests.
15610
15611   This LU runs the allocator tests
15612
15613   """
15614   def CheckPrereq(self):
15615     """Check prerequisites.
15616
15617     This checks the opcode parameters depending on the director and mode test.
15618
15619     """
15620     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15621                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
15622       for attr in ["memory", "disks", "disk_template",
15623                    "os", "tags", "nics", "vcpus"]:
15624         if not hasattr(self.op, attr):
15625           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15626                                      attr, errors.ECODE_INVAL)
15627       iname = self.cfg.ExpandInstanceName(self.op.name)
15628       if iname is not None:
15629         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15630                                    iname, errors.ECODE_EXISTS)
15631       if not isinstance(self.op.nics, list):
15632         raise errors.OpPrereqError("Invalid parameter 'nics'",
15633                                    errors.ECODE_INVAL)
15634       if not isinstance(self.op.disks, list):
15635         raise errors.OpPrereqError("Invalid parameter 'disks'",
15636                                    errors.ECODE_INVAL)
15637       for row in self.op.disks:
15638         if (not isinstance(row, dict) or
15639             constants.IDISK_SIZE not in row or
15640             not isinstance(row[constants.IDISK_SIZE], int) or
15641             constants.IDISK_MODE not in row or
15642             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15643           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15644                                      " parameter", errors.ECODE_INVAL)
15645       if self.op.hypervisor is None:
15646         self.op.hypervisor = self.cfg.GetHypervisorType()
15647     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15648       fname = _ExpandInstanceName(self.cfg, self.op.name)
15649       self.op.name = fname
15650       self.relocate_from = \
15651           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15652     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15653                           constants.IALLOCATOR_MODE_NODE_EVAC):
15654       if not self.op.instances:
15655         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15656       self.op.instances = _GetWantedInstances(self, self.op.instances)
15657     else:
15658       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15659                                  self.op.mode, errors.ECODE_INVAL)
15660
15661     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15662       if self.op.iallocator is None:
15663         raise errors.OpPrereqError("Missing allocator name",
15664                                    errors.ECODE_INVAL)
15665     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15666       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15667                                  self.op.direction, errors.ECODE_INVAL)
15668
15669   def Exec(self, feedback_fn):
15670     """Run the allocator test.
15671
15672     """
15673     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15674       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15675                                           memory=self.op.memory,
15676                                           disks=self.op.disks,
15677                                           disk_template=self.op.disk_template,
15678                                           os=self.op.os,
15679                                           tags=self.op.tags,
15680                                           nics=self.op.nics,
15681                                           vcpus=self.op.vcpus,
15682                                           spindle_use=self.op.spindle_use,
15683                                           hypervisor=self.op.hypervisor)
15684     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15685       req = iallocator.IAReqRelocate(name=self.op.name,
15686                                      relocate_from=list(self.relocate_from))
15687     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15688       req = iallocator.IAReqGroupChange(instances=self.op.instances,
15689                                         target_groups=self.op.target_groups)
15690     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15691       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15692                                      evac_mode=self.op.evac_mode)
15693     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15694       disk_template = self.op.disk_template
15695       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15696                                              memory=self.op.memory,
15697                                              disks=self.op.disks,
15698                                              disk_template=disk_template,
15699                                              os=self.op.os,
15700                                              tags=self.op.tags,
15701                                              nics=self.op.nics,
15702                                              vcpus=self.op.vcpus,
15703                                              spindle_use=self.op.spindle_use,
15704                                              hypervisor=self.op.hypervisor)
15705                for idx in range(self.op.count)]
15706       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15707     else:
15708       raise errors.ProgrammerError("Uncatched mode %s in"
15709                                    " LUTestAllocator.Exec", self.op.mode)
15710
15711     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15712     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15713       result = ial.in_text
15714     else:
15715       ial.Run(self.op.iallocator, validate=False)
15716       result = ial.out_text
15717     return result
15718
15719
15720 class LUNetworkAdd(LogicalUnit):
15721   """Logical unit for creating networks.
15722
15723   """
15724   HPATH = "network-add"
15725   HTYPE = constants.HTYPE_NETWORK
15726   REQ_BGL = False
15727
15728   def BuildHooksNodes(self):
15729     """Build hooks nodes.
15730
15731     """
15732     mn = self.cfg.GetMasterNode()
15733     return ([mn], [mn])
15734
15735   def CheckArguments(self):
15736     if self.op.mac_prefix:
15737       self.op.mac_prefix = \
15738         utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
15739
15740   def ExpandNames(self):
15741     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15742
15743     if self.op.conflicts_check:
15744       self.share_locks[locking.LEVEL_NODE] = 1
15745       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
15746       self.needed_locks = {
15747         locking.LEVEL_NODE: locking.ALL_SET,
15748         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
15749         }
15750     else:
15751       self.needed_locks = {}
15752
15753     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15754
15755   def CheckPrereq(self):
15756     if self.op.network is None:
15757       raise errors.OpPrereqError("Network must be given",
15758                                  errors.ECODE_INVAL)
15759
15760     uuid = self.cfg.LookupNetwork(self.op.network_name)
15761
15762     if uuid:
15763       raise errors.OpPrereqError("Network '%s' already defined" %
15764                                  self.op.network, errors.ECODE_EXISTS)
15765
15766     # Check tag validity
15767     for tag in self.op.tags:
15768       objects.TaggableObject.ValidateTag(tag)
15769
15770   def BuildHooksEnv(self):
15771     """Build hooks env.
15772
15773     """
15774     args = {
15775       "name": self.op.network_name,
15776       "subnet": self.op.network,
15777       "gateway": self.op.gateway,
15778       "network6": self.op.network6,
15779       "gateway6": self.op.gateway6,
15780       "mac_prefix": self.op.mac_prefix,
15781       "network_type": self.op.network_type,
15782       "tags": self.op.tags,
15783       }
15784     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15785
15786   def Exec(self, feedback_fn):
15787     """Add the ip pool to the cluster.
15788
15789     """
15790     nobj = objects.Network(name=self.op.network_name,
15791                            network=self.op.network,
15792                            gateway=self.op.gateway,
15793                            network6=self.op.network6,
15794                            gateway6=self.op.gateway6,
15795                            mac_prefix=self.op.mac_prefix,
15796                            network_type=self.op.network_type,
15797                            uuid=self.network_uuid,
15798                            family=constants.IP4_VERSION)
15799     # Initialize the associated address pool
15800     try:
15801       pool = network.AddressPool.InitializeNetwork(nobj)
15802     except errors.AddressPoolError, e:
15803       raise errors.OpExecError("Cannot create IP pool for this network: %s" % e)
15804
15805     # Check if we need to reserve the nodes and the cluster master IP
15806     # These may not be allocated to any instances in routed mode, as
15807     # they wouldn't function anyway.
15808     if self.op.conflicts_check:
15809       for node in self.cfg.GetAllNodesInfo().values():
15810         for ip in [node.primary_ip, node.secondary_ip]:
15811           try:
15812             if pool.Contains(ip):
15813               pool.Reserve(ip)
15814               self.LogInfo("Reserved IP address of node '%s' (%s)",
15815                            node.name, ip)
15816           except errors.AddressPoolError:
15817             self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
15818                             node.name, ip)
15819
15820       master_ip = self.cfg.GetClusterInfo().master_ip
15821       try:
15822         if pool.Contains(master_ip):
15823           pool.Reserve(master_ip)
15824           self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
15825       except errors.AddressPoolError:
15826         self.LogWarning("Cannot reserve cluster master IP address (%s)",
15827                         master_ip)
15828
15829     if self.op.add_reserved_ips:
15830       for ip in self.op.add_reserved_ips:
15831         try:
15832           pool.Reserve(ip, external=True)
15833         except errors.AddressPoolError, e:
15834           raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15835
15836     if self.op.tags:
15837       for tag in self.op.tags:
15838         nobj.AddTag(tag)
15839
15840     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15841     del self.remove_locks[locking.LEVEL_NETWORK]
15842
15843
15844 class LUNetworkRemove(LogicalUnit):
15845   HPATH = "network-remove"
15846   HTYPE = constants.HTYPE_NETWORK
15847   REQ_BGL = False
15848
15849   def ExpandNames(self):
15850     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15851
15852     if not self.network_uuid:
15853       raise errors.OpPrereqError(("Network '%s' not found" %
15854                                   self.op.network_name), errors.ECODE_NOENT)
15855
15856     self.share_locks[locking.LEVEL_NODEGROUP] = 1
15857     self.needed_locks = {
15858       locking.LEVEL_NETWORK: [self.network_uuid],
15859       locking.LEVEL_NODEGROUP: locking.ALL_SET,
15860       }
15861
15862   def CheckPrereq(self):
15863     """Check prerequisites.
15864
15865     This checks that the given network name exists as a network, that is
15866     empty (i.e., contains no nodes), and that is not the last group of the
15867     cluster.
15868
15869     """
15870     # Verify that the network is not conncted.
15871     node_groups = [group.name
15872                    for group in self.cfg.GetAllNodeGroupsInfo().values()
15873                    if self.network_uuid in group.networks]
15874
15875     if node_groups:
15876       self.LogWarning("Network '%s' is connected to the following"
15877                       " node groups: %s" %
15878                       (self.op.network_name,
15879                        utils.CommaJoin(utils.NiceSort(node_groups))))
15880       raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
15881
15882   def BuildHooksEnv(self):
15883     """Build hooks env.
15884
15885     """
15886     return {
15887       "NETWORK_NAME": self.op.network_name,
15888       }
15889
15890   def BuildHooksNodes(self):
15891     """Build hooks nodes.
15892
15893     """
15894     mn = self.cfg.GetMasterNode()
15895     return ([mn], [mn])
15896
15897   def Exec(self, feedback_fn):
15898     """Remove the network.
15899
15900     """
15901     try:
15902       self.cfg.RemoveNetwork(self.network_uuid)
15903     except errors.ConfigurationError:
15904       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15905                                (self.op.network_name, self.network_uuid))
15906
15907
15908 class LUNetworkSetParams(LogicalUnit):
15909   """Modifies the parameters of a network.
15910
15911   """
15912   HPATH = "network-modify"
15913   HTYPE = constants.HTYPE_NETWORK
15914   REQ_BGL = False
15915
15916   def CheckArguments(self):
15917     if (self.op.gateway and
15918         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15919       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15920                                  " at once", errors.ECODE_INVAL)
15921
15922   def ExpandNames(self):
15923     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15924     if self.network_uuid is None:
15925       raise errors.OpPrereqError(("Network '%s' not found" %
15926                                   self.op.network_name), errors.ECODE_NOENT)
15927
15928     self.needed_locks = {
15929       locking.LEVEL_NETWORK: [self.network_uuid],
15930       }
15931
15932   def CheckPrereq(self):
15933     """Check prerequisites.
15934
15935     """
15936     self.network = self.cfg.GetNetwork(self.network_uuid)
15937     self.gateway = self.network.gateway
15938     self.network_type = self.network.network_type
15939     self.mac_prefix = self.network.mac_prefix
15940     self.network6 = self.network.network6
15941     self.gateway6 = self.network.gateway6
15942     self.tags = self.network.tags
15943
15944     self.pool = network.AddressPool(self.network)
15945
15946     if self.op.gateway:
15947       if self.op.gateway == constants.VALUE_NONE:
15948         self.gateway = None
15949       else:
15950         self.gateway = self.op.gateway
15951         if self.pool.IsReserved(self.gateway):
15952           raise errors.OpPrereqError("%s is already reserved" %
15953                                      self.gateway, errors.ECODE_STATE)
15954
15955     if self.op.network_type:
15956       if self.op.network_type == constants.VALUE_NONE:
15957         self.network_type = None
15958       else:
15959         self.network_type = self.op.network_type
15960
15961     if self.op.mac_prefix:
15962       if self.op.mac_prefix == constants.VALUE_NONE:
15963         self.mac_prefix = None
15964       else:
15965         self.mac_prefix = \
15966           utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
15967
15968     if self.op.gateway6:
15969       if self.op.gateway6 == constants.VALUE_NONE:
15970         self.gateway6 = None
15971       else:
15972         self.gateway6 = self.op.gateway6
15973
15974     if self.op.network6:
15975       if self.op.network6 == constants.VALUE_NONE:
15976         self.network6 = None
15977       else:
15978         self.network6 = self.op.network6
15979
15980   def BuildHooksEnv(self):
15981     """Build hooks env.
15982
15983     """
15984     args = {
15985       "name": self.op.network_name,
15986       "subnet": self.network.network,
15987       "gateway": self.gateway,
15988       "network6": self.network6,
15989       "gateway6": self.gateway6,
15990       "mac_prefix": self.mac_prefix,
15991       "network_type": self.network_type,
15992       "tags": self.tags,
15993       }
15994     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15995
15996   def BuildHooksNodes(self):
15997     """Build hooks nodes.
15998
15999     """
16000     mn = self.cfg.GetMasterNode()
16001     return ([mn], [mn])
16002
16003   def Exec(self, feedback_fn):
16004     """Modifies the network.
16005
16006     """
16007     #TODO: reserve/release via temporary reservation manager
16008     #      extend cfg.ReserveIp/ReleaseIp with the external flag
16009     if self.op.gateway:
16010       if self.gateway == self.network.gateway:
16011         self.LogWarning("Gateway is already %s", self.gateway)
16012       else:
16013         if self.gateway:
16014           self.pool.Reserve(self.gateway, external=True)
16015         if self.network.gateway:
16016           self.pool.Release(self.network.gateway, external=True)
16017         self.network.gateway = self.gateway
16018
16019     if self.op.add_reserved_ips:
16020       for ip in self.op.add_reserved_ips:
16021         try:
16022           if self.pool.IsReserved(ip):
16023             self.LogWarning("IP address %s is already reserved", ip)
16024           else:
16025             self.pool.Reserve(ip, external=True)
16026         except errors.AddressPoolError, err:
16027           self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16028
16029     if self.op.remove_reserved_ips:
16030       for ip in self.op.remove_reserved_ips:
16031         if ip == self.network.gateway:
16032           self.LogWarning("Cannot unreserve Gateway's IP")
16033           continue
16034         try:
16035           if not self.pool.IsReserved(ip):
16036             self.LogWarning("IP address %s is already unreserved", ip)
16037           else:
16038             self.pool.Release(ip, external=True)
16039         except errors.AddressPoolError, err:
16040           self.LogWarning("Cannot release IP address %s: %s", ip, err)
16041
16042     if self.op.mac_prefix:
16043       self.network.mac_prefix = self.mac_prefix
16044
16045     if self.op.network6:
16046       self.network.network6 = self.network6
16047
16048     if self.op.gateway6:
16049       self.network.gateway6 = self.gateway6
16050
16051     if self.op.network_type:
16052       self.network.network_type = self.network_type
16053
16054     self.pool.Validate()
16055
16056     self.cfg.Update(self.network, feedback_fn)
16057
16058
16059 class _NetworkQuery(_QueryBase):
16060   FIELDS = query.NETWORK_FIELDS
16061
16062   def ExpandNames(self, lu):
16063     lu.needed_locks = {}
16064
16065     self._all_networks = lu.cfg.GetAllNetworksInfo()
16066     name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
16067
16068     if not self.names:
16069       self.wanted = [name_to_uuid[name]
16070                      for name in utils.NiceSort(name_to_uuid.keys())]
16071     else:
16072       # Accept names to be either names or UUIDs.
16073       missing = []
16074       self.wanted = []
16075       all_uuid = frozenset(self._all_networks.keys())
16076
16077       for name in self.names:
16078         if name in all_uuid:
16079           self.wanted.append(name)
16080         elif name in name_to_uuid:
16081           self.wanted.append(name_to_uuid[name])
16082         else:
16083           missing.append(name)
16084
16085       if missing:
16086         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16087                                    errors.ECODE_NOENT)
16088
16089   def DeclareLocks(self, lu, level):
16090     pass
16091
16092   def _GetQueryData(self, lu):
16093     """Computes the list of networks and their attributes.
16094
16095     """
16096     do_instances = query.NETQ_INST in self.requested_data
16097     do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
16098
16099     network_to_groups = None
16100     network_to_instances = None
16101
16102     # For NETQ_GROUP, we need to map network->[groups]
16103     if do_groups:
16104       all_groups = lu.cfg.GetAllNodeGroupsInfo()
16105       network_to_groups = dict((uuid, []) for uuid in self.wanted)
16106
16107       if do_instances:
16108         all_instances = lu.cfg.GetAllInstancesInfo()
16109         all_nodes = lu.cfg.GetAllNodesInfo()
16110         network_to_instances = dict((uuid, []) for uuid in self.wanted)
16111
16112       for group in all_groups.values():
16113         if do_instances:
16114           group_nodes = [node.name for node in all_nodes.values() if
16115                          node.group == group.uuid]
16116           group_instances = [instance for instance in all_instances.values()
16117                              if instance.primary_node in group_nodes]
16118
16119         for net_uuid in group.networks.keys():
16120           if net_uuid in network_to_groups:
16121             netparams = group.networks[net_uuid]
16122             mode = netparams[constants.NIC_MODE]
16123             link = netparams[constants.NIC_LINK]
16124             info = group.name + "(" + mode + ", " + link + ")"
16125             network_to_groups[net_uuid].append(info)
16126
16127             if do_instances:
16128               for instance in group_instances:
16129                 for nic in instance.nics:
16130                   if nic.network == self._all_networks[net_uuid].name:
16131                     network_to_instances[net_uuid].append(instance.name)
16132                     break
16133
16134     if query.NETQ_STATS in self.requested_data:
16135       stats = \
16136         dict((uuid,
16137               self._GetStats(network.AddressPool(self._all_networks[uuid])))
16138              for uuid in self.wanted)
16139     else:
16140       stats = None
16141
16142     return query.NetworkQueryData([self._all_networks[uuid]
16143                                    for uuid in self.wanted],
16144                                    network_to_groups,
16145                                    network_to_instances,
16146                                    stats)
16147
16148   @staticmethod
16149   def _GetStats(pool):
16150     """Returns statistics for a network address pool.
16151
16152     """
16153     return {
16154       "free_count": pool.GetFreeCount(),
16155       "reserved_count": pool.GetReservedCount(),
16156       "map": pool.GetMap(),
16157       "external_reservations":
16158         utils.CommaJoin(pool.GetExternalReservations()),
16159       }
16160
16161
16162 class LUNetworkQuery(NoHooksLU):
16163   """Logical unit for querying networks.
16164
16165   """
16166   REQ_BGL = False
16167
16168   def CheckArguments(self):
16169     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16170                             self.op.output_fields, False)
16171
16172   def ExpandNames(self):
16173     self.nq.ExpandNames(self)
16174
16175   def Exec(self, feedback_fn):
16176     return self.nq.OldStyleQuery(self)
16177
16178
16179 class LUNetworkConnect(LogicalUnit):
16180   """Connect a network to a nodegroup
16181
16182   """
16183   HPATH = "network-connect"
16184   HTYPE = constants.HTYPE_NETWORK
16185   REQ_BGL = False
16186
16187   def ExpandNames(self):
16188     self.network_name = self.op.network_name
16189     self.group_name = self.op.group_name
16190     self.network_mode = self.op.network_mode
16191     self.network_link = self.op.network_link
16192
16193     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16194     if self.network_uuid is None:
16195       raise errors.OpPrereqError("Network %s does not exist" %
16196                                  self.network_name, errors.ECODE_NOENT)
16197
16198     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16199     if self.group_uuid is None:
16200       raise errors.OpPrereqError("Group %s does not exist" %
16201                                  self.group_name, errors.ECODE_NOENT)
16202
16203     self.needed_locks = {
16204       locking.LEVEL_INSTANCE: [],
16205       locking.LEVEL_NODEGROUP: [self.group_uuid],
16206       }
16207     self.share_locks[locking.LEVEL_INSTANCE] = 1
16208
16209     if self.op.conflicts_check:
16210       self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16211       self.share_locks[locking.LEVEL_NETWORK] = 1
16212
16213   def DeclareLocks(self, level):
16214     if level == locking.LEVEL_INSTANCE:
16215       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16216
16217       # Lock instances optimistically, needs verification once group lock has
16218       # been acquired
16219       if self.op.conflicts_check:
16220         self.needed_locks[locking.LEVEL_INSTANCE] = \
16221             self.cfg.GetNodeGroupInstances(self.group_uuid)
16222
16223   def BuildHooksEnv(self):
16224     ret = {
16225       "GROUP_NAME": self.group_name,
16226       "GROUP_NETWORK_MODE": self.network_mode,
16227       "GROUP_NETWORK_LINK": self.network_link,
16228       }
16229     return ret
16230
16231   def BuildHooksNodes(self):
16232     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16233     return (nodes, nodes)
16234
16235   def CheckPrereq(self):
16236     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16237
16238     assert self.group_uuid in owned_groups
16239
16240     self.netparams = {
16241       constants.NIC_MODE: self.network_mode,
16242       constants.NIC_LINK: self.network_link,
16243       }
16244     objects.NIC.CheckParameterSyntax(self.netparams)
16245
16246     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16247     #if self.network_mode == constants.NIC_MODE_BRIDGED:
16248     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16249     self.connected = False
16250     if self.network_uuid in self.group.networks:
16251       self.LogWarning("Network '%s' is already mapped to group '%s'" %
16252                       (self.network_name, self.group.name))
16253       self.connected = True
16254       return
16255
16256     if self.op.conflicts_check:
16257       pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16258
16259       _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16260                             "connect to")
16261
16262   def Exec(self, feedback_fn):
16263     if self.connected:
16264       return
16265
16266     self.group.networks[self.network_uuid] = self.netparams
16267     self.cfg.Update(self.group, feedback_fn)
16268
16269
16270 def _NetworkConflictCheck(lu, check_fn, action):
16271   """Checks for network interface conflicts with a network.
16272
16273   @type lu: L{LogicalUnit}
16274   @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16275     returning boolean
16276   @param check_fn: Function checking for conflict
16277   @type action: string
16278   @param action: Part of error message (see code)
16279   @raise errors.OpPrereqError: If conflicting IP addresses are found.
16280
16281   """
16282   # Check if locked instances are still correct
16283   owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
16284   _CheckNodeGroupInstances(lu.cfg, lu.group_uuid, owned_instances)
16285
16286   conflicts = []
16287
16288   for (_, instance) in lu.cfg.GetMultiInstanceInfo(owned_instances):
16289     instconflicts = [(idx, nic.ip)
16290                      for (idx, nic) in enumerate(instance.nics)
16291                      if check_fn(nic)]
16292
16293     if instconflicts:
16294       conflicts.append((instance.name, instconflicts))
16295
16296   if conflicts:
16297     lu.LogWarning("IP addresses from network '%s', which is about to %s"
16298                   " node group '%s', are in use: %s" %
16299                   (lu.network_name, action, lu.group.name,
16300                    utils.CommaJoin(("%s: %s" %
16301                                     (name, _FmtNetworkConflict(details)))
16302                                    for (name, details) in conflicts)))
16303
16304     raise errors.OpPrereqError("Conflicting IP addresses found; "
16305                                " remove/modify the corresponding network"
16306                                " interfaces", errors.ECODE_STATE)
16307
16308
16309 def _FmtNetworkConflict(details):
16310   """Utility for L{_NetworkConflictCheck}.
16311
16312   """
16313   return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16314                          for (idx, ipaddr) in details)
16315
16316
16317 class LUNetworkDisconnect(LogicalUnit):
16318   """Disconnect a network to a nodegroup
16319
16320   """
16321   HPATH = "network-disconnect"
16322   HTYPE = constants.HTYPE_NETWORK
16323   REQ_BGL = False
16324
16325   def ExpandNames(self):
16326     self.network_name = self.op.network_name
16327     self.group_name = self.op.group_name
16328
16329     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16330     if self.network_uuid is None:
16331       raise errors.OpPrereqError("Network %s does not exist" %
16332                                  self.network_name, errors.ECODE_NOENT)
16333
16334     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16335     if self.group_uuid is None:
16336       raise errors.OpPrereqError("Group %s does not exist" %
16337                                  self.group_name, errors.ECODE_NOENT)
16338
16339     self.needed_locks = {
16340       locking.LEVEL_INSTANCE: [],
16341       locking.LEVEL_NODEGROUP: [self.group_uuid],
16342       }
16343     self.share_locks[locking.LEVEL_INSTANCE] = 1
16344
16345   def DeclareLocks(self, level):
16346     if level == locking.LEVEL_INSTANCE:
16347       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16348
16349       # Lock instances optimistically, needs verification once group lock has
16350       # been acquired
16351       if self.op.conflicts_check:
16352         self.needed_locks[locking.LEVEL_INSTANCE] = \
16353           self.cfg.GetNodeGroupInstances(self.group_uuid)
16354
16355   def BuildHooksEnv(self):
16356     ret = {
16357       "GROUP_NAME": self.group_name,
16358       }
16359     return ret
16360
16361   def BuildHooksNodes(self):
16362     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16363     return (nodes, nodes)
16364
16365   def CheckPrereq(self):
16366     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16367
16368     assert self.group_uuid in owned_groups
16369
16370     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16371     self.connected = True
16372     if self.network_uuid not in self.group.networks:
16373       self.LogWarning("Network '%s' is not mapped to group '%s'",
16374                       self.network_name, self.group.name)
16375       self.connected = False
16376       return
16377
16378     if self.op.conflicts_check:
16379       _NetworkConflictCheck(self, lambda nic: nic.network == self.network_name,
16380                             "disconnect from")
16381
16382   def Exec(self, feedback_fn):
16383     if not self.connected:
16384       return
16385
16386     del self.group.networks[self.network_uuid]
16387     self.cfg.Update(self.group, feedback_fn)
16388
16389
16390 #: Query type implementations
16391 _QUERY_IMPL = {
16392   constants.QR_CLUSTER: _ClusterQuery,
16393   constants.QR_INSTANCE: _InstanceQuery,
16394   constants.QR_NODE: _NodeQuery,
16395   constants.QR_GROUP: _GroupQuery,
16396   constants.QR_NETWORK: _NetworkQuery,
16397   constants.QR_OS: _OsQuery,
16398   constants.QR_EXPORT: _ExportQuery,
16399   }
16400
16401 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16402
16403
16404 def _GetQueryImplementation(name):
16405   """Returns the implemtnation for a query type.
16406
16407   @param name: Query type, must be one of L{constants.QR_VIA_OP}
16408
16409   """
16410   try:
16411     return _QUERY_IMPL[name]
16412   except KeyError:
16413     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16414                                errors.ECODE_INVAL)
16415
16416
16417 def _CheckForConflictingIp(lu, ip, node):
16418   """In case of conflicting ip raise error.
16419
16420   @type ip: string
16421   @param ip: ip address
16422   @type node: string
16423   @param node: node name
16424
16425   """
16426   (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16427   if conf_net is not None:
16428     raise errors.OpPrereqError("Conflicting IP found:"
16429                                " %s <> %s." % (ip, conf_net),
16430                                errors.ECODE_STATE)
16431
16432   return (None, None)