code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti import network
  65 from ganeti.masterd import iallocator
  66
  67 import ganeti.masterd.instance # pylint: disable=W0611
  68
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141
 142     # Dictionaries used to declare locking needs to mcpu
 143     self.needed_locks = None
 144     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 145     self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
 146
 147     self.add_locks = {}
 148     self.remove_locks = {}
 149
 150     # Used to force good behavior when calling helper functions
 151     self.recalculate_locks = {}
 152
 153     # logging
 154     self.Log = processor.Log # pylint: disable=C0103
 155     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 156     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 157     self.LogStep = processor.LogStep # pylint: disable=C0103
 158     # support for dry-run
 159     self.dry_run_result = None
 160     # support for generic debug attribute
 161     if (not hasattr(self.op, "debug_level") or
 162         not isinstance(self.op.debug_level, int)):
 163       self.op.debug_level = 0
 164
 165     # Tasklets
 166     self.tasklets = None
 167
 168     # Validate opcode parameters and set defaults
 169     self.op.Validate(True)
 170
 171     self.CheckArguments()
 172
 173   def CheckArguments(self):
 174     """Check syntactic validity for the opcode arguments.
 175
 176     This method is for doing a simple syntactic check and ensure
 177     validity of opcode parameters, without any cluster-related
 178     checks. While the same can be accomplished in ExpandNames and/or
 179     CheckPrereq, doing these separate is better because:
 180
 181       - ExpandNames is left as as purely a lock-related function
 182       - CheckPrereq is run after we have acquired locks (and possible
 183         waited for them)
 184
 185     The function is allowed to change the self.op attribute so that
 186     later methods can no longer worry about missing parameters.
 187
 188     """
 189     pass
 190
 191   def ExpandNames(self):
 192     """Expand names for this LU.
 193
 194     This method is called before starting to execute the opcode, and it should
 195     update all the parameters of the opcode to their canonical form (e.g. a
 196     short node name must be fully expanded after this method has successfully
 197     completed). This way locking, hooks, logging, etc. can work correctly.
 198
 199     LUs which implement this method must also populate the self.needed_locks
 200     member, as a dict with lock levels as keys, and a list of needed lock names
 201     as values. Rules:
 202
 203       - use an empty dict if you don't need any lock
 204       - if you don't need any lock at a particular level omit that
 205         level (note that in this case C{DeclareLocks} won't be called
 206         at all for that level)
 207       - if you need locks at a level, but you can't calculate it in
 208         this function, initialise that level with an empty list and do
 209         further processing in L{LogicalUnit.DeclareLocks} (see that
 210         function's docstring)
 211       - don't put anything for the BGL level
 212       - if you want all locks at a level use L{locking.ALL_SET} as a value
 213
 214     If you need to share locks (rather than acquire them exclusively) at one
 215     level you can modify self.share_locks, setting a true value (usually 1) for
 216     that level. By default locks are not shared.
 217
 218     This function can also define a list of tasklets, which then will be
 219     executed in order instead of the usual LU-level CheckPrereq and Exec
 220     functions, if those are not defined by the LU.
 221
 222     Examples::
 223
 224       # Acquire all nodes and one instance
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: locking.ALL_SET,
 227         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 228       }
 229       # Acquire just two nodes
 230       self.needed_locks = {
 231         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 232       }
 233       # Acquire no locks
 234       self.needed_locks = {} # No, you can't leave it to the default value None
 235
 236     """
 237     # The implementation of this method is mandatory only if the new LU is
 238     # concurrent, so that old LUs don't need to be changed all at the same
 239     # time.
 240     if self.REQ_BGL:
 241       self.needed_locks = {} # Exclusive LUs don't need locks.
 242     else:
 243       raise NotImplementedError
 244
 245   def DeclareLocks(self, level):
 246     """Declare LU locking needs for a level
 247
 248     While most LUs can just declare their locking needs at ExpandNames time,
 249     sometimes there's the need to calculate some locks after having acquired
 250     the ones before. This function is called just before acquiring locks at a
 251     particular level, but after acquiring the ones at lower levels, and permits
 252     such calculations. It can be used to modify self.needed_locks, and by
 253     default it does nothing.
 254
 255     This function is only called if you have something already set in
 256     self.needed_locks for the level.
 257
 258     @param level: Locking level which is going to be locked
 259     @type level: member of L{ganeti.locking.LEVELS}
 260
 261     """
 262
 263   def CheckPrereq(self):
 264     """Check prerequisites for this LU.
 265
 266     This method should check that the prerequisites for the execution
 267     of this LU are fulfilled. It can do internode communication, but
 268     it should be idempotent - no cluster or system changes are
 269     allowed.
 270
 271     The method should raise errors.OpPrereqError in case something is
 272     not fulfilled. Its return value is ignored.
 273
 274     This method should also update all the parameters of the opcode to
 275     their canonical form if it hasn't been done by ExpandNames before.
 276
 277     """
 278     if self.tasklets is not None:
 279       for (idx, tl) in enumerate(self.tasklets):
 280         logging.debug("Checking prerequisites for tasklet %s/%s",
 281                       idx + 1, len(self.tasklets))
 282         tl.CheckPrereq()
 283     else:
 284       pass
 285
 286   def Exec(self, feedback_fn):
 287     """Execute the LU.
 288
 289     This method should implement the actual work. It should raise
 290     errors.OpExecError for failures that are somewhat dealt with in
 291     code, or expected.
 292
 293     """
 294     if self.tasklets is not None:
 295       for (idx, tl) in enumerate(self.tasklets):
 296         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 297         tl.Exec(feedback_fn)
 298     else:
 299       raise NotImplementedError
 300
 301   def BuildHooksEnv(self):
 302     """Build hooks environment for this LU.
 303
 304     @rtype: dict
 305     @return: Dictionary containing the environment that will be used for
 306       running the hooks for this LU. The keys of the dict must not be prefixed
 307       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 308       will extend the environment with additional variables. If no environment
 309       should be defined, an empty dictionary should be returned (not C{None}).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def BuildHooksNodes(self):
 317     """Build list of nodes to run LU's hooks.
 318
 319     @rtype: tuple; (list, list)
 320     @return: Tuple containing a list of node names on which the hook
 321       should run before the execution and a list of node names on which the
 322       hook should run after the execution. No nodes should be returned as an
 323       empty list (and not None).
 324     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 325       will not be called.
 326
 327     """
 328     raise NotImplementedError
 329
 330   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 331     """Notify the LU about the results of its hooks.
 332
 333     This method is called every time a hooks phase is executed, and notifies
 334     the Logical Unit about the hooks' result. The LU can then use it to alter
 335     its result based on the hooks.  By default the method does nothing and the
 336     previous result is passed back unchanged but any LU can define it if it
 337     wants to use the local cluster hook-scripts somehow.
 338
 339     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 340         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 341     @param hook_results: the results of the multi-node hooks rpc call
 342     @param feedback_fn: function used send feedback back to the caller
 343     @param lu_result: the previous Exec result this LU had, or None
 344         in the PRE phase
 345     @return: the new Exec result, based on the previous result
 346         and hook results
 347
 348     """
 349     # API must be kept, thus we ignore the unused argument and could
 350     # be a function warnings
 351     # pylint: disable=W0613,R0201
 352     return lu_result
 353
 354   def _ExpandAndLockInstance(self):
 355     """Helper function to expand and lock an instance.
 356
 357     Many LUs that work on an instance take its name in self.op.instance_name
 358     and need to expand it and then declare the expanded name for locking. This
 359     function does it, and then updates self.op.instance_name to the expanded
 360     name. It also initializes needed_locks as a dict, if this hasn't been done
 361     before.
 362
 363     """
 364     if self.needed_locks is None:
 365       self.needed_locks = {}
 366     else:
 367       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 368         "_ExpandAndLockInstance called with instance-level locks set"
 369     self.op.instance_name = _ExpandInstanceName(self.cfg,
 370                                                 self.op.instance_name)
 371     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 372
 373   def _LockInstancesNodes(self, primary_only=False,
 374                           level=locking.LEVEL_NODE):
 375     """Helper function to declare instances' nodes for locking.
 376
 377     This function should be called after locking one or more instances to lock
 378     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 379     with all primary or secondary nodes for instances already locked and
 380     present in self.needed_locks[locking.LEVEL_INSTANCE].
 381
 382     It should be called from DeclareLocks, and for safety only works if
 383     self.recalculate_locks[locking.LEVEL_NODE] is set.
 384
 385     In the future it may grow parameters to just lock some instance's nodes, or
 386     to just lock primaries or secondary nodes, if needed.
 387
 388     If should be called in DeclareLocks in a way similar to::
 389
 390       if level == locking.LEVEL_NODE:
 391         self._LockInstancesNodes()
 392
 393     @type primary_only: boolean
 394     @param primary_only: only lock primary nodes of locked instances
 395     @param level: Which lock level to use for locking nodes
 396
 397     """
 398     assert level in self.recalculate_locks, \
 399       "_LockInstancesNodes helper function called with no nodes to recalculate"
 400
 401     # TODO: check if we're really been called with the instance locks held
 402
 403     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 404     # future we might want to have different behaviors depending on the value
 405     # of self.recalculate_locks[locking.LEVEL_NODE]
 406     wanted_nodes = []
 407     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 408     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 409       wanted_nodes.append(instance.primary_node)
 410       if not primary_only:
 411         wanted_nodes.extend(instance.secondary_nodes)
 412
 413     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 414       self.needed_locks[level] = wanted_nodes
 415     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 416       self.needed_locks[level].extend(wanted_nodes)
 417     else:
 418       raise errors.ProgrammerError("Unknown recalculation mode")
 419
 420     del self.recalculate_locks[level]
 421
 422
 423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 424   """Simple LU which runs no hooks.
 425
 426   This LU is intended as a parent for other LogicalUnits which will
 427   run no hooks, in order to reduce duplicate code.
 428
 429   """
 430   HPATH = None
 431   HTYPE = None
 432
 433   def BuildHooksEnv(self):
 434     """Empty BuildHooksEnv for NoHooksLu.
 435
 436     This just raises an error.
 437
 438     """
 439     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 440
 441   def BuildHooksNodes(self):
 442     """Empty BuildHooksNodes for NoHooksLU.
 443
 444     """
 445     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 446
 447
 448 class Tasklet:
 449   """Tasklet base class.
 450
 451   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 452   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 453   tasklets know nothing about locks.
 454
 455   Subclasses must follow these rules:
 456     - Implement CheckPrereq
 457     - Implement Exec
 458
 459   """
 460   def __init__(self, lu):
 461     self.lu = lu
 462
 463     # Shortcuts
 464     self.cfg = lu.cfg
 465     self.rpc = lu.rpc
 466
 467   def CheckPrereq(self):
 468     """Check prerequisites for this tasklets.
 469
 470     This method should check whether the prerequisites for the execution of
 471     this tasklet are fulfilled. It can do internode communication, but it
 472     should be idempotent - no cluster or system changes are allowed.
 473
 474     The method should raise errors.OpPrereqError in case something is not
 475     fulfilled. Its return value is ignored.
 476
 477     This method should also update all parameters to their canonical form if it
 478     hasn't been done before.
 479
 480     """
 481     pass
 482
 483   def Exec(self, feedback_fn):
 484     """Execute the tasklet.
 485
 486     This method should implement the actual work. It should raise
 487     errors.OpExecError for failures that are somewhat dealt with in code, or
 488     expected.
 489
 490     """
 491     raise NotImplementedError
 492
 493
 494 class _QueryBase:
 495   """Base for query utility classes.
 496
 497   """
 498   #: Attribute holding field definitions
 499   FIELDS = None
 500
 501   #: Field to sort by
 502   SORT_FIELD = "name"
 503
 504   def __init__(self, qfilter, fields, use_locking):
 505     """Initializes this class.
 506
 507     """
 508     self.use_locking = use_locking
 509
 510     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 511                              namefield=self.SORT_FIELD)
 512     self.requested_data = self.query.RequestedData()
 513     self.names = self.query.RequestedNames()
 514
 515     # Sort only if no names were requested
 516     self.sort_by_name = not self.names
 517
 518     self.do_locking = None
 519     self.wanted = None
 520
 521   def _GetNames(self, lu, all_names, lock_level):
 522     """Helper function to determine names asked for in the query.
 523
 524     """
 525     if self.do_locking:
 526       names = lu.owned_locks(lock_level)
 527     else:
 528       names = all_names
 529
 530     if self.wanted == locking.ALL_SET:
 531       assert not self.names
 532       # caller didn't specify names, so ordering is not important
 533       return utils.NiceSort(names)
 534
 535     # caller specified names and we must keep the same order
 536     assert self.names
 537     assert not self.do_locking or lu.glm.is_owned(lock_level)
 538
 539     missing = set(self.wanted).difference(names)
 540     if missing:
 541       raise errors.OpExecError("Some items were removed before retrieving"
 542                                " their data: %s" % missing)
 543
 544     # Return expanded names
 545     return self.wanted
 546
 547   def ExpandNames(self, lu):
 548     """Expand names for this query.
 549
 550     See L{LogicalUnit.ExpandNames}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def DeclareLocks(self, lu, level):
 556     """Declare locks for this query.
 557
 558     See L{LogicalUnit.DeclareLocks}.
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def _GetQueryData(self, lu):
 564     """Collects all data for this query.
 565
 566     @return: Query data object
 567
 568     """
 569     raise NotImplementedError()
 570
 571   def NewStyleQuery(self, lu):
 572     """Collect data and execute query.
 573
 574     """
 575     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 576                                   sort_by_name=self.sort_by_name)
 577
 578   def OldStyleQuery(self, lu):
 579     """Collect data and execute query.
 580
 581     """
 582     return self.query.OldStyleQuery(self._GetQueryData(lu),
 583                                     sort_by_name=self.sort_by_name)
 584
 585
 586 def _ShareAll():
 587   """Returns a dict declaring all lock levels shared.
 588
 589   """
 590   return dict.fromkeys(locking.LEVELS, 1)
 591
 592
 593 def _AnnotateDiskParams(instance, devs, cfg):
 594   """Little helper wrapper to the rpc annotation method.
 595
 596   @param instance: The instance object
 597   @type devs: List of L{objects.Disk}
 598   @param devs: The root devices (not any of its children!)
 599   @param cfg: The config object
 600   @returns The annotated disk copies
 601   @see L{rpc.AnnotateDiskParams}
 602
 603   """
 604   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 605                                 cfg.GetInstanceDiskParams(instance))
 606
 607
 608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 609                               cur_group_uuid):
 610   """Checks if node groups for locked instances are still correct.
 611
 612   @type cfg: L{config.ConfigWriter}
 613   @param cfg: Cluster configuration
 614   @type instances: dict; string as key, L{objects.Instance} as value
 615   @param instances: Dictionary, instance name as key, instance object as value
 616   @type owned_groups: iterable of string
 617   @param owned_groups: List of owned groups
 618   @type owned_nodes: iterable of string
 619   @param owned_nodes: List of owned nodes
 620   @type cur_group_uuid: string or None
 621   @param cur_group_uuid: Optional group UUID to check against instance's groups
 622
 623   """
 624   for (name, inst) in instances.items():
 625     assert owned_nodes.issuperset(inst.all_nodes), \
 626       "Instance %s's nodes changed while we kept the lock" % name
 627
 628     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 629
 630     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 631       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 632
 633
 634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 635                              primary_only=False):
 636   """Checks if the owned node groups are still correct for an instance.
 637
 638   @type cfg: L{config.ConfigWriter}
 639   @param cfg: The cluster configuration
 640   @type instance_name: string
 641   @param instance_name: Instance name
 642   @type owned_groups: set or frozenset
 643   @param owned_groups: List of currently owned node groups
 644   @type primary_only: boolean
 645   @param primary_only: Whether to check node groups for only the primary node
 646
 647   """
 648   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 649
 650   if not owned_groups.issuperset(inst_groups):
 651     raise errors.OpPrereqError("Instance %s's node groups changed since"
 652                                " locks were acquired, current groups are"
 653                                " are '%s', owning groups '%s'; retry the"
 654                                " operation" %
 655                                (instance_name,
 656                                 utils.CommaJoin(inst_groups),
 657                                 utils.CommaJoin(owned_groups)),
 658                                errors.ECODE_STATE)
 659
 660   return inst_groups
 661
 662
 663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 664   """Checks if the instances in a node group are still correct.
 665
 666   @type cfg: L{config.ConfigWriter}
 667   @param cfg: The cluster configuration
 668   @type group_uuid: string
 669   @param group_uuid: Node group UUID
 670   @type owned_instances: set or frozenset
 671   @param owned_instances: List of currently owned instances
 672
 673   """
 674   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 675   if owned_instances != wanted_instances:
 676     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 677                                " locks were acquired, wanted '%s', have '%s';"
 678                                " retry the operation" %
 679                                (group_uuid,
 680                                 utils.CommaJoin(wanted_instances),
 681                                 utils.CommaJoin(owned_instances)),
 682                                errors.ECODE_STATE)
 683
 684   return wanted_instances
 685
 686
 687 def _SupportsOob(cfg, node):
 688   """Tells if node supports OOB.
 689
 690   @type cfg: L{config.ConfigWriter}
 691   @param cfg: The cluster configuration
 692   @type node: L{objects.Node}
 693   @param node: The node
 694   @return: The OOB script if supported or an empty string otherwise
 695
 696   """
 697   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 698
 699
 700 def _IsExclusiveStorageEnabledNode(cfg, node):
 701   """Whether exclusive_storage is in effect for the given node.
 702
 703   @type cfg: L{config.ConfigWriter}
 704   @param cfg: The cluster configuration
 705   @type node: L{objects.Node}
 706   @param node: The node
 707   @rtype: bool
 708   @return: The effective value of exclusive_storage
 709
 710   """
 711   return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
 712
 713
 714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
 715   """Whether exclusive_storage is in effect for the given node.
 716
 717   @type cfg: L{config.ConfigWriter}
 718   @param cfg: The cluster configuration
 719   @type nodename: string
 720   @param nodename: The node
 721   @rtype: bool
 722   @return: The effective value of exclusive_storage
 723   @raise errors.OpPrereqError: if no node exists with the given name
 724
 725   """
 726   ni = cfg.GetNodeInfo(nodename)
 727   if ni is None:
 728     raise errors.OpPrereqError("Invalid node name %s" % nodename,
 729                                errors.ECODE_NOENT)
 730   return _IsExclusiveStorageEnabledNode(cfg, ni)
 731
 732
 733 def _CopyLockList(names):
 734   """Makes a copy of a list of lock names.
 735
 736   Handles L{locking.ALL_SET} correctly.
 737
 738   """
 739   if names == locking.ALL_SET:
 740     return locking.ALL_SET
 741   else:
 742     return names[:]
 743
 744
 745 def _GetWantedNodes(lu, nodes):
 746   """Returns list of checked and expanded node names.
 747
 748   @type lu: L{LogicalUnit}
 749   @param lu: the logical unit on whose behalf we execute
 750   @type nodes: list
 751   @param nodes: list of node names or None for all nodes
 752   @rtype: list
 753   @return: the list of nodes, sorted
 754   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 755
 756   """
 757   if nodes:
 758     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 759
 760   return utils.NiceSort(lu.cfg.GetNodeList())
 761
 762
 763 def _GetWantedInstances(lu, instances):
 764   """Returns list of checked and expanded instance names.
 765
 766   @type lu: L{LogicalUnit}
 767   @param lu: the logical unit on whose behalf we execute
 768   @type instances: list
 769   @param instances: list of instance names or None for all instances
 770   @rtype: list
 771   @return: the list of instances, sorted
 772   @raise errors.OpPrereqError: if the instances parameter is wrong type
 773   @raise errors.OpPrereqError: if any of the passed instances is not found
 774
 775   """
 776   if instances:
 777     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 778   else:
 779     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 780   return wanted
 781
 782
 783 def _GetUpdatedParams(old_params, update_dict,
 784                       use_default=True, use_none=False):
 785   """Return the new version of a parameter dictionary.
 786
 787   @type old_params: dict
 788   @param old_params: old parameters
 789   @type update_dict: dict
 790   @param update_dict: dict containing new parameter values, or
 791       constants.VALUE_DEFAULT to reset the parameter to its default
 792       value
 793   @param use_default: boolean
 794   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 795       values as 'to be deleted' values
 796   @param use_none: boolean
 797   @type use_none: whether to recognise C{None} values as 'to be
 798       deleted' values
 799   @rtype: dict
 800   @return: the new parameter dictionary
 801
 802   """
 803   params_copy = copy.deepcopy(old_params)
 804   for key, val in update_dict.iteritems():
 805     if ((use_default and val == constants.VALUE_DEFAULT) or
 806         (use_none and val is None)):
 807       try:
 808         del params_copy[key]
 809       except KeyError:
 810         pass
 811     else:
 812       params_copy[key] = val
 813   return params_copy
 814
 815
 816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 817   """Return the new version of a instance policy.
 818
 819   @param group_policy: whether this policy applies to a group and thus
 820     we should support removal of policy entries
 821
 822   """
 823   use_none = use_default = group_policy
 824   ipolicy = copy.deepcopy(old_ipolicy)
 825   for key, value in new_ipolicy.items():
 826     if key not in constants.IPOLICY_ALL_KEYS:
 827       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 828                                  errors.ECODE_INVAL)
 829     if key in constants.IPOLICY_ISPECS:
 830       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 831       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 832                                        use_none=use_none,
 833                                        use_default=use_default)
 834     else:
 835       if (not value or value == [constants.VALUE_DEFAULT] or
 836           value == constants.VALUE_DEFAULT):
 837         if group_policy:
 838           del ipolicy[key]
 839         else:
 840           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 841                                      " on the cluster'" % key,
 842                                      errors.ECODE_INVAL)
 843       else:
 844         if key in constants.IPOLICY_PARAMETERS:
 845           # FIXME: we assume all such values are float
 846           try:
 847             ipolicy[key] = float(value)
 848           except (TypeError, ValueError), err:
 849             raise errors.OpPrereqError("Invalid value for attribute"
 850                                        " '%s': '%s', error: %s" %
 851                                        (key, value, err), errors.ECODE_INVAL)
 852         else:
 853           # FIXME: we assume all others are lists; this should be redone
 854           # in a nicer way
 855           ipolicy[key] = list(value)
 856   try:
 857     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 858   except errors.ConfigurationError, err:
 859     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 860                                errors.ECODE_INVAL)
 861   return ipolicy
 862
 863
 864 def _UpdateAndVerifySubDict(base, updates, type_check):
 865   """Updates and verifies a dict with sub dicts of the same type.
 866
 867   @param base: The dict with the old data
 868   @param updates: The dict with the new data
 869   @param type_check: Dict suitable to ForceDictType to verify correct types
 870   @returns: A new dict with updated and verified values
 871
 872   """
 873   def fn(old, value):
 874     new = _GetUpdatedParams(old, value)
 875     utils.ForceDictType(new, type_check)
 876     return new
 877
 878   ret = copy.deepcopy(base)
 879   ret.update(dict((key, fn(base.get(key, {}), value))
 880                   for key, value in updates.items()))
 881   return ret
 882
 883
 884 def _MergeAndVerifyHvState(op_input, obj_input):
 885   """Combines the hv state from an opcode with the one of the object
 886
 887   @param op_input: The input dict from the opcode
 888   @param obj_input: The input dict from the objects
 889   @return: The verified and updated dict
 890
 891   """
 892   if op_input:
 893     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 894     if invalid_hvs:
 895       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 896                                  " %s" % utils.CommaJoin(invalid_hvs),
 897                                  errors.ECODE_INVAL)
 898     if obj_input is None:
 899       obj_input = {}
 900     type_check = constants.HVSTS_PARAMETER_TYPES
 901     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 902
 903   return None
 904
 905
 906 def _MergeAndVerifyDiskState(op_input, obj_input):
 907   """Combines the disk state from an opcode with the one of the object
 908
 909   @param op_input: The input dict from the opcode
 910   @param obj_input: The input dict from the objects
 911   @return: The verified and updated dict
 912   """
 913   if op_input:
 914     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 915     if invalid_dst:
 916       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 917                                  utils.CommaJoin(invalid_dst),
 918                                  errors.ECODE_INVAL)
 919     type_check = constants.DSS_PARAMETER_TYPES
 920     if obj_input is None:
 921       obj_input = {}
 922     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 923                                               type_check))
 924                 for key, value in op_input.items())
 925
 926   return None
 927
 928
 929 def _ReleaseLocks(lu, level, names=None, keep=None):
 930   """Releases locks owned by an LU.
 931
 932   @type lu: L{LogicalUnit}
 933   @param level: Lock level
 934   @type names: list or None
 935   @param names: Names of locks to release
 936   @type keep: list or None
 937   @param keep: Names of locks to retain
 938
 939   """
 940   assert not (keep is not None and names is not None), \
 941          "Only one of the 'names' and the 'keep' parameters can be given"
 942
 943   if names is not None:
 944     should_release = names.__contains__
 945   elif keep:
 946     should_release = lambda name: name not in keep
 947   else:
 948     should_release = None
 949
 950   owned = lu.owned_locks(level)
 951   if not owned:
 952     # Not owning any lock at this level, do nothing
 953     pass
 954
 955   elif should_release:
 956     retain = []
 957     release = []
 958
 959     # Determine which locks to release
 960     for name in owned:
 961       if should_release(name):
 962         release.append(name)
 963       else:
 964         retain.append(name)
 965
 966     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 967
 968     # Release just some locks
 969     lu.glm.release(level, names=release)
 970
 971     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 972   else:
 973     # Release everything
 974     lu.glm.release(level)
 975
 976     assert not lu.glm.is_owned(level), "No locks should be owned"
 977
 978
 979 def _MapInstanceDisksToNodes(instances):
 980   """Creates a map from (node, volume) to instance name.
 981
 982   @type instances: list of L{objects.Instance}
 983   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 984
 985   """
 986   return dict(((node, vol), inst.name)
 987               for inst in instances
 988               for (node, vols) in inst.MapLVsByNode().items()
 989               for vol in vols)
 990
 991
 992 def _RunPostHook(lu, node_name):
 993   """Runs the post-hook for an opcode on a single node.
 994
 995   """
 996   hm = lu.proc.BuildHooksManager(lu)
 997   try:
 998     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 999   except Exception, err: # pylint: disable=W0703
1000     lu.LogWarning("Errors occurred running hooks on %s: %s",
1001                   node_name, err)
1002
1003
1004 def _CheckOutputFields(static, dynamic, selected):
1005   """Checks whether all selected fields are valid.
1006
1007   @type static: L{utils.FieldSet}
1008   @param static: static fields set
1009   @type dynamic: L{utils.FieldSet}
1010   @param dynamic: dynamic fields set
1011
1012   """
1013   f = utils.FieldSet()
1014   f.Extend(static)
1015   f.Extend(dynamic)
1016
1017   delta = f.NonMatching(selected)
1018   if delta:
1019     raise errors.OpPrereqError("Unknown output fields selected: %s"
1020                                % ",".join(delta), errors.ECODE_INVAL)
1021
1022
1023 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
1024   """Make sure that none of the given paramters is global.
1025
1026   If a global parameter is found, an L{errors.OpPrereqError} exception is
1027   raised. This is used to avoid setting global parameters for individual nodes.
1028
1029   @type params: dictionary
1030   @param params: Parameters to check
1031   @type glob_pars: dictionary
1032   @param glob_pars: Forbidden parameters
1033   @type kind: string
1034   @param kind: Kind of parameters (e.g. "node")
1035   @type bad_levels: string
1036   @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
1037       "instance")
1038   @type good_levels: strings
1039   @param good_levels: Level(s) at which the parameters are allowed (e.g.
1040       "cluster or group")
1041
1042   """
1043   used_globals = glob_pars.intersection(params)
1044   if used_globals:
1045     msg = ("The following %s parameters are global and cannot"
1046            " be customized at %s level, please modify them at"
1047            " %s level: %s" %
1048            (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
1049     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1050
1051
1052 def _CheckNodeOnline(lu, node, msg=None):
1053   """Ensure that a given node is online.
1054
1055   @param lu: the LU on behalf of which we make the check
1056   @param node: the node to check
1057   @param msg: if passed, should be a message to replace the default one
1058   @raise errors.OpPrereqError: if the node is offline
1059
1060   """
1061   if msg is None:
1062     msg = "Can't use offline node"
1063   if lu.cfg.GetNodeInfo(node).offline:
1064     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1065
1066
1067 def _CheckNodeNotDrained(lu, node):
1068   """Ensure that a given node is not drained.
1069
1070   @param lu: the LU on behalf of which we make the check
1071   @param node: the node to check
1072   @raise errors.OpPrereqError: if the node is drained
1073
1074   """
1075   if lu.cfg.GetNodeInfo(node).drained:
1076     raise errors.OpPrereqError("Can't use drained node %s" % node,
1077                                errors.ECODE_STATE)
1078
1079
1080 def _CheckNodeVmCapable(lu, node):
1081   """Ensure that a given node is vm capable.
1082
1083   @param lu: the LU on behalf of which we make the check
1084   @param node: the node to check
1085   @raise errors.OpPrereqError: if the node is not vm capable
1086
1087   """
1088   if not lu.cfg.GetNodeInfo(node).vm_capable:
1089     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1090                                errors.ECODE_STATE)
1091
1092
1093 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1094   """Ensure that a node supports a given OS.
1095
1096   @param lu: the LU on behalf of which we make the check
1097   @param node: the node to check
1098   @param os_name: the OS to query about
1099   @param force_variant: whether to ignore variant errors
1100   @raise errors.OpPrereqError: if the node is not supporting the OS
1101
1102   """
1103   result = lu.rpc.call_os_get(node, os_name)
1104   result.Raise("OS '%s' not in supported OS list for node %s" %
1105                (os_name, node),
1106                prereq=True, ecode=errors.ECODE_INVAL)
1107   if not force_variant:
1108     _CheckOSVariant(result.payload, os_name)
1109
1110
1111 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1112   """Ensure that a node has the given secondary ip.
1113
1114   @type lu: L{LogicalUnit}
1115   @param lu: the LU on behalf of which we make the check
1116   @type node: string
1117   @param node: the node to check
1118   @type secondary_ip: string
1119   @param secondary_ip: the ip to check
1120   @type prereq: boolean
1121   @param prereq: whether to throw a prerequisite or an execute error
1122   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1123   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1124
1125   """
1126   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1127   result.Raise("Failure checking secondary ip on node %s" % node,
1128                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1129   if not result.payload:
1130     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1131            " please fix and re-run this command" % secondary_ip)
1132     if prereq:
1133       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1134     else:
1135       raise errors.OpExecError(msg)
1136
1137
1138 def _CheckNodePVs(nresult, exclusive_storage):
1139   """Check node PVs.
1140
1141   """
1142   pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1143   if pvlist_dict is None:
1144     return (["Can't get PV list from node"], None)
1145   pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1146   errlist = []
1147   # check that ':' is not present in PV names, since it's a
1148   # special character for lvcreate (denotes the range of PEs to
1149   # use on the PV)
1150   for pv in pvlist:
1151     if ":" in pv.name:
1152       errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1153                      (pv.name, pv.vg_name))
1154   es_pvinfo = None
1155   if exclusive_storage:
1156     (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1157     errlist.extend(errmsgs)
1158     shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1159     if shared_pvs:
1160       for (pvname, lvlist) in shared_pvs:
1161         # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1162         errlist.append("PV %s is shared among unrelated LVs (%s)" %
1163                        (pvname, utils.CommaJoin(lvlist)))
1164   return (errlist, es_pvinfo)
1165
1166
1167 def _GetClusterDomainSecret():
1168   """Reads the cluster domain secret.
1169
1170   """
1171   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1172                                strict=True)
1173
1174
1175 def _CheckInstanceState(lu, instance, req_states, msg=None):
1176   """Ensure that an instance is in one of the required states.
1177
1178   @param lu: the LU on behalf of which we make the check
1179   @param instance: the instance to check
1180   @param msg: if passed, should be a message to replace the default one
1181   @raise errors.OpPrereqError: if the instance is not in the required state
1182
1183   """
1184   if msg is None:
1185     msg = ("can't use instance from outside %s states" %
1186            utils.CommaJoin(req_states))
1187   if instance.admin_state not in req_states:
1188     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1189                                (instance.name, instance.admin_state, msg),
1190                                errors.ECODE_STATE)
1191
1192   if constants.ADMINST_UP not in req_states:
1193     pnode = instance.primary_node
1194     if not lu.cfg.GetNodeInfo(pnode).offline:
1195       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1196       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1197                   prereq=True, ecode=errors.ECODE_ENVIRON)
1198       if instance.name in ins_l.payload:
1199         raise errors.OpPrereqError("Instance %s is running, %s" %
1200                                    (instance.name, msg), errors.ECODE_STATE)
1201     else:
1202       lu.LogWarning("Primary node offline, ignoring check that instance"
1203                      " is down")
1204
1205
1206 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1207   """Computes if value is in the desired range.
1208
1209   @param name: name of the parameter for which we perform the check
1210   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1211       not just 'disk')
1212   @param ipolicy: dictionary containing min, max and std values
1213   @param value: actual value that we want to use
1214   @return: None or element not meeting the criteria
1215
1216
1217   """
1218   if value in [None, constants.VALUE_AUTO]:
1219     return None
1220   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1221   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1222   if value > max_v or min_v > value:
1223     if qualifier:
1224       fqn = "%s/%s" % (name, qualifier)
1225     else:
1226       fqn = name
1227     return ("%s value %s is not in range [%s, %s]" %
1228             (fqn, value, min_v, max_v))
1229   return None
1230
1231
1232 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1233                                  nic_count, disk_sizes, spindle_use,
1234                                  _compute_fn=_ComputeMinMaxSpec):
1235   """Verifies ipolicy against provided specs.
1236
1237   @type ipolicy: dict
1238   @param ipolicy: The ipolicy
1239   @type mem_size: int
1240   @param mem_size: The memory size
1241   @type cpu_count: int
1242   @param cpu_count: Used cpu cores
1243   @type disk_count: int
1244   @param disk_count: Number of disks used
1245   @type nic_count: int
1246   @param nic_count: Number of nics used
1247   @type disk_sizes: list of ints
1248   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1249   @type spindle_use: int
1250   @param spindle_use: The number of spindles this instance uses
1251   @param _compute_fn: The compute function (unittest only)
1252   @return: A list of violations, or an empty list of no violations are found
1253
1254   """
1255   assert disk_count == len(disk_sizes)
1256
1257   test_settings = [
1258     (constants.ISPEC_MEM_SIZE, "", mem_size),
1259     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1260     (constants.ISPEC_DISK_COUNT, "", disk_count),
1261     (constants.ISPEC_NIC_COUNT, "", nic_count),
1262     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1263     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1264          for idx, d in enumerate(disk_sizes)]
1265
1266   return filter(None,
1267                 (_compute_fn(name, qualifier, ipolicy, value)
1268                  for (name, qualifier, value) in test_settings))
1269
1270
1271 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1272                                      _compute_fn=_ComputeIPolicySpecViolation):
1273   """Compute if instance meets the specs of ipolicy.
1274
1275   @type ipolicy: dict
1276   @param ipolicy: The ipolicy to verify against
1277   @type instance: L{objects.Instance}
1278   @param instance: The instance to verify
1279   @param _compute_fn: The function to verify ipolicy (unittest only)
1280   @see: L{_ComputeIPolicySpecViolation}
1281
1282   """
1283   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1284   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1285   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1286   disk_count = len(instance.disks)
1287   disk_sizes = [disk.size for disk in instance.disks]
1288   nic_count = len(instance.nics)
1289
1290   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1291                      disk_sizes, spindle_use)
1292
1293
1294 def _ComputeIPolicyInstanceSpecViolation(
1295   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1296   """Compute if instance specs meets the specs of ipolicy.
1297
1298   @type ipolicy: dict
1299   @param ipolicy: The ipolicy to verify against
1300   @param instance_spec: dict
1301   @param instance_spec: The instance spec to verify
1302   @param _compute_fn: The function to verify ipolicy (unittest only)
1303   @see: L{_ComputeIPolicySpecViolation}
1304
1305   """
1306   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1307   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1308   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1309   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1310   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1311   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1312
1313   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1314                      disk_sizes, spindle_use)
1315
1316
1317 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1318                                  target_group,
1319                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1320   """Compute if instance meets the specs of the new target group.
1321
1322   @param ipolicy: The ipolicy to verify
1323   @param instance: The instance object to verify
1324   @param current_group: The current group of the instance
1325   @param target_group: The new group of the instance
1326   @param _compute_fn: The function to verify ipolicy (unittest only)
1327   @see: L{_ComputeIPolicySpecViolation}
1328
1329   """
1330   if current_group == target_group:
1331     return []
1332   else:
1333     return _compute_fn(ipolicy, instance)
1334
1335
1336 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1337                             _compute_fn=_ComputeIPolicyNodeViolation):
1338   """Checks that the target node is correct in terms of instance policy.
1339
1340   @param ipolicy: The ipolicy to verify
1341   @param instance: The instance object to verify
1342   @param node: The new node to relocate
1343   @param ignore: Ignore violations of the ipolicy
1344   @param _compute_fn: The function to verify ipolicy (unittest only)
1345   @see: L{_ComputeIPolicySpecViolation}
1346
1347   """
1348   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1349   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1350
1351   if res:
1352     msg = ("Instance does not meet target node group's (%s) instance"
1353            " policy: %s") % (node.group, utils.CommaJoin(res))
1354     if ignore:
1355       lu.LogWarning(msg)
1356     else:
1357       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1358
1359
1360 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1361   """Computes a set of any instances that would violate the new ipolicy.
1362
1363   @param old_ipolicy: The current (still in-place) ipolicy
1364   @param new_ipolicy: The new (to become) ipolicy
1365   @param instances: List of instances to verify
1366   @return: A list of instances which violates the new ipolicy but
1367       did not before
1368
1369   """
1370   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1371           _ComputeViolatingInstances(old_ipolicy, instances))
1372
1373
1374 def _ExpandItemName(fn, name, kind):
1375   """Expand an item name.
1376
1377   @param fn: the function to use for expansion
1378   @param name: requested item name
1379   @param kind: text description ('Node' or 'Instance')
1380   @return: the resolved (full) name
1381   @raise errors.OpPrereqError: if the item is not found
1382
1383   """
1384   full_name = fn(name)
1385   if full_name is None:
1386     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1387                                errors.ECODE_NOENT)
1388   return full_name
1389
1390
1391 def _ExpandNodeName(cfg, name):
1392   """Wrapper over L{_ExpandItemName} for nodes."""
1393   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1394
1395
1396 def _ExpandInstanceName(cfg, name):
1397   """Wrapper over L{_ExpandItemName} for instance."""
1398   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1399
1400
1401 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1402                          mac_prefix, tags):
1403   """Builds network related env variables for hooks
1404
1405   This builds the hook environment from individual variables.
1406
1407   @type name: string
1408   @param name: the name of the network
1409   @type subnet: string
1410   @param subnet: the ipv4 subnet
1411   @type gateway: string
1412   @param gateway: the ipv4 gateway
1413   @type network6: string
1414   @param network6: the ipv6 subnet
1415   @type gateway6: string
1416   @param gateway6: the ipv6 gateway
1417   @type mac_prefix: string
1418   @param mac_prefix: the mac_prefix
1419   @type tags: list
1420   @param tags: the tags of the network
1421
1422   """
1423   env = {}
1424   if name:
1425     env["NETWORK_NAME"] = name
1426   if subnet:
1427     env["NETWORK_SUBNET"] = subnet
1428   if gateway:
1429     env["NETWORK_GATEWAY"] = gateway
1430   if network6:
1431     env["NETWORK_SUBNET6"] = network6
1432   if gateway6:
1433     env["NETWORK_GATEWAY6"] = gateway6
1434   if mac_prefix:
1435     env["NETWORK_MAC_PREFIX"] = mac_prefix
1436   if tags:
1437     env["NETWORK_TAGS"] = " ".join(tags)
1438
1439   return env
1440
1441
1442 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1443                           minmem, maxmem, vcpus, nics, disk_template, disks,
1444                           bep, hvp, hypervisor_name, tags):
1445   """Builds instance related env variables for hooks
1446
1447   This builds the hook environment from individual variables.
1448
1449   @type name: string
1450   @param name: the name of the instance
1451   @type primary_node: string
1452   @param primary_node: the name of the instance's primary node
1453   @type secondary_nodes: list
1454   @param secondary_nodes: list of secondary nodes as strings
1455   @type os_type: string
1456   @param os_type: the name of the instance's OS
1457   @type status: string
1458   @param status: the desired status of the instance
1459   @type minmem: string
1460   @param minmem: the minimum memory size of the instance
1461   @type maxmem: string
1462   @param maxmem: the maximum memory size of the instance
1463   @type vcpus: string
1464   @param vcpus: the count of VCPUs the instance has
1465   @type nics: list
1466   @param nics: list of tuples (ip, mac, mode, link, net, netinfo) representing
1467       the NICs the instance has
1468   @type disk_template: string
1469   @param disk_template: the disk template of the instance
1470   @type disks: list
1471   @param disks: the list of (size, mode) pairs
1472   @type bep: dict
1473   @param bep: the backend parameters for the instance
1474   @type hvp: dict
1475   @param hvp: the hypervisor parameters for the instance
1476   @type hypervisor_name: string
1477   @param hypervisor_name: the hypervisor for the instance
1478   @type tags: list
1479   @param tags: list of instance tags as strings
1480   @rtype: dict
1481   @return: the hook environment for this instance
1482
1483   """
1484   env = {
1485     "OP_TARGET": name,
1486     "INSTANCE_NAME": name,
1487     "INSTANCE_PRIMARY": primary_node,
1488     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1489     "INSTANCE_OS_TYPE": os_type,
1490     "INSTANCE_STATUS": status,
1491     "INSTANCE_MINMEM": minmem,
1492     "INSTANCE_MAXMEM": maxmem,
1493     # TODO(2.7) remove deprecated "memory" value
1494     "INSTANCE_MEMORY": maxmem,
1495     "INSTANCE_VCPUS": vcpus,
1496     "INSTANCE_DISK_TEMPLATE": disk_template,
1497     "INSTANCE_HYPERVISOR": hypervisor_name,
1498   }
1499   if nics:
1500     nic_count = len(nics)
1501     for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1502       if ip is None:
1503         ip = ""
1504       env["INSTANCE_NIC%d_IP" % idx] = ip
1505       env["INSTANCE_NIC%d_MAC" % idx] = mac
1506       env["INSTANCE_NIC%d_MODE" % idx] = mode
1507       env["INSTANCE_NIC%d_LINK" % idx] = link
1508       if netinfo:
1509         nobj = objects.Network.FromDict(netinfo)
1510         env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1511       elif network:
1512         # FIXME: broken network reference: the instance NIC specifies a
1513         # network, but the relevant network entry was not in the config. This
1514         # should be made impossible.
1515         env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
1516       if mode == constants.NIC_MODE_BRIDGED:
1517         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1518   else:
1519     nic_count = 0
1520
1521   env["INSTANCE_NIC_COUNT"] = nic_count
1522
1523   if disks:
1524     disk_count = len(disks)
1525     for idx, (size, mode) in enumerate(disks):
1526       env["INSTANCE_DISK%d_SIZE" % idx] = size
1527       env["INSTANCE_DISK%d_MODE" % idx] = mode
1528   else:
1529     disk_count = 0
1530
1531   env["INSTANCE_DISK_COUNT"] = disk_count
1532
1533   if not tags:
1534     tags = []
1535
1536   env["INSTANCE_TAGS"] = " ".join(tags)
1537
1538   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1539     for key, value in source.items():
1540       env["INSTANCE_%s_%s" % (kind, key)] = value
1541
1542   return env
1543
1544
1545 def _NICToTuple(lu, nic):
1546   """Build a tupple of nic information.
1547
1548   @type lu:  L{LogicalUnit}
1549   @param lu: the logical unit on whose behalf we execute
1550   @type nic: L{objects.NIC}
1551   @param nic: nic to convert to hooks tuple
1552
1553   """
1554   cluster = lu.cfg.GetClusterInfo()
1555   filled_params = cluster.SimpleFillNIC(nic.nicparams)
1556   mode = filled_params[constants.NIC_MODE]
1557   link = filled_params[constants.NIC_LINK]
1558   netinfo = None
1559   if nic.network:
1560     nobj = lu.cfg.GetNetwork(nic.network)
1561     netinfo = objects.Network.ToDict(nobj)
1562   return (nic.ip, nic.mac, mode, link, nic.network, netinfo)
1563
1564
1565 def _NICListToTuple(lu, nics):
1566   """Build a list of nic information tuples.
1567
1568   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1569   value in LUInstanceQueryData.
1570
1571   @type lu:  L{LogicalUnit}
1572   @param lu: the logical unit on whose behalf we execute
1573   @type nics: list of L{objects.NIC}
1574   @param nics: list of nics to convert to hooks tuples
1575
1576   """
1577   hooks_nics = []
1578   for nic in nics:
1579     hooks_nics.append(_NICToTuple(lu, nic))
1580   return hooks_nics
1581
1582
1583 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1584   """Builds instance related env variables for hooks from an object.
1585
1586   @type lu: L{LogicalUnit}
1587   @param lu: the logical unit on whose behalf we execute
1588   @type instance: L{objects.Instance}
1589   @param instance: the instance for which we should build the
1590       environment
1591   @type override: dict
1592   @param override: dictionary with key/values that will override
1593       our values
1594   @rtype: dict
1595   @return: the hook environment dictionary
1596
1597   """
1598   cluster = lu.cfg.GetClusterInfo()
1599   bep = cluster.FillBE(instance)
1600   hvp = cluster.FillHV(instance)
1601   args = {
1602     "name": instance.name,
1603     "primary_node": instance.primary_node,
1604     "secondary_nodes": instance.secondary_nodes,
1605     "os_type": instance.os,
1606     "status": instance.admin_state,
1607     "maxmem": bep[constants.BE_MAXMEM],
1608     "minmem": bep[constants.BE_MINMEM],
1609     "vcpus": bep[constants.BE_VCPUS],
1610     "nics": _NICListToTuple(lu, instance.nics),
1611     "disk_template": instance.disk_template,
1612     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1613     "bep": bep,
1614     "hvp": hvp,
1615     "hypervisor_name": instance.hypervisor,
1616     "tags": instance.tags,
1617   }
1618   if override:
1619     args.update(override)
1620   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1621
1622
1623 def _AdjustCandidatePool(lu, exceptions):
1624   """Adjust the candidate pool after node operations.
1625
1626   """
1627   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1628   if mod_list:
1629     lu.LogInfo("Promoted nodes to master candidate role: %s",
1630                utils.CommaJoin(node.name for node in mod_list))
1631     for name in mod_list:
1632       lu.context.ReaddNode(name)
1633   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1634   if mc_now > mc_max:
1635     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1636                (mc_now, mc_max))
1637
1638
1639 def _DecideSelfPromotion(lu, exceptions=None):
1640   """Decide whether I should promote myself as a master candidate.
1641
1642   """
1643   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1644   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1645   # the new node will increase mc_max with one, so:
1646   mc_should = min(mc_should + 1, cp_size)
1647   return mc_now < mc_should
1648
1649
1650 def _ComputeViolatingInstances(ipolicy, instances):
1651   """Computes a set of instances who violates given ipolicy.
1652
1653   @param ipolicy: The ipolicy to verify
1654   @type instances: object.Instance
1655   @param instances: List of instances to verify
1656   @return: A frozenset of instance names violating the ipolicy
1657
1658   """
1659   return frozenset([inst.name for inst in instances
1660                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1661
1662
1663 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1664   """Check that the brigdes needed by a list of nics exist.
1665
1666   """
1667   cluster = lu.cfg.GetClusterInfo()
1668   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1669   brlist = [params[constants.NIC_LINK] for params in paramslist
1670             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1671   if brlist:
1672     result = lu.rpc.call_bridges_exist(target_node, brlist)
1673     result.Raise("Error checking bridges on destination node '%s'" %
1674                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1675
1676
1677 def _CheckInstanceBridgesExist(lu, instance, node=None):
1678   """Check that the brigdes needed by an instance exist.
1679
1680   """
1681   if node is None:
1682     node = instance.primary_node
1683   _CheckNicsBridgesExist(lu, instance.nics, node)
1684
1685
1686 def _CheckOSVariant(os_obj, name):
1687   """Check whether an OS name conforms to the os variants specification.
1688
1689   @type os_obj: L{objects.OS}
1690   @param os_obj: OS object to check
1691   @type name: string
1692   @param name: OS name passed by the user, to check for validity
1693
1694   """
1695   variant = objects.OS.GetVariant(name)
1696   if not os_obj.supported_variants:
1697     if variant:
1698       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1699                                  " passed)" % (os_obj.name, variant),
1700                                  errors.ECODE_INVAL)
1701     return
1702   if not variant:
1703     raise errors.OpPrereqError("OS name must include a variant",
1704                                errors.ECODE_INVAL)
1705
1706   if variant not in os_obj.supported_variants:
1707     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1708
1709
1710 def _GetNodeInstancesInner(cfg, fn):
1711   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1712
1713
1714 def _GetNodeInstances(cfg, node_name):
1715   """Returns a list of all primary and secondary instances on a node.
1716
1717   """
1718
1719   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1720
1721
1722 def _GetNodePrimaryInstances(cfg, node_name):
1723   """Returns primary instances on a node.
1724
1725   """
1726   return _GetNodeInstancesInner(cfg,
1727                                 lambda inst: node_name == inst.primary_node)
1728
1729
1730 def _GetNodeSecondaryInstances(cfg, node_name):
1731   """Returns secondary instances on a node.
1732
1733   """
1734   return _GetNodeInstancesInner(cfg,
1735                                 lambda inst: node_name in inst.secondary_nodes)
1736
1737
1738 def _GetStorageTypeArgs(cfg, storage_type):
1739   """Returns the arguments for a storage type.
1740
1741   """
1742   # Special case for file storage
1743   if storage_type == constants.ST_FILE:
1744     # storage.FileStorage wants a list of storage directories
1745     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1746
1747   return []
1748
1749
1750 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1751   faulty = []
1752
1753   for dev in instance.disks:
1754     cfg.SetDiskID(dev, node_name)
1755
1756   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1757                                                                 instance))
1758   result.Raise("Failed to get disk status from node %s" % node_name,
1759                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1760
1761   for idx, bdev_status in enumerate(result.payload):
1762     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1763       faulty.append(idx)
1764
1765   return faulty
1766
1767
1768 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1769   """Check the sanity of iallocator and node arguments and use the
1770   cluster-wide iallocator if appropriate.
1771
1772   Check that at most one of (iallocator, node) is specified. If none is
1773   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1774   then the LU's opcode's iallocator slot is filled with the cluster-wide
1775   default iallocator.
1776
1777   @type iallocator_slot: string
1778   @param iallocator_slot: the name of the opcode iallocator slot
1779   @type node_slot: string
1780   @param node_slot: the name of the opcode target node slot
1781
1782   """
1783   node = getattr(lu.op, node_slot, None)
1784   ialloc = getattr(lu.op, iallocator_slot, None)
1785   if node == []:
1786     node = None
1787
1788   if node is not None and ialloc is not None:
1789     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1790                                errors.ECODE_INVAL)
1791   elif ((node is None and ialloc is None) or
1792         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1793     default_iallocator = lu.cfg.GetDefaultIAllocator()
1794     if default_iallocator:
1795       setattr(lu.op, iallocator_slot, default_iallocator)
1796     else:
1797       raise errors.OpPrereqError("No iallocator or node given and no"
1798                                  " cluster-wide default iallocator found;"
1799                                  " please specify either an iallocator or a"
1800                                  " node, or set a cluster-wide default"
1801                                  " iallocator", errors.ECODE_INVAL)
1802
1803
1804 def _GetDefaultIAllocator(cfg, ialloc):
1805   """Decides on which iallocator to use.
1806
1807   @type cfg: L{config.ConfigWriter}
1808   @param cfg: Cluster configuration object
1809   @type ialloc: string or None
1810   @param ialloc: Iallocator specified in opcode
1811   @rtype: string
1812   @return: Iallocator name
1813
1814   """
1815   if not ialloc:
1816     # Use default iallocator
1817     ialloc = cfg.GetDefaultIAllocator()
1818
1819   if not ialloc:
1820     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1821                                " opcode nor as a cluster-wide default",
1822                                errors.ECODE_INVAL)
1823
1824   return ialloc
1825
1826
1827 def _CheckHostnameSane(lu, name):
1828   """Ensures that a given hostname resolves to a 'sane' name.
1829
1830   The given name is required to be a prefix of the resolved hostname,
1831   to prevent accidental mismatches.
1832
1833   @param lu: the logical unit on behalf of which we're checking
1834   @param name: the name we should resolve and check
1835   @return: the resolved hostname object
1836
1837   """
1838   hostname = netutils.GetHostname(name=name)
1839   if hostname.name != name:
1840     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1841   if not utils.MatchNameComponent(name, [hostname.name]):
1842     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1843                                 " same as given hostname '%s'") %
1844                                 (hostname.name, name), errors.ECODE_INVAL)
1845   return hostname
1846
1847
1848 class LUClusterPostInit(LogicalUnit):
1849   """Logical unit for running hooks after cluster initialization.
1850
1851   """
1852   HPATH = "cluster-init"
1853   HTYPE = constants.HTYPE_CLUSTER
1854
1855   def BuildHooksEnv(self):
1856     """Build hooks env.
1857
1858     """
1859     return {
1860       "OP_TARGET": self.cfg.GetClusterName(),
1861       }
1862
1863   def BuildHooksNodes(self):
1864     """Build hooks nodes.
1865
1866     """
1867     return ([], [self.cfg.GetMasterNode()])
1868
1869   def Exec(self, feedback_fn):
1870     """Nothing to do.
1871
1872     """
1873     return True
1874
1875
1876 class LUClusterDestroy(LogicalUnit):
1877   """Logical unit for destroying the cluster.
1878
1879   """
1880   HPATH = "cluster-destroy"
1881   HTYPE = constants.HTYPE_CLUSTER
1882
1883   def BuildHooksEnv(self):
1884     """Build hooks env.
1885
1886     """
1887     return {
1888       "OP_TARGET": self.cfg.GetClusterName(),
1889       }
1890
1891   def BuildHooksNodes(self):
1892     """Build hooks nodes.
1893
1894     """
1895     return ([], [])
1896
1897   def CheckPrereq(self):
1898     """Check prerequisites.
1899
1900     This checks whether the cluster is empty.
1901
1902     Any errors are signaled by raising errors.OpPrereqError.
1903
1904     """
1905     master = self.cfg.GetMasterNode()
1906
1907     nodelist = self.cfg.GetNodeList()
1908     if len(nodelist) != 1 or nodelist[0] != master:
1909       raise errors.OpPrereqError("There are still %d node(s) in"
1910                                  " this cluster." % (len(nodelist) - 1),
1911                                  errors.ECODE_INVAL)
1912     instancelist = self.cfg.GetInstanceList()
1913     if instancelist:
1914       raise errors.OpPrereqError("There are still %d instance(s) in"
1915                                  " this cluster." % len(instancelist),
1916                                  errors.ECODE_INVAL)
1917
1918   def Exec(self, feedback_fn):
1919     """Destroys the cluster.
1920
1921     """
1922     master_params = self.cfg.GetMasterNetworkParameters()
1923
1924     # Run post hooks on master node before it's removed
1925     _RunPostHook(self, master_params.name)
1926
1927     ems = self.cfg.GetUseExternalMipScript()
1928     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1929                                                      master_params, ems)
1930     if result.fail_msg:
1931       self.LogWarning("Error disabling the master IP address: %s",
1932                       result.fail_msg)
1933
1934     return master_params.name
1935
1936
1937 def _VerifyCertificate(filename):
1938   """Verifies a certificate for L{LUClusterVerifyConfig}.
1939
1940   @type filename: string
1941   @param filename: Path to PEM file
1942
1943   """
1944   try:
1945     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1946                                            utils.ReadFile(filename))
1947   except Exception, err: # pylint: disable=W0703
1948     return (LUClusterVerifyConfig.ETYPE_ERROR,
1949             "Failed to load X509 certificate %s: %s" % (filename, err))
1950
1951   (errcode, msg) = \
1952     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1953                                 constants.SSL_CERT_EXPIRATION_ERROR)
1954
1955   if msg:
1956     fnamemsg = "While verifying %s: %s" % (filename, msg)
1957   else:
1958     fnamemsg = None
1959
1960   if errcode is None:
1961     return (None, fnamemsg)
1962   elif errcode == utils.CERT_WARNING:
1963     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1964   elif errcode == utils.CERT_ERROR:
1965     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1966
1967   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1968
1969
1970 def _GetAllHypervisorParameters(cluster, instances):
1971   """Compute the set of all hypervisor parameters.
1972
1973   @type cluster: L{objects.Cluster}
1974   @param cluster: the cluster object
1975   @param instances: list of L{objects.Instance}
1976   @param instances: additional instances from which to obtain parameters
1977   @rtype: list of (origin, hypervisor, parameters)
1978   @return: a list with all parameters found, indicating the hypervisor they
1979        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1980
1981   """
1982   hvp_data = []
1983
1984   for hv_name in cluster.enabled_hypervisors:
1985     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1986
1987   for os_name, os_hvp in cluster.os_hvp.items():
1988     for hv_name, hv_params in os_hvp.items():
1989       if hv_params:
1990         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1991         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1992
1993   # TODO: collapse identical parameter values in a single one
1994   for instance in instances:
1995     if instance.hvparams:
1996       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1997                        cluster.FillHV(instance)))
1998
1999   return hvp_data
2000
2001
2002 class _VerifyErrors(object):
2003   """Mix-in for cluster/group verify LUs.
2004
2005   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2006   self.op and self._feedback_fn to be available.)
2007
2008   """
2009
2010   ETYPE_FIELD = "code"
2011   ETYPE_ERROR = "ERROR"
2012   ETYPE_WARNING = "WARNING"
2013
2014   def _Error(self, ecode, item, msg, *args, **kwargs):
2015     """Format an error message.
2016
2017     Based on the opcode's error_codes parameter, either format a
2018     parseable error code, or a simpler error string.
2019
2020     This must be called only from Exec and functions called from Exec.
2021
2022     """
2023     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2024     itype, etxt, _ = ecode
2025     # If the error code is in the list of ignored errors, demote the error to a
2026     # warning
2027     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
2028       ltype = self.ETYPE_WARNING
2029     # first complete the msg
2030     if args:
2031       msg = msg % args
2032     # then format the whole message
2033     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2034       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2035     else:
2036       if item:
2037         item = " " + item
2038       else:
2039         item = ""
2040       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2041     # and finally report it via the feedback_fn
2042     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
2043     # do not mark the operation as failed for WARN cases only
2044     if ltype == self.ETYPE_ERROR:
2045       self.bad = True
2046
2047   def _ErrorIf(self, cond, *args, **kwargs):
2048     """Log an error message if the passed condition is True.
2049
2050     """
2051     if (bool(cond)
2052         or self.op.debug_simulate_errors): # pylint: disable=E1101
2053       self._Error(*args, **kwargs)
2054
2055
2056 class LUClusterVerify(NoHooksLU):
2057   """Submits all jobs necessary to verify the cluster.
2058
2059   """
2060   REQ_BGL = False
2061
2062   def ExpandNames(self):
2063     self.needed_locks = {}
2064
2065   def Exec(self, feedback_fn):
2066     jobs = []
2067
2068     if self.op.group_name:
2069       groups = [self.op.group_name]
2070       depends_fn = lambda: None
2071     else:
2072       groups = self.cfg.GetNodeGroupList()
2073
2074       # Verify global configuration
2075       jobs.append([
2076         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2077         ])
2078
2079       # Always depend on global verification
2080       depends_fn = lambda: [(-len(jobs), [])]
2081
2082     jobs.extend(
2083       [opcodes.OpClusterVerifyGroup(group_name=group,
2084                                     ignore_errors=self.op.ignore_errors,
2085                                     depends=depends_fn())]
2086       for group in groups)
2087
2088     # Fix up all parameters
2089     for op in itertools.chain(*jobs): # pylint: disable=W0142
2090       op.debug_simulate_errors = self.op.debug_simulate_errors
2091       op.verbose = self.op.verbose
2092       op.error_codes = self.op.error_codes
2093       try:
2094         op.skip_checks = self.op.skip_checks
2095       except AttributeError:
2096         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2097
2098     return ResultWithJobs(jobs)
2099
2100
2101 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2102   """Verifies the cluster config.
2103
2104   """
2105   REQ_BGL = False
2106
2107   def _VerifyHVP(self, hvp_data):
2108     """Verifies locally the syntax of the hypervisor parameters.
2109
2110     """
2111     for item, hv_name, hv_params in hvp_data:
2112       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2113              (item, hv_name))
2114       try:
2115         hv_class = hypervisor.GetHypervisorClass(hv_name)
2116         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2117         hv_class.CheckParameterSyntax(hv_params)
2118       except errors.GenericError, err:
2119         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2120
2121   def ExpandNames(self):
2122     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2123     self.share_locks = _ShareAll()
2124
2125   def CheckPrereq(self):
2126     """Check prerequisites.
2127
2128     """
2129     # Retrieve all information
2130     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2131     self.all_node_info = self.cfg.GetAllNodesInfo()
2132     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2133
2134   def Exec(self, feedback_fn):
2135     """Verify integrity of cluster, performing various test on nodes.
2136
2137     """
2138     self.bad = False
2139     self._feedback_fn = feedback_fn
2140
2141     feedback_fn("* Verifying cluster config")
2142
2143     for msg in self.cfg.VerifyConfig():
2144       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2145
2146     feedback_fn("* Verifying cluster certificate files")
2147
2148     for cert_filename in pathutils.ALL_CERT_FILES:
2149       (errcode, msg) = _VerifyCertificate(cert_filename)
2150       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2151
2152     feedback_fn("* Verifying hypervisor parameters")
2153
2154     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2155                                                 self.all_inst_info.values()))
2156
2157     feedback_fn("* Verifying all nodes belong to an existing group")
2158
2159     # We do this verification here because, should this bogus circumstance
2160     # occur, it would never be caught by VerifyGroup, which only acts on
2161     # nodes/instances reachable from existing node groups.
2162
2163     dangling_nodes = set(node.name for node in self.all_node_info.values()
2164                          if node.group not in self.all_group_info)
2165
2166     dangling_instances = {}
2167     no_node_instances = []
2168
2169     for inst in self.all_inst_info.values():
2170       if inst.primary_node in dangling_nodes:
2171         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2172       elif inst.primary_node not in self.all_node_info:
2173         no_node_instances.append(inst.name)
2174
2175     pretty_dangling = [
2176         "%s (%s)" %
2177         (node.name,
2178          utils.CommaJoin(dangling_instances.get(node.name,
2179                                                 ["no instances"])))
2180         for node in dangling_nodes]
2181
2182     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2183                   None,
2184                   "the following nodes (and their instances) belong to a non"
2185                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2186
2187     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2188                   None,
2189                   "the following instances have a non-existing primary-node:"
2190                   " %s", utils.CommaJoin(no_node_instances))
2191
2192     return not self.bad
2193
2194
2195 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2196   """Verifies the status of a node group.
2197
2198   """
2199   HPATH = "cluster-verify"
2200   HTYPE = constants.HTYPE_CLUSTER
2201   REQ_BGL = False
2202
2203   _HOOKS_INDENT_RE = re.compile("^", re.M)
2204
2205   class NodeImage(object):
2206     """A class representing the logical and physical status of a node.
2207
2208     @type name: string
2209     @ivar name: the node name to which this object refers
2210     @ivar volumes: a structure as returned from
2211         L{ganeti.backend.GetVolumeList} (runtime)
2212     @ivar instances: a list of running instances (runtime)
2213     @ivar pinst: list of configured primary instances (config)
2214     @ivar sinst: list of configured secondary instances (config)
2215     @ivar sbp: dictionary of {primary-node: list of instances} for all
2216         instances for which this node is secondary (config)
2217     @ivar mfree: free memory, as reported by hypervisor (runtime)
2218     @ivar dfree: free disk, as reported by the node (runtime)
2219     @ivar offline: the offline status (config)
2220     @type rpc_fail: boolean
2221     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2222         not whether the individual keys were correct) (runtime)
2223     @type lvm_fail: boolean
2224     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2225     @type hyp_fail: boolean
2226     @ivar hyp_fail: whether the RPC call didn't return the instance list
2227     @type ghost: boolean
2228     @ivar ghost: whether this is a known node or not (config)
2229     @type os_fail: boolean
2230     @ivar os_fail: whether the RPC call didn't return valid OS data
2231     @type oslist: list
2232     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2233     @type vm_capable: boolean
2234     @ivar vm_capable: whether the node can host instances
2235     @type pv_min: float
2236     @ivar pv_min: size in MiB of the smallest PVs
2237     @type pv_max: float
2238     @ivar pv_max: size in MiB of the biggest PVs
2239
2240     """
2241     def __init__(self, offline=False, name=None, vm_capable=True):
2242       self.name = name
2243       self.volumes = {}
2244       self.instances = []
2245       self.pinst = []
2246       self.sinst = []
2247       self.sbp = {}
2248       self.mfree = 0
2249       self.dfree = 0
2250       self.offline = offline
2251       self.vm_capable = vm_capable
2252       self.rpc_fail = False
2253       self.lvm_fail = False
2254       self.hyp_fail = False
2255       self.ghost = False
2256       self.os_fail = False
2257       self.oslist = {}
2258       self.pv_min = None
2259       self.pv_max = None
2260
2261   def ExpandNames(self):
2262     # This raises errors.OpPrereqError on its own:
2263     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2264
2265     # Get instances in node group; this is unsafe and needs verification later
2266     inst_names = \
2267       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2268
2269     self.needed_locks = {
2270       locking.LEVEL_INSTANCE: inst_names,
2271       locking.LEVEL_NODEGROUP: [self.group_uuid],
2272       locking.LEVEL_NODE: [],
2273
2274       # This opcode is run by watcher every five minutes and acquires all nodes
2275       # for a group. It doesn't run for a long time, so it's better to acquire
2276       # the node allocation lock as well.
2277       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2278       }
2279
2280     self.share_locks = _ShareAll()
2281
2282   def DeclareLocks(self, level):
2283     if level == locking.LEVEL_NODE:
2284       # Get members of node group; this is unsafe and needs verification later
2285       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2286
2287       all_inst_info = self.cfg.GetAllInstancesInfo()
2288
2289       # In Exec(), we warn about mirrored instances that have primary and
2290       # secondary living in separate node groups. To fully verify that
2291       # volumes for these instances are healthy, we will need to do an
2292       # extra call to their secondaries. We ensure here those nodes will
2293       # be locked.
2294       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2295         # Important: access only the instances whose lock is owned
2296         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2297           nodes.update(all_inst_info[inst].secondary_nodes)
2298
2299       self.needed_locks[locking.LEVEL_NODE] = nodes
2300
2301   def CheckPrereq(self):
2302     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2303     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2304
2305     group_nodes = set(self.group_info.members)
2306     group_instances = \
2307       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2308
2309     unlocked_nodes = \
2310         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2311
2312     unlocked_instances = \
2313         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2314
2315     if unlocked_nodes:
2316       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2317                                  utils.CommaJoin(unlocked_nodes),
2318                                  errors.ECODE_STATE)
2319
2320     if unlocked_instances:
2321       raise errors.OpPrereqError("Missing lock for instances: %s" %
2322                                  utils.CommaJoin(unlocked_instances),
2323                                  errors.ECODE_STATE)
2324
2325     self.all_node_info = self.cfg.GetAllNodesInfo()
2326     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2327
2328     self.my_node_names = utils.NiceSort(group_nodes)
2329     self.my_inst_names = utils.NiceSort(group_instances)
2330
2331     self.my_node_info = dict((name, self.all_node_info[name])
2332                              for name in self.my_node_names)
2333
2334     self.my_inst_info = dict((name, self.all_inst_info[name])
2335                              for name in self.my_inst_names)
2336
2337     # We detect here the nodes that will need the extra RPC calls for verifying
2338     # split LV volumes; they should be locked.
2339     extra_lv_nodes = set()
2340
2341     for inst in self.my_inst_info.values():
2342       if inst.disk_template in constants.DTS_INT_MIRROR:
2343         for nname in inst.all_nodes:
2344           if self.all_node_info[nname].group != self.group_uuid:
2345             extra_lv_nodes.add(nname)
2346
2347     unlocked_lv_nodes = \
2348         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2349
2350     if unlocked_lv_nodes:
2351       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2352                                  utils.CommaJoin(unlocked_lv_nodes),
2353                                  errors.ECODE_STATE)
2354     self.extra_lv_nodes = list(extra_lv_nodes)
2355
2356   def _VerifyNode(self, ninfo, nresult):
2357     """Perform some basic validation on data returned from a node.
2358
2359       - check the result data structure is well formed and has all the
2360         mandatory fields
2361       - check ganeti version
2362
2363     @type ninfo: L{objects.Node}
2364     @param ninfo: the node to check
2365     @param nresult: the results from the node
2366     @rtype: boolean
2367     @return: whether overall this call was successful (and we can expect
2368          reasonable values in the respose)
2369
2370     """
2371     node = ninfo.name
2372     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2373
2374     # main result, nresult should be a non-empty dict
2375     test = not nresult or not isinstance(nresult, dict)
2376     _ErrorIf(test, constants.CV_ENODERPC, node,
2377                   "unable to verify node: no data returned")
2378     if test:
2379       return False
2380
2381     # compares ganeti version
2382     local_version = constants.PROTOCOL_VERSION
2383     remote_version = nresult.get("version", None)
2384     test = not (remote_version and
2385                 isinstance(remote_version, (list, tuple)) and
2386                 len(remote_version) == 2)
2387     _ErrorIf(test, constants.CV_ENODERPC, node,
2388              "connection to node returned invalid data")
2389     if test:
2390       return False
2391
2392     test = local_version != remote_version[0]
2393     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2394              "incompatible protocol versions: master %s,"
2395              " node %s", local_version, remote_version[0])
2396     if test:
2397       return False
2398
2399     # node seems compatible, we can actually try to look into its results
2400
2401     # full package version
2402     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2403                   constants.CV_ENODEVERSION, node,
2404                   "software version mismatch: master %s, node %s",
2405                   constants.RELEASE_VERSION, remote_version[1],
2406                   code=self.ETYPE_WARNING)
2407
2408     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2409     if ninfo.vm_capable and isinstance(hyp_result, dict):
2410       for hv_name, hv_result in hyp_result.iteritems():
2411         test = hv_result is not None
2412         _ErrorIf(test, constants.CV_ENODEHV, node,
2413                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2414
2415     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2416     if ninfo.vm_capable and isinstance(hvp_result, list):
2417       for item, hv_name, hv_result in hvp_result:
2418         _ErrorIf(True, constants.CV_ENODEHV, node,
2419                  "hypervisor %s parameter verify failure (source %s): %s",
2420                  hv_name, item, hv_result)
2421
2422     test = nresult.get(constants.NV_NODESETUP,
2423                        ["Missing NODESETUP results"])
2424     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2425              "; ".join(test))
2426
2427     return True
2428
2429   def _VerifyNodeTime(self, ninfo, nresult,
2430                       nvinfo_starttime, nvinfo_endtime):
2431     """Check the node time.
2432
2433     @type ninfo: L{objects.Node}
2434     @param ninfo: the node to check
2435     @param nresult: the remote results for the node
2436     @param nvinfo_starttime: the start time of the RPC call
2437     @param nvinfo_endtime: the end time of the RPC call
2438
2439     """
2440     node = ninfo.name
2441     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2442
2443     ntime = nresult.get(constants.NV_TIME, None)
2444     try:
2445       ntime_merged = utils.MergeTime(ntime)
2446     except (ValueError, TypeError):
2447       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2448       return
2449
2450     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2451       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2452     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2453       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2454     else:
2455       ntime_diff = None
2456
2457     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2458              "Node time diverges by at least %s from master node time",
2459              ntime_diff)
2460
2461   def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2462     """Check the node LVM results and update info for cross-node checks.
2463
2464     @type ninfo: L{objects.Node}
2465     @param ninfo: the node to check
2466     @param nresult: the remote results for the node
2467     @param vg_name: the configured VG name
2468     @type nimg: L{NodeImage}
2469     @param nimg: node image
2470
2471     """
2472     if vg_name is None:
2473       return
2474
2475     node = ninfo.name
2476     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2477
2478     # checks vg existence and size > 20G
2479     vglist = nresult.get(constants.NV_VGLIST, None)
2480     test = not vglist
2481     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2482     if not test:
2483       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2484                                             constants.MIN_VG_SIZE)
2485       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2486
2487     # Check PVs
2488     (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2489     for em in errmsgs:
2490       self._Error(constants.CV_ENODELVM, node, em)
2491     if pvminmax is not None:
2492       (nimg.pv_min, nimg.pv_max) = pvminmax
2493
2494   def _VerifyGroupLVM(self, node_image, vg_name):
2495     """Check cross-node consistency in LVM.
2496
2497     @type node_image: dict
2498     @param node_image: info about nodes, mapping from node to names to
2499       L{NodeImage} objects
2500     @param vg_name: the configured VG name
2501
2502     """
2503     if vg_name is None:
2504       return
2505
2506     # Only exlcusive storage needs this kind of checks
2507     if not self._exclusive_storage:
2508       return
2509
2510     # exclusive_storage wants all PVs to have the same size (approximately),
2511     # if the smallest and the biggest ones are okay, everything is fine.
2512     # pv_min is None iff pv_max is None
2513     vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2514     if not vals:
2515       return
2516     (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2517     (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2518     bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2519     self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2520                   "PV sizes differ too much in the group; smallest (%s MB) is"
2521                   " on %s, biggest (%s MB) is on %s",
2522                   pvmin, minnode, pvmax, maxnode)
2523
2524   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2525     """Check the node bridges.
2526
2527     @type ninfo: L{objects.Node}
2528     @param ninfo: the node to check
2529     @param nresult: the remote results for the node
2530     @param bridges: the expected list of bridges
2531
2532     """
2533     if not bridges:
2534       return
2535
2536     node = ninfo.name
2537     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2538
2539     missing = nresult.get(constants.NV_BRIDGES, None)
2540     test = not isinstance(missing, list)
2541     _ErrorIf(test, constants.CV_ENODENET, node,
2542              "did not return valid bridge information")
2543     if not test:
2544       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2545                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2546
2547   def _VerifyNodeUserScripts(self, ninfo, nresult):
2548     """Check the results of user scripts presence and executability on the node
2549
2550     @type ninfo: L{objects.Node}
2551     @param ninfo: the node to check
2552     @param nresult: the remote results for the node
2553
2554     """
2555     node = ninfo.name
2556
2557     test = not constants.NV_USERSCRIPTS in nresult
2558     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2559                   "did not return user scripts information")
2560
2561     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2562     if not test:
2563       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2564                     "user scripts not present or not executable: %s" %
2565                     utils.CommaJoin(sorted(broken_scripts)))
2566
2567   def _VerifyNodeNetwork(self, ninfo, nresult):
2568     """Check the node network connectivity results.
2569
2570     @type ninfo: L{objects.Node}
2571     @param ninfo: the node to check
2572     @param nresult: the remote results for the node
2573
2574     """
2575     node = ninfo.name
2576     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2577
2578     test = constants.NV_NODELIST not in nresult
2579     _ErrorIf(test, constants.CV_ENODESSH, node,
2580              "node hasn't returned node ssh connectivity data")
2581     if not test:
2582       if nresult[constants.NV_NODELIST]:
2583         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2584           _ErrorIf(True, constants.CV_ENODESSH, node,
2585                    "ssh communication with node '%s': %s", a_node, a_msg)
2586
2587     test = constants.NV_NODENETTEST not in nresult
2588     _ErrorIf(test, constants.CV_ENODENET, node,
2589              "node hasn't returned node tcp connectivity data")
2590     if not test:
2591       if nresult[constants.NV_NODENETTEST]:
2592         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2593         for anode in nlist:
2594           _ErrorIf(True, constants.CV_ENODENET, node,
2595                    "tcp communication with node '%s': %s",
2596                    anode, nresult[constants.NV_NODENETTEST][anode])
2597
2598     test = constants.NV_MASTERIP not in nresult
2599     _ErrorIf(test, constants.CV_ENODENET, node,
2600              "node hasn't returned node master IP reachability data")
2601     if not test:
2602       if not nresult[constants.NV_MASTERIP]:
2603         if node == self.master_node:
2604           msg = "the master node cannot reach the master IP (not configured?)"
2605         else:
2606           msg = "cannot reach the master IP"
2607         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2608
2609   def _VerifyInstance(self, instance, inst_config, node_image,
2610                       diskstatus):
2611     """Verify an instance.
2612
2613     This function checks to see if the required block devices are
2614     available on the instance's node, and that the nodes are in the correct
2615     state.
2616
2617     """
2618     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2619     pnode = inst_config.primary_node
2620     pnode_img = node_image[pnode]
2621     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2622
2623     node_vol_should = {}
2624     inst_config.MapLVsByNode(node_vol_should)
2625
2626     cluster = self.cfg.GetClusterInfo()
2627     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2628                                                             self.group_info)
2629     err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config)
2630     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2631              code=self.ETYPE_WARNING)
2632
2633     for node in node_vol_should:
2634       n_img = node_image[node]
2635       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2636         # ignore missing volumes on offline or broken nodes
2637         continue
2638       for volume in node_vol_should[node]:
2639         test = volume not in n_img.volumes
2640         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2641                  "volume %s missing on node %s", volume, node)
2642
2643     if inst_config.admin_state == constants.ADMINST_UP:
2644       test = instance not in pnode_img.instances and not pnode_img.offline
2645       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2646                "instance not running on its primary node %s",
2647                pnode)
2648       _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2649                "instance is marked as running and lives on offline node %s",
2650                pnode)
2651
2652     diskdata = [(nname, success, status, idx)
2653                 for (nname, disks) in diskstatus.items()
2654                 for idx, (success, status) in enumerate(disks)]
2655
2656     for nname, success, bdev_status, idx in diskdata:
2657       # the 'ghost node' construction in Exec() ensures that we have a
2658       # node here
2659       snode = node_image[nname]
2660       bad_snode = snode.ghost or snode.offline
2661       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2662                not success and not bad_snode,
2663                constants.CV_EINSTANCEFAULTYDISK, instance,
2664                "couldn't retrieve status for disk/%s on %s: %s",
2665                idx, nname, bdev_status)
2666       _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2667                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2668                constants.CV_EINSTANCEFAULTYDISK, instance,
2669                "disk/%s on %s is faulty", idx, nname)
2670
2671     _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2672              constants.CV_ENODERPC, pnode, "instance %s, connection to"
2673              " primary node failed", instance)
2674
2675     _ErrorIf(len(inst_config.secondary_nodes) > 1,
2676              constants.CV_EINSTANCELAYOUT,
2677              instance, "instance has multiple secondary nodes: %s",
2678              utils.CommaJoin(inst_config.secondary_nodes),
2679              code=self.ETYPE_WARNING)
2680
2681     if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2682       # Disk template not compatible with exclusive_storage: no instance
2683       # node should have the flag set
2684       es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2685                                                      inst_config.all_nodes)
2686       es_nodes = [n for (n, es) in es_flags.items()
2687                   if es]
2688       _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2689                "instance has template %s, which is not supported on nodes"
2690                " that have exclusive storage set: %s",
2691                inst_config.disk_template, utils.CommaJoin(es_nodes))
2692
2693     if inst_config.disk_template in constants.DTS_INT_MIRROR:
2694       instance_nodes = utils.NiceSort(inst_config.all_nodes)
2695       instance_groups = {}
2696
2697       for node in instance_nodes:
2698         instance_groups.setdefault(self.all_node_info[node].group,
2699                                    []).append(node)
2700
2701       pretty_list = [
2702         "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2703         # Sort so that we always list the primary node first.
2704         for group, nodes in sorted(instance_groups.items(),
2705                                    key=lambda (_, nodes): pnode in nodes,
2706                                    reverse=True)]
2707
2708       self._ErrorIf(len(instance_groups) > 1,
2709                     constants.CV_EINSTANCESPLITGROUPS,
2710                     instance, "instance has primary and secondary nodes in"
2711                     " different groups: %s", utils.CommaJoin(pretty_list),
2712                     code=self.ETYPE_WARNING)
2713
2714     inst_nodes_offline = []
2715     for snode in inst_config.secondary_nodes:
2716       s_img = node_image[snode]
2717       _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2718                snode, "instance %s, connection to secondary node failed",
2719                instance)
2720
2721       if s_img.offline:
2722         inst_nodes_offline.append(snode)
2723
2724     # warn that the instance lives on offline nodes
2725     _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2726              "instance has offline secondary node(s) %s",
2727              utils.CommaJoin(inst_nodes_offline))
2728     # ... or ghost/non-vm_capable nodes
2729     for node in inst_config.all_nodes:
2730       _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2731                instance, "instance lives on ghost node %s", node)
2732       _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2733                instance, "instance lives on non-vm_capable node %s", node)
2734
2735   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2736     """Verify if there are any unknown volumes in the cluster.
2737
2738     The .os, .swap and backup volumes are ignored. All other volumes are
2739     reported as unknown.
2740
2741     @type reserved: L{ganeti.utils.FieldSet}
2742     @param reserved: a FieldSet of reserved volume names
2743
2744     """
2745     for node, n_img in node_image.items():
2746       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2747           self.all_node_info[node].group != self.group_uuid):
2748         # skip non-healthy nodes
2749         continue
2750       for volume in n_img.volumes:
2751         test = ((node not in node_vol_should or
2752                 volume not in node_vol_should[node]) and
2753                 not reserved.Matches(volume))
2754         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2755                       "volume %s is unknown", volume)
2756
2757   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2758     """Verify N+1 Memory Resilience.
2759
2760     Check that if one single node dies we can still start all the
2761     instances it was primary for.
2762
2763     """
2764     cluster_info = self.cfg.GetClusterInfo()
2765     for node, n_img in node_image.items():
2766       # This code checks that every node which is now listed as
2767       # secondary has enough memory to host all instances it is
2768       # supposed to should a single other node in the cluster fail.
2769       # FIXME: not ready for failover to an arbitrary node
2770       # FIXME: does not support file-backed instances
2771       # WARNING: we currently take into account down instances as well
2772       # as up ones, considering that even if they're down someone
2773       # might want to start them even in the event of a node failure.
2774       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2775         # we're skipping nodes marked offline and nodes in other groups from
2776         # the N+1 warning, since most likely we don't have good memory
2777         # infromation from them; we already list instances living on such
2778         # nodes, and that's enough warning
2779         continue
2780       #TODO(dynmem): also consider ballooning out other instances
2781       for prinode, instances in n_img.sbp.items():
2782         needed_mem = 0
2783         for instance in instances:
2784           bep = cluster_info.FillBE(instance_cfg[instance])
2785           if bep[constants.BE_AUTO_BALANCE]:
2786             needed_mem += bep[constants.BE_MINMEM]
2787         test = n_img.mfree < needed_mem
2788         self._ErrorIf(test, constants.CV_ENODEN1, node,
2789                       "not enough memory to accomodate instance failovers"
2790                       " should node %s fail (%dMiB needed, %dMiB available)",
2791                       prinode, needed_mem, n_img.mfree)
2792
2793   @classmethod
2794   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2795                    (files_all, files_opt, files_mc, files_vm)):
2796     """Verifies file checksums collected from all nodes.
2797
2798     @param errorif: Callback for reporting errors
2799     @param nodeinfo: List of L{objects.Node} objects
2800     @param master_node: Name of master node
2801     @param all_nvinfo: RPC results
2802
2803     """
2804     # Define functions determining which nodes to consider for a file
2805     files2nodefn = [
2806       (files_all, None),
2807       (files_mc, lambda node: (node.master_candidate or
2808                                node.name == master_node)),
2809       (files_vm, lambda node: node.vm_capable),
2810       ]
2811
2812     # Build mapping from filename to list of nodes which should have the file
2813     nodefiles = {}
2814     for (files, fn) in files2nodefn:
2815       if fn is None:
2816         filenodes = nodeinfo
2817       else:
2818         filenodes = filter(fn, nodeinfo)
2819       nodefiles.update((filename,
2820                         frozenset(map(operator.attrgetter("name"), filenodes)))
2821                        for filename in files)
2822
2823     assert set(nodefiles) == (files_all | files_mc | files_vm)
2824
2825     fileinfo = dict((filename, {}) for filename in nodefiles)
2826     ignore_nodes = set()
2827
2828     for node in nodeinfo:
2829       if node.offline:
2830         ignore_nodes.add(node.name)
2831         continue
2832
2833       nresult = all_nvinfo[node.name]
2834
2835       if nresult.fail_msg or not nresult.payload:
2836         node_files = None
2837       else:
2838         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2839         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2840                           for (key, value) in fingerprints.items())
2841         del fingerprints
2842
2843       test = not (node_files and isinstance(node_files, dict))
2844       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2845               "Node did not return file checksum data")
2846       if test:
2847         ignore_nodes.add(node.name)
2848         continue
2849
2850       # Build per-checksum mapping from filename to nodes having it
2851       for (filename, checksum) in node_files.items():
2852         assert filename in nodefiles
2853         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2854
2855     for (filename, checksums) in fileinfo.items():
2856       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2857
2858       # Nodes having the file
2859       with_file = frozenset(node_name
2860                             for nodes in fileinfo[filename].values()
2861                             for node_name in nodes) - ignore_nodes
2862
2863       expected_nodes = nodefiles[filename] - ignore_nodes
2864
2865       # Nodes missing file
2866       missing_file = expected_nodes - with_file
2867
2868       if filename in files_opt:
2869         # All or no nodes
2870         errorif(missing_file and missing_file != expected_nodes,
2871                 constants.CV_ECLUSTERFILECHECK, None,
2872                 "File %s is optional, but it must exist on all or no"
2873                 " nodes (not found on %s)",
2874                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2875       else:
2876         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2877                 "File %s is missing from node(s) %s", filename,
2878                 utils.CommaJoin(utils.NiceSort(missing_file)))
2879
2880         # Warn if a node has a file it shouldn't
2881         unexpected = with_file - expected_nodes
2882         errorif(unexpected,
2883                 constants.CV_ECLUSTERFILECHECK, None,
2884                 "File %s should not exist on node(s) %s",
2885                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2886
2887       # See if there are multiple versions of the file
2888       test = len(checksums) > 1
2889       if test:
2890         variants = ["variant %s on %s" %
2891                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2892                     for (idx, (checksum, nodes)) in
2893                       enumerate(sorted(checksums.items()))]
2894       else:
2895         variants = []
2896
2897       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2898               "File %s found with %s different checksums (%s)",
2899               filename, len(checksums), "; ".join(variants))
2900
2901   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2902                       drbd_map):
2903     """Verifies and the node DRBD status.
2904
2905     @type ninfo: L{objects.Node}
2906     @param ninfo: the node to check
2907     @param nresult: the remote results for the node
2908     @param instanceinfo: the dict of instances
2909     @param drbd_helper: the configured DRBD usermode helper
2910     @param drbd_map: the DRBD map as returned by
2911         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2912
2913     """
2914     node = ninfo.name
2915     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2916
2917     if drbd_helper:
2918       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2919       test = (helper_result is None)
2920       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2921                "no drbd usermode helper returned")
2922       if helper_result:
2923         status, payload = helper_result
2924         test = not status
2925         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2926                  "drbd usermode helper check unsuccessful: %s", payload)
2927         test = status and (payload != drbd_helper)
2928         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2929                  "wrong drbd usermode helper: %s", payload)
2930
2931     # compute the DRBD minors
2932     node_drbd = {}
2933     for minor, instance in drbd_map[node].items():
2934       test = instance not in instanceinfo
2935       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2936                "ghost instance '%s' in temporary DRBD map", instance)
2937         # ghost instance should not be running, but otherwise we
2938         # don't give double warnings (both ghost instance and
2939         # unallocated minor in use)
2940       if test:
2941         node_drbd[minor] = (instance, False)
2942       else:
2943         instance = instanceinfo[instance]
2944         node_drbd[minor] = (instance.name,
2945                             instance.admin_state == constants.ADMINST_UP)
2946
2947     # and now check them
2948     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2949     test = not isinstance(used_minors, (tuple, list))
2950     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2951              "cannot parse drbd status file: %s", str(used_minors))
2952     if test:
2953       # we cannot check drbd status
2954       return
2955
2956     for minor, (iname, must_exist) in node_drbd.items():
2957       test = minor not in used_minors and must_exist
2958       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2959                "drbd minor %d of instance %s is not active", minor, iname)
2960     for minor in used_minors:
2961       test = minor not in node_drbd
2962       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2963                "unallocated drbd minor %d is in use", minor)
2964
2965   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2966     """Builds the node OS structures.
2967
2968     @type ninfo: L{objects.Node}
2969     @param ninfo: the node to check
2970     @param nresult: the remote results for the node
2971     @param nimg: the node image object
2972
2973     """
2974     node = ninfo.name
2975     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2976
2977     remote_os = nresult.get(constants.NV_OSLIST, None)
2978     test = (not isinstance(remote_os, list) or
2979             not compat.all(isinstance(v, list) and len(v) == 7
2980                            for v in remote_os))
2981
2982     _ErrorIf(test, constants.CV_ENODEOS, node,
2983              "node hasn't returned valid OS data")
2984
2985     nimg.os_fail = test
2986
2987     if test:
2988       return
2989
2990     os_dict = {}
2991
2992     for (name, os_path, status, diagnose,
2993          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2994
2995       if name not in os_dict:
2996         os_dict[name] = []
2997
2998       # parameters is a list of lists instead of list of tuples due to
2999       # JSON lacking a real tuple type, fix it:
3000       parameters = [tuple(v) for v in parameters]
3001       os_dict[name].append((os_path, status, diagnose,
3002                             set(variants), set(parameters), set(api_ver)))
3003
3004     nimg.oslist = os_dict
3005
3006   def _VerifyNodeOS(self, ninfo, nimg, base):
3007     """Verifies the node OS list.
3008
3009     @type ninfo: L{objects.Node}
3010     @param ninfo: the node to check
3011     @param nimg: the node image object
3012     @param base: the 'template' node we match against (e.g. from the master)
3013
3014     """
3015     node = ninfo.name
3016     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3017
3018     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3019
3020     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3021     for os_name, os_data in nimg.oslist.items():
3022       assert os_data, "Empty OS status for OS %s?!" % os_name
3023       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3024       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3025                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3026       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3027                "OS '%s' has multiple entries (first one shadows the rest): %s",
3028                os_name, utils.CommaJoin([v[0] for v in os_data]))
3029       # comparisons with the 'base' image
3030       test = os_name not in base.oslist
3031       _ErrorIf(test, constants.CV_ENODEOS, node,
3032                "Extra OS %s not present on reference node (%s)",
3033                os_name, base.name)
3034       if test:
3035         continue
3036       assert base.oslist[os_name], "Base node has empty OS status?"
3037       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3038       if not b_status:
3039         # base OS is invalid, skipping
3040         continue
3041       for kind, a, b in [("API version", f_api, b_api),
3042                          ("variants list", f_var, b_var),
3043                          ("parameters", beautify_params(f_param),
3044                           beautify_params(b_param))]:
3045         _ErrorIf(a != b, constants.CV_ENODEOS, node,
3046                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3047                  kind, os_name, base.name,
3048                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3049
3050     # check any missing OSes
3051     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3052     _ErrorIf(missing, constants.CV_ENODEOS, node,
3053              "OSes present on reference node %s but missing on this node: %s",
3054              base.name, utils.CommaJoin(missing))
3055
3056   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3057     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3058
3059     @type ninfo: L{objects.Node}
3060     @param ninfo: the node to check
3061     @param nresult: the remote results for the node
3062     @type is_master: bool
3063     @param is_master: Whether node is the master node
3064
3065     """
3066     node = ninfo.name
3067
3068     if (is_master and
3069         (constants.ENABLE_FILE_STORAGE or
3070          constants.ENABLE_SHARED_FILE_STORAGE)):
3071       try:
3072         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3073       except KeyError:
3074         # This should never happen
3075         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3076                       "Node did not return forbidden file storage paths")
3077       else:
3078         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3079                       "Found forbidden file storage paths: %s",
3080                       utils.CommaJoin(fspaths))
3081     else:
3082       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3083                     constants.CV_ENODEFILESTORAGEPATHS, node,
3084                     "Node should not have returned forbidden file storage"
3085                     " paths")
3086
3087   def _VerifyOob(self, ninfo, nresult):
3088     """Verifies out of band functionality of a node.
3089
3090     @type ninfo: L{objects.Node}
3091     @param ninfo: the node to check
3092     @param nresult: the remote results for the node
3093
3094     """
3095     node = ninfo.name
3096     # We just have to verify the paths on master and/or master candidates
3097     # as the oob helper is invoked on the master
3098     if ((ninfo.master_candidate or ninfo.master_capable) and
3099         constants.NV_OOB_PATHS in nresult):
3100       for path_result in nresult[constants.NV_OOB_PATHS]:
3101         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3102
3103   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3104     """Verifies and updates the node volume data.
3105
3106     This function will update a L{NodeImage}'s internal structures
3107     with data from the remote call.
3108
3109     @type ninfo: L{objects.Node}
3110     @param ninfo: the node to check
3111     @param nresult: the remote results for the node
3112     @param nimg: the node image object
3113     @param vg_name: the configured VG name
3114
3115     """
3116     node = ninfo.name
3117     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3118
3119     nimg.lvm_fail = True
3120     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3121     if vg_name is None:
3122       pass
3123     elif isinstance(lvdata, basestring):
3124       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3125                utils.SafeEncode(lvdata))
3126     elif not isinstance(lvdata, dict):
3127       _ErrorIf(True, constants.CV_ENODELVM, node,
3128                "rpc call to node failed (lvlist)")
3129     else:
3130       nimg.volumes = lvdata
3131       nimg.lvm_fail = False
3132
3133   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3134     """Verifies and updates the node instance list.
3135
3136     If the listing was successful, then updates this node's instance
3137     list. Otherwise, it marks the RPC call as failed for the instance
3138     list key.
3139
3140     @type ninfo: L{objects.Node}
3141     @param ninfo: the node to check
3142     @param nresult: the remote results for the node
3143     @param nimg: the node image object
3144
3145     """
3146     idata = nresult.get(constants.NV_INSTANCELIST, None)
3147     test = not isinstance(idata, list)
3148     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3149                   "rpc call to node failed (instancelist): %s",
3150                   utils.SafeEncode(str(idata)))
3151     if test:
3152       nimg.hyp_fail = True
3153     else:
3154       nimg.instances = idata
3155
3156   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3157     """Verifies and computes a node information map
3158
3159     @type ninfo: L{objects.Node}
3160     @param ninfo: the node to check
3161     @param nresult: the remote results for the node
3162     @param nimg: the node image object
3163     @param vg_name: the configured VG name
3164
3165     """
3166     node = ninfo.name
3167     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3168
3169     # try to read free memory (from the hypervisor)
3170     hv_info = nresult.get(constants.NV_HVINFO, None)
3171     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3172     _ErrorIf(test, constants.CV_ENODEHV, node,
3173              "rpc call to node failed (hvinfo)")
3174     if not test:
3175       try:
3176         nimg.mfree = int(hv_info["memory_free"])
3177       except (ValueError, TypeError):
3178         _ErrorIf(True, constants.CV_ENODERPC, node,
3179                  "node returned invalid nodeinfo, check hypervisor")
3180
3181     # FIXME: devise a free space model for file based instances as well
3182     if vg_name is not None:
3183       test = (constants.NV_VGLIST not in nresult or
3184               vg_name not in nresult[constants.NV_VGLIST])
3185       _ErrorIf(test, constants.CV_ENODELVM, node,
3186                "node didn't return data for the volume group '%s'"
3187                " - it is either missing or broken", vg_name)
3188       if not test:
3189         try:
3190           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3191         except (ValueError, TypeError):
3192           _ErrorIf(True, constants.CV_ENODERPC, node,
3193                    "node returned invalid LVM info, check LVM status")
3194
3195   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3196     """Gets per-disk status information for all instances.
3197
3198     @type nodelist: list of strings
3199     @param nodelist: Node names
3200     @type node_image: dict of (name, L{objects.Node})
3201     @param node_image: Node objects
3202     @type instanceinfo: dict of (name, L{objects.Instance})
3203     @param instanceinfo: Instance objects
3204     @rtype: {instance: {node: [(succes, payload)]}}
3205     @return: a dictionary of per-instance dictionaries with nodes as
3206         keys and disk information as values; the disk information is a
3207         list of tuples (success, payload)
3208
3209     """
3210     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3211
3212     node_disks = {}
3213     node_disks_devonly = {}
3214     diskless_instances = set()
3215     diskless = constants.DT_DISKLESS
3216
3217     for nname in nodelist:
3218       node_instances = list(itertools.chain(node_image[nname].pinst,
3219                                             node_image[nname].sinst))
3220       diskless_instances.update(inst for inst in node_instances
3221                                 if instanceinfo[inst].disk_template == diskless)
3222       disks = [(inst, disk)
3223                for inst in node_instances
3224                for disk in instanceinfo[inst].disks]
3225
3226       if not disks:
3227         # No need to collect data
3228         continue
3229
3230       node_disks[nname] = disks
3231
3232       # _AnnotateDiskParams makes already copies of the disks
3233       devonly = []
3234       for (inst, dev) in disks:
3235         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3236         self.cfg.SetDiskID(anno_disk, nname)
3237         devonly.append(anno_disk)
3238
3239       node_disks_devonly[nname] = devonly
3240
3241     assert len(node_disks) == len(node_disks_devonly)
3242
3243     # Collect data from all nodes with disks
3244     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3245                                                           node_disks_devonly)
3246
3247     assert len(result) == len(node_disks)
3248
3249     instdisk = {}
3250
3251     for (nname, nres) in result.items():
3252       disks = node_disks[nname]
3253
3254       if nres.offline:
3255         # No data from this node
3256         data = len(disks) * [(False, "node offline")]
3257       else:
3258         msg = nres.fail_msg
3259         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3260                  "while getting disk information: %s", msg)
3261         if msg:
3262           # No data from this node
3263           data = len(disks) * [(False, msg)]
3264         else:
3265           data = []
3266           for idx, i in enumerate(nres.payload):
3267             if isinstance(i, (tuple, list)) and len(i) == 2:
3268               data.append(i)
3269             else:
3270               logging.warning("Invalid result from node %s, entry %d: %s",
3271                               nname, idx, i)
3272               data.append((False, "Invalid result from the remote node"))
3273
3274       for ((inst, _), status) in zip(disks, data):
3275         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3276
3277     # Add empty entries for diskless instances.
3278     for inst in diskless_instances:
3279       assert inst not in instdisk
3280       instdisk[inst] = {}
3281
3282     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3283                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3284                       compat.all(isinstance(s, (tuple, list)) and
3285                                  len(s) == 2 for s in statuses)
3286                       for inst, nnames in instdisk.items()
3287                       for nname, statuses in nnames.items())
3288     if __debug__:
3289       instdisk_keys = set(instdisk)
3290       instanceinfo_keys = set(instanceinfo)
3291       assert instdisk_keys == instanceinfo_keys, \
3292         ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3293          (instdisk_keys, instanceinfo_keys))
3294
3295     return instdisk
3296
3297   @staticmethod
3298   def _SshNodeSelector(group_uuid, all_nodes):
3299     """Create endless iterators for all potential SSH check hosts.
3300
3301     """
3302     nodes = [node for node in all_nodes
3303              if (node.group != group_uuid and
3304                  not node.offline)]
3305     keyfunc = operator.attrgetter("group")
3306
3307     return map(itertools.cycle,
3308                [sorted(map(operator.attrgetter("name"), names))
3309                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3310                                                   keyfunc)])
3311
3312   @classmethod
3313   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3314     """Choose which nodes should talk to which other nodes.
3315
3316     We will make nodes contact all nodes in their group, and one node from
3317     every other group.
3318
3319     @warning: This algorithm has a known issue if one node group is much
3320       smaller than others (e.g. just one node). In such a case all other
3321       nodes will talk to the single node.
3322
3323     """
3324     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3325     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3326
3327     return (online_nodes,
3328             dict((name, sorted([i.next() for i in sel]))
3329                  for name in online_nodes))
3330
3331   def BuildHooksEnv(self):
3332     """Build hooks env.
3333
3334     Cluster-Verify hooks just ran in the post phase and their failure makes
3335     the output be logged in the verify output and the verification to fail.
3336
3337     """
3338     env = {
3339       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3340       }
3341
3342     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3343                for node in self.my_node_info.values())
3344
3345     return env
3346
3347   def BuildHooksNodes(self):
3348     """Build hooks nodes.
3349
3350     """
3351     return ([], self.my_node_names)
3352
3353   def Exec(self, feedback_fn):
3354     """Verify integrity of the node group, performing various test on nodes.
3355
3356     """
3357     # This method has too many local variables. pylint: disable=R0914
3358     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3359
3360     if not self.my_node_names:
3361       # empty node group
3362       feedback_fn("* Empty node group, skipping verification")
3363       return True
3364
3365     self.bad = False
3366     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3367     verbose = self.op.verbose
3368     self._feedback_fn = feedback_fn
3369
3370     vg_name = self.cfg.GetVGName()
3371     drbd_helper = self.cfg.GetDRBDHelper()
3372     cluster = self.cfg.GetClusterInfo()
3373     hypervisors = cluster.enabled_hypervisors
3374     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3375
3376     i_non_redundant = [] # Non redundant instances
3377     i_non_a_balanced = [] # Non auto-balanced instances
3378     i_offline = 0 # Count of offline instances
3379     n_offline = 0 # Count of offline nodes
3380     n_drained = 0 # Count of nodes being drained
3381     node_vol_should = {}
3382
3383     # FIXME: verify OS list
3384
3385     # File verification
3386     filemap = _ComputeAncillaryFiles(cluster, False)
3387
3388     # do local checksums
3389     master_node = self.master_node = self.cfg.GetMasterNode()
3390     master_ip = self.cfg.GetMasterIP()
3391
3392     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3393
3394     user_scripts = []
3395     if self.cfg.GetUseExternalMipScript():
3396       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3397
3398     node_verify_param = {
3399       constants.NV_FILELIST:
3400         map(vcluster.MakeVirtualPath,
3401             utils.UniqueSequence(filename
3402                                  for files in filemap
3403                                  for filename in files)),
3404       constants.NV_NODELIST:
3405         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3406                                   self.all_node_info.values()),
3407       constants.NV_HYPERVISOR: hypervisors,
3408       constants.NV_HVPARAMS:
3409         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3410       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3411                                  for node in node_data_list
3412                                  if not node.offline],
3413       constants.NV_INSTANCELIST: hypervisors,
3414       constants.NV_VERSION: None,
3415       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3416       constants.NV_NODESETUP: None,
3417       constants.NV_TIME: None,
3418       constants.NV_MASTERIP: (master_node, master_ip),
3419       constants.NV_OSLIST: None,
3420       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3421       constants.NV_USERSCRIPTS: user_scripts,
3422       }
3423
3424     if vg_name is not None:
3425       node_verify_param[constants.NV_VGLIST] = None
3426       node_verify_param[constants.NV_LVLIST] = vg_name
3427       node_verify_param[constants.NV_PVLIST] = [vg_name]
3428
3429     if drbd_helper:
3430       node_verify_param[constants.NV_DRBDLIST] = None
3431       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3432
3433     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3434       # Load file storage paths only from master node
3435       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3436
3437     # bridge checks
3438     # FIXME: this needs to be changed per node-group, not cluster-wide
3439     bridges = set()
3440     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3441     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3442       bridges.add(default_nicpp[constants.NIC_LINK])
3443     for instance in self.my_inst_info.values():
3444       for nic in instance.nics:
3445         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3446         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3447           bridges.add(full_nic[constants.NIC_LINK])
3448
3449     if bridges:
3450       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3451
3452     # Build our expected cluster state
3453     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3454                                                  name=node.name,
3455                                                  vm_capable=node.vm_capable))
3456                       for node in node_data_list)
3457
3458     # Gather OOB paths
3459     oob_paths = []
3460     for node in self.all_node_info.values():
3461       path = _SupportsOob(self.cfg, node)
3462       if path and path not in oob_paths:
3463         oob_paths.append(path)
3464
3465     if oob_paths:
3466       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3467
3468     for instance in self.my_inst_names:
3469       inst_config = self.my_inst_info[instance]
3470       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3471         i_offline += 1
3472
3473       for nname in inst_config.all_nodes:
3474         if nname not in node_image:
3475           gnode = self.NodeImage(name=nname)
3476           gnode.ghost = (nname not in self.all_node_info)
3477           node_image[nname] = gnode
3478
3479       inst_config.MapLVsByNode(node_vol_should)
3480
3481       pnode = inst_config.primary_node
3482       node_image[pnode].pinst.append(instance)
3483
3484       for snode in inst_config.secondary_nodes:
3485         nimg = node_image[snode]
3486         nimg.sinst.append(instance)
3487         if pnode not in nimg.sbp:
3488           nimg.sbp[pnode] = []
3489         nimg.sbp[pnode].append(instance)
3490
3491     es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3492     # The value of exclusive_storage should be the same across the group, so if
3493     # it's True for at least a node, we act as if it were set for all the nodes
3494     self._exclusive_storage = compat.any(es_flags.values())
3495     if self._exclusive_storage:
3496       node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3497
3498     # At this point, we have the in-memory data structures complete,
3499     # except for the runtime information, which we'll gather next
3500
3501     # Due to the way our RPC system works, exact response times cannot be
3502     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3503     # time before and after executing the request, we can at least have a time
3504     # window.
3505     nvinfo_starttime = time.time()
3506     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3507                                            node_verify_param,
3508                                            self.cfg.GetClusterName())
3509     nvinfo_endtime = time.time()
3510
3511     if self.extra_lv_nodes and vg_name is not None:
3512       extra_lv_nvinfo = \
3513           self.rpc.call_node_verify(self.extra_lv_nodes,
3514                                     {constants.NV_LVLIST: vg_name},
3515                                     self.cfg.GetClusterName())
3516     else:
3517       extra_lv_nvinfo = {}
3518
3519     all_drbd_map = self.cfg.ComputeDRBDMap()
3520
3521     feedback_fn("* Gathering disk information (%s nodes)" %
3522                 len(self.my_node_names))
3523     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3524                                      self.my_inst_info)
3525
3526     feedback_fn("* Verifying configuration file consistency")
3527
3528     # If not all nodes are being checked, we need to make sure the master node
3529     # and a non-checked vm_capable node are in the list.
3530     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3531     if absent_nodes:
3532       vf_nvinfo = all_nvinfo.copy()
3533       vf_node_info = list(self.my_node_info.values())
3534       additional_nodes = []
3535       if master_node not in self.my_node_info:
3536         additional_nodes.append(master_node)
3537         vf_node_info.append(self.all_node_info[master_node])
3538       # Add the first vm_capable node we find which is not included,
3539       # excluding the master node (which we already have)
3540       for node in absent_nodes:
3541         nodeinfo = self.all_node_info[node]
3542         if (nodeinfo.vm_capable and not nodeinfo.offline and
3543             node != master_node):
3544           additional_nodes.append(node)
3545           vf_node_info.append(self.all_node_info[node])
3546           break
3547       key = constants.NV_FILELIST
3548       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3549                                                  {key: node_verify_param[key]},
3550                                                  self.cfg.GetClusterName()))
3551     else:
3552       vf_nvinfo = all_nvinfo
3553       vf_node_info = self.my_node_info.values()
3554
3555     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3556
3557     feedback_fn("* Verifying node status")
3558
3559     refos_img = None
3560
3561     for node_i in node_data_list:
3562       node = node_i.name
3563       nimg = node_image[node]
3564
3565       if node_i.offline:
3566         if verbose:
3567           feedback_fn("* Skipping offline node %s" % (node,))
3568         n_offline += 1
3569         continue
3570
3571       if node == master_node:
3572         ntype = "master"
3573       elif node_i.master_candidate:
3574         ntype = "master candidate"
3575       elif node_i.drained:
3576         ntype = "drained"
3577         n_drained += 1
3578       else:
3579         ntype = "regular"
3580       if verbose:
3581         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3582
3583       msg = all_nvinfo[node].fail_msg
3584       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3585                msg)
3586       if msg:
3587         nimg.rpc_fail = True
3588         continue
3589
3590       nresult = all_nvinfo[node].payload
3591
3592       nimg.call_ok = self._VerifyNode(node_i, nresult)
3593       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3594       self._VerifyNodeNetwork(node_i, nresult)
3595       self._VerifyNodeUserScripts(node_i, nresult)
3596       self._VerifyOob(node_i, nresult)
3597       self._VerifyFileStoragePaths(node_i, nresult,
3598                                    node == master_node)
3599
3600       if nimg.vm_capable:
3601         self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3602         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3603                              all_drbd_map)
3604
3605         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3606         self._UpdateNodeInstances(node_i, nresult, nimg)
3607         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3608         self._UpdateNodeOS(node_i, nresult, nimg)
3609
3610         if not nimg.os_fail:
3611           if refos_img is None:
3612             refos_img = nimg
3613           self._VerifyNodeOS(node_i, nimg, refos_img)
3614         self._VerifyNodeBridges(node_i, nresult, bridges)
3615
3616         # Check whether all running instancies are primary for the node. (This
3617         # can no longer be done from _VerifyInstance below, since some of the
3618         # wrong instances could be from other node groups.)
3619         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3620
3621         for inst in non_primary_inst:
3622           test = inst in self.all_inst_info
3623           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3624                    "instance should not run on node %s", node_i.name)
3625           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3626                    "node is running unknown instance %s", inst)
3627
3628     self._VerifyGroupLVM(node_image, vg_name)
3629
3630     for node, result in extra_lv_nvinfo.items():
3631       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3632                               node_image[node], vg_name)
3633
3634     feedback_fn("* Verifying instance status")
3635     for instance in self.my_inst_names:
3636       if verbose:
3637         feedback_fn("* Verifying instance %s" % instance)
3638       inst_config = self.my_inst_info[instance]
3639       self._VerifyInstance(instance, inst_config, node_image,
3640                            instdisk[instance])
3641
3642       # If the instance is non-redundant we cannot survive losing its primary
3643       # node, so we are not N+1 compliant.
3644       if inst_config.disk_template not in constants.DTS_MIRRORED:
3645         i_non_redundant.append(instance)
3646
3647       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3648         i_non_a_balanced.append(instance)
3649
3650     feedback_fn("* Verifying orphan volumes")
3651     reserved = utils.FieldSet(*cluster.reserved_lvs)
3652
3653     # We will get spurious "unknown volume" warnings if any node of this group
3654     # is secondary for an instance whose primary is in another group. To avoid
3655     # them, we find these instances and add their volumes to node_vol_should.
3656     for inst in self.all_inst_info.values():
3657       for secondary in inst.secondary_nodes:
3658         if (secondary in self.my_node_info
3659             and inst.name not in self.my_inst_info):
3660           inst.MapLVsByNode(node_vol_should)
3661           break
3662
3663     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3664
3665     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3666       feedback_fn("* Verifying N+1 Memory redundancy")
3667       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3668
3669     feedback_fn("* Other Notes")
3670     if i_non_redundant:
3671       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3672                   % len(i_non_redundant))
3673
3674     if i_non_a_balanced:
3675       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3676                   % len(i_non_a_balanced))
3677
3678     if i_offline:
3679       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3680
3681     if n_offline:
3682       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3683
3684     if n_drained:
3685       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3686
3687     return not self.bad
3688
3689   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3690     """Analyze the post-hooks' result
3691
3692     This method analyses the hook result, handles it, and sends some
3693     nicely-formatted feedback back to the user.
3694
3695     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3696         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3697     @param hooks_results: the results of the multi-node hooks rpc call
3698     @param feedback_fn: function used send feedback back to the caller
3699     @param lu_result: previous Exec result
3700     @return: the new Exec result, based on the previous result
3701         and hook results
3702
3703     """
3704     # We only really run POST phase hooks, only for non-empty groups,
3705     # and are only interested in their results
3706     if not self.my_node_names:
3707       # empty node group
3708       pass
3709     elif phase == constants.HOOKS_PHASE_POST:
3710       # Used to change hooks' output to proper indentation
3711       feedback_fn("* Hooks Results")
3712       assert hooks_results, "invalid result from hooks"
3713
3714       for node_name in hooks_results:
3715         res = hooks_results[node_name]
3716         msg = res.fail_msg
3717         test = msg and not res.offline
3718         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3719                       "Communication failure in hooks execution: %s", msg)
3720         if res.offline or msg:
3721           # No need to investigate payload if node is offline or gave
3722           # an error.
3723           continue
3724         for script, hkr, output in res.payload:
3725           test = hkr == constants.HKR_FAIL
3726           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3727                         "Script %s failed, output:", script)
3728           if test:
3729             output = self._HOOKS_INDENT_RE.sub("      ", output)
3730             feedback_fn("%s" % output)
3731             lu_result = False
3732
3733     return lu_result
3734
3735
3736 class LUClusterVerifyDisks(NoHooksLU):
3737   """Verifies the cluster disks status.
3738
3739   """
3740   REQ_BGL = False
3741
3742   def ExpandNames(self):
3743     self.share_locks = _ShareAll()
3744     self.needed_locks = {
3745       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3746       }
3747
3748   def Exec(self, feedback_fn):
3749     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3750
3751     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3752     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3753                            for group in group_names])
3754
3755
3756 class LUGroupVerifyDisks(NoHooksLU):
3757   """Verifies the status of all disks in a node group.
3758
3759   """
3760   REQ_BGL = False
3761
3762   def ExpandNames(self):
3763     # Raises errors.OpPrereqError on its own if group can't be found
3764     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3765
3766     self.share_locks = _ShareAll()
3767     self.needed_locks = {
3768       locking.LEVEL_INSTANCE: [],
3769       locking.LEVEL_NODEGROUP: [],
3770       locking.LEVEL_NODE: [],
3771
3772       # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3773       # starts one instance of this opcode for every group, which means all
3774       # nodes will be locked for a short amount of time, so it's better to
3775       # acquire the node allocation lock as well.
3776       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3777       }
3778
3779   def DeclareLocks(self, level):
3780     if level == locking.LEVEL_INSTANCE:
3781       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3782
3783       # Lock instances optimistically, needs verification once node and group
3784       # locks have been acquired
3785       self.needed_locks[locking.LEVEL_INSTANCE] = \
3786         self.cfg.GetNodeGroupInstances(self.group_uuid)
3787
3788     elif level == locking.LEVEL_NODEGROUP:
3789       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3790
3791       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3792         set([self.group_uuid] +
3793             # Lock all groups used by instances optimistically; this requires
3794             # going via the node before it's locked, requiring verification
3795             # later on
3796             [group_uuid
3797              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3798              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3799
3800     elif level == locking.LEVEL_NODE:
3801       # This will only lock the nodes in the group to be verified which contain
3802       # actual instances
3803       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3804       self._LockInstancesNodes()
3805
3806       # Lock all nodes in group to be verified
3807       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3808       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3809       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3810
3811   def CheckPrereq(self):
3812     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3813     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3814     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3815
3816     assert self.group_uuid in owned_groups
3817
3818     # Check if locked instances are still correct
3819     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3820
3821     # Get instance information
3822     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3823
3824     # Check if node groups for locked instances are still correct
3825     _CheckInstancesNodeGroups(self.cfg, self.instances,
3826                               owned_groups, owned_nodes, self.group_uuid)
3827
3828   def Exec(self, feedback_fn):
3829     """Verify integrity of cluster disks.
3830
3831     @rtype: tuple of three items
3832     @return: a tuple of (dict of node-to-node_error, list of instances
3833         which need activate-disks, dict of instance: (node, volume) for
3834         missing volumes
3835
3836     """
3837     res_nodes = {}
3838     res_instances = set()
3839     res_missing = {}
3840
3841     nv_dict = _MapInstanceDisksToNodes(
3842       [inst for inst in self.instances.values()
3843        if inst.admin_state == constants.ADMINST_UP])
3844
3845     if nv_dict:
3846       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3847                              set(self.cfg.GetVmCapableNodeList()))
3848
3849       node_lvs = self.rpc.call_lv_list(nodes, [])
3850
3851       for (node, node_res) in node_lvs.items():
3852         if node_res.offline:
3853           continue
3854
3855         msg = node_res.fail_msg
3856         if msg:
3857           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3858           res_nodes[node] = msg
3859           continue
3860
3861         for lv_name, (_, _, lv_online) in node_res.payload.items():
3862           inst = nv_dict.pop((node, lv_name), None)
3863           if not (lv_online or inst is None):
3864             res_instances.add(inst)
3865
3866       # any leftover items in nv_dict are missing LVs, let's arrange the data
3867       # better
3868       for key, inst in nv_dict.iteritems():
3869         res_missing.setdefault(inst, []).append(list(key))
3870
3871     return (res_nodes, list(res_instances), res_missing)
3872
3873
3874 class LUClusterRepairDiskSizes(NoHooksLU):
3875   """Verifies the cluster disks sizes.
3876
3877   """
3878   REQ_BGL = False
3879
3880   def ExpandNames(self):
3881     if self.op.instances:
3882       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3883       # Not getting the node allocation lock as only a specific set of
3884       # instances (and their nodes) is going to be acquired
3885       self.needed_locks = {
3886         locking.LEVEL_NODE_RES: [],
3887         locking.LEVEL_INSTANCE: self.wanted_names,
3888         }
3889       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3890     else:
3891       self.wanted_names = None
3892       self.needed_locks = {
3893         locking.LEVEL_NODE_RES: locking.ALL_SET,
3894         locking.LEVEL_INSTANCE: locking.ALL_SET,
3895
3896         # This opcode is acquires the node locks for all instances
3897         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3898         }
3899
3900     self.share_locks = {
3901       locking.LEVEL_NODE_RES: 1,
3902       locking.LEVEL_INSTANCE: 0,
3903       locking.LEVEL_NODE_ALLOC: 1,
3904       }
3905
3906   def DeclareLocks(self, level):
3907     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3908       self._LockInstancesNodes(primary_only=True, level=level)
3909
3910   def CheckPrereq(self):
3911     """Check prerequisites.
3912
3913     This only checks the optional instance list against the existing names.
3914
3915     """
3916     if self.wanted_names is None:
3917       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3918
3919     self.wanted_instances = \
3920         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3921
3922   def _EnsureChildSizes(self, disk):
3923     """Ensure children of the disk have the needed disk size.
3924
3925     This is valid mainly for DRBD8 and fixes an issue where the
3926     children have smaller disk size.
3927
3928     @param disk: an L{ganeti.objects.Disk} object
3929
3930     """
3931     if disk.dev_type == constants.LD_DRBD8:
3932       assert disk.children, "Empty children for DRBD8?"
3933       fchild = disk.children[0]
3934       mismatch = fchild.size < disk.size
3935       if mismatch:
3936         self.LogInfo("Child disk has size %d, parent %d, fixing",
3937                      fchild.size, disk.size)
3938         fchild.size = disk.size
3939
3940       # and we recurse on this child only, not on the metadev
3941       return self._EnsureChildSizes(fchild) or mismatch
3942     else:
3943       return False
3944
3945   def Exec(self, feedback_fn):
3946     """Verify the size of cluster disks.
3947
3948     """
3949     # TODO: check child disks too
3950     # TODO: check differences in size between primary/secondary nodes
3951     per_node_disks = {}
3952     for instance in self.wanted_instances:
3953       pnode = instance.primary_node
3954       if pnode not in per_node_disks:
3955         per_node_disks[pnode] = []
3956       for idx, disk in enumerate(instance.disks):
3957         per_node_disks[pnode].append((instance, idx, disk))
3958
3959     assert not (frozenset(per_node_disks.keys()) -
3960                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3961       "Not owning correct locks"
3962     assert not self.owned_locks(locking.LEVEL_NODE)
3963
3964     changed = []
3965     for node, dskl in per_node_disks.items():
3966       newl = [v[2].Copy() for v in dskl]
3967       for dsk in newl:
3968         self.cfg.SetDiskID(dsk, node)
3969       result = self.rpc.call_blockdev_getsize(node, newl)
3970       if result.fail_msg:
3971         self.LogWarning("Failure in blockdev_getsize call to node"
3972                         " %s, ignoring", node)
3973         continue
3974       if len(result.payload) != len(dskl):
3975         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3976                         " result.payload=%s", node, len(dskl), result.payload)
3977         self.LogWarning("Invalid result from node %s, ignoring node results",
3978                         node)
3979         continue
3980       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3981         if size is None:
3982           self.LogWarning("Disk %d of instance %s did not return size"
3983                           " information, ignoring", idx, instance.name)
3984           continue
3985         if not isinstance(size, (int, long)):
3986           self.LogWarning("Disk %d of instance %s did not return valid"
3987                           " size information, ignoring", idx, instance.name)
3988           continue
3989         size = size >> 20
3990         if size != disk.size:
3991           self.LogInfo("Disk %d of instance %s has mismatched size,"
3992                        " correcting: recorded %d, actual %d", idx,
3993                        instance.name, disk.size, size)
3994           disk.size = size
3995           self.cfg.Update(instance, feedback_fn)
3996           changed.append((instance.name, idx, size))
3997         if self._EnsureChildSizes(disk):
3998           self.cfg.Update(instance, feedback_fn)
3999           changed.append((instance.name, idx, disk.size))
4000     return changed
4001
4002
4003 class LUClusterRename(LogicalUnit):
4004   """Rename the cluster.
4005
4006   """
4007   HPATH = "cluster-rename"
4008   HTYPE = constants.HTYPE_CLUSTER
4009
4010   def BuildHooksEnv(self):
4011     """Build hooks env.
4012
4013     """
4014     return {
4015       "OP_TARGET": self.cfg.GetClusterName(),
4016       "NEW_NAME": self.op.name,
4017       }
4018
4019   def BuildHooksNodes(self):
4020     """Build hooks nodes.
4021
4022     """
4023     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4024
4025   def CheckPrereq(self):
4026     """Verify that the passed name is a valid one.
4027
4028     """
4029     hostname = netutils.GetHostname(name=self.op.name,
4030                                     family=self.cfg.GetPrimaryIPFamily())
4031
4032     new_name = hostname.name
4033     self.ip = new_ip = hostname.ip
4034     old_name = self.cfg.GetClusterName()
4035     old_ip = self.cfg.GetMasterIP()
4036     if new_name == old_name and new_ip == old_ip:
4037       raise errors.OpPrereqError("Neither the name nor the IP address of the"
4038                                  " cluster has changed",
4039                                  errors.ECODE_INVAL)
4040     if new_ip != old_ip:
4041       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4042         raise errors.OpPrereqError("The given cluster IP address (%s) is"
4043                                    " reachable on the network" %
4044                                    new_ip, errors.ECODE_NOTUNIQUE)
4045
4046     self.op.name = new_name
4047
4048   def Exec(self, feedback_fn):
4049     """Rename the cluster.
4050
4051     """
4052     clustername = self.op.name
4053     new_ip = self.ip
4054
4055     # shutdown the master IP
4056     master_params = self.cfg.GetMasterNetworkParameters()
4057     ems = self.cfg.GetUseExternalMipScript()
4058     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4059                                                      master_params, ems)
4060     result.Raise("Could not disable the master role")
4061
4062     try:
4063       cluster = self.cfg.GetClusterInfo()
4064       cluster.cluster_name = clustername
4065       cluster.master_ip = new_ip
4066       self.cfg.Update(cluster, feedback_fn)
4067
4068       # update the known hosts file
4069       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4070       node_list = self.cfg.GetOnlineNodeList()
4071       try:
4072         node_list.remove(master_params.name)
4073       except ValueError:
4074         pass
4075       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4076     finally:
4077       master_params.ip = new_ip
4078       result = self.rpc.call_node_activate_master_ip(master_params.name,
4079                                                      master_params, ems)
4080       msg = result.fail_msg
4081       if msg:
4082         self.LogWarning("Could not re-enable the master role on"
4083                         " the master, please restart manually: %s", msg)
4084
4085     return clustername
4086
4087
4088 def _ValidateNetmask(cfg, netmask):
4089   """Checks if a netmask is valid.
4090
4091   @type cfg: L{config.ConfigWriter}
4092   @param cfg: The cluster configuration
4093   @type netmask: int
4094   @param netmask: the netmask to be verified
4095   @raise errors.OpPrereqError: if the validation fails
4096
4097   """
4098   ip_family = cfg.GetPrimaryIPFamily()
4099   try:
4100     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4101   except errors.ProgrammerError:
4102     raise errors.OpPrereqError("Invalid primary ip family: %s." %
4103                                ip_family, errors.ECODE_INVAL)
4104   if not ipcls.ValidateNetmask(netmask):
4105     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4106                                 (netmask), errors.ECODE_INVAL)
4107
4108
4109 class LUClusterSetParams(LogicalUnit):
4110   """Change the parameters of the cluster.
4111
4112   """
4113   HPATH = "cluster-modify"
4114   HTYPE = constants.HTYPE_CLUSTER
4115   REQ_BGL = False
4116
4117   def CheckArguments(self):
4118     """Check parameters
4119
4120     """
4121     if self.op.uid_pool:
4122       uidpool.CheckUidPool(self.op.uid_pool)
4123
4124     if self.op.add_uids:
4125       uidpool.CheckUidPool(self.op.add_uids)
4126
4127     if self.op.remove_uids:
4128       uidpool.CheckUidPool(self.op.remove_uids)
4129
4130     if self.op.master_netmask is not None:
4131       _ValidateNetmask(self.cfg, self.op.master_netmask)
4132
4133     if self.op.diskparams:
4134       for dt_params in self.op.diskparams.values():
4135         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4136       try:
4137         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4138       except errors.OpPrereqError, err:
4139         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4140                                    errors.ECODE_INVAL)
4141
4142   def ExpandNames(self):
4143     # FIXME: in the future maybe other cluster params won't require checking on
4144     # all nodes to be modified.
4145     # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4146     # resource locks the right thing, shouldn't it be the BGL instead?
4147     self.needed_locks = {
4148       locking.LEVEL_NODE: locking.ALL_SET,
4149       locking.LEVEL_INSTANCE: locking.ALL_SET,
4150       locking.LEVEL_NODEGROUP: locking.ALL_SET,
4151       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4152     }
4153     self.share_locks = _ShareAll()
4154
4155   def BuildHooksEnv(self):
4156     """Build hooks env.
4157
4158     """
4159     return {
4160       "OP_TARGET": self.cfg.GetClusterName(),
4161       "NEW_VG_NAME": self.op.vg_name,
4162       }
4163
4164   def BuildHooksNodes(self):
4165     """Build hooks nodes.
4166
4167     """
4168     mn = self.cfg.GetMasterNode()
4169     return ([mn], [mn])
4170
4171   def CheckPrereq(self):
4172     """Check prerequisites.
4173
4174     This checks whether the given params don't conflict and
4175     if the given volume group is valid.
4176
4177     """
4178     if self.op.vg_name is not None and not self.op.vg_name:
4179       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4180         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4181                                    " instances exist", errors.ECODE_INVAL)
4182
4183     if self.op.drbd_helper is not None and not self.op.drbd_helper:
4184       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4185         raise errors.OpPrereqError("Cannot disable drbd helper while"
4186                                    " drbd-based instances exist",
4187                                    errors.ECODE_INVAL)
4188
4189     node_list = self.owned_locks(locking.LEVEL_NODE)
4190
4191     # if vg_name not None, checks given volume group on all nodes
4192     if self.op.vg_name:
4193       vglist = self.rpc.call_vg_list(node_list)
4194       for node in node_list:
4195         msg = vglist[node].fail_msg
4196         if msg:
4197           # ignoring down node
4198           self.LogWarning("Error while gathering data on node %s"
4199                           " (ignoring node): %s", node, msg)
4200           continue
4201         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4202                                               self.op.vg_name,
4203                                               constants.MIN_VG_SIZE)
4204         if vgstatus:
4205           raise errors.OpPrereqError("Error on node '%s': %s" %
4206                                      (node, vgstatus), errors.ECODE_ENVIRON)
4207
4208     if self.op.drbd_helper:
4209       # checks given drbd helper on all nodes
4210       helpers = self.rpc.call_drbd_helper(node_list)
4211       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4212         if ninfo.offline:
4213           self.LogInfo("Not checking drbd helper on offline node %s", node)
4214           continue
4215         msg = helpers[node].fail_msg
4216         if msg:
4217           raise errors.OpPrereqError("Error checking drbd helper on node"
4218                                      " '%s': %s" % (node, msg),
4219                                      errors.ECODE_ENVIRON)
4220         node_helper = helpers[node].payload
4221         if node_helper != self.op.drbd_helper:
4222           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4223                                      (node, node_helper), errors.ECODE_ENVIRON)
4224
4225     self.cluster = cluster = self.cfg.GetClusterInfo()
4226     # validate params changes
4227     if self.op.beparams:
4228       objects.UpgradeBeParams(self.op.beparams)
4229       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4230       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4231
4232     if self.op.ndparams:
4233       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4234       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4235
4236       # TODO: we need a more general way to handle resetting
4237       # cluster-level parameters to default values
4238       if self.new_ndparams["oob_program"] == "":
4239         self.new_ndparams["oob_program"] = \
4240             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4241
4242     if self.op.hv_state:
4243       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4244                                             self.cluster.hv_state_static)
4245       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4246                                for hv, values in new_hv_state.items())
4247
4248     if self.op.disk_state:
4249       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4250                                                 self.cluster.disk_state_static)
4251       self.new_disk_state = \
4252         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4253                             for name, values in svalues.items()))
4254              for storage, svalues in new_disk_state.items())
4255
4256     if self.op.ipolicy:
4257       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4258                                             group_policy=False)
4259
4260       all_instances = self.cfg.GetAllInstancesInfo().values()
4261       violations = set()
4262       for group in self.cfg.GetAllNodeGroupsInfo().values():
4263         instances = frozenset([inst for inst in all_instances
4264                                if compat.any(node in group.members
4265                                              for node in inst.all_nodes)])
4266         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4267         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4268         new = _ComputeNewInstanceViolations(ipol,
4269                                             new_ipolicy, instances)
4270         if new:
4271           violations.update(new)
4272
4273       if violations:
4274         self.LogWarning("After the ipolicy change the following instances"
4275                         " violate them: %s",
4276                         utils.CommaJoin(utils.NiceSort(violations)))
4277
4278     if self.op.nicparams:
4279       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4280       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4281       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4282       nic_errors = []
4283
4284       # check all instances for consistency
4285       for instance in self.cfg.GetAllInstancesInfo().values():
4286         for nic_idx, nic in enumerate(instance.nics):
4287           params_copy = copy.deepcopy(nic.nicparams)
4288           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4289
4290           # check parameter syntax
4291           try:
4292             objects.NIC.CheckParameterSyntax(params_filled)
4293           except errors.ConfigurationError, err:
4294             nic_errors.append("Instance %s, nic/%d: %s" %
4295                               (instance.name, nic_idx, err))
4296
4297           # if we're moving instances to routed, check that they have an ip
4298           target_mode = params_filled[constants.NIC_MODE]
4299           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4300             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4301                               " address" % (instance.name, nic_idx))
4302       if nic_errors:
4303         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4304                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4305
4306     # hypervisor list/parameters
4307     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4308     if self.op.hvparams:
4309       for hv_name, hv_dict in self.op.hvparams.items():
4310         if hv_name not in self.new_hvparams:
4311           self.new_hvparams[hv_name] = hv_dict
4312         else:
4313           self.new_hvparams[hv_name].update(hv_dict)
4314
4315     # disk template parameters
4316     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4317     if self.op.diskparams:
4318       for dt_name, dt_params in self.op.diskparams.items():
4319         if dt_name not in self.op.diskparams:
4320           self.new_diskparams[dt_name] = dt_params
4321         else:
4322           self.new_diskparams[dt_name].update(dt_params)
4323
4324     # os hypervisor parameters
4325     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4326     if self.op.os_hvp:
4327       for os_name, hvs in self.op.os_hvp.items():
4328         if os_name not in self.new_os_hvp:
4329           self.new_os_hvp[os_name] = hvs
4330         else:
4331           for hv_name, hv_dict in hvs.items():
4332             if hv_dict is None:
4333               # Delete if it exists
4334               self.new_os_hvp[os_name].pop(hv_name, None)
4335             elif hv_name not in self.new_os_hvp[os_name]:
4336               self.new_os_hvp[os_name][hv_name] = hv_dict
4337             else:
4338               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4339
4340     # os parameters
4341     self.new_osp = objects.FillDict(cluster.osparams, {})
4342     if self.op.osparams:
4343       for os_name, osp in self.op.osparams.items():
4344         if os_name not in self.new_osp:
4345           self.new_osp[os_name] = {}
4346
4347         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4348                                                   use_none=True)
4349
4350         if not self.new_osp[os_name]:
4351           # we removed all parameters
4352           del self.new_osp[os_name]
4353         else:
4354           # check the parameter validity (remote check)
4355           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4356                          os_name, self.new_osp[os_name])
4357
4358     # changes to the hypervisor list
4359     if self.op.enabled_hypervisors is not None:
4360       self.hv_list = self.op.enabled_hypervisors
4361       for hv in self.hv_list:
4362         # if the hypervisor doesn't already exist in the cluster
4363         # hvparams, we initialize it to empty, and then (in both
4364         # cases) we make sure to fill the defaults, as we might not
4365         # have a complete defaults list if the hypervisor wasn't
4366         # enabled before
4367         if hv not in new_hvp:
4368           new_hvp[hv] = {}
4369         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4370         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4371     else:
4372       self.hv_list = cluster.enabled_hypervisors
4373
4374     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4375       # either the enabled list has changed, or the parameters have, validate
4376       for hv_name, hv_params in self.new_hvparams.items():
4377         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4378             (self.op.enabled_hypervisors and
4379              hv_name in self.op.enabled_hypervisors)):
4380           # either this is a new hypervisor, or its parameters have changed
4381           hv_class = hypervisor.GetHypervisorClass(hv_name)
4382           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4383           hv_class.CheckParameterSyntax(hv_params)
4384           _CheckHVParams(self, node_list, hv_name, hv_params)
4385
4386     if self.op.os_hvp:
4387       # no need to check any newly-enabled hypervisors, since the
4388       # defaults have already been checked in the above code-block
4389       for os_name, os_hvp in self.new_os_hvp.items():
4390         for hv_name, hv_params in os_hvp.items():
4391           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4392           # we need to fill in the new os_hvp on top of the actual hv_p
4393           cluster_defaults = self.new_hvparams.get(hv_name, {})
4394           new_osp = objects.FillDict(cluster_defaults, hv_params)
4395           hv_class = hypervisor.GetHypervisorClass(hv_name)
4396           hv_class.CheckParameterSyntax(new_osp)
4397           _CheckHVParams(self, node_list, hv_name, new_osp)
4398
4399     if self.op.default_iallocator:
4400       alloc_script = utils.FindFile(self.op.default_iallocator,
4401                                     constants.IALLOCATOR_SEARCH_PATH,
4402                                     os.path.isfile)
4403       if alloc_script is None:
4404         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4405                                    " specified" % self.op.default_iallocator,
4406                                    errors.ECODE_INVAL)
4407
4408   def Exec(self, feedback_fn):
4409     """Change the parameters of the cluster.
4410
4411     """
4412     if self.op.vg_name is not None:
4413       new_volume = self.op.vg_name
4414       if not new_volume:
4415         new_volume = None
4416       if new_volume != self.cfg.GetVGName():
4417         self.cfg.SetVGName(new_volume)
4418       else:
4419         feedback_fn("Cluster LVM configuration already in desired"
4420                     " state, not changing")
4421     if self.op.drbd_helper is not None:
4422       new_helper = self.op.drbd_helper
4423       if not new_helper:
4424         new_helper = None
4425       if new_helper != self.cfg.GetDRBDHelper():
4426         self.cfg.SetDRBDHelper(new_helper)
4427       else:
4428         feedback_fn("Cluster DRBD helper already in desired state,"
4429                     " not changing")
4430     if self.op.hvparams:
4431       self.cluster.hvparams = self.new_hvparams
4432     if self.op.os_hvp:
4433       self.cluster.os_hvp = self.new_os_hvp
4434     if self.op.enabled_hypervisors is not None:
4435       self.cluster.hvparams = self.new_hvparams
4436       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4437     if self.op.beparams:
4438       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4439     if self.op.nicparams:
4440       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4441     if self.op.ipolicy:
4442       self.cluster.ipolicy = self.new_ipolicy
4443     if self.op.osparams:
4444       self.cluster.osparams = self.new_osp
4445     if self.op.ndparams:
4446       self.cluster.ndparams = self.new_ndparams
4447     if self.op.diskparams:
4448       self.cluster.diskparams = self.new_diskparams
4449     if self.op.hv_state:
4450       self.cluster.hv_state_static = self.new_hv_state
4451     if self.op.disk_state:
4452       self.cluster.disk_state_static = self.new_disk_state
4453
4454     if self.op.candidate_pool_size is not None:
4455       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4456       # we need to update the pool size here, otherwise the save will fail
4457       _AdjustCandidatePool(self, [])
4458
4459     if self.op.maintain_node_health is not None:
4460       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4461         feedback_fn("Note: CONFD was disabled at build time, node health"
4462                     " maintenance is not useful (still enabling it)")
4463       self.cluster.maintain_node_health = self.op.maintain_node_health
4464
4465     if self.op.prealloc_wipe_disks is not None:
4466       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4467
4468     if self.op.add_uids is not None:
4469       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4470
4471     if self.op.remove_uids is not None:
4472       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4473
4474     if self.op.uid_pool is not None:
4475       self.cluster.uid_pool = self.op.uid_pool
4476
4477     if self.op.default_iallocator is not None:
4478       self.cluster.default_iallocator = self.op.default_iallocator
4479
4480     if self.op.reserved_lvs is not None:
4481       self.cluster.reserved_lvs = self.op.reserved_lvs
4482
4483     if self.op.use_external_mip_script is not None:
4484       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4485
4486     def helper_os(aname, mods, desc):
4487       desc += " OS list"
4488       lst = getattr(self.cluster, aname)
4489       for key, val in mods:
4490         if key == constants.DDM_ADD:
4491           if val in lst:
4492             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4493           else:
4494             lst.append(val)
4495         elif key == constants.DDM_REMOVE:
4496           if val in lst:
4497             lst.remove(val)
4498           else:
4499             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4500         else:
4501           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4502
4503     if self.op.hidden_os:
4504       helper_os("hidden_os", self.op.hidden_os, "hidden")
4505
4506     if self.op.blacklisted_os:
4507       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4508
4509     if self.op.master_netdev:
4510       master_params = self.cfg.GetMasterNetworkParameters()
4511       ems = self.cfg.GetUseExternalMipScript()
4512       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4513                   self.cluster.master_netdev)
4514       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4515                                                        master_params, ems)
4516       result.Raise("Could not disable the master ip")
4517       feedback_fn("Changing master_netdev from %s to %s" %
4518                   (master_params.netdev, self.op.master_netdev))
4519       self.cluster.master_netdev = self.op.master_netdev
4520
4521     if self.op.master_netmask:
4522       master_params = self.cfg.GetMasterNetworkParameters()
4523       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4524       result = self.rpc.call_node_change_master_netmask(master_params.name,
4525                                                         master_params.netmask,
4526                                                         self.op.master_netmask,
4527                                                         master_params.ip,
4528                                                         master_params.netdev)
4529       if result.fail_msg:
4530         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4531         feedback_fn(msg)
4532
4533       self.cluster.master_netmask = self.op.master_netmask
4534
4535     self.cfg.Update(self.cluster, feedback_fn)
4536
4537     if self.op.master_netdev:
4538       master_params = self.cfg.GetMasterNetworkParameters()
4539       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4540                   self.op.master_netdev)
4541       ems = self.cfg.GetUseExternalMipScript()
4542       result = self.rpc.call_node_activate_master_ip(master_params.name,
4543                                                      master_params, ems)
4544       if result.fail_msg:
4545         self.LogWarning("Could not re-enable the master ip on"
4546                         " the master, please restart manually: %s",
4547                         result.fail_msg)
4548
4549
4550 def _UploadHelper(lu, nodes, fname):
4551   """Helper for uploading a file and showing warnings.
4552
4553   """
4554   if os.path.exists(fname):
4555     result = lu.rpc.call_upload_file(nodes, fname)
4556     for to_node, to_result in result.items():
4557       msg = to_result.fail_msg
4558       if msg:
4559         msg = ("Copy of file %s to node %s failed: %s" %
4560                (fname, to_node, msg))
4561         lu.LogWarning(msg)
4562
4563
4564 def _ComputeAncillaryFiles(cluster, redist):
4565   """Compute files external to Ganeti which need to be consistent.
4566
4567   @type redist: boolean
4568   @param redist: Whether to include files which need to be redistributed
4569
4570   """
4571   # Compute files for all nodes
4572   files_all = set([
4573     pathutils.SSH_KNOWN_HOSTS_FILE,
4574     pathutils.CONFD_HMAC_KEY,
4575     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4576     pathutils.SPICE_CERT_FILE,
4577     pathutils.SPICE_CACERT_FILE,
4578     pathutils.RAPI_USERS_FILE,
4579     ])
4580
4581   if redist:
4582     # we need to ship at least the RAPI certificate
4583     files_all.add(pathutils.RAPI_CERT_FILE)
4584   else:
4585     files_all.update(pathutils.ALL_CERT_FILES)
4586     files_all.update(ssconf.SimpleStore().GetFileList())
4587
4588   if cluster.modify_etc_hosts:
4589     files_all.add(pathutils.ETC_HOSTS)
4590
4591   if cluster.use_external_mip_script:
4592     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4593
4594   # Files which are optional, these must:
4595   # - be present in one other category as well
4596   # - either exist or not exist on all nodes of that category (mc, vm all)
4597   files_opt = set([
4598     pathutils.RAPI_USERS_FILE,
4599     ])
4600
4601   # Files which should only be on master candidates
4602   files_mc = set()
4603
4604   if not redist:
4605     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4606
4607   # File storage
4608   if (not redist and
4609       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4610     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4611     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4612
4613   # Files which should only be on VM-capable nodes
4614   files_vm = set(
4615     filename
4616     for hv_name in cluster.enabled_hypervisors
4617     for filename in
4618       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4619
4620   files_opt |= set(
4621     filename
4622     for hv_name in cluster.enabled_hypervisors
4623     for filename in
4624       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4625
4626   # Filenames in each category must be unique
4627   all_files_set = files_all | files_mc | files_vm
4628   assert (len(all_files_set) ==
4629           sum(map(len, [files_all, files_mc, files_vm]))), \
4630          "Found file listed in more than one file list"
4631
4632   # Optional files must be present in one other category
4633   assert all_files_set.issuperset(files_opt), \
4634          "Optional file not in a different required list"
4635
4636   # This one file should never ever be re-distributed via RPC
4637   assert not (redist and
4638               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4639
4640   return (files_all, files_opt, files_mc, files_vm)
4641
4642
4643 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4644   """Distribute additional files which are part of the cluster configuration.
4645
4646   ConfigWriter takes care of distributing the config and ssconf files, but
4647   there are more files which should be distributed to all nodes. This function
4648   makes sure those are copied.
4649
4650   @param lu: calling logical unit
4651   @param additional_nodes: list of nodes not in the config to distribute to
4652   @type additional_vm: boolean
4653   @param additional_vm: whether the additional nodes are vm-capable or not
4654
4655   """
4656   # Gather target nodes
4657   cluster = lu.cfg.GetClusterInfo()
4658   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4659
4660   online_nodes = lu.cfg.GetOnlineNodeList()
4661   online_set = frozenset(online_nodes)
4662   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4663
4664   if additional_nodes is not None:
4665     online_nodes.extend(additional_nodes)
4666     if additional_vm:
4667       vm_nodes.extend(additional_nodes)
4668
4669   # Never distribute to master node
4670   for nodelist in [online_nodes, vm_nodes]:
4671     if master_info.name in nodelist:
4672       nodelist.remove(master_info.name)
4673
4674   # Gather file lists
4675   (files_all, _, files_mc, files_vm) = \
4676     _ComputeAncillaryFiles(cluster, True)
4677
4678   # Never re-distribute configuration file from here
4679   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4680               pathutils.CLUSTER_CONF_FILE in files_vm)
4681   assert not files_mc, "Master candidates not handled in this function"
4682
4683   filemap = [
4684     (online_nodes, files_all),
4685     (vm_nodes, files_vm),
4686     ]
4687
4688   # Upload the files
4689   for (node_list, files) in filemap:
4690     for fname in files:
4691       _UploadHelper(lu, node_list, fname)
4692
4693
4694 class LUClusterRedistConf(NoHooksLU):
4695   """Force the redistribution of cluster configuration.
4696
4697   This is a very simple LU.
4698
4699   """
4700   REQ_BGL = False
4701
4702   def ExpandNames(self):
4703     self.needed_locks = {
4704       locking.LEVEL_NODE: locking.ALL_SET,
4705       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4706     }
4707     self.share_locks = _ShareAll()
4708
4709   def Exec(self, feedback_fn):
4710     """Redistribute the configuration.
4711
4712     """
4713     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4714     _RedistributeAncillaryFiles(self)
4715
4716
4717 class LUClusterActivateMasterIp(NoHooksLU):
4718   """Activate the master IP on the master node.
4719
4720   """
4721   def Exec(self, feedback_fn):
4722     """Activate the master IP.
4723
4724     """
4725     master_params = self.cfg.GetMasterNetworkParameters()
4726     ems = self.cfg.GetUseExternalMipScript()
4727     result = self.rpc.call_node_activate_master_ip(master_params.name,
4728                                                    master_params, ems)
4729     result.Raise("Could not activate the master IP")
4730
4731
4732 class LUClusterDeactivateMasterIp(NoHooksLU):
4733   """Deactivate the master IP on the master node.
4734
4735   """
4736   def Exec(self, feedback_fn):
4737     """Deactivate the master IP.
4738
4739     """
4740     master_params = self.cfg.GetMasterNetworkParameters()
4741     ems = self.cfg.GetUseExternalMipScript()
4742     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4743                                                      master_params, ems)
4744     result.Raise("Could not deactivate the master IP")
4745
4746
4747 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4748   """Sleep and poll for an instance's disk to sync.
4749
4750   """
4751   if not instance.disks or disks is not None and not disks:
4752     return True
4753
4754   disks = _ExpandCheckDisks(instance, disks)
4755
4756   if not oneshot:
4757     lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4758
4759   node = instance.primary_node
4760
4761   for dev in disks:
4762     lu.cfg.SetDiskID(dev, node)
4763
4764   # TODO: Convert to utils.Retry
4765
4766   retries = 0
4767   degr_retries = 10 # in seconds, as we sleep 1 second each time
4768   while True:
4769     max_time = 0
4770     done = True
4771     cumul_degraded = False
4772     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4773     msg = rstats.fail_msg
4774     if msg:
4775       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4776       retries += 1
4777       if retries >= 10:
4778         raise errors.RemoteError("Can't contact node %s for mirror data,"
4779                                  " aborting." % node)
4780       time.sleep(6)
4781       continue
4782     rstats = rstats.payload
4783     retries = 0
4784     for i, mstat in enumerate(rstats):
4785       if mstat is None:
4786         lu.LogWarning("Can't compute data for node %s/%s",
4787                            node, disks[i].iv_name)
4788         continue
4789
4790       cumul_degraded = (cumul_degraded or
4791                         (mstat.is_degraded and mstat.sync_percent is None))
4792       if mstat.sync_percent is not None:
4793         done = False
4794         if mstat.estimated_time is not None:
4795           rem_time = ("%s remaining (estimated)" %
4796                       utils.FormatSeconds(mstat.estimated_time))
4797           max_time = mstat.estimated_time
4798         else:
4799           rem_time = "no time estimate"
4800         lu.LogInfo("- device %s: %5.2f%% done, %s",
4801                    disks[i].iv_name, mstat.sync_percent, rem_time)
4802
4803     # if we're done but degraded, let's do a few small retries, to
4804     # make sure we see a stable and not transient situation; therefore
4805     # we force restart of the loop
4806     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4807       logging.info("Degraded disks found, %d retries left", degr_retries)
4808       degr_retries -= 1
4809       time.sleep(1)
4810       continue
4811
4812     if done or oneshot:
4813       break
4814
4815     time.sleep(min(60, max_time))
4816
4817   if done:
4818     lu.LogInfo("Instance %s's disks are in sync", instance.name)
4819
4820   return not cumul_degraded
4821
4822
4823 def _BlockdevFind(lu, node, dev, instance):
4824   """Wrapper around call_blockdev_find to annotate diskparams.
4825
4826   @param lu: A reference to the lu object
4827   @param node: The node to call out
4828   @param dev: The device to find
4829   @param instance: The instance object the device belongs to
4830   @returns The result of the rpc call
4831
4832   """
4833   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4834   return lu.rpc.call_blockdev_find(node, disk)
4835
4836
4837 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4838   """Wrapper around L{_CheckDiskConsistencyInner}.
4839
4840   """
4841   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4842   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4843                                     ldisk=ldisk)
4844
4845
4846 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4847                                ldisk=False):
4848   """Check that mirrors are not degraded.
4849
4850   @attention: The device has to be annotated already.
4851
4852   The ldisk parameter, if True, will change the test from the
4853   is_degraded attribute (which represents overall non-ok status for
4854   the device(s)) to the ldisk (representing the local storage status).
4855
4856   """
4857   lu.cfg.SetDiskID(dev, node)
4858
4859   result = True
4860
4861   if on_primary or dev.AssembleOnSecondary():
4862     rstats = lu.rpc.call_blockdev_find(node, dev)
4863     msg = rstats.fail_msg
4864     if msg:
4865       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4866       result = False
4867     elif not rstats.payload:
4868       lu.LogWarning("Can't find disk on node %s", node)
4869       result = False
4870     else:
4871       if ldisk:
4872         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4873       else:
4874         result = result and not rstats.payload.is_degraded
4875
4876   if dev.children:
4877     for child in dev.children:
4878       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4879                                                      on_primary)
4880
4881   return result
4882
4883
4884 class LUOobCommand(NoHooksLU):
4885   """Logical unit for OOB handling.
4886
4887   """
4888   REQ_BGL = False
4889   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4890
4891   def ExpandNames(self):
4892     """Gather locks we need.
4893
4894     """
4895     if self.op.node_names:
4896       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4897       lock_names = self.op.node_names
4898     else:
4899       lock_names = locking.ALL_SET
4900
4901     self.needed_locks = {
4902       locking.LEVEL_NODE: lock_names,
4903       }
4904
4905     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4906
4907     if not self.op.node_names:
4908       # Acquire node allocation lock only if all nodes are affected
4909       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4910
4911   def CheckPrereq(self):
4912     """Check prerequisites.
4913
4914     This checks:
4915      - the node exists in the configuration
4916      - OOB is supported
4917
4918     Any errors are signaled by raising errors.OpPrereqError.
4919
4920     """
4921     self.nodes = []
4922     self.master_node = self.cfg.GetMasterNode()
4923
4924     assert self.op.power_delay >= 0.0
4925
4926     if self.op.node_names:
4927       if (self.op.command in self._SKIP_MASTER and
4928           self.master_node in self.op.node_names):
4929         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4930         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4931
4932         if master_oob_handler:
4933           additional_text = ("run '%s %s %s' if you want to operate on the"
4934                              " master regardless") % (master_oob_handler,
4935                                                       self.op.command,
4936                                                       self.master_node)
4937         else:
4938           additional_text = "it does not support out-of-band operations"
4939
4940         raise errors.OpPrereqError(("Operating on the master node %s is not"
4941                                     " allowed for %s; %s") %
4942                                    (self.master_node, self.op.command,
4943                                     additional_text), errors.ECODE_INVAL)
4944     else:
4945       self.op.node_names = self.cfg.GetNodeList()
4946       if self.op.command in self._SKIP_MASTER:
4947         self.op.node_names.remove(self.master_node)
4948
4949     if self.op.command in self._SKIP_MASTER:
4950       assert self.master_node not in self.op.node_names
4951
4952     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4953       if node is None:
4954         raise errors.OpPrereqError("Node %s not found" % node_name,
4955                                    errors.ECODE_NOENT)
4956       else:
4957         self.nodes.append(node)
4958
4959       if (not self.op.ignore_status and
4960           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4961         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4962                                     " not marked offline") % node_name,
4963                                    errors.ECODE_STATE)
4964
4965   def Exec(self, feedback_fn):
4966     """Execute OOB and return result if we expect any.
4967
4968     """
4969     master_node = self.master_node
4970     ret = []
4971
4972     for idx, node in enumerate(utils.NiceSort(self.nodes,
4973                                               key=lambda node: node.name)):
4974       node_entry = [(constants.RS_NORMAL, node.name)]
4975       ret.append(node_entry)
4976
4977       oob_program = _SupportsOob(self.cfg, node)
4978
4979       if not oob_program:
4980         node_entry.append((constants.RS_UNAVAIL, None))
4981         continue
4982
4983       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4984                    self.op.command, oob_program, node.name)
4985       result = self.rpc.call_run_oob(master_node, oob_program,
4986                                      self.op.command, node.name,
4987                                      self.op.timeout)
4988
4989       if result.fail_msg:
4990         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4991                         node.name, result.fail_msg)
4992         node_entry.append((constants.RS_NODATA, None))
4993       else:
4994         try:
4995           self._CheckPayload(result)
4996         except errors.OpExecError, err:
4997           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4998                           node.name, err)
4999           node_entry.append((constants.RS_NODATA, None))
5000         else:
5001           if self.op.command == constants.OOB_HEALTH:
5002             # For health we should log important events
5003             for item, status in result.payload:
5004               if status in [constants.OOB_STATUS_WARNING,
5005                             constants.OOB_STATUS_CRITICAL]:
5006                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5007                                 item, node.name, status)
5008
5009           if self.op.command == constants.OOB_POWER_ON:
5010             node.powered = True
5011           elif self.op.command == constants.OOB_POWER_OFF:
5012             node.powered = False
5013           elif self.op.command == constants.OOB_POWER_STATUS:
5014             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5015             if powered != node.powered:
5016               logging.warning(("Recorded power state (%s) of node '%s' does not"
5017                                " match actual power state (%s)"), node.powered,
5018                               node.name, powered)
5019
5020           # For configuration changing commands we should update the node
5021           if self.op.command in (constants.OOB_POWER_ON,
5022                                  constants.OOB_POWER_OFF):
5023             self.cfg.Update(node, feedback_fn)
5024
5025           node_entry.append((constants.RS_NORMAL, result.payload))
5026
5027           if (self.op.command == constants.OOB_POWER_ON and
5028               idx < len(self.nodes) - 1):
5029             time.sleep(self.op.power_delay)
5030
5031     return ret
5032
5033   def _CheckPayload(self, result):
5034     """Checks if the payload is valid.
5035
5036     @param result: RPC result
5037     @raises errors.OpExecError: If payload is not valid
5038
5039     """
5040     errs = []
5041     if self.op.command == constants.OOB_HEALTH:
5042       if not isinstance(result.payload, list):
5043         errs.append("command 'health' is expected to return a list but got %s" %
5044                     type(result.payload))
5045       else:
5046         for item, status in result.payload:
5047           if status not in constants.OOB_STATUSES:
5048             errs.append("health item '%s' has invalid status '%s'" %
5049                         (item, status))
5050
5051     if self.op.command == constants.OOB_POWER_STATUS:
5052       if not isinstance(result.payload, dict):
5053         errs.append("power-status is expected to return a dict but got %s" %
5054                     type(result.payload))
5055
5056     if self.op.command in [
5057       constants.OOB_POWER_ON,
5058       constants.OOB_POWER_OFF,
5059       constants.OOB_POWER_CYCLE,
5060       ]:
5061       if result.payload is not None:
5062         errs.append("%s is expected to not return payload but got '%s'" %
5063                     (self.op.command, result.payload))
5064
5065     if errs:
5066       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5067                                utils.CommaJoin(errs))
5068
5069
5070 class _OsQuery(_QueryBase):
5071   FIELDS = query.OS_FIELDS
5072
5073   def ExpandNames(self, lu):
5074     # Lock all nodes in shared mode
5075     # Temporary removal of locks, should be reverted later
5076     # TODO: reintroduce locks when they are lighter-weight
5077     lu.needed_locks = {}
5078     #self.share_locks[locking.LEVEL_NODE] = 1
5079     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5080
5081     # The following variables interact with _QueryBase._GetNames
5082     if self.names:
5083       self.wanted = self.names
5084     else:
5085       self.wanted = locking.ALL_SET
5086
5087     self.do_locking = self.use_locking
5088
5089   def DeclareLocks(self, lu, level):
5090     pass
5091
5092   @staticmethod
5093   def _DiagnoseByOS(rlist):
5094     """Remaps a per-node return list into an a per-os per-node dictionary
5095
5096     @param rlist: a map with node names as keys and OS objects as values
5097
5098     @rtype: dict
5099     @return: a dictionary with osnames as keys and as value another
5100         map, with nodes as keys and tuples of (path, status, diagnose,
5101         variants, parameters, api_versions) as values, eg::
5102
5103           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5104                                      (/srv/..., False, "invalid api")],
5105                            "node2": [(/srv/..., True, "", [], [])]}
5106           }
5107
5108     """
5109     all_os = {}
5110     # we build here the list of nodes that didn't fail the RPC (at RPC
5111     # level), so that nodes with a non-responding node daemon don't
5112     # make all OSes invalid
5113     good_nodes = [node_name for node_name in rlist
5114                   if not rlist[node_name].fail_msg]
5115     for node_name, nr in rlist.items():
5116       if nr.fail_msg or not nr.payload:
5117         continue
5118       for (name, path, status, diagnose, variants,
5119            params, api_versions) in nr.payload:
5120         if name not in all_os:
5121           # build a list of nodes for this os containing empty lists
5122           # for each node in node_list
5123           all_os[name] = {}
5124           for nname in good_nodes:
5125             all_os[name][nname] = []
5126         # convert params from [name, help] to (name, help)
5127         params = [tuple(v) for v in params]
5128         all_os[name][node_name].append((path, status, diagnose,
5129                                         variants, params, api_versions))
5130     return all_os
5131
5132   def _GetQueryData(self, lu):
5133     """Computes the list of nodes and their attributes.
5134
5135     """
5136     # Locking is not used
5137     assert not (compat.any(lu.glm.is_owned(level)
5138                            for level in locking.LEVELS
5139                            if level != locking.LEVEL_CLUSTER) or
5140                 self.do_locking or self.use_locking)
5141
5142     valid_nodes = [node.name
5143                    for node in lu.cfg.GetAllNodesInfo().values()
5144                    if not node.offline and node.vm_capable]
5145     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5146     cluster = lu.cfg.GetClusterInfo()
5147
5148     data = {}
5149
5150     for (os_name, os_data) in pol.items():
5151       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5152                           hidden=(os_name in cluster.hidden_os),
5153                           blacklisted=(os_name in cluster.blacklisted_os))
5154
5155       variants = set()
5156       parameters = set()
5157       api_versions = set()
5158
5159       for idx, osl in enumerate(os_data.values()):
5160         info.valid = bool(info.valid and osl and osl[0][1])
5161         if not info.valid:
5162           break
5163
5164         (node_variants, node_params, node_api) = osl[0][3:6]
5165         if idx == 0:
5166           # First entry
5167           variants.update(node_variants)
5168           parameters.update(node_params)
5169           api_versions.update(node_api)
5170         else:
5171           # Filter out inconsistent values
5172           variants.intersection_update(node_variants)
5173           parameters.intersection_update(node_params)
5174           api_versions.intersection_update(node_api)
5175
5176       info.variants = list(variants)
5177       info.parameters = list(parameters)
5178       info.api_versions = list(api_versions)
5179
5180       data[os_name] = info
5181
5182     # Prepare data in requested order
5183     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5184             if name in data]
5185
5186
5187 class LUOsDiagnose(NoHooksLU):
5188   """Logical unit for OS diagnose/query.
5189
5190   """
5191   REQ_BGL = False
5192
5193   @staticmethod
5194   def _BuildFilter(fields, names):
5195     """Builds a filter for querying OSes.
5196
5197     """
5198     name_filter = qlang.MakeSimpleFilter("name", names)
5199
5200     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5201     # respective field is not requested
5202     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5203                      for fname in ["hidden", "blacklisted"]
5204                      if fname not in fields]
5205     if "valid" not in fields:
5206       status_filter.append([qlang.OP_TRUE, "valid"])
5207
5208     if status_filter:
5209       status_filter.insert(0, qlang.OP_AND)
5210     else:
5211       status_filter = None
5212
5213     if name_filter and status_filter:
5214       return [qlang.OP_AND, name_filter, status_filter]
5215     elif name_filter:
5216       return name_filter
5217     else:
5218       return status_filter
5219
5220   def CheckArguments(self):
5221     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5222                        self.op.output_fields, False)
5223
5224   def ExpandNames(self):
5225     self.oq.ExpandNames(self)
5226
5227   def Exec(self, feedback_fn):
5228     return self.oq.OldStyleQuery(self)
5229
5230
5231 class _ExtStorageQuery(_QueryBase):
5232   FIELDS = query.EXTSTORAGE_FIELDS
5233
5234   def ExpandNames(self, lu):
5235     # Lock all nodes in shared mode
5236     # Temporary removal of locks, should be reverted later
5237     # TODO: reintroduce locks when they are lighter-weight
5238     lu.needed_locks = {}
5239     #self.share_locks[locking.LEVEL_NODE] = 1
5240     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5241
5242     # The following variables interact with _QueryBase._GetNames
5243     if self.names:
5244       self.wanted = self.names
5245     else:
5246       self.wanted = locking.ALL_SET
5247
5248     self.do_locking = self.use_locking
5249
5250   def DeclareLocks(self, lu, level):
5251     pass
5252
5253   @staticmethod
5254   def _DiagnoseByProvider(rlist):
5255     """Remaps a per-node return list into an a per-provider per-node dictionary
5256
5257     @param rlist: a map with node names as keys and ExtStorage objects as values
5258
5259     @rtype: dict
5260     @return: a dictionary with extstorage providers as keys and as
5261         value another map, with nodes as keys and tuples of
5262         (path, status, diagnose, parameters) as values, eg::
5263
5264           {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5265                          "node2": [(/srv/..., False, "missing file")]
5266                          "node3": [(/srv/..., True, "", [])]
5267           }
5268
5269     """
5270     all_es = {}
5271     # we build here the list of nodes that didn't fail the RPC (at RPC
5272     # level), so that nodes with a non-responding node daemon don't
5273     # make all OSes invalid
5274     good_nodes = [node_name for node_name in rlist
5275                   if not rlist[node_name].fail_msg]
5276     for node_name, nr in rlist.items():
5277       if nr.fail_msg or not nr.payload:
5278         continue
5279       for (name, path, status, diagnose, params) in nr.payload:
5280         if name not in all_es:
5281           # build a list of nodes for this os containing empty lists
5282           # for each node in node_list
5283           all_es[name] = {}
5284           for nname in good_nodes:
5285             all_es[name][nname] = []
5286         # convert params from [name, help] to (name, help)
5287         params = [tuple(v) for v in params]
5288         all_es[name][node_name].append((path, status, diagnose, params))
5289     return all_es
5290
5291   def _GetQueryData(self, lu):
5292     """Computes the list of nodes and their attributes.
5293
5294     """
5295     # Locking is not used
5296     assert not (compat.any(lu.glm.is_owned(level)
5297                            for level in locking.LEVELS
5298                            if level != locking.LEVEL_CLUSTER) or
5299                 self.do_locking or self.use_locking)
5300
5301     valid_nodes = [node.name
5302                    for node in lu.cfg.GetAllNodesInfo().values()
5303                    if not node.offline and node.vm_capable]
5304     pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5305
5306     data = {}
5307
5308     nodegroup_list = lu.cfg.GetNodeGroupList()
5309
5310     for (es_name, es_data) in pol.items():
5311       # For every provider compute the nodegroup validity.
5312       # To do this we need to check the validity of each node in es_data
5313       # and then construct the corresponding nodegroup dict:
5314       #      { nodegroup1: status
5315       #        nodegroup2: status
5316       #      }
5317       ndgrp_data = {}
5318       for nodegroup in nodegroup_list:
5319         ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5320
5321         nodegroup_nodes = ndgrp.members
5322         nodegroup_name = ndgrp.name
5323         node_statuses = []
5324
5325         for node in nodegroup_nodes:
5326           if node in valid_nodes:
5327             if es_data[node] != []:
5328               node_status = es_data[node][0][1]
5329               node_statuses.append(node_status)
5330             else:
5331               node_statuses.append(False)
5332
5333         if False in node_statuses:
5334           ndgrp_data[nodegroup_name] = False
5335         else:
5336           ndgrp_data[nodegroup_name] = True
5337
5338       # Compute the provider's parameters
5339       parameters = set()
5340       for idx, esl in enumerate(es_data.values()):
5341         valid = bool(esl and esl[0][1])
5342         if not valid:
5343           break
5344
5345         node_params = esl[0][3]
5346         if idx == 0:
5347           # First entry
5348           parameters.update(node_params)
5349         else:
5350           # Filter out inconsistent values
5351           parameters.intersection_update(node_params)
5352
5353       params = list(parameters)
5354
5355       # Now fill all the info for this provider
5356       info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5357                                   nodegroup_status=ndgrp_data,
5358                                   parameters=params)
5359
5360       data[es_name] = info
5361
5362     # Prepare data in requested order
5363     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5364             if name in data]
5365
5366
5367 class LUExtStorageDiagnose(NoHooksLU):
5368   """Logical unit for ExtStorage diagnose/query.
5369
5370   """
5371   REQ_BGL = False
5372
5373   def CheckArguments(self):
5374     self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5375                                self.op.output_fields, False)
5376
5377   def ExpandNames(self):
5378     self.eq.ExpandNames(self)
5379
5380   def Exec(self, feedback_fn):
5381     return self.eq.OldStyleQuery(self)
5382
5383
5384 class LUNodeRemove(LogicalUnit):
5385   """Logical unit for removing a node.
5386
5387   """
5388   HPATH = "node-remove"
5389   HTYPE = constants.HTYPE_NODE
5390
5391   def BuildHooksEnv(self):
5392     """Build hooks env.
5393
5394     """
5395     return {
5396       "OP_TARGET": self.op.node_name,
5397       "NODE_NAME": self.op.node_name,
5398       }
5399
5400   def BuildHooksNodes(self):
5401     """Build hooks nodes.
5402
5403     This doesn't run on the target node in the pre phase as a failed
5404     node would then be impossible to remove.
5405
5406     """
5407     all_nodes = self.cfg.GetNodeList()
5408     try:
5409       all_nodes.remove(self.op.node_name)
5410     except ValueError:
5411       pass
5412     return (all_nodes, all_nodes)
5413
5414   def CheckPrereq(self):
5415     """Check prerequisites.
5416
5417     This checks:
5418      - the node exists in the configuration
5419      - it does not have primary or secondary instances
5420      - it's not the master
5421
5422     Any errors are signaled by raising errors.OpPrereqError.
5423
5424     """
5425     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5426     node = self.cfg.GetNodeInfo(self.op.node_name)
5427     assert node is not None
5428
5429     masternode = self.cfg.GetMasterNode()
5430     if node.name == masternode:
5431       raise errors.OpPrereqError("Node is the master node, failover to another"
5432                                  " node is required", errors.ECODE_INVAL)
5433
5434     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5435       if node.name in instance.all_nodes:
5436         raise errors.OpPrereqError("Instance %s is still running on the node,"
5437                                    " please remove first" % instance_name,
5438                                    errors.ECODE_INVAL)
5439     self.op.node_name = node.name
5440     self.node = node
5441
5442   def Exec(self, feedback_fn):
5443     """Removes the node from the cluster.
5444
5445     """
5446     node = self.node
5447     logging.info("Stopping the node daemon and removing configs from node %s",
5448                  node.name)
5449
5450     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5451
5452     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5453       "Not owning BGL"
5454
5455     # Promote nodes to master candidate as needed
5456     _AdjustCandidatePool(self, exceptions=[node.name])
5457     self.context.RemoveNode(node.name)
5458
5459     # Run post hooks on the node before it's removed
5460     _RunPostHook(self, node.name)
5461
5462     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5463     msg = result.fail_msg
5464     if msg:
5465       self.LogWarning("Errors encountered on the remote node while leaving"
5466                       " the cluster: %s", msg)
5467
5468     # Remove node from our /etc/hosts
5469     if self.cfg.GetClusterInfo().modify_etc_hosts:
5470       master_node = self.cfg.GetMasterNode()
5471       result = self.rpc.call_etc_hosts_modify(master_node,
5472                                               constants.ETC_HOSTS_REMOVE,
5473                                               node.name, None)
5474       result.Raise("Can't update hosts file with new host data")
5475       _RedistributeAncillaryFiles(self)
5476
5477
5478 class _NodeQuery(_QueryBase):
5479   FIELDS = query.NODE_FIELDS
5480
5481   def ExpandNames(self, lu):
5482     lu.needed_locks = {}
5483     lu.share_locks = _ShareAll()
5484
5485     if self.names:
5486       self.wanted = _GetWantedNodes(lu, self.names)
5487     else:
5488       self.wanted = locking.ALL_SET
5489
5490     self.do_locking = (self.use_locking and
5491                        query.NQ_LIVE in self.requested_data)
5492
5493     if self.do_locking:
5494       # If any non-static field is requested we need to lock the nodes
5495       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5496       lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5497
5498   def DeclareLocks(self, lu, level):
5499     pass
5500
5501   def _GetQueryData(self, lu):
5502     """Computes the list of nodes and their attributes.
5503
5504     """
5505     all_info = lu.cfg.GetAllNodesInfo()
5506
5507     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5508
5509     # Gather data as requested
5510     if query.NQ_LIVE in self.requested_data:
5511       # filter out non-vm_capable nodes
5512       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5513
5514       es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5515       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5516                                         [lu.cfg.GetHypervisorType()], es_flags)
5517       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5518                        for (name, nresult) in node_data.items()
5519                        if not nresult.fail_msg and nresult.payload)
5520     else:
5521       live_data = None
5522
5523     if query.NQ_INST in self.requested_data:
5524       node_to_primary = dict([(name, set()) for name in nodenames])
5525       node_to_secondary = dict([(name, set()) for name in nodenames])
5526
5527       inst_data = lu.cfg.GetAllInstancesInfo()
5528
5529       for inst in inst_data.values():
5530         if inst.primary_node in node_to_primary:
5531           node_to_primary[inst.primary_node].add(inst.name)
5532         for secnode in inst.secondary_nodes:
5533           if secnode in node_to_secondary:
5534             node_to_secondary[secnode].add(inst.name)
5535     else:
5536       node_to_primary = None
5537       node_to_secondary = None
5538
5539     if query.NQ_OOB in self.requested_data:
5540       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5541                          for name, node in all_info.iteritems())
5542     else:
5543       oob_support = None
5544
5545     if query.NQ_GROUP in self.requested_data:
5546       groups = lu.cfg.GetAllNodeGroupsInfo()
5547     else:
5548       groups = {}
5549
5550     return query.NodeQueryData([all_info[name] for name in nodenames],
5551                                live_data, lu.cfg.GetMasterNode(),
5552                                node_to_primary, node_to_secondary, groups,
5553                                oob_support, lu.cfg.GetClusterInfo())
5554
5555
5556 class LUNodeQuery(NoHooksLU):
5557   """Logical unit for querying nodes.
5558
5559   """
5560   # pylint: disable=W0142
5561   REQ_BGL = False
5562
5563   def CheckArguments(self):
5564     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5565                          self.op.output_fields, self.op.use_locking)
5566
5567   def ExpandNames(self):
5568     self.nq.ExpandNames(self)
5569
5570   def DeclareLocks(self, level):
5571     self.nq.DeclareLocks(self, level)
5572
5573   def Exec(self, feedback_fn):
5574     return self.nq.OldStyleQuery(self)
5575
5576
5577 class LUNodeQueryvols(NoHooksLU):
5578   """Logical unit for getting volumes on node(s).
5579
5580   """
5581   REQ_BGL = False
5582   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5583   _FIELDS_STATIC = utils.FieldSet("node")
5584
5585   def CheckArguments(self):
5586     _CheckOutputFields(static=self._FIELDS_STATIC,
5587                        dynamic=self._FIELDS_DYNAMIC,
5588                        selected=self.op.output_fields)
5589
5590   def ExpandNames(self):
5591     self.share_locks = _ShareAll()
5592
5593     if self.op.nodes:
5594       self.needed_locks = {
5595         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5596         }
5597     else:
5598       self.needed_locks = {
5599         locking.LEVEL_NODE: locking.ALL_SET,
5600         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5601         }
5602
5603   def Exec(self, feedback_fn):
5604     """Computes the list of nodes and their attributes.
5605
5606     """
5607     nodenames = self.owned_locks(locking.LEVEL_NODE)
5608     volumes = self.rpc.call_node_volumes(nodenames)
5609
5610     ilist = self.cfg.GetAllInstancesInfo()
5611     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5612
5613     output = []
5614     for node in nodenames:
5615       nresult = volumes[node]
5616       if nresult.offline:
5617         continue
5618       msg = nresult.fail_msg
5619       if msg:
5620         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5621         continue
5622
5623       node_vols = sorted(nresult.payload,
5624                          key=operator.itemgetter("dev"))
5625
5626       for vol in node_vols:
5627         node_output = []
5628         for field in self.op.output_fields:
5629           if field == "node":
5630             val = node
5631           elif field == "phys":
5632             val = vol["dev"]
5633           elif field == "vg":
5634             val = vol["vg"]
5635           elif field == "name":
5636             val = vol["name"]
5637           elif field == "size":
5638             val = int(float(vol["size"]))
5639           elif field == "instance":
5640             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5641           else:
5642             raise errors.ParameterError(field)
5643           node_output.append(str(val))
5644
5645         output.append(node_output)
5646
5647     return output
5648
5649
5650 class LUNodeQueryStorage(NoHooksLU):
5651   """Logical unit for getting information on storage units on node(s).
5652
5653   """
5654   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5655   REQ_BGL = False
5656
5657   def CheckArguments(self):
5658     _CheckOutputFields(static=self._FIELDS_STATIC,
5659                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5660                        selected=self.op.output_fields)
5661
5662   def ExpandNames(self):
5663     self.share_locks = _ShareAll()
5664
5665     if self.op.nodes:
5666       self.needed_locks = {
5667         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5668         }
5669     else:
5670       self.needed_locks = {
5671         locking.LEVEL_NODE: locking.ALL_SET,
5672         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5673         }
5674
5675   def Exec(self, feedback_fn):
5676     """Computes the list of nodes and their attributes.
5677
5678     """
5679     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5680
5681     # Always get name to sort by
5682     if constants.SF_NAME in self.op.output_fields:
5683       fields = self.op.output_fields[:]
5684     else:
5685       fields = [constants.SF_NAME] + self.op.output_fields
5686
5687     # Never ask for node or type as it's only known to the LU
5688     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5689       while extra in fields:
5690         fields.remove(extra)
5691
5692     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5693     name_idx = field_idx[constants.SF_NAME]
5694
5695     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5696     data = self.rpc.call_storage_list(self.nodes,
5697                                       self.op.storage_type, st_args,
5698                                       self.op.name, fields)
5699
5700     result = []
5701
5702     for node in utils.NiceSort(self.nodes):
5703       nresult = data[node]
5704       if nresult.offline:
5705         continue
5706
5707       msg = nresult.fail_msg
5708       if msg:
5709         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5710         continue
5711
5712       rows = dict([(row[name_idx], row) for row in nresult.payload])
5713
5714       for name in utils.NiceSort(rows.keys()):
5715         row = rows[name]
5716
5717         out = []
5718
5719         for field in self.op.output_fields:
5720           if field == constants.SF_NODE:
5721             val = node
5722           elif field == constants.SF_TYPE:
5723             val = self.op.storage_type
5724           elif field in field_idx:
5725             val = row[field_idx[field]]
5726           else:
5727             raise errors.ParameterError(field)
5728
5729           out.append(val)
5730
5731         result.append(out)
5732
5733     return result
5734
5735
5736 class _InstanceQuery(_QueryBase):
5737   FIELDS = query.INSTANCE_FIELDS
5738
5739   def ExpandNames(self, lu):
5740     lu.needed_locks = {}
5741     lu.share_locks = _ShareAll()
5742
5743     if self.names:
5744       self.wanted = _GetWantedInstances(lu, self.names)
5745     else:
5746       self.wanted = locking.ALL_SET
5747
5748     self.do_locking = (self.use_locking and
5749                        query.IQ_LIVE in self.requested_data)
5750     if self.do_locking:
5751       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5752       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5753       lu.needed_locks[locking.LEVEL_NODE] = []
5754       lu.needed_locks[locking.LEVEL_NETWORK] = []
5755       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5756
5757     self.do_grouplocks = (self.do_locking and
5758                           query.IQ_NODES in self.requested_data)
5759
5760   def DeclareLocks(self, lu, level):
5761     if self.do_locking:
5762       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5763         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5764
5765         # Lock all groups used by instances optimistically; this requires going
5766         # via the node before it's locked, requiring verification later on
5767         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5768           set(group_uuid
5769               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5770               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5771       elif level == locking.LEVEL_NODE:
5772         lu._LockInstancesNodes() # pylint: disable=W0212
5773
5774       elif level == locking.LEVEL_NETWORK:
5775         lu.needed_locks[locking.LEVEL_NETWORK] = \
5776           frozenset(net_uuid
5777                     for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5778                     for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
5779
5780   @staticmethod
5781   def _CheckGroupLocks(lu):
5782     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5783     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5784
5785     # Check if node groups for locked instances are still correct
5786     for instance_name in owned_instances:
5787       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5788
5789   def _GetQueryData(self, lu):
5790     """Computes the list of instances and their attributes.
5791
5792     """
5793     if self.do_grouplocks:
5794       self._CheckGroupLocks(lu)
5795
5796     cluster = lu.cfg.GetClusterInfo()
5797     all_info = lu.cfg.GetAllInstancesInfo()
5798
5799     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5800
5801     instance_list = [all_info[name] for name in instance_names]
5802     nodes = frozenset(itertools.chain(*(inst.all_nodes
5803                                         for inst in instance_list)))
5804     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5805     bad_nodes = []
5806     offline_nodes = []
5807     wrongnode_inst = set()
5808
5809     # Gather data as requested
5810     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5811       live_data = {}
5812       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5813       for name in nodes:
5814         result = node_data[name]
5815         if result.offline:
5816           # offline nodes will be in both lists
5817           assert result.fail_msg
5818           offline_nodes.append(name)
5819         if result.fail_msg:
5820           bad_nodes.append(name)
5821         elif result.payload:
5822           for inst in result.payload:
5823             if inst in all_info:
5824               if all_info[inst].primary_node == name:
5825                 live_data.update(result.payload)
5826               else:
5827                 wrongnode_inst.add(inst)
5828             else:
5829               # orphan instance; we don't list it here as we don't
5830               # handle this case yet in the output of instance listing
5831               logging.warning("Orphan instance '%s' found on node %s",
5832                               inst, name)
5833         # else no instance is alive
5834     else:
5835       live_data = {}
5836
5837     if query.IQ_DISKUSAGE in self.requested_data:
5838       gmi = ganeti.masterd.instance
5839       disk_usage = dict((inst.name,
5840                          gmi.ComputeDiskSize(inst.disk_template,
5841                                              [{constants.IDISK_SIZE: disk.size}
5842                                               for disk in inst.disks]))
5843                         for inst in instance_list)
5844     else:
5845       disk_usage = None
5846
5847     if query.IQ_CONSOLE in self.requested_data:
5848       consinfo = {}
5849       for inst in instance_list:
5850         if inst.name in live_data:
5851           # Instance is running
5852           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5853         else:
5854           consinfo[inst.name] = None
5855       assert set(consinfo.keys()) == set(instance_names)
5856     else:
5857       consinfo = None
5858
5859     if query.IQ_NODES in self.requested_data:
5860       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5861                                             instance_list)))
5862       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5863       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5864                     for uuid in set(map(operator.attrgetter("group"),
5865                                         nodes.values())))
5866     else:
5867       nodes = None
5868       groups = None
5869
5870     if query.IQ_NETWORKS in self.requested_data:
5871       net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
5872                                     for i in instance_list))
5873       networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
5874     else:
5875       networks = None
5876
5877     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5878                                    disk_usage, offline_nodes, bad_nodes,
5879                                    live_data, wrongnode_inst, consinfo,
5880                                    nodes, groups, networks)
5881
5882
5883 class LUQuery(NoHooksLU):
5884   """Query for resources/items of a certain kind.
5885
5886   """
5887   # pylint: disable=W0142
5888   REQ_BGL = False
5889
5890   def CheckArguments(self):
5891     qcls = _GetQueryImplementation(self.op.what)
5892
5893     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5894
5895   def ExpandNames(self):
5896     self.impl.ExpandNames(self)
5897
5898   def DeclareLocks(self, level):
5899     self.impl.DeclareLocks(self, level)
5900
5901   def Exec(self, feedback_fn):
5902     return self.impl.NewStyleQuery(self)
5903
5904
5905 class LUQueryFields(NoHooksLU):
5906   """Query for resources/items of a certain kind.
5907
5908   """
5909   # pylint: disable=W0142
5910   REQ_BGL = False
5911
5912   def CheckArguments(self):
5913     self.qcls = _GetQueryImplementation(self.op.what)
5914
5915   def ExpandNames(self):
5916     self.needed_locks = {}
5917
5918   def Exec(self, feedback_fn):
5919     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5920
5921
5922 class LUNodeModifyStorage(NoHooksLU):
5923   """Logical unit for modifying a storage volume on a node.
5924
5925   """
5926   REQ_BGL = False
5927
5928   def CheckArguments(self):
5929     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5930
5931     storage_type = self.op.storage_type
5932
5933     try:
5934       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5935     except KeyError:
5936       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5937                                  " modified" % storage_type,
5938                                  errors.ECODE_INVAL)
5939
5940     diff = set(self.op.changes.keys()) - modifiable
5941     if diff:
5942       raise errors.OpPrereqError("The following fields can not be modified for"
5943                                  " storage units of type '%s': %r" %
5944                                  (storage_type, list(diff)),
5945                                  errors.ECODE_INVAL)
5946
5947   def ExpandNames(self):
5948     self.needed_locks = {
5949       locking.LEVEL_NODE: self.op.node_name,
5950       }
5951
5952   def Exec(self, feedback_fn):
5953     """Computes the list of nodes and their attributes.
5954
5955     """
5956     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5957     result = self.rpc.call_storage_modify(self.op.node_name,
5958                                           self.op.storage_type, st_args,
5959                                           self.op.name, self.op.changes)
5960     result.Raise("Failed to modify storage unit '%s' on %s" %
5961                  (self.op.name, self.op.node_name))
5962
5963
5964 class LUNodeAdd(LogicalUnit):
5965   """Logical unit for adding node to the cluster.
5966
5967   """
5968   HPATH = "node-add"
5969   HTYPE = constants.HTYPE_NODE
5970   _NFLAGS = ["master_capable", "vm_capable"]
5971
5972   def CheckArguments(self):
5973     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5974     # validate/normalize the node name
5975     self.hostname = netutils.GetHostname(name=self.op.node_name,
5976                                          family=self.primary_ip_family)
5977     self.op.node_name = self.hostname.name
5978
5979     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5980       raise errors.OpPrereqError("Cannot readd the master node",
5981                                  errors.ECODE_STATE)
5982
5983     if self.op.readd and self.op.group:
5984       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5985                                  " being readded", errors.ECODE_INVAL)
5986
5987   def BuildHooksEnv(self):
5988     """Build hooks env.
5989
5990     This will run on all nodes before, and on all nodes + the new node after.
5991
5992     """
5993     return {
5994       "OP_TARGET": self.op.node_name,
5995       "NODE_NAME": self.op.node_name,
5996       "NODE_PIP": self.op.primary_ip,
5997       "NODE_SIP": self.op.secondary_ip,
5998       "MASTER_CAPABLE": str(self.op.master_capable),
5999       "VM_CAPABLE": str(self.op.vm_capable),
6000       }
6001
6002   def BuildHooksNodes(self):
6003     """Build hooks nodes.
6004
6005     """
6006     # Exclude added node
6007     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6008     post_nodes = pre_nodes + [self.op.node_name, ]
6009
6010     return (pre_nodes, post_nodes)
6011
6012   def CheckPrereq(self):
6013     """Check prerequisites.
6014
6015     This checks:
6016      - the new node is not already in the config
6017      - it is resolvable
6018      - its parameters (single/dual homed) matches the cluster
6019
6020     Any errors are signaled by raising errors.OpPrereqError.
6021
6022     """
6023     cfg = self.cfg
6024     hostname = self.hostname
6025     node = hostname.name
6026     primary_ip = self.op.primary_ip = hostname.ip
6027     if self.op.secondary_ip is None:
6028       if self.primary_ip_family == netutils.IP6Address.family:
6029         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6030                                    " IPv4 address must be given as secondary",
6031                                    errors.ECODE_INVAL)
6032       self.op.secondary_ip = primary_ip
6033
6034     secondary_ip = self.op.secondary_ip
6035     if not netutils.IP4Address.IsValid(secondary_ip):
6036       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6037                                  " address" % secondary_ip, errors.ECODE_INVAL)
6038
6039     node_list = cfg.GetNodeList()
6040     if not self.op.readd and node in node_list:
6041       raise errors.OpPrereqError("Node %s is already in the configuration" %
6042                                  node, errors.ECODE_EXISTS)
6043     elif self.op.readd and node not in node_list:
6044       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6045                                  errors.ECODE_NOENT)
6046
6047     self.changed_primary_ip = False
6048
6049     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6050       if self.op.readd and node == existing_node_name:
6051         if existing_node.secondary_ip != secondary_ip:
6052           raise errors.OpPrereqError("Readded node doesn't have the same IP"
6053                                      " address configuration as before",
6054                                      errors.ECODE_INVAL)
6055         if existing_node.primary_ip != primary_ip:
6056           self.changed_primary_ip = True
6057
6058         continue
6059
6060       if (existing_node.primary_ip == primary_ip or
6061           existing_node.secondary_ip == primary_ip or
6062           existing_node.primary_ip == secondary_ip or
6063           existing_node.secondary_ip == secondary_ip):
6064         raise errors.OpPrereqError("New node ip address(es) conflict with"
6065                                    " existing node %s" % existing_node.name,
6066                                    errors.ECODE_NOTUNIQUE)
6067
6068     # After this 'if' block, None is no longer a valid value for the
6069     # _capable op attributes
6070     if self.op.readd:
6071       old_node = self.cfg.GetNodeInfo(node)
6072       assert old_node is not None, "Can't retrieve locked node %s" % node
6073       for attr in self._NFLAGS:
6074         if getattr(self.op, attr) is None:
6075           setattr(self.op, attr, getattr(old_node, attr))
6076     else:
6077       for attr in self._NFLAGS:
6078         if getattr(self.op, attr) is None:
6079           setattr(self.op, attr, True)
6080
6081     if self.op.readd and not self.op.vm_capable:
6082       pri, sec = cfg.GetNodeInstances(node)
6083       if pri or sec:
6084         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6085                                    " flag set to false, but it already holds"
6086                                    " instances" % node,
6087                                    errors.ECODE_STATE)
6088
6089     # check that the type of the node (single versus dual homed) is the
6090     # same as for the master
6091     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6092     master_singlehomed = myself.secondary_ip == myself.primary_ip
6093     newbie_singlehomed = secondary_ip == primary_ip
6094     if master_singlehomed != newbie_singlehomed:
6095       if master_singlehomed:
6096         raise errors.OpPrereqError("The master has no secondary ip but the"
6097                                    " new node has one",
6098                                    errors.ECODE_INVAL)
6099       else:
6100         raise errors.OpPrereqError("The master has a secondary ip but the"
6101                                    " new node doesn't have one",
6102                                    errors.ECODE_INVAL)
6103
6104     # checks reachability
6105     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6106       raise errors.OpPrereqError("Node not reachable by ping",
6107                                  errors.ECODE_ENVIRON)
6108
6109     if not newbie_singlehomed:
6110       # check reachability from my secondary ip to newbie's secondary ip
6111       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6112                               source=myself.secondary_ip):
6113         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6114                                    " based ping to node daemon port",
6115                                    errors.ECODE_ENVIRON)
6116
6117     if self.op.readd:
6118       exceptions = [node]
6119     else:
6120       exceptions = []
6121
6122     if self.op.master_capable:
6123       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6124     else:
6125       self.master_candidate = False
6126
6127     if self.op.readd:
6128       self.new_node = old_node
6129     else:
6130       node_group = cfg.LookupNodeGroup(self.op.group)
6131       self.new_node = objects.Node(name=node,
6132                                    primary_ip=primary_ip,
6133                                    secondary_ip=secondary_ip,
6134                                    master_candidate=self.master_candidate,
6135                                    offline=False, drained=False,
6136                                    group=node_group, ndparams={})
6137
6138     if self.op.ndparams:
6139       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6140       _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6141                             "node", "cluster or group")
6142
6143     if self.op.hv_state:
6144       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6145
6146     if self.op.disk_state:
6147       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6148
6149     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6150     #       it a property on the base class.
6151     rpcrunner = rpc.DnsOnlyRunner()
6152     result = rpcrunner.call_version([node])[node]
6153     result.Raise("Can't get version information from node %s" % node)
6154     if constants.PROTOCOL_VERSION == result.payload:
6155       logging.info("Communication to node %s fine, sw version %s match",
6156                    node, result.payload)
6157     else:
6158       raise errors.OpPrereqError("Version mismatch master version %s,"
6159                                  " node version %s" %
6160                                  (constants.PROTOCOL_VERSION, result.payload),
6161                                  errors.ECODE_ENVIRON)
6162
6163     vg_name = cfg.GetVGName()
6164     if vg_name is not None:
6165       vparams = {constants.NV_PVLIST: [vg_name]}
6166       excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6167       cname = self.cfg.GetClusterName()
6168       result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6169       (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6170       if errmsgs:
6171         raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6172                                    "; ".join(errmsgs), errors.ECODE_ENVIRON)
6173
6174   def Exec(self, feedback_fn):
6175     """Adds the new node to the cluster.
6176
6177     """
6178     new_node = self.new_node
6179     node = new_node.name
6180
6181     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6182       "Not owning BGL"
6183
6184     # We adding a new node so we assume it's powered
6185     new_node.powered = True
6186
6187     # for re-adds, reset the offline/drained/master-candidate flags;
6188     # we need to reset here, otherwise offline would prevent RPC calls
6189     # later in the procedure; this also means that if the re-add
6190     # fails, we are left with a non-offlined, broken node
6191     if self.op.readd:
6192       new_node.drained = new_node.offline = False # pylint: disable=W0201
6193       self.LogInfo("Readding a node, the offline/drained flags were reset")
6194       # if we demote the node, we do cleanup later in the procedure
6195       new_node.master_candidate = self.master_candidate
6196       if self.changed_primary_ip:
6197         new_node.primary_ip = self.op.primary_ip
6198
6199     # copy the master/vm_capable flags
6200     for attr in self._NFLAGS:
6201       setattr(new_node, attr, getattr(self.op, attr))
6202
6203     # notify the user about any possible mc promotion
6204     if new_node.master_candidate:
6205       self.LogInfo("Node will be a master candidate")
6206
6207     if self.op.ndparams:
6208       new_node.ndparams = self.op.ndparams
6209     else:
6210       new_node.ndparams = {}
6211
6212     if self.op.hv_state:
6213       new_node.hv_state_static = self.new_hv_state
6214
6215     if self.op.disk_state:
6216       new_node.disk_state_static = self.new_disk_state
6217
6218     # Add node to our /etc/hosts, and add key to known_hosts
6219     if self.cfg.GetClusterInfo().modify_etc_hosts:
6220       master_node = self.cfg.GetMasterNode()
6221       result = self.rpc.call_etc_hosts_modify(master_node,
6222                                               constants.ETC_HOSTS_ADD,
6223                                               self.hostname.name,
6224                                               self.hostname.ip)
6225       result.Raise("Can't update hosts file with new host data")
6226
6227     if new_node.secondary_ip != new_node.primary_ip:
6228       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6229                                False)
6230
6231     node_verify_list = [self.cfg.GetMasterNode()]
6232     node_verify_param = {
6233       constants.NV_NODELIST: ([node], {}),
6234       # TODO: do a node-net-test as well?
6235     }
6236
6237     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6238                                        self.cfg.GetClusterName())
6239     for verifier in node_verify_list:
6240       result[verifier].Raise("Cannot communicate with node %s" % verifier)
6241       nl_payload = result[verifier].payload[constants.NV_NODELIST]
6242       if nl_payload:
6243         for failed in nl_payload:
6244           feedback_fn("ssh/hostname verification failed"
6245                       " (checking from %s): %s" %
6246                       (verifier, nl_payload[failed]))
6247         raise errors.OpExecError("ssh/hostname verification failed")
6248
6249     if self.op.readd:
6250       _RedistributeAncillaryFiles(self)
6251       self.context.ReaddNode(new_node)
6252       # make sure we redistribute the config
6253       self.cfg.Update(new_node, feedback_fn)
6254       # and make sure the new node will not have old files around
6255       if not new_node.master_candidate:
6256         result = self.rpc.call_node_demote_from_mc(new_node.name)
6257         msg = result.fail_msg
6258         if msg:
6259           self.LogWarning("Node failed to demote itself from master"
6260                           " candidate status: %s" % msg)
6261     else:
6262       _RedistributeAncillaryFiles(self, additional_nodes=[node],
6263                                   additional_vm=self.op.vm_capable)
6264       self.context.AddNode(new_node, self.proc.GetECId())
6265
6266
6267 class LUNodeSetParams(LogicalUnit):
6268   """Modifies the parameters of a node.
6269
6270   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6271       to the node role (as _ROLE_*)
6272   @cvar _R2F: a dictionary from node role to tuples of flags
6273   @cvar _FLAGS: a list of attribute names corresponding to the flags
6274
6275   """
6276   HPATH = "node-modify"
6277   HTYPE = constants.HTYPE_NODE
6278   REQ_BGL = False
6279   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6280   _F2R = {
6281     (True, False, False): _ROLE_CANDIDATE,
6282     (False, True, False): _ROLE_DRAINED,
6283     (False, False, True): _ROLE_OFFLINE,
6284     (False, False, False): _ROLE_REGULAR,
6285     }
6286   _R2F = dict((v, k) for k, v in _F2R.items())
6287   _FLAGS = ["master_candidate", "drained", "offline"]
6288
6289   def CheckArguments(self):
6290     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6291     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6292                 self.op.master_capable, self.op.vm_capable,
6293                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6294                 self.op.disk_state]
6295     if all_mods.count(None) == len(all_mods):
6296       raise errors.OpPrereqError("Please pass at least one modification",
6297                                  errors.ECODE_INVAL)
6298     if all_mods.count(True) > 1:
6299       raise errors.OpPrereqError("Can't set the node into more than one"
6300                                  " state at the same time",
6301                                  errors.ECODE_INVAL)
6302
6303     # Boolean value that tells us whether we might be demoting from MC
6304     self.might_demote = (self.op.master_candidate is False or
6305                          self.op.offline is True or
6306                          self.op.drained is True or
6307                          self.op.master_capable is False)
6308
6309     if self.op.secondary_ip:
6310       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6311         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6312                                    " address" % self.op.secondary_ip,
6313                                    errors.ECODE_INVAL)
6314
6315     self.lock_all = self.op.auto_promote and self.might_demote
6316     self.lock_instances = self.op.secondary_ip is not None
6317
6318   def _InstanceFilter(self, instance):
6319     """Filter for getting affected instances.
6320
6321     """
6322     return (instance.disk_template in constants.DTS_INT_MIRROR and
6323             self.op.node_name in instance.all_nodes)
6324
6325   def ExpandNames(self):
6326     if self.lock_all:
6327       self.needed_locks = {
6328         locking.LEVEL_NODE: locking.ALL_SET,
6329
6330         # Block allocations when all nodes are locked
6331         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6332         }
6333     else:
6334       self.needed_locks = {
6335         locking.LEVEL_NODE: self.op.node_name,
6336         }
6337
6338     # Since modifying a node can have severe effects on currently running
6339     # operations the resource lock is at least acquired in shared mode
6340     self.needed_locks[locking.LEVEL_NODE_RES] = \
6341       self.needed_locks[locking.LEVEL_NODE]
6342
6343     # Get all locks except nodes in shared mode; they are not used for anything
6344     # but read-only access
6345     self.share_locks = _ShareAll()
6346     self.share_locks[locking.LEVEL_NODE] = 0
6347     self.share_locks[locking.LEVEL_NODE_RES] = 0
6348     self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6349
6350     if self.lock_instances:
6351       self.needed_locks[locking.LEVEL_INSTANCE] = \
6352         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6353
6354   def BuildHooksEnv(self):
6355     """Build hooks env.
6356
6357     This runs on the master node.
6358
6359     """
6360     return {
6361       "OP_TARGET": self.op.node_name,
6362       "MASTER_CANDIDATE": str(self.op.master_candidate),
6363       "OFFLINE": str(self.op.offline),
6364       "DRAINED": str(self.op.drained),
6365       "MASTER_CAPABLE": str(self.op.master_capable),
6366       "VM_CAPABLE": str(self.op.vm_capable),
6367       }
6368
6369   def BuildHooksNodes(self):
6370     """Build hooks nodes.
6371
6372     """
6373     nl = [self.cfg.GetMasterNode(), self.op.node_name]
6374     return (nl, nl)
6375
6376   def CheckPrereq(self):
6377     """Check prerequisites.
6378
6379     This only checks the instance list against the existing names.
6380
6381     """
6382     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6383
6384     if self.lock_instances:
6385       affected_instances = \
6386         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6387
6388       # Verify instance locks
6389       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6390       wanted_instances = frozenset(affected_instances.keys())
6391       if wanted_instances - owned_instances:
6392         raise errors.OpPrereqError("Instances affected by changing node %s's"
6393                                    " secondary IP address have changed since"
6394                                    " locks were acquired, wanted '%s', have"
6395                                    " '%s'; retry the operation" %
6396                                    (self.op.node_name,
6397                                     utils.CommaJoin(wanted_instances),
6398                                     utils.CommaJoin(owned_instances)),
6399                                    errors.ECODE_STATE)
6400     else:
6401       affected_instances = None
6402
6403     if (self.op.master_candidate is not None or
6404         self.op.drained is not None or
6405         self.op.offline is not None):
6406       # we can't change the master's node flags
6407       if self.op.node_name == self.cfg.GetMasterNode():
6408         raise errors.OpPrereqError("The master role can be changed"
6409                                    " only via master-failover",
6410                                    errors.ECODE_INVAL)
6411
6412     if self.op.master_candidate and not node.master_capable:
6413       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6414                                  " it a master candidate" % node.name,
6415                                  errors.ECODE_STATE)
6416
6417     if self.op.vm_capable is False:
6418       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6419       if ipri or isec:
6420         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6421                                    " the vm_capable flag" % node.name,
6422                                    errors.ECODE_STATE)
6423
6424     if node.master_candidate and self.might_demote and not self.lock_all:
6425       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6426       # check if after removing the current node, we're missing master
6427       # candidates
6428       (mc_remaining, mc_should, _) = \
6429           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6430       if mc_remaining < mc_should:
6431         raise errors.OpPrereqError("Not enough master candidates, please"
6432                                    " pass auto promote option to allow"
6433                                    " promotion (--auto-promote or RAPI"
6434                                    " auto_promote=True)", errors.ECODE_STATE)
6435
6436     self.old_flags = old_flags = (node.master_candidate,
6437                                   node.drained, node.offline)
6438     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6439     self.old_role = old_role = self._F2R[old_flags]
6440
6441     # Check for ineffective changes
6442     for attr in self._FLAGS:
6443       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6444         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6445         setattr(self.op, attr, None)
6446
6447     # Past this point, any flag change to False means a transition
6448     # away from the respective state, as only real changes are kept
6449
6450     # TODO: We might query the real power state if it supports OOB
6451     if _SupportsOob(self.cfg, node):
6452       if self.op.offline is False and not (node.powered or
6453                                            self.op.powered is True):
6454         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6455                                     " offline status can be reset") %
6456                                    self.op.node_name, errors.ECODE_STATE)
6457     elif self.op.powered is not None:
6458       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6459                                   " as it does not support out-of-band"
6460                                   " handling") % self.op.node_name,
6461                                  errors.ECODE_STATE)
6462
6463     # If we're being deofflined/drained, we'll MC ourself if needed
6464     if (self.op.drained is False or self.op.offline is False or
6465         (self.op.master_capable and not node.master_capable)):
6466       if _DecideSelfPromotion(self):
6467         self.op.master_candidate = True
6468         self.LogInfo("Auto-promoting node to master candidate")
6469
6470     # If we're no longer master capable, we'll demote ourselves from MC
6471     if self.op.master_capable is False and node.master_candidate:
6472       self.LogInfo("Demoting from master candidate")
6473       self.op.master_candidate = False
6474
6475     # Compute new role
6476     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6477     if self.op.master_candidate:
6478       new_role = self._ROLE_CANDIDATE
6479     elif self.op.drained:
6480       new_role = self._ROLE_DRAINED
6481     elif self.op.offline:
6482       new_role = self._ROLE_OFFLINE
6483     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6484       # False is still in new flags, which means we're un-setting (the
6485       # only) True flag
6486       new_role = self._ROLE_REGULAR
6487     else: # no new flags, nothing, keep old role
6488       new_role = old_role
6489
6490     self.new_role = new_role
6491
6492     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6493       # Trying to transition out of offline status
6494       result = self.rpc.call_version([node.name])[node.name]
6495       if result.fail_msg:
6496         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6497                                    " to report its version: %s" %
6498                                    (node.name, result.fail_msg),
6499                                    errors.ECODE_STATE)
6500       else:
6501         self.LogWarning("Transitioning node from offline to online state"
6502                         " without using re-add. Please make sure the node"
6503                         " is healthy!")
6504
6505     # When changing the secondary ip, verify if this is a single-homed to
6506     # multi-homed transition or vice versa, and apply the relevant
6507     # restrictions.
6508     if self.op.secondary_ip:
6509       # Ok even without locking, because this can't be changed by any LU
6510       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6511       master_singlehomed = master.secondary_ip == master.primary_ip
6512       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6513         if self.op.force and node.name == master.name:
6514           self.LogWarning("Transitioning from single-homed to multi-homed"
6515                           " cluster; all nodes will require a secondary IP"
6516                           " address")
6517         else:
6518           raise errors.OpPrereqError("Changing the secondary ip on a"
6519                                      " single-homed cluster requires the"
6520                                      " --force option to be passed, and the"
6521                                      " target node to be the master",
6522                                      errors.ECODE_INVAL)
6523       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6524         if self.op.force and node.name == master.name:
6525           self.LogWarning("Transitioning from multi-homed to single-homed"
6526                           " cluster; secondary IP addresses will have to be"
6527                           " removed")
6528         else:
6529           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6530                                      " same as the primary IP on a multi-homed"
6531                                      " cluster, unless the --force option is"
6532                                      " passed, and the target node is the"
6533                                      " master", errors.ECODE_INVAL)
6534
6535       assert not (frozenset(affected_instances) -
6536                   self.owned_locks(locking.LEVEL_INSTANCE))
6537
6538       if node.offline:
6539         if affected_instances:
6540           msg = ("Cannot change secondary IP address: offline node has"
6541                  " instances (%s) configured to use it" %
6542                  utils.CommaJoin(affected_instances.keys()))
6543           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6544       else:
6545         # On online nodes, check that no instances are running, and that
6546         # the node has the new ip and we can reach it.
6547         for instance in affected_instances.values():
6548           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6549                               msg="cannot change secondary ip")
6550
6551         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6552         if master.name != node.name:
6553           # check reachability from master secondary ip to new secondary ip
6554           if not netutils.TcpPing(self.op.secondary_ip,
6555                                   constants.DEFAULT_NODED_PORT,
6556                                   source=master.secondary_ip):
6557             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6558                                        " based ping to node daemon port",
6559                                        errors.ECODE_ENVIRON)
6560
6561     if self.op.ndparams:
6562       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6563       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6564       _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6565                             "node", "cluster or group")
6566       self.new_ndparams = new_ndparams
6567
6568     if self.op.hv_state:
6569       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6570                                                  self.node.hv_state_static)
6571
6572     if self.op.disk_state:
6573       self.new_disk_state = \
6574         _MergeAndVerifyDiskState(self.op.disk_state,
6575                                  self.node.disk_state_static)
6576
6577   def Exec(self, feedback_fn):
6578     """Modifies a node.
6579
6580     """
6581     node = self.node
6582     old_role = self.old_role
6583     new_role = self.new_role
6584
6585     result = []
6586
6587     if self.op.ndparams:
6588       node.ndparams = self.new_ndparams
6589
6590     if self.op.powered is not None:
6591       node.powered = self.op.powered
6592
6593     if self.op.hv_state:
6594       node.hv_state_static = self.new_hv_state
6595
6596     if self.op.disk_state:
6597       node.disk_state_static = self.new_disk_state
6598
6599     for attr in ["master_capable", "vm_capable"]:
6600       val = getattr(self.op, attr)
6601       if val is not None:
6602         setattr(node, attr, val)
6603         result.append((attr, str(val)))
6604
6605     if new_role != old_role:
6606       # Tell the node to demote itself, if no longer MC and not offline
6607       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6608         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6609         if msg:
6610           self.LogWarning("Node failed to demote itself: %s", msg)
6611
6612       new_flags = self._R2F[new_role]
6613       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6614         if of != nf:
6615           result.append((desc, str(nf)))
6616       (node.master_candidate, node.drained, node.offline) = new_flags
6617
6618       # we locked all nodes, we adjust the CP before updating this node
6619       if self.lock_all:
6620         _AdjustCandidatePool(self, [node.name])
6621
6622     if self.op.secondary_ip:
6623       node.secondary_ip = self.op.secondary_ip
6624       result.append(("secondary_ip", self.op.secondary_ip))
6625
6626     # this will trigger configuration file update, if needed
6627     self.cfg.Update(node, feedback_fn)
6628
6629     # this will trigger job queue propagation or cleanup if the mc
6630     # flag changed
6631     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6632       self.context.ReaddNode(node)
6633
6634     return result
6635
6636
6637 class LUNodePowercycle(NoHooksLU):
6638   """Powercycles a node.
6639
6640   """
6641   REQ_BGL = False
6642
6643   def CheckArguments(self):
6644     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6645     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6646       raise errors.OpPrereqError("The node is the master and the force"
6647                                  " parameter was not set",
6648                                  errors.ECODE_INVAL)
6649
6650   def ExpandNames(self):
6651     """Locking for PowercycleNode.
6652
6653     This is a last-resort option and shouldn't block on other
6654     jobs. Therefore, we grab no locks.
6655
6656     """
6657     self.needed_locks = {}
6658
6659   def Exec(self, feedback_fn):
6660     """Reboots a node.
6661
6662     """
6663     result = self.rpc.call_node_powercycle(self.op.node_name,
6664                                            self.cfg.GetHypervisorType())
6665     result.Raise("Failed to schedule the reboot")
6666     return result.payload
6667
6668
6669 class LUClusterQuery(NoHooksLU):
6670   """Query cluster configuration.
6671
6672   """
6673   REQ_BGL = False
6674
6675   def ExpandNames(self):
6676     self.needed_locks = {}
6677
6678   def Exec(self, feedback_fn):
6679     """Return cluster config.
6680
6681     """
6682     cluster = self.cfg.GetClusterInfo()
6683     os_hvp = {}
6684
6685     # Filter just for enabled hypervisors
6686     for os_name, hv_dict in cluster.os_hvp.items():
6687       os_hvp[os_name] = {}
6688       for hv_name, hv_params in hv_dict.items():
6689         if hv_name in cluster.enabled_hypervisors:
6690           os_hvp[os_name][hv_name] = hv_params
6691
6692     # Convert ip_family to ip_version
6693     primary_ip_version = constants.IP4_VERSION
6694     if cluster.primary_ip_family == netutils.IP6Address.family:
6695       primary_ip_version = constants.IP6_VERSION
6696
6697     result = {
6698       "software_version": constants.RELEASE_VERSION,
6699       "protocol_version": constants.PROTOCOL_VERSION,
6700       "config_version": constants.CONFIG_VERSION,
6701       "os_api_version": max(constants.OS_API_VERSIONS),
6702       "export_version": constants.EXPORT_VERSION,
6703       "architecture": runtime.GetArchInfo(),
6704       "name": cluster.cluster_name,
6705       "master": cluster.master_node,
6706       "default_hypervisor": cluster.primary_hypervisor,
6707       "enabled_hypervisors": cluster.enabled_hypervisors,
6708       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6709                         for hypervisor_name in cluster.enabled_hypervisors]),
6710       "os_hvp": os_hvp,
6711       "beparams": cluster.beparams,
6712       "osparams": cluster.osparams,
6713       "ipolicy": cluster.ipolicy,
6714       "nicparams": cluster.nicparams,
6715       "ndparams": cluster.ndparams,
6716       "diskparams": cluster.diskparams,
6717       "candidate_pool_size": cluster.candidate_pool_size,
6718       "master_netdev": cluster.master_netdev,
6719       "master_netmask": cluster.master_netmask,
6720       "use_external_mip_script": cluster.use_external_mip_script,
6721       "volume_group_name": cluster.volume_group_name,
6722       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6723       "file_storage_dir": cluster.file_storage_dir,
6724       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6725       "maintain_node_health": cluster.maintain_node_health,
6726       "ctime": cluster.ctime,
6727       "mtime": cluster.mtime,
6728       "uuid": cluster.uuid,
6729       "tags": list(cluster.GetTags()),
6730       "uid_pool": cluster.uid_pool,
6731       "default_iallocator": cluster.default_iallocator,
6732       "reserved_lvs": cluster.reserved_lvs,
6733       "primary_ip_version": primary_ip_version,
6734       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6735       "hidden_os": cluster.hidden_os,
6736       "blacklisted_os": cluster.blacklisted_os,
6737       }
6738
6739     return result
6740
6741
6742 class LUClusterConfigQuery(NoHooksLU):
6743   """Return configuration values.
6744
6745   """
6746   REQ_BGL = False
6747
6748   def CheckArguments(self):
6749     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6750
6751   def ExpandNames(self):
6752     self.cq.ExpandNames(self)
6753
6754   def DeclareLocks(self, level):
6755     self.cq.DeclareLocks(self, level)
6756
6757   def Exec(self, feedback_fn):
6758     result = self.cq.OldStyleQuery(self)
6759
6760     assert len(result) == 1
6761
6762     return result[0]
6763
6764
6765 class _ClusterQuery(_QueryBase):
6766   FIELDS = query.CLUSTER_FIELDS
6767
6768   #: Do not sort (there is only one item)
6769   SORT_FIELD = None
6770
6771   def ExpandNames(self, lu):
6772     lu.needed_locks = {}
6773
6774     # The following variables interact with _QueryBase._GetNames
6775     self.wanted = locking.ALL_SET
6776     self.do_locking = self.use_locking
6777
6778     if self.do_locking:
6779       raise errors.OpPrereqError("Can not use locking for cluster queries",
6780                                  errors.ECODE_INVAL)
6781
6782   def DeclareLocks(self, lu, level):
6783     pass
6784
6785   def _GetQueryData(self, lu):
6786     """Computes the list of nodes and their attributes.
6787
6788     """
6789     # Locking is not used
6790     assert not (compat.any(lu.glm.is_owned(level)
6791                            for level in locking.LEVELS
6792                            if level != locking.LEVEL_CLUSTER) or
6793                 self.do_locking or self.use_locking)
6794
6795     if query.CQ_CONFIG in self.requested_data:
6796       cluster = lu.cfg.GetClusterInfo()
6797     else:
6798       cluster = NotImplemented
6799
6800     if query.CQ_QUEUE_DRAINED in self.requested_data:
6801       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6802     else:
6803       drain_flag = NotImplemented
6804
6805     if query.CQ_WATCHER_PAUSE in self.requested_data:
6806       master_name = lu.cfg.GetMasterNode()
6807
6808       result = lu.rpc.call_get_watcher_pause(master_name)
6809       result.Raise("Can't retrieve watcher pause from master node '%s'" %
6810                    master_name)
6811
6812       watcher_pause = result.payload
6813     else:
6814       watcher_pause = NotImplemented
6815
6816     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6817
6818
6819 class LUInstanceActivateDisks(NoHooksLU):
6820   """Bring up an instance's disks.
6821
6822   """
6823   REQ_BGL = False
6824
6825   def ExpandNames(self):
6826     self._ExpandAndLockInstance()
6827     self.needed_locks[locking.LEVEL_NODE] = []
6828     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6829
6830   def DeclareLocks(self, level):
6831     if level == locking.LEVEL_NODE:
6832       self._LockInstancesNodes()
6833
6834   def CheckPrereq(self):
6835     """Check prerequisites.
6836
6837     This checks that the instance is in the cluster.
6838
6839     """
6840     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6841     assert self.instance is not None, \
6842       "Cannot retrieve locked instance %s" % self.op.instance_name
6843     _CheckNodeOnline(self, self.instance.primary_node)
6844
6845   def Exec(self, feedback_fn):
6846     """Activate the disks.
6847
6848     """
6849     disks_ok, disks_info = \
6850               _AssembleInstanceDisks(self, self.instance,
6851                                      ignore_size=self.op.ignore_size)
6852     if not disks_ok:
6853       raise errors.OpExecError("Cannot activate block devices")
6854
6855     if self.op.wait_for_sync:
6856       if not _WaitForSync(self, self.instance):
6857         raise errors.OpExecError("Some disks of the instance are degraded!")
6858
6859     return disks_info
6860
6861
6862 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6863                            ignore_size=False):
6864   """Prepare the block devices for an instance.
6865
6866   This sets up the block devices on all nodes.
6867
6868   @type lu: L{LogicalUnit}
6869   @param lu: the logical unit on whose behalf we execute
6870   @type instance: L{objects.Instance}
6871   @param instance: the instance for whose disks we assemble
6872   @type disks: list of L{objects.Disk} or None
6873   @param disks: which disks to assemble (or all, if None)
6874   @type ignore_secondaries: boolean
6875   @param ignore_secondaries: if true, errors on secondary nodes
6876       won't result in an error return from the function
6877   @type ignore_size: boolean
6878   @param ignore_size: if true, the current known size of the disk
6879       will not be used during the disk activation, useful for cases
6880       when the size is wrong
6881   @return: False if the operation failed, otherwise a list of
6882       (host, instance_visible_name, node_visible_name)
6883       with the mapping from node devices to instance devices
6884
6885   """
6886   device_info = []
6887   disks_ok = True
6888   iname = instance.name
6889   disks = _ExpandCheckDisks(instance, disks)
6890
6891   # With the two passes mechanism we try to reduce the window of
6892   # opportunity for the race condition of switching DRBD to primary
6893   # before handshaking occured, but we do not eliminate it
6894
6895   # The proper fix would be to wait (with some limits) until the
6896   # connection has been made and drbd transitions from WFConnection
6897   # into any other network-connected state (Connected, SyncTarget,
6898   # SyncSource, etc.)
6899
6900   # 1st pass, assemble on all nodes in secondary mode
6901   for idx, inst_disk in enumerate(disks):
6902     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6903       if ignore_size:
6904         node_disk = node_disk.Copy()
6905         node_disk.UnsetSize()
6906       lu.cfg.SetDiskID(node_disk, node)
6907       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6908                                              False, idx)
6909       msg = result.fail_msg
6910       if msg:
6911         is_offline_secondary = (node in instance.secondary_nodes and
6912                                 result.offline)
6913         lu.LogWarning("Could not prepare block device %s on node %s"
6914                       " (is_primary=False, pass=1): %s",
6915                       inst_disk.iv_name, node, msg)
6916         if not (ignore_secondaries or is_offline_secondary):
6917           disks_ok = False
6918
6919   # FIXME: race condition on drbd migration to primary
6920
6921   # 2nd pass, do only the primary node
6922   for idx, inst_disk in enumerate(disks):
6923     dev_path = None
6924
6925     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6926       if node != instance.primary_node:
6927         continue
6928       if ignore_size:
6929         node_disk = node_disk.Copy()
6930         node_disk.UnsetSize()
6931       lu.cfg.SetDiskID(node_disk, node)
6932       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6933                                              True, idx)
6934       msg = result.fail_msg
6935       if msg:
6936         lu.LogWarning("Could not prepare block device %s on node %s"
6937                       " (is_primary=True, pass=2): %s",
6938                       inst_disk.iv_name, node, msg)
6939         disks_ok = False
6940       else:
6941         dev_path = result.payload
6942
6943     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6944
6945   # leave the disks configured for the primary node
6946   # this is a workaround that would be fixed better by
6947   # improving the logical/physical id handling
6948   for disk in disks:
6949     lu.cfg.SetDiskID(disk, instance.primary_node)
6950
6951   return disks_ok, device_info
6952
6953
6954 def _StartInstanceDisks(lu, instance, force):
6955   """Start the disks of an instance.
6956
6957   """
6958   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6959                                            ignore_secondaries=force)
6960   if not disks_ok:
6961     _ShutdownInstanceDisks(lu, instance)
6962     if force is not None and not force:
6963       lu.LogWarning("",
6964                     hint=("If the message above refers to a secondary node,"
6965                           " you can retry the operation using '--force'"))
6966     raise errors.OpExecError("Disk consistency error")
6967
6968
6969 class LUInstanceDeactivateDisks(NoHooksLU):
6970   """Shutdown an instance's disks.
6971
6972   """
6973   REQ_BGL = False
6974
6975   def ExpandNames(self):
6976     self._ExpandAndLockInstance()
6977     self.needed_locks[locking.LEVEL_NODE] = []
6978     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6979
6980   def DeclareLocks(self, level):
6981     if level == locking.LEVEL_NODE:
6982       self._LockInstancesNodes()
6983
6984   def CheckPrereq(self):
6985     """Check prerequisites.
6986
6987     This checks that the instance is in the cluster.
6988
6989     """
6990     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6991     assert self.instance is not None, \
6992       "Cannot retrieve locked instance %s" % self.op.instance_name
6993
6994   def Exec(self, feedback_fn):
6995     """Deactivate the disks
6996
6997     """
6998     instance = self.instance
6999     if self.op.force:
7000       _ShutdownInstanceDisks(self, instance)
7001     else:
7002       _SafeShutdownInstanceDisks(self, instance)
7003
7004
7005 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
7006   """Shutdown block devices of an instance.
7007
7008   This function checks if an instance is running, before calling
7009   _ShutdownInstanceDisks.
7010
7011   """
7012   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7013   _ShutdownInstanceDisks(lu, instance, disks=disks)
7014
7015
7016 def _ExpandCheckDisks(instance, disks):
7017   """Return the instance disks selected by the disks list
7018
7019   @type disks: list of L{objects.Disk} or None
7020   @param disks: selected disks
7021   @rtype: list of L{objects.Disk}
7022   @return: selected instance disks to act on
7023
7024   """
7025   if disks is None:
7026     return instance.disks
7027   else:
7028     if not set(disks).issubset(instance.disks):
7029       raise errors.ProgrammerError("Can only act on disks belonging to the"
7030                                    " target instance")
7031     return disks
7032
7033
7034 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7035   """Shutdown block devices of an instance.
7036
7037   This does the shutdown on all nodes of the instance.
7038
7039   If the ignore_primary is false, errors on the primary node are
7040   ignored.
7041
7042   """
7043   all_result = True
7044   disks = _ExpandCheckDisks(instance, disks)
7045
7046   for disk in disks:
7047     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7048       lu.cfg.SetDiskID(top_disk, node)
7049       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7050       msg = result.fail_msg
7051       if msg:
7052         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7053                       disk.iv_name, node, msg)
7054         if ((node == instance.primary_node and not ignore_primary) or
7055             (node != instance.primary_node and not result.offline)):
7056           all_result = False
7057   return all_result
7058
7059
7060 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7061   """Checks if a node has enough free memory.
7062
7063   This function checks if a given node has the needed amount of free
7064   memory. In case the node has less memory or we cannot get the
7065   information from the node, this function raises an OpPrereqError
7066   exception.
7067
7068   @type lu: C{LogicalUnit}
7069   @param lu: a logical unit from which we get configuration data
7070   @type node: C{str}
7071   @param node: the node to check
7072   @type reason: C{str}
7073   @param reason: string to use in the error message
7074   @type requested: C{int}
7075   @param requested: the amount of memory in MiB to check for
7076   @type hypervisor_name: C{str}
7077   @param hypervisor_name: the hypervisor to ask for memory stats
7078   @rtype: integer
7079   @return: node current free memory
7080   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7081       we cannot check the node
7082
7083   """
7084   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7085   nodeinfo[node].Raise("Can't get data from node %s" % node,
7086                        prereq=True, ecode=errors.ECODE_ENVIRON)
7087   (_, _, (hv_info, )) = nodeinfo[node].payload
7088
7089   free_mem = hv_info.get("memory_free", None)
7090   if not isinstance(free_mem, int):
7091     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7092                                " was '%s'" % (node, free_mem),
7093                                errors.ECODE_ENVIRON)
7094   if requested > free_mem:
7095     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7096                                " needed %s MiB, available %s MiB" %
7097                                (node, reason, requested, free_mem),
7098                                errors.ECODE_NORES)
7099   return free_mem
7100
7101
7102 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7103   """Checks if nodes have enough free disk space in all the VGs.
7104
7105   This function checks if all given nodes have the needed amount of
7106   free disk. In case any node has less disk or we cannot get the
7107   information from the node, this function raises an OpPrereqError
7108   exception.
7109
7110   @type lu: C{LogicalUnit}
7111   @param lu: a logical unit from which we get configuration data
7112   @type nodenames: C{list}
7113   @param nodenames: the list of node names to check
7114   @type req_sizes: C{dict}
7115   @param req_sizes: the hash of vg and corresponding amount of disk in
7116       MiB to check for
7117   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7118       or we cannot check the node
7119
7120   """
7121   for vg, req_size in req_sizes.items():
7122     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7123
7124
7125 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7126   """Checks if nodes have enough free disk space in the specified VG.
7127
7128   This function checks if all given nodes have the needed amount of
7129   free disk. In case any node has less disk or we cannot get the
7130   information from the node, this function raises an OpPrereqError
7131   exception.
7132
7133   @type lu: C{LogicalUnit}
7134   @param lu: a logical unit from which we get configuration data
7135   @type nodenames: C{list}
7136   @param nodenames: the list of node names to check
7137   @type vg: C{str}
7138   @param vg: the volume group to check
7139   @type requested: C{int}
7140   @param requested: the amount of disk in MiB to check for
7141   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7142       or we cannot check the node
7143
7144   """
7145   es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7146   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7147   for node in nodenames:
7148     info = nodeinfo[node]
7149     info.Raise("Cannot get current information from node %s" % node,
7150                prereq=True, ecode=errors.ECODE_ENVIRON)
7151     (_, (vg_info, ), _) = info.payload
7152     vg_free = vg_info.get("vg_free", None)
7153     if not isinstance(vg_free, int):
7154       raise errors.OpPrereqError("Can't compute free disk space on node"
7155                                  " %s for vg %s, result was '%s'" %
7156                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
7157     if requested > vg_free:
7158       raise errors.OpPrereqError("Not enough disk space on target node %s"
7159                                  " vg %s: required %d MiB, available %d MiB" %
7160                                  (node, vg, requested, vg_free),
7161                                  errors.ECODE_NORES)
7162
7163
7164 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7165   """Checks if nodes have enough physical CPUs
7166
7167   This function checks if all given nodes have the needed number of
7168   physical CPUs. In case any node has less CPUs or we cannot get the
7169   information from the node, this function raises an OpPrereqError
7170   exception.
7171
7172   @type lu: C{LogicalUnit}
7173   @param lu: a logical unit from which we get configuration data
7174   @type nodenames: C{list}
7175   @param nodenames: the list of node names to check
7176   @type requested: C{int}
7177   @param requested: the minimum acceptable number of physical CPUs
7178   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7179       or we cannot check the node
7180
7181   """
7182   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7183   for node in nodenames:
7184     info = nodeinfo[node]
7185     info.Raise("Cannot get current information from node %s" % node,
7186                prereq=True, ecode=errors.ECODE_ENVIRON)
7187     (_, _, (hv_info, )) = info.payload
7188     num_cpus = hv_info.get("cpu_total", None)
7189     if not isinstance(num_cpus, int):
7190       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7191                                  " on node %s, result was '%s'" %
7192                                  (node, num_cpus), errors.ECODE_ENVIRON)
7193     if requested > num_cpus:
7194       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7195                                  "required" % (node, num_cpus, requested),
7196                                  errors.ECODE_NORES)
7197
7198
7199 class LUInstanceStartup(LogicalUnit):
7200   """Starts an instance.
7201
7202   """
7203   HPATH = "instance-start"
7204   HTYPE = constants.HTYPE_INSTANCE
7205   REQ_BGL = False
7206
7207   def CheckArguments(self):
7208     # extra beparams
7209     if self.op.beparams:
7210       # fill the beparams dict
7211       objects.UpgradeBeParams(self.op.beparams)
7212       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7213
7214   def ExpandNames(self):
7215     self._ExpandAndLockInstance()
7216     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7217
7218   def DeclareLocks(self, level):
7219     if level == locking.LEVEL_NODE_RES:
7220       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7221
7222   def BuildHooksEnv(self):
7223     """Build hooks env.
7224
7225     This runs on master, primary and secondary nodes of the instance.
7226
7227     """
7228     env = {
7229       "FORCE": self.op.force,
7230       }
7231
7232     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7233
7234     return env
7235
7236   def BuildHooksNodes(self):
7237     """Build hooks nodes.
7238
7239     """
7240     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7241     return (nl, nl)
7242
7243   def CheckPrereq(self):
7244     """Check prerequisites.
7245
7246     This checks that the instance is in the cluster.
7247
7248     """
7249     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7250     assert self.instance is not None, \
7251       "Cannot retrieve locked instance %s" % self.op.instance_name
7252
7253     # extra hvparams
7254     if self.op.hvparams:
7255       # check hypervisor parameter syntax (locally)
7256       cluster = self.cfg.GetClusterInfo()
7257       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7258       filled_hvp = cluster.FillHV(instance)
7259       filled_hvp.update(self.op.hvparams)
7260       hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7261       hv_type.CheckParameterSyntax(filled_hvp)
7262       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7263
7264     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7265
7266     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7267
7268     if self.primary_offline and self.op.ignore_offline_nodes:
7269       self.LogWarning("Ignoring offline primary node")
7270
7271       if self.op.hvparams or self.op.beparams:
7272         self.LogWarning("Overridden parameters are ignored")
7273     else:
7274       _CheckNodeOnline(self, instance.primary_node)
7275
7276       bep = self.cfg.GetClusterInfo().FillBE(instance)
7277       bep.update(self.op.beparams)
7278
7279       # check bridges existence
7280       _CheckInstanceBridgesExist(self, instance)
7281
7282       remote_info = self.rpc.call_instance_info(instance.primary_node,
7283                                                 instance.name,
7284                                                 instance.hypervisor)
7285       remote_info.Raise("Error checking node %s" % instance.primary_node,
7286                         prereq=True, ecode=errors.ECODE_ENVIRON)
7287       if not remote_info.payload: # not running already
7288         _CheckNodeFreeMemory(self, instance.primary_node,
7289                              "starting instance %s" % instance.name,
7290                              bep[constants.BE_MINMEM], instance.hypervisor)
7291
7292   def Exec(self, feedback_fn):
7293     """Start the instance.
7294
7295     """
7296     instance = self.instance
7297     force = self.op.force
7298
7299     if not self.op.no_remember:
7300       self.cfg.MarkInstanceUp(instance.name)
7301
7302     if self.primary_offline:
7303       assert self.op.ignore_offline_nodes
7304       self.LogInfo("Primary node offline, marked instance as started")
7305     else:
7306       node_current = instance.primary_node
7307
7308       _StartInstanceDisks(self, instance, force)
7309
7310       result = \
7311         self.rpc.call_instance_start(node_current,
7312                                      (instance, self.op.hvparams,
7313                                       self.op.beparams),
7314                                      self.op.startup_paused)
7315       msg = result.fail_msg
7316       if msg:
7317         _ShutdownInstanceDisks(self, instance)
7318         raise errors.OpExecError("Could not start instance: %s" % msg)
7319
7320
7321 class LUInstanceReboot(LogicalUnit):
7322   """Reboot an instance.
7323
7324   """
7325   HPATH = "instance-reboot"
7326   HTYPE = constants.HTYPE_INSTANCE
7327   REQ_BGL = False
7328
7329   def ExpandNames(self):
7330     self._ExpandAndLockInstance()
7331
7332   def BuildHooksEnv(self):
7333     """Build hooks env.
7334
7335     This runs on master, primary and secondary nodes of the instance.
7336
7337     """
7338     env = {
7339       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7340       "REBOOT_TYPE": self.op.reboot_type,
7341       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7342       }
7343
7344     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7345
7346     return env
7347
7348   def BuildHooksNodes(self):
7349     """Build hooks nodes.
7350
7351     """
7352     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7353     return (nl, nl)
7354
7355   def CheckPrereq(self):
7356     """Check prerequisites.
7357
7358     This checks that the instance is in the cluster.
7359
7360     """
7361     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7362     assert self.instance is not None, \
7363       "Cannot retrieve locked instance %s" % self.op.instance_name
7364     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7365     _CheckNodeOnline(self, instance.primary_node)
7366
7367     # check bridges existence
7368     _CheckInstanceBridgesExist(self, instance)
7369
7370   def Exec(self, feedback_fn):
7371     """Reboot the instance.
7372
7373     """
7374     instance = self.instance
7375     ignore_secondaries = self.op.ignore_secondaries
7376     reboot_type = self.op.reboot_type
7377
7378     remote_info = self.rpc.call_instance_info(instance.primary_node,
7379                                               instance.name,
7380                                               instance.hypervisor)
7381     remote_info.Raise("Error checking node %s" % instance.primary_node)
7382     instance_running = bool(remote_info.payload)
7383
7384     node_current = instance.primary_node
7385
7386     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7387                                             constants.INSTANCE_REBOOT_HARD]:
7388       for disk in instance.disks:
7389         self.cfg.SetDiskID(disk, node_current)
7390       result = self.rpc.call_instance_reboot(node_current, instance,
7391                                              reboot_type,
7392                                              self.op.shutdown_timeout)
7393       result.Raise("Could not reboot instance")
7394     else:
7395       if instance_running:
7396         result = self.rpc.call_instance_shutdown(node_current, instance,
7397                                                  self.op.shutdown_timeout)
7398         result.Raise("Could not shutdown instance for full reboot")
7399         _ShutdownInstanceDisks(self, instance)
7400       else:
7401         self.LogInfo("Instance %s was already stopped, starting now",
7402                      instance.name)
7403       _StartInstanceDisks(self, instance, ignore_secondaries)
7404       result = self.rpc.call_instance_start(node_current,
7405                                             (instance, None, None), False)
7406       msg = result.fail_msg
7407       if msg:
7408         _ShutdownInstanceDisks(self, instance)
7409         raise errors.OpExecError("Could not start instance for"
7410                                  " full reboot: %s" % msg)
7411
7412     self.cfg.MarkInstanceUp(instance.name)
7413
7414
7415 class LUInstanceShutdown(LogicalUnit):
7416   """Shutdown an instance.
7417
7418   """
7419   HPATH = "instance-stop"
7420   HTYPE = constants.HTYPE_INSTANCE
7421   REQ_BGL = False
7422
7423   def ExpandNames(self):
7424     self._ExpandAndLockInstance()
7425
7426   def BuildHooksEnv(self):
7427     """Build hooks env.
7428
7429     This runs on master, primary and secondary nodes of the instance.
7430
7431     """
7432     env = _BuildInstanceHookEnvByObject(self, self.instance)
7433     env["TIMEOUT"] = self.op.timeout
7434     return env
7435
7436   def BuildHooksNodes(self):
7437     """Build hooks nodes.
7438
7439     """
7440     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7441     return (nl, nl)
7442
7443   def CheckPrereq(self):
7444     """Check prerequisites.
7445
7446     This checks that the instance is in the cluster.
7447
7448     """
7449     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7450     assert self.instance is not None, \
7451       "Cannot retrieve locked instance %s" % self.op.instance_name
7452
7453     if not self.op.force:
7454       _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7455     else:
7456       self.LogWarning("Ignoring offline instance check")
7457
7458     self.primary_offline = \
7459       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7460
7461     if self.primary_offline and self.op.ignore_offline_nodes:
7462       self.LogWarning("Ignoring offline primary node")
7463     else:
7464       _CheckNodeOnline(self, self.instance.primary_node)
7465
7466   def Exec(self, feedback_fn):
7467     """Shutdown the instance.
7468
7469     """
7470     instance = self.instance
7471     node_current = instance.primary_node
7472     timeout = self.op.timeout
7473
7474     # If the instance is offline we shouldn't mark it as down, as that
7475     # resets the offline flag.
7476     if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7477       self.cfg.MarkInstanceDown(instance.name)
7478
7479     if self.primary_offline:
7480       assert self.op.ignore_offline_nodes
7481       self.LogInfo("Primary node offline, marked instance as stopped")
7482     else:
7483       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7484       msg = result.fail_msg
7485       if msg:
7486         self.LogWarning("Could not shutdown instance: %s", msg)
7487
7488       _ShutdownInstanceDisks(self, instance)
7489
7490
7491 class LUInstanceReinstall(LogicalUnit):
7492   """Reinstall an instance.
7493
7494   """
7495   HPATH = "instance-reinstall"
7496   HTYPE = constants.HTYPE_INSTANCE
7497   REQ_BGL = False
7498
7499   def ExpandNames(self):
7500     self._ExpandAndLockInstance()
7501
7502   def BuildHooksEnv(self):
7503     """Build hooks env.
7504
7505     This runs on master, primary and secondary nodes of the instance.
7506
7507     """
7508     return _BuildInstanceHookEnvByObject(self, self.instance)
7509
7510   def BuildHooksNodes(self):
7511     """Build hooks nodes.
7512
7513     """
7514     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7515     return (nl, nl)
7516
7517   def CheckPrereq(self):
7518     """Check prerequisites.
7519
7520     This checks that the instance is in the cluster and is not running.
7521
7522     """
7523     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7524     assert instance is not None, \
7525       "Cannot retrieve locked instance %s" % self.op.instance_name
7526     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7527                      " offline, cannot reinstall")
7528
7529     if instance.disk_template == constants.DT_DISKLESS:
7530       raise errors.OpPrereqError("Instance '%s' has no disks" %
7531                                  self.op.instance_name,
7532                                  errors.ECODE_INVAL)
7533     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7534
7535     if self.op.os_type is not None:
7536       # OS verification
7537       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7538       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7539       instance_os = self.op.os_type
7540     else:
7541       instance_os = instance.os
7542
7543     nodelist = list(instance.all_nodes)
7544
7545     if self.op.osparams:
7546       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7547       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7548       self.os_inst = i_osdict # the new dict (without defaults)
7549     else:
7550       self.os_inst = None
7551
7552     self.instance = instance
7553
7554   def Exec(self, feedback_fn):
7555     """Reinstall the instance.
7556
7557     """
7558     inst = self.instance
7559
7560     if self.op.os_type is not None:
7561       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7562       inst.os = self.op.os_type
7563       # Write to configuration
7564       self.cfg.Update(inst, feedback_fn)
7565
7566     _StartInstanceDisks(self, inst, None)
7567     try:
7568       feedback_fn("Running the instance OS create scripts...")
7569       # FIXME: pass debug option from opcode to backend
7570       result = self.rpc.call_instance_os_add(inst.primary_node,
7571                                              (inst, self.os_inst), True,
7572                                              self.op.debug_level)
7573       result.Raise("Could not install OS for instance %s on node %s" %
7574                    (inst.name, inst.primary_node))
7575     finally:
7576       _ShutdownInstanceDisks(self, inst)
7577
7578
7579 class LUInstanceRecreateDisks(LogicalUnit):
7580   """Recreate an instance's missing disks.
7581
7582   """
7583   HPATH = "instance-recreate-disks"
7584   HTYPE = constants.HTYPE_INSTANCE
7585   REQ_BGL = False
7586
7587   _MODIFYABLE = compat.UniqueFrozenset([
7588     constants.IDISK_SIZE,
7589     constants.IDISK_MODE,
7590     ])
7591
7592   # New or changed disk parameters may have different semantics
7593   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7594     constants.IDISK_ADOPT,
7595
7596     # TODO: Implement support changing VG while recreating
7597     constants.IDISK_VG,
7598     constants.IDISK_METAVG,
7599     constants.IDISK_PROVIDER,
7600     ]))
7601
7602   def _RunAllocator(self):
7603     """Run the allocator based on input opcode.
7604
7605     """
7606     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7607
7608     # FIXME
7609     # The allocator should actually run in "relocate" mode, but current
7610     # allocators don't support relocating all the nodes of an instance at
7611     # the same time. As a workaround we use "allocate" mode, but this is
7612     # suboptimal for two reasons:
7613     # - The instance name passed to the allocator is present in the list of
7614     #   existing instances, so there could be a conflict within the
7615     #   internal structures of the allocator. This doesn't happen with the
7616     #   current allocators, but it's a liability.
7617     # - The allocator counts the resources used by the instance twice: once
7618     #   because the instance exists already, and once because it tries to
7619     #   allocate a new instance.
7620     # The allocator could choose some of the nodes on which the instance is
7621     # running, but that's not a problem. If the instance nodes are broken,
7622     # they should be already be marked as drained or offline, and hence
7623     # skipped by the allocator. If instance disks have been lost for other
7624     # reasons, then recreating the disks on the same nodes should be fine.
7625     disk_template = self.instance.disk_template
7626     spindle_use = be_full[constants.BE_SPINDLE_USE]
7627     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7628                                         disk_template=disk_template,
7629                                         tags=list(self.instance.GetTags()),
7630                                         os=self.instance.os,
7631                                         nics=[{}],
7632                                         vcpus=be_full[constants.BE_VCPUS],
7633                                         memory=be_full[constants.BE_MAXMEM],
7634                                         spindle_use=spindle_use,
7635                                         disks=[{constants.IDISK_SIZE: d.size,
7636                                                 constants.IDISK_MODE: d.mode}
7637                                                 for d in self.instance.disks],
7638                                         hypervisor=self.instance.hypervisor,
7639                                         node_whitelist=None)
7640     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7641
7642     ial.Run(self.op.iallocator)
7643
7644     assert req.RequiredNodes() == len(self.instance.all_nodes)
7645
7646     if not ial.success:
7647       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7648                                  " %s" % (self.op.iallocator, ial.info),
7649                                  errors.ECODE_NORES)
7650
7651     self.op.nodes = ial.result
7652     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7653                  self.op.instance_name, self.op.iallocator,
7654                  utils.CommaJoin(ial.result))
7655
7656   def CheckArguments(self):
7657     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7658       # Normalize and convert deprecated list of disk indices
7659       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7660
7661     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7662     if duplicates:
7663       raise errors.OpPrereqError("Some disks have been specified more than"
7664                                  " once: %s" % utils.CommaJoin(duplicates),
7665                                  errors.ECODE_INVAL)
7666
7667     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7668     # when neither iallocator nor nodes are specified
7669     if self.op.iallocator or self.op.nodes:
7670       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7671
7672     for (idx, params) in self.op.disks:
7673       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7674       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7675       if unsupported:
7676         raise errors.OpPrereqError("Parameters for disk %s try to change"
7677                                    " unmodifyable parameter(s): %s" %
7678                                    (idx, utils.CommaJoin(unsupported)),
7679                                    errors.ECODE_INVAL)
7680
7681   def ExpandNames(self):
7682     self._ExpandAndLockInstance()
7683     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7684
7685     if self.op.nodes:
7686       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7687       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7688     else:
7689       self.needed_locks[locking.LEVEL_NODE] = []
7690       if self.op.iallocator:
7691         # iallocator will select a new node in the same group
7692         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7693         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7694
7695     self.needed_locks[locking.LEVEL_NODE_RES] = []
7696
7697   def DeclareLocks(self, level):
7698     if level == locking.LEVEL_NODEGROUP:
7699       assert self.op.iallocator is not None
7700       assert not self.op.nodes
7701       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7702       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7703       # Lock the primary group used by the instance optimistically; this
7704       # requires going via the node before it's locked, requiring
7705       # verification later on
7706       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7707         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7708
7709     elif level == locking.LEVEL_NODE:
7710       # If an allocator is used, then we lock all the nodes in the current
7711       # instance group, as we don't know yet which ones will be selected;
7712       # if we replace the nodes without using an allocator, locks are
7713       # already declared in ExpandNames; otherwise, we need to lock all the
7714       # instance nodes for disk re-creation
7715       if self.op.iallocator:
7716         assert not self.op.nodes
7717         assert not self.needed_locks[locking.LEVEL_NODE]
7718         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7719
7720         # Lock member nodes of the group of the primary node
7721         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7722           self.needed_locks[locking.LEVEL_NODE].extend(
7723             self.cfg.GetNodeGroup(group_uuid).members)
7724
7725         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7726       elif not self.op.nodes:
7727         self._LockInstancesNodes(primary_only=False)
7728     elif level == locking.LEVEL_NODE_RES:
7729       # Copy node locks
7730       self.needed_locks[locking.LEVEL_NODE_RES] = \
7731         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7732
7733   def BuildHooksEnv(self):
7734     """Build hooks env.
7735
7736     This runs on master, primary and secondary nodes of the instance.
7737
7738     """
7739     return _BuildInstanceHookEnvByObject(self, self.instance)
7740
7741   def BuildHooksNodes(self):
7742     """Build hooks nodes.
7743
7744     """
7745     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7746     return (nl, nl)
7747
7748   def CheckPrereq(self):
7749     """Check prerequisites.
7750
7751     This checks that the instance is in the cluster and is not running.
7752
7753     """
7754     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7755     assert instance is not None, \
7756       "Cannot retrieve locked instance %s" % self.op.instance_name
7757     if self.op.nodes:
7758       if len(self.op.nodes) != len(instance.all_nodes):
7759         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7760                                    " %d replacement nodes were specified" %
7761                                    (instance.name, len(instance.all_nodes),
7762                                     len(self.op.nodes)),
7763                                    errors.ECODE_INVAL)
7764       assert instance.disk_template != constants.DT_DRBD8 or \
7765           len(self.op.nodes) == 2
7766       assert instance.disk_template != constants.DT_PLAIN or \
7767           len(self.op.nodes) == 1
7768       primary_node = self.op.nodes[0]
7769     else:
7770       primary_node = instance.primary_node
7771     if not self.op.iallocator:
7772       _CheckNodeOnline(self, primary_node)
7773
7774     if instance.disk_template == constants.DT_DISKLESS:
7775       raise errors.OpPrereqError("Instance '%s' has no disks" %
7776                                  self.op.instance_name, errors.ECODE_INVAL)
7777
7778     # Verify if node group locks are still correct
7779     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7780     if owned_groups:
7781       # Node group locks are acquired only for the primary node (and only
7782       # when the allocator is used)
7783       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7784                                primary_only=True)
7785
7786     # if we replace nodes *and* the old primary is offline, we don't
7787     # check the instance state
7788     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7789     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7790       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7791                           msg="cannot recreate disks")
7792
7793     if self.op.disks:
7794       self.disks = dict(self.op.disks)
7795     else:
7796       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7797
7798     maxidx = max(self.disks.keys())
7799     if maxidx >= len(instance.disks):
7800       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7801                                  errors.ECODE_INVAL)
7802
7803     if ((self.op.nodes or self.op.iallocator) and
7804         sorted(self.disks.keys()) != range(len(instance.disks))):
7805       raise errors.OpPrereqError("Can't recreate disks partially and"
7806                                  " change the nodes at the same time",
7807                                  errors.ECODE_INVAL)
7808
7809     self.instance = instance
7810
7811     if self.op.iallocator:
7812       self._RunAllocator()
7813       # Release unneeded node and node resource locks
7814       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7815       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7816       _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7817
7818     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7819
7820   def Exec(self, feedback_fn):
7821     """Recreate the disks.
7822
7823     """
7824     instance = self.instance
7825
7826     assert (self.owned_locks(locking.LEVEL_NODE) ==
7827             self.owned_locks(locking.LEVEL_NODE_RES))
7828
7829     to_skip = []
7830     mods = [] # keeps track of needed changes
7831
7832     for idx, disk in enumerate(instance.disks):
7833       try:
7834         changes = self.disks[idx]
7835       except KeyError:
7836         # Disk should not be recreated
7837         to_skip.append(idx)
7838         continue
7839
7840       # update secondaries for disks, if needed
7841       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7842         # need to update the nodes and minors
7843         assert len(self.op.nodes) == 2
7844         assert len(disk.logical_id) == 6 # otherwise disk internals
7845                                          # have changed
7846         (_, _, old_port, _, _, old_secret) = disk.logical_id
7847         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7848         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7849                   new_minors[0], new_minors[1], old_secret)
7850         assert len(disk.logical_id) == len(new_id)
7851       else:
7852         new_id = None
7853
7854       mods.append((idx, new_id, changes))
7855
7856     # now that we have passed all asserts above, we can apply the mods
7857     # in a single run (to avoid partial changes)
7858     for idx, new_id, changes in mods:
7859       disk = instance.disks[idx]
7860       if new_id is not None:
7861         assert disk.dev_type == constants.LD_DRBD8
7862         disk.logical_id = new_id
7863       if changes:
7864         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7865                     mode=changes.get(constants.IDISK_MODE, None))
7866
7867     # change primary node, if needed
7868     if self.op.nodes:
7869       instance.primary_node = self.op.nodes[0]
7870       self.LogWarning("Changing the instance's nodes, you will have to"
7871                       " remove any disks left on the older nodes manually")
7872
7873     if self.op.nodes:
7874       self.cfg.Update(instance, feedback_fn)
7875
7876     # All touched nodes must be locked
7877     mylocks = self.owned_locks(locking.LEVEL_NODE)
7878     assert mylocks.issuperset(frozenset(instance.all_nodes))
7879     _CreateDisks(self, instance, to_skip=to_skip)
7880
7881
7882 class LUInstanceRename(LogicalUnit):
7883   """Rename an instance.
7884
7885   """
7886   HPATH = "instance-rename"
7887   HTYPE = constants.HTYPE_INSTANCE
7888
7889   def CheckArguments(self):
7890     """Check arguments.
7891
7892     """
7893     if self.op.ip_check and not self.op.name_check:
7894       # TODO: make the ip check more flexible and not depend on the name check
7895       raise errors.OpPrereqError("IP address check requires a name check",
7896                                  errors.ECODE_INVAL)
7897
7898   def BuildHooksEnv(self):
7899     """Build hooks env.
7900
7901     This runs on master, primary and secondary nodes of the instance.
7902
7903     """
7904     env = _BuildInstanceHookEnvByObject(self, self.instance)
7905     env["INSTANCE_NEW_NAME"] = self.op.new_name
7906     return env
7907
7908   def BuildHooksNodes(self):
7909     """Build hooks nodes.
7910
7911     """
7912     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7913     return (nl, nl)
7914
7915   def CheckPrereq(self):
7916     """Check prerequisites.
7917
7918     This checks that the instance is in the cluster and is not running.
7919
7920     """
7921     self.op.instance_name = _ExpandInstanceName(self.cfg,
7922                                                 self.op.instance_name)
7923     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7924     assert instance is not None
7925     _CheckNodeOnline(self, instance.primary_node)
7926     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7927                         msg="cannot rename")
7928     self.instance = instance
7929
7930     new_name = self.op.new_name
7931     if self.op.name_check:
7932       hostname = _CheckHostnameSane(self, new_name)
7933       new_name = self.op.new_name = hostname.name
7934       if (self.op.ip_check and
7935           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7936         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7937                                    (hostname.ip, new_name),
7938                                    errors.ECODE_NOTUNIQUE)
7939
7940     instance_list = self.cfg.GetInstanceList()
7941     if new_name in instance_list and new_name != instance.name:
7942       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7943                                  new_name, errors.ECODE_EXISTS)
7944
7945   def Exec(self, feedback_fn):
7946     """Rename the instance.
7947
7948     """
7949     inst = self.instance
7950     old_name = inst.name
7951
7952     rename_file_storage = False
7953     if (inst.disk_template in constants.DTS_FILEBASED and
7954         self.op.new_name != inst.name):
7955       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7956       rename_file_storage = True
7957
7958     self.cfg.RenameInstance(inst.name, self.op.new_name)
7959     # Change the instance lock. This is definitely safe while we hold the BGL.
7960     # Otherwise the new lock would have to be added in acquired mode.
7961     assert self.REQ_BGL
7962     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7963     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7964     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7965
7966     # re-read the instance from the configuration after rename
7967     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7968
7969     if rename_file_storage:
7970       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7971       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7972                                                      old_file_storage_dir,
7973                                                      new_file_storage_dir)
7974       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7975                    " (but the instance has been renamed in Ganeti)" %
7976                    (inst.primary_node, old_file_storage_dir,
7977                     new_file_storage_dir))
7978
7979     _StartInstanceDisks(self, inst, None)
7980     # update info on disks
7981     info = _GetInstanceInfoText(inst)
7982     for (idx, disk) in enumerate(inst.disks):
7983       for node in inst.all_nodes:
7984         self.cfg.SetDiskID(disk, node)
7985         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7986         if result.fail_msg:
7987           self.LogWarning("Error setting info on node %s for disk %s: %s",
7988                           node, idx, result.fail_msg)
7989     try:
7990       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7991                                                  old_name, self.op.debug_level)
7992       msg = result.fail_msg
7993       if msg:
7994         msg = ("Could not run OS rename script for instance %s on node %s"
7995                " (but the instance has been renamed in Ganeti): %s" %
7996                (inst.name, inst.primary_node, msg))
7997         self.LogWarning(msg)
7998     finally:
7999       _ShutdownInstanceDisks(self, inst)
8000
8001     return inst.name
8002
8003
8004 class LUInstanceRemove(LogicalUnit):
8005   """Remove an instance.
8006
8007   """
8008   HPATH = "instance-remove"
8009   HTYPE = constants.HTYPE_INSTANCE
8010   REQ_BGL = False
8011
8012   def ExpandNames(self):
8013     self._ExpandAndLockInstance()
8014     self.needed_locks[locking.LEVEL_NODE] = []
8015     self.needed_locks[locking.LEVEL_NODE_RES] = []
8016     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8017
8018   def DeclareLocks(self, level):
8019     if level == locking.LEVEL_NODE:
8020       self._LockInstancesNodes()
8021     elif level == locking.LEVEL_NODE_RES:
8022       # Copy node locks
8023       self.needed_locks[locking.LEVEL_NODE_RES] = \
8024         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8025
8026   def BuildHooksEnv(self):
8027     """Build hooks env.
8028
8029     This runs on master, primary and secondary nodes of the instance.
8030
8031     """
8032     env = _BuildInstanceHookEnvByObject(self, self.instance)
8033     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8034     return env
8035
8036   def BuildHooksNodes(self):
8037     """Build hooks nodes.
8038
8039     """
8040     nl = [self.cfg.GetMasterNode()]
8041     nl_post = list(self.instance.all_nodes) + nl
8042     return (nl, nl_post)
8043
8044   def CheckPrereq(self):
8045     """Check prerequisites.
8046
8047     This checks that the instance is in the cluster.
8048
8049     """
8050     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8051     assert self.instance is not None, \
8052       "Cannot retrieve locked instance %s" % self.op.instance_name
8053
8054   def Exec(self, feedback_fn):
8055     """Remove the instance.
8056
8057     """
8058     instance = self.instance
8059     logging.info("Shutting down instance %s on node %s",
8060                  instance.name, instance.primary_node)
8061
8062     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8063                                              self.op.shutdown_timeout)
8064     msg = result.fail_msg
8065     if msg:
8066       if self.op.ignore_failures:
8067         feedback_fn("Warning: can't shutdown instance: %s" % msg)
8068       else:
8069         raise errors.OpExecError("Could not shutdown instance %s on"
8070                                  " node %s: %s" %
8071                                  (instance.name, instance.primary_node, msg))
8072
8073     assert (self.owned_locks(locking.LEVEL_NODE) ==
8074             self.owned_locks(locking.LEVEL_NODE_RES))
8075     assert not (set(instance.all_nodes) -
8076                 self.owned_locks(locking.LEVEL_NODE)), \
8077       "Not owning correct locks"
8078
8079     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8080
8081
8082 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8083   """Utility function to remove an instance.
8084
8085   """
8086   logging.info("Removing block devices for instance %s", instance.name)
8087
8088   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8089     if not ignore_failures:
8090       raise errors.OpExecError("Can't remove instance's disks")
8091     feedback_fn("Warning: can't remove instance's disks")
8092
8093   logging.info("Removing instance %s out of cluster config", instance.name)
8094
8095   lu.cfg.RemoveInstance(instance.name)
8096
8097   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8098     "Instance lock removal conflict"
8099
8100   # Remove lock for the instance
8101   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8102
8103
8104 class LUInstanceQuery(NoHooksLU):
8105   """Logical unit for querying instances.
8106
8107   """
8108   # pylint: disable=W0142
8109   REQ_BGL = False
8110
8111   def CheckArguments(self):
8112     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8113                              self.op.output_fields, self.op.use_locking)
8114
8115   def ExpandNames(self):
8116     self.iq.ExpandNames(self)
8117
8118   def DeclareLocks(self, level):
8119     self.iq.DeclareLocks(self, level)
8120
8121   def Exec(self, feedback_fn):
8122     return self.iq.OldStyleQuery(self)
8123
8124
8125 def _ExpandNamesForMigration(lu):
8126   """Expands names for use with L{TLMigrateInstance}.
8127
8128   @type lu: L{LogicalUnit}
8129
8130   """
8131   if lu.op.target_node is not None:
8132     lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8133
8134   lu.needed_locks[locking.LEVEL_NODE] = []
8135   lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8136
8137   lu.needed_locks[locking.LEVEL_NODE_RES] = []
8138   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8139
8140   # The node allocation lock is actually only needed for replicated instances
8141   # (e.g. DRBD8) and if an iallocator is used.
8142   lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8143
8144
8145 def _DeclareLocksForMigration(lu, level):
8146   """Declares locks for L{TLMigrateInstance}.
8147
8148   @type lu: L{LogicalUnit}
8149   @param level: Lock level
8150
8151   """
8152   if level == locking.LEVEL_NODE_ALLOC:
8153     assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8154
8155     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8156
8157     # Node locks are already declared here rather than at LEVEL_NODE as we need
8158     # the instance object anyway to declare the node allocation lock.
8159     if instance.disk_template in constants.DTS_EXT_MIRROR:
8160       if lu.op.target_node is None:
8161         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8162         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8163       else:
8164         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8165                                                lu.op.target_node]
8166       del lu.recalculate_locks[locking.LEVEL_NODE]
8167     else:
8168       lu._LockInstancesNodes() # pylint: disable=W0212
8169
8170   elif level == locking.LEVEL_NODE:
8171     # Node locks are declared together with the node allocation lock
8172     assert (lu.needed_locks[locking.LEVEL_NODE] or
8173             lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8174
8175   elif level == locking.LEVEL_NODE_RES:
8176     # Copy node locks
8177     lu.needed_locks[locking.LEVEL_NODE_RES] = \
8178       _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8179
8180
8181 class LUInstanceFailover(LogicalUnit):
8182   """Failover an instance.
8183
8184   """
8185   HPATH = "instance-failover"
8186   HTYPE = constants.HTYPE_INSTANCE
8187   REQ_BGL = False
8188
8189   def CheckArguments(self):
8190     """Check the arguments.
8191
8192     """
8193     self.iallocator = getattr(self.op, "iallocator", None)
8194     self.target_node = getattr(self.op, "target_node", None)
8195
8196   def ExpandNames(self):
8197     self._ExpandAndLockInstance()
8198     _ExpandNamesForMigration(self)
8199
8200     self._migrater = \
8201       TLMigrateInstance(self, self.op.instance_name, False, True, False,
8202                         self.op.ignore_consistency, True,
8203                         self.op.shutdown_timeout, self.op.ignore_ipolicy)
8204
8205     self.tasklets = [self._migrater]
8206
8207   def DeclareLocks(self, level):
8208     _DeclareLocksForMigration(self, level)
8209
8210   def BuildHooksEnv(self):
8211     """Build hooks env.
8212
8213     This runs on master, primary and secondary nodes of the instance.
8214
8215     """
8216     instance = self._migrater.instance
8217     source_node = instance.primary_node
8218     target_node = self.op.target_node
8219     env = {
8220       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8221       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8222       "OLD_PRIMARY": source_node,
8223       "NEW_PRIMARY": target_node,
8224       }
8225
8226     if instance.disk_template in constants.DTS_INT_MIRROR:
8227       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8228       env["NEW_SECONDARY"] = source_node
8229     else:
8230       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8231
8232     env.update(_BuildInstanceHookEnvByObject(self, instance))
8233
8234     return env
8235
8236   def BuildHooksNodes(self):
8237     """Build hooks nodes.
8238
8239     """
8240     instance = self._migrater.instance
8241     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8242     return (nl, nl + [instance.primary_node])
8243
8244
8245 class LUInstanceMigrate(LogicalUnit):
8246   """Migrate an instance.
8247
8248   This is migration without shutting down, compared to the failover,
8249   which is done with shutdown.
8250
8251   """
8252   HPATH = "instance-migrate"
8253   HTYPE = constants.HTYPE_INSTANCE
8254   REQ_BGL = False
8255
8256   def ExpandNames(self):
8257     self._ExpandAndLockInstance()
8258     _ExpandNamesForMigration(self)
8259
8260     self._migrater = \
8261       TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8262                         False, self.op.allow_failover, False,
8263                         self.op.allow_runtime_changes,
8264                         constants.DEFAULT_SHUTDOWN_TIMEOUT,
8265                         self.op.ignore_ipolicy)
8266
8267     self.tasklets = [self._migrater]
8268
8269   def DeclareLocks(self, level):
8270     _DeclareLocksForMigration(self, level)
8271
8272   def BuildHooksEnv(self):
8273     """Build hooks env.
8274
8275     This runs on master, primary and secondary nodes of the instance.
8276
8277     """
8278     instance = self._migrater.instance
8279     source_node = instance.primary_node
8280     target_node = self.op.target_node
8281     env = _BuildInstanceHookEnvByObject(self, instance)
8282     env.update({
8283       "MIGRATE_LIVE": self._migrater.live,
8284       "MIGRATE_CLEANUP": self.op.cleanup,
8285       "OLD_PRIMARY": source_node,
8286       "NEW_PRIMARY": target_node,
8287       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8288       })
8289
8290     if instance.disk_template in constants.DTS_INT_MIRROR:
8291       env["OLD_SECONDARY"] = target_node
8292       env["NEW_SECONDARY"] = source_node
8293     else:
8294       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8295
8296     return env
8297
8298   def BuildHooksNodes(self):
8299     """Build hooks nodes.
8300
8301     """
8302     instance = self._migrater.instance
8303     snodes = list(instance.secondary_nodes)
8304     nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8305     return (nl, nl)
8306
8307
8308 class LUInstanceMove(LogicalUnit):
8309   """Move an instance by data-copying.
8310
8311   """
8312   HPATH = "instance-move"
8313   HTYPE = constants.HTYPE_INSTANCE
8314   REQ_BGL = False
8315
8316   def ExpandNames(self):
8317     self._ExpandAndLockInstance()
8318     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8319     self.op.target_node = target_node
8320     self.needed_locks[locking.LEVEL_NODE] = [target_node]
8321     self.needed_locks[locking.LEVEL_NODE_RES] = []
8322     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8323
8324   def DeclareLocks(self, level):
8325     if level == locking.LEVEL_NODE:
8326       self._LockInstancesNodes(primary_only=True)
8327     elif level == locking.LEVEL_NODE_RES:
8328       # Copy node locks
8329       self.needed_locks[locking.LEVEL_NODE_RES] = \
8330         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8331
8332   def BuildHooksEnv(self):
8333     """Build hooks env.
8334
8335     This runs on master, primary and secondary nodes of the instance.
8336
8337     """
8338     env = {
8339       "TARGET_NODE": self.op.target_node,
8340       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8341       }
8342     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8343     return env
8344
8345   def BuildHooksNodes(self):
8346     """Build hooks nodes.
8347
8348     """
8349     nl = [
8350       self.cfg.GetMasterNode(),
8351       self.instance.primary_node,
8352       self.op.target_node,
8353       ]
8354     return (nl, nl)
8355
8356   def CheckPrereq(self):
8357     """Check prerequisites.
8358
8359     This checks that the instance is in the cluster.
8360
8361     """
8362     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8363     assert self.instance is not None, \
8364       "Cannot retrieve locked instance %s" % self.op.instance_name
8365
8366     node = self.cfg.GetNodeInfo(self.op.target_node)
8367     assert node is not None, \
8368       "Cannot retrieve locked node %s" % self.op.target_node
8369
8370     self.target_node = target_node = node.name
8371
8372     if target_node == instance.primary_node:
8373       raise errors.OpPrereqError("Instance %s is already on the node %s" %
8374                                  (instance.name, target_node),
8375                                  errors.ECODE_STATE)
8376
8377     bep = self.cfg.GetClusterInfo().FillBE(instance)
8378
8379     for idx, dsk in enumerate(instance.disks):
8380       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8381         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8382                                    " cannot copy" % idx, errors.ECODE_STATE)
8383
8384     _CheckNodeOnline(self, target_node)
8385     _CheckNodeNotDrained(self, target_node)
8386     _CheckNodeVmCapable(self, target_node)
8387     cluster = self.cfg.GetClusterInfo()
8388     group_info = self.cfg.GetNodeGroup(node.group)
8389     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8390     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8391                             ignore=self.op.ignore_ipolicy)
8392
8393     if instance.admin_state == constants.ADMINST_UP:
8394       # check memory requirements on the secondary node
8395       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8396                            instance.name, bep[constants.BE_MAXMEM],
8397                            instance.hypervisor)
8398     else:
8399       self.LogInfo("Not checking memory on the secondary node as"
8400                    " instance will not be started")
8401
8402     # check bridge existance
8403     _CheckInstanceBridgesExist(self, instance, node=target_node)
8404
8405   def Exec(self, feedback_fn):
8406     """Move an instance.
8407
8408     The move is done by shutting it down on its present node, copying
8409     the data over (slow) and starting it on the new node.
8410
8411     """
8412     instance = self.instance
8413
8414     source_node = instance.primary_node
8415     target_node = self.target_node
8416
8417     self.LogInfo("Shutting down instance %s on source node %s",
8418                  instance.name, source_node)
8419
8420     assert (self.owned_locks(locking.LEVEL_NODE) ==
8421             self.owned_locks(locking.LEVEL_NODE_RES))
8422
8423     result = self.rpc.call_instance_shutdown(source_node, instance,
8424                                              self.op.shutdown_timeout)
8425     msg = result.fail_msg
8426     if msg:
8427       if self.op.ignore_consistency:
8428         self.LogWarning("Could not shutdown instance %s on node %s."
8429                         " Proceeding anyway. Please make sure node"
8430                         " %s is down. Error details: %s",
8431                         instance.name, source_node, source_node, msg)
8432       else:
8433         raise errors.OpExecError("Could not shutdown instance %s on"
8434                                  " node %s: %s" %
8435                                  (instance.name, source_node, msg))
8436
8437     # create the target disks
8438     try:
8439       _CreateDisks(self, instance, target_node=target_node)
8440     except errors.OpExecError:
8441       self.LogWarning("Device creation failed, reverting...")
8442       try:
8443         _RemoveDisks(self, instance, target_node=target_node)
8444       finally:
8445         self.cfg.ReleaseDRBDMinors(instance.name)
8446         raise
8447
8448     cluster_name = self.cfg.GetClusterInfo().cluster_name
8449
8450     errs = []
8451     # activate, get path, copy the data over
8452     for idx, disk in enumerate(instance.disks):
8453       self.LogInfo("Copying data for disk %d", idx)
8454       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8455                                                instance.name, True, idx)
8456       if result.fail_msg:
8457         self.LogWarning("Can't assemble newly created disk %d: %s",
8458                         idx, result.fail_msg)
8459         errs.append(result.fail_msg)
8460         break
8461       dev_path = result.payload
8462       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8463                                              target_node, dev_path,
8464                                              cluster_name)
8465       if result.fail_msg:
8466         self.LogWarning("Can't copy data over for disk %d: %s",
8467                         idx, result.fail_msg)
8468         errs.append(result.fail_msg)
8469         break
8470
8471     if errs:
8472       self.LogWarning("Some disks failed to copy, aborting")
8473       try:
8474         _RemoveDisks(self, instance, target_node=target_node)
8475       finally:
8476         self.cfg.ReleaseDRBDMinors(instance.name)
8477         raise errors.OpExecError("Errors during disk copy: %s" %
8478                                  (",".join(errs),))
8479
8480     instance.primary_node = target_node
8481     self.cfg.Update(instance, feedback_fn)
8482
8483     self.LogInfo("Removing the disks on the original node")
8484     _RemoveDisks(self, instance, target_node=source_node)
8485
8486     # Only start the instance if it's marked as up
8487     if instance.admin_state == constants.ADMINST_UP:
8488       self.LogInfo("Starting instance %s on node %s",
8489                    instance.name, target_node)
8490
8491       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8492                                            ignore_secondaries=True)
8493       if not disks_ok:
8494         _ShutdownInstanceDisks(self, instance)
8495         raise errors.OpExecError("Can't activate the instance's disks")
8496
8497       result = self.rpc.call_instance_start(target_node,
8498                                             (instance, None, None), False)
8499       msg = result.fail_msg
8500       if msg:
8501         _ShutdownInstanceDisks(self, instance)
8502         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8503                                  (instance.name, target_node, msg))
8504
8505
8506 class LUNodeMigrate(LogicalUnit):
8507   """Migrate all instances from a node.
8508
8509   """
8510   HPATH = "node-migrate"
8511   HTYPE = constants.HTYPE_NODE
8512   REQ_BGL = False
8513
8514   def CheckArguments(self):
8515     pass
8516
8517   def ExpandNames(self):
8518     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8519
8520     self.share_locks = _ShareAll()
8521     self.needed_locks = {
8522       locking.LEVEL_NODE: [self.op.node_name],
8523       }
8524
8525   def BuildHooksEnv(self):
8526     """Build hooks env.
8527
8528     This runs on the master, the primary and all the secondaries.
8529
8530     """
8531     return {
8532       "NODE_NAME": self.op.node_name,
8533       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8534       }
8535
8536   def BuildHooksNodes(self):
8537     """Build hooks nodes.
8538
8539     """
8540     nl = [self.cfg.GetMasterNode()]
8541     return (nl, nl)
8542
8543   def CheckPrereq(self):
8544     pass
8545
8546   def Exec(self, feedback_fn):
8547     # Prepare jobs for migration instances
8548     allow_runtime_changes = self.op.allow_runtime_changes
8549     jobs = [
8550       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8551                                  mode=self.op.mode,
8552                                  live=self.op.live,
8553                                  iallocator=self.op.iallocator,
8554                                  target_node=self.op.target_node,
8555                                  allow_runtime_changes=allow_runtime_changes,
8556                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8557       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8558
8559     # TODO: Run iallocator in this opcode and pass correct placement options to
8560     # OpInstanceMigrate. Since other jobs can modify the cluster between
8561     # running the iallocator and the actual migration, a good consistency model
8562     # will have to be found.
8563
8564     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8565             frozenset([self.op.node_name]))
8566
8567     return ResultWithJobs(jobs)
8568
8569
8570 class TLMigrateInstance(Tasklet):
8571   """Tasklet class for instance migration.
8572
8573   @type live: boolean
8574   @ivar live: whether the migration will be done live or non-live;
8575       this variable is initalized only after CheckPrereq has run
8576   @type cleanup: boolean
8577   @ivar cleanup: Wheater we cleanup from a failed migration
8578   @type iallocator: string
8579   @ivar iallocator: The iallocator used to determine target_node
8580   @type target_node: string
8581   @ivar target_node: If given, the target_node to reallocate the instance to
8582   @type failover: boolean
8583   @ivar failover: Whether operation results in failover or migration
8584   @type fallback: boolean
8585   @ivar fallback: Whether fallback to failover is allowed if migration not
8586                   possible
8587   @type ignore_consistency: boolean
8588   @ivar ignore_consistency: Wheter we should ignore consistency between source
8589                             and target node
8590   @type shutdown_timeout: int
8591   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8592   @type ignore_ipolicy: bool
8593   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8594
8595   """
8596
8597   # Constants
8598   _MIGRATION_POLL_INTERVAL = 1      # seconds
8599   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8600
8601   def __init__(self, lu, instance_name, cleanup, failover, fallback,
8602                ignore_consistency, allow_runtime_changes, shutdown_timeout,
8603                ignore_ipolicy):
8604     """Initializes this class.
8605
8606     """
8607     Tasklet.__init__(self, lu)
8608
8609     # Parameters
8610     self.instance_name = instance_name
8611     self.cleanup = cleanup
8612     self.live = False # will be overridden later
8613     self.failover = failover
8614     self.fallback = fallback
8615     self.ignore_consistency = ignore_consistency
8616     self.shutdown_timeout = shutdown_timeout
8617     self.ignore_ipolicy = ignore_ipolicy
8618     self.allow_runtime_changes = allow_runtime_changes
8619
8620   def CheckPrereq(self):
8621     """Check prerequisites.
8622
8623     This checks that the instance is in the cluster.
8624
8625     """
8626     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8627     instance = self.cfg.GetInstanceInfo(instance_name)
8628     assert instance is not None
8629     self.instance = instance
8630     cluster = self.cfg.GetClusterInfo()
8631
8632     if (not self.cleanup and
8633         not instance.admin_state == constants.ADMINST_UP and
8634         not self.failover and self.fallback):
8635       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8636                       " switching to failover")
8637       self.failover = True
8638
8639     if instance.disk_template not in constants.DTS_MIRRORED:
8640       if self.failover:
8641         text = "failovers"
8642       else:
8643         text = "migrations"
8644       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8645                                  " %s" % (instance.disk_template, text),
8646                                  errors.ECODE_STATE)
8647
8648     if instance.disk_template in constants.DTS_EXT_MIRROR:
8649       assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8650
8651       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8652
8653       if self.lu.op.iallocator:
8654         self._RunAllocator()
8655       else:
8656         # We set set self.target_node as it is required by
8657         # BuildHooksEnv
8658         self.target_node = self.lu.op.target_node
8659
8660       # Check that the target node is correct in terms of instance policy
8661       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8662       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8663       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8664                                                               group_info)
8665       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8666                               ignore=self.ignore_ipolicy)
8667
8668       # self.target_node is already populated, either directly or by the
8669       # iallocator run
8670       target_node = self.target_node
8671       if self.target_node == instance.primary_node:
8672         raise errors.OpPrereqError("Cannot migrate instance %s"
8673                                    " to its primary (%s)" %
8674                                    (instance.name, instance.primary_node),
8675                                    errors.ECODE_STATE)
8676
8677       if len(self.lu.tasklets) == 1:
8678         # It is safe to release locks only when we're the only tasklet
8679         # in the LU
8680         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8681                       keep=[instance.primary_node, self.target_node])
8682         _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8683
8684     else:
8685       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8686
8687       secondary_nodes = instance.secondary_nodes
8688       if not secondary_nodes:
8689         raise errors.ConfigurationError("No secondary node but using"
8690                                         " %s disk template" %
8691                                         instance.disk_template)
8692       target_node = secondary_nodes[0]
8693       if self.lu.op.iallocator or (self.lu.op.target_node and
8694                                    self.lu.op.target_node != target_node):
8695         if self.failover:
8696           text = "failed over"
8697         else:
8698           text = "migrated"
8699         raise errors.OpPrereqError("Instances with disk template %s cannot"
8700                                    " be %s to arbitrary nodes"
8701                                    " (neither an iallocator nor a target"
8702                                    " node can be passed)" %
8703                                    (instance.disk_template, text),
8704                                    errors.ECODE_INVAL)
8705       nodeinfo = self.cfg.GetNodeInfo(target_node)
8706       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8707       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8708                                                               group_info)
8709       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8710                               ignore=self.ignore_ipolicy)
8711
8712     i_be = cluster.FillBE(instance)
8713
8714     # check memory requirements on the secondary node
8715     if (not self.cleanup and
8716          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8717       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8718                                                "migrating instance %s" %
8719                                                instance.name,
8720                                                i_be[constants.BE_MINMEM],
8721                                                instance.hypervisor)
8722     else:
8723       self.lu.LogInfo("Not checking memory on the secondary node as"
8724                       " instance will not be started")
8725
8726     # check if failover must be forced instead of migration
8727     if (not self.cleanup and not self.failover and
8728         i_be[constants.BE_ALWAYS_FAILOVER]):
8729       self.lu.LogInfo("Instance configured to always failover; fallback"
8730                       " to failover")
8731       self.failover = True
8732
8733     # check bridge existance
8734     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8735
8736     if not self.cleanup:
8737       _CheckNodeNotDrained(self.lu, target_node)
8738       if not self.failover:
8739         result = self.rpc.call_instance_migratable(instance.primary_node,
8740                                                    instance)
8741         if result.fail_msg and self.fallback:
8742           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8743                           " failover")
8744           self.failover = True
8745         else:
8746           result.Raise("Can't migrate, please use failover",
8747                        prereq=True, ecode=errors.ECODE_STATE)
8748
8749     assert not (self.failover and self.cleanup)
8750
8751     if not self.failover:
8752       if self.lu.op.live is not None and self.lu.op.mode is not None:
8753         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8754                                    " parameters are accepted",
8755                                    errors.ECODE_INVAL)
8756       if self.lu.op.live is not None:
8757         if self.lu.op.live:
8758           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8759         else:
8760           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8761         # reset the 'live' parameter to None so that repeated
8762         # invocations of CheckPrereq do not raise an exception
8763         self.lu.op.live = None
8764       elif self.lu.op.mode is None:
8765         # read the default value from the hypervisor
8766         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8767         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8768
8769       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8770     else:
8771       # Failover is never live
8772       self.live = False
8773
8774     if not (self.failover or self.cleanup):
8775       remote_info = self.rpc.call_instance_info(instance.primary_node,
8776                                                 instance.name,
8777                                                 instance.hypervisor)
8778       remote_info.Raise("Error checking instance on node %s" %
8779                         instance.primary_node)
8780       instance_running = bool(remote_info.payload)
8781       if instance_running:
8782         self.current_mem = int(remote_info.payload["memory"])
8783
8784   def _RunAllocator(self):
8785     """Run the allocator based on input opcode.
8786
8787     """
8788     assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8789
8790     # FIXME: add a self.ignore_ipolicy option
8791     req = iallocator.IAReqRelocate(name=self.instance_name,
8792                                    relocate_from=[self.instance.primary_node])
8793     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8794
8795     ial.Run(self.lu.op.iallocator)
8796
8797     if not ial.success:
8798       raise errors.OpPrereqError("Can't compute nodes using"
8799                                  " iallocator '%s': %s" %
8800                                  (self.lu.op.iallocator, ial.info),
8801                                  errors.ECODE_NORES)
8802     self.target_node = ial.result[0]
8803     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8804                     self.instance_name, self.lu.op.iallocator,
8805                     utils.CommaJoin(ial.result))
8806
8807   def _WaitUntilSync(self):
8808     """Poll with custom rpc for disk sync.
8809
8810     This uses our own step-based rpc call.
8811
8812     """
8813     self.feedback_fn("* wait until resync is done")
8814     all_done = False
8815     while not all_done:
8816       all_done = True
8817       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8818                                             self.nodes_ip,
8819                                             (self.instance.disks,
8820                                              self.instance))
8821       min_percent = 100
8822       for node, nres in result.items():
8823         nres.Raise("Cannot resync disks on node %s" % node)
8824         node_done, node_percent = nres.payload
8825         all_done = all_done and node_done
8826         if node_percent is not None:
8827           min_percent = min(min_percent, node_percent)
8828       if not all_done:
8829         if min_percent < 100:
8830           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8831         time.sleep(2)
8832
8833   def _EnsureSecondary(self, node):
8834     """Demote a node to secondary.
8835
8836     """
8837     self.feedback_fn("* switching node %s to secondary mode" % node)
8838
8839     for dev in self.instance.disks:
8840       self.cfg.SetDiskID(dev, node)
8841
8842     result = self.rpc.call_blockdev_close(node, self.instance.name,
8843                                           self.instance.disks)
8844     result.Raise("Cannot change disk to secondary on node %s" % node)
8845
8846   def _GoStandalone(self):
8847     """Disconnect from the network.
8848
8849     """
8850     self.feedback_fn("* changing into standalone mode")
8851     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8852                                                self.instance.disks)
8853     for node, nres in result.items():
8854       nres.Raise("Cannot disconnect disks node %s" % node)
8855
8856   def _GoReconnect(self, multimaster):
8857     """Reconnect to the network.
8858
8859     """
8860     if multimaster:
8861       msg = "dual-master"
8862     else:
8863       msg = "single-master"
8864     self.feedback_fn("* changing disks into %s mode" % msg)
8865     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8866                                            (self.instance.disks, self.instance),
8867                                            self.instance.name, multimaster)
8868     for node, nres in result.items():
8869       nres.Raise("Cannot change disks config on node %s" % node)
8870
8871   def _ExecCleanup(self):
8872     """Try to cleanup after a failed migration.
8873
8874     The cleanup is done by:
8875       - check that the instance is running only on one node
8876         (and update the config if needed)
8877       - change disks on its secondary node to secondary
8878       - wait until disks are fully synchronized
8879       - disconnect from the network
8880       - change disks into single-master mode
8881       - wait again until disks are fully synchronized
8882
8883     """
8884     instance = self.instance
8885     target_node = self.target_node
8886     source_node = self.source_node
8887
8888     # check running on only one node
8889     self.feedback_fn("* checking where the instance actually runs"
8890                      " (if this hangs, the hypervisor might be in"
8891                      " a bad state)")
8892     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8893     for node, result in ins_l.items():
8894       result.Raise("Can't contact node %s" % node)
8895
8896     runningon_source = instance.name in ins_l[source_node].payload
8897     runningon_target = instance.name in ins_l[target_node].payload
8898
8899     if runningon_source and runningon_target:
8900       raise errors.OpExecError("Instance seems to be running on two nodes,"
8901                                " or the hypervisor is confused; you will have"
8902                                " to ensure manually that it runs only on one"
8903                                " and restart this operation")
8904
8905     if not (runningon_source or runningon_target):
8906       raise errors.OpExecError("Instance does not seem to be running at all;"
8907                                " in this case it's safer to repair by"
8908                                " running 'gnt-instance stop' to ensure disk"
8909                                " shutdown, and then restarting it")
8910
8911     if runningon_target:
8912       # the migration has actually succeeded, we need to update the config
8913       self.feedback_fn("* instance running on secondary node (%s),"
8914                        " updating config" % target_node)
8915       instance.primary_node = target_node
8916       self.cfg.Update(instance, self.feedback_fn)
8917       demoted_node = source_node
8918     else:
8919       self.feedback_fn("* instance confirmed to be running on its"
8920                        " primary node (%s)" % source_node)
8921       demoted_node = target_node
8922
8923     if instance.disk_template in constants.DTS_INT_MIRROR:
8924       self._EnsureSecondary(demoted_node)
8925       try:
8926         self._WaitUntilSync()
8927       except errors.OpExecError:
8928         # we ignore here errors, since if the device is standalone, it
8929         # won't be able to sync
8930         pass
8931       self._GoStandalone()
8932       self._GoReconnect(False)
8933       self._WaitUntilSync()
8934
8935     self.feedback_fn("* done")
8936
8937   def _RevertDiskStatus(self):
8938     """Try to revert the disk status after a failed migration.
8939
8940     """
8941     target_node = self.target_node
8942     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8943       return
8944
8945     try:
8946       self._EnsureSecondary(target_node)
8947       self._GoStandalone()
8948       self._GoReconnect(False)
8949       self._WaitUntilSync()
8950     except errors.OpExecError, err:
8951       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8952                          " please try to recover the instance manually;"
8953                          " error '%s'" % str(err))
8954
8955   def _AbortMigration(self):
8956     """Call the hypervisor code to abort a started migration.
8957
8958     """
8959     instance = self.instance
8960     target_node = self.target_node
8961     source_node = self.source_node
8962     migration_info = self.migration_info
8963
8964     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8965                                                                  instance,
8966                                                                  migration_info,
8967                                                                  False)
8968     abort_msg = abort_result.fail_msg
8969     if abort_msg:
8970       logging.error("Aborting migration failed on target node %s: %s",
8971                     target_node, abort_msg)
8972       # Don't raise an exception here, as we stil have to try to revert the
8973       # disk status, even if this step failed.
8974
8975     abort_result = self.rpc.call_instance_finalize_migration_src(
8976       source_node, instance, False, self.live)
8977     abort_msg = abort_result.fail_msg
8978     if abort_msg:
8979       logging.error("Aborting migration failed on source node %s: %s",
8980                     source_node, abort_msg)
8981
8982   def _ExecMigration(self):
8983     """Migrate an instance.
8984
8985     The migrate is done by:
8986       - change the disks into dual-master mode
8987       - wait until disks are fully synchronized again
8988       - migrate the instance
8989       - change disks on the new secondary node (the old primary) to secondary
8990       - wait until disks are fully synchronized
8991       - change disks into single-master mode
8992
8993     """
8994     instance = self.instance
8995     target_node = self.target_node
8996     source_node = self.source_node
8997
8998     # Check for hypervisor version mismatch and warn the user.
8999     nodeinfo = self.rpc.call_node_info([source_node, target_node],
9000                                        None, [self.instance.hypervisor], False)
9001     for ninfo in nodeinfo.values():
9002       ninfo.Raise("Unable to retrieve node information from node '%s'" %
9003                   ninfo.node)
9004     (_, _, (src_info, )) = nodeinfo[source_node].payload
9005     (_, _, (dst_info, )) = nodeinfo[target_node].payload
9006
9007     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9008         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9009       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9010       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9011       if src_version != dst_version:
9012         self.feedback_fn("* warning: hypervisor version mismatch between"
9013                          " source (%s) and target (%s) node" %
9014                          (src_version, dst_version))
9015
9016     self.feedback_fn("* checking disk consistency between source and target")
9017     for (idx, dev) in enumerate(instance.disks):
9018       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9019         raise errors.OpExecError("Disk %s is degraded or not fully"
9020                                  " synchronized on target node,"
9021                                  " aborting migration" % idx)
9022
9023     if self.current_mem > self.tgt_free_mem:
9024       if not self.allow_runtime_changes:
9025         raise errors.OpExecError("Memory ballooning not allowed and not enough"
9026                                  " free memory to fit instance %s on target"
9027                                  " node %s (have %dMB, need %dMB)" %
9028                                  (instance.name, target_node,
9029                                   self.tgt_free_mem, self.current_mem))
9030       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9031       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9032                                                      instance,
9033                                                      self.tgt_free_mem)
9034       rpcres.Raise("Cannot modify instance runtime memory")
9035
9036     # First get the migration information from the remote node
9037     result = self.rpc.call_migration_info(source_node, instance)
9038     msg = result.fail_msg
9039     if msg:
9040       log_err = ("Failed fetching source migration information from %s: %s" %
9041                  (source_node, msg))
9042       logging.error(log_err)
9043       raise errors.OpExecError(log_err)
9044
9045     self.migration_info = migration_info = result.payload
9046
9047     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9048       # Then switch the disks to master/master mode
9049       self._EnsureSecondary(target_node)
9050       self._GoStandalone()
9051       self._GoReconnect(True)
9052       self._WaitUntilSync()
9053
9054     self.feedback_fn("* preparing %s to accept the instance" % target_node)
9055     result = self.rpc.call_accept_instance(target_node,
9056                                            instance,
9057                                            migration_info,
9058                                            self.nodes_ip[target_node])
9059
9060     msg = result.fail_msg
9061     if msg:
9062       logging.error("Instance pre-migration failed, trying to revert"
9063                     " disk status: %s", msg)
9064       self.feedback_fn("Pre-migration failed, aborting")
9065       self._AbortMigration()
9066       self._RevertDiskStatus()
9067       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9068                                (instance.name, msg))
9069
9070     self.feedback_fn("* migrating instance to %s" % target_node)
9071     result = self.rpc.call_instance_migrate(source_node, instance,
9072                                             self.nodes_ip[target_node],
9073                                             self.live)
9074     msg = result.fail_msg
9075     if msg:
9076       logging.error("Instance migration failed, trying to revert"
9077                     " disk status: %s", msg)
9078       self.feedback_fn("Migration failed, aborting")
9079       self._AbortMigration()
9080       self._RevertDiskStatus()
9081       raise errors.OpExecError("Could not migrate instance %s: %s" %
9082                                (instance.name, msg))
9083
9084     self.feedback_fn("* starting memory transfer")
9085     last_feedback = time.time()
9086     while True:
9087       result = self.rpc.call_instance_get_migration_status(source_node,
9088                                                            instance)
9089       msg = result.fail_msg
9090       ms = result.payload   # MigrationStatus instance
9091       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9092         logging.error("Instance migration failed, trying to revert"
9093                       " disk status: %s", msg)
9094         self.feedback_fn("Migration failed, aborting")
9095         self._AbortMigration()
9096         self._RevertDiskStatus()
9097         if not msg:
9098           msg = "hypervisor returned failure"
9099         raise errors.OpExecError("Could not migrate instance %s: %s" %
9100                                  (instance.name, msg))
9101
9102       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9103         self.feedback_fn("* memory transfer complete")
9104         break
9105
9106       if (utils.TimeoutExpired(last_feedback,
9107                                self._MIGRATION_FEEDBACK_INTERVAL) and
9108           ms.transferred_ram is not None):
9109         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9110         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9111         last_feedback = time.time()
9112
9113       time.sleep(self._MIGRATION_POLL_INTERVAL)
9114
9115     result = self.rpc.call_instance_finalize_migration_src(source_node,
9116                                                            instance,
9117                                                            True,
9118                                                            self.live)
9119     msg = result.fail_msg
9120     if msg:
9121       logging.error("Instance migration succeeded, but finalization failed"
9122                     " on the source node: %s", msg)
9123       raise errors.OpExecError("Could not finalize instance migration: %s" %
9124                                msg)
9125
9126     instance.primary_node = target_node
9127
9128     # distribute new instance config to the other nodes
9129     self.cfg.Update(instance, self.feedback_fn)
9130
9131     result = self.rpc.call_instance_finalize_migration_dst(target_node,
9132                                                            instance,
9133                                                            migration_info,
9134                                                            True)
9135     msg = result.fail_msg
9136     if msg:
9137       logging.error("Instance migration succeeded, but finalization failed"
9138                     " on the target node: %s", msg)
9139       raise errors.OpExecError("Could not finalize instance migration: %s" %
9140                                msg)
9141
9142     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9143       self._EnsureSecondary(source_node)
9144       self._WaitUntilSync()
9145       self._GoStandalone()
9146       self._GoReconnect(False)
9147       self._WaitUntilSync()
9148
9149     # If the instance's disk template is `rbd' or `ext' and there was a
9150     # successful migration, unmap the device from the source node.
9151     if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9152       disks = _ExpandCheckDisks(instance, instance.disks)
9153       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9154       for disk in disks:
9155         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9156         msg = result.fail_msg
9157         if msg:
9158           logging.error("Migration was successful, but couldn't unmap the"
9159                         " block device %s on source node %s: %s",
9160                         disk.iv_name, source_node, msg)
9161           logging.error("You need to unmap the device %s manually on %s",
9162                         disk.iv_name, source_node)
9163
9164     self.feedback_fn("* done")
9165
9166   def _ExecFailover(self):
9167     """Failover an instance.
9168
9169     The failover is done by shutting it down on its present node and
9170     starting it on the secondary.
9171
9172     """
9173     instance = self.instance
9174     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9175
9176     source_node = instance.primary_node
9177     target_node = self.target_node
9178
9179     if instance.admin_state == constants.ADMINST_UP:
9180       self.feedback_fn("* checking disk consistency between source and target")
9181       for (idx, dev) in enumerate(instance.disks):
9182         # for drbd, these are drbd over lvm
9183         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9184                                      False):
9185           if primary_node.offline:
9186             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9187                              " target node %s" %
9188                              (primary_node.name, idx, target_node))
9189           elif not self.ignore_consistency:
9190             raise errors.OpExecError("Disk %s is degraded on target node,"
9191                                      " aborting failover" % idx)
9192     else:
9193       self.feedback_fn("* not checking disk consistency as instance is not"
9194                        " running")
9195
9196     self.feedback_fn("* shutting down instance on source node")
9197     logging.info("Shutting down instance %s on node %s",
9198                  instance.name, source_node)
9199
9200     result = self.rpc.call_instance_shutdown(source_node, instance,
9201                                              self.shutdown_timeout)
9202     msg = result.fail_msg
9203     if msg:
9204       if self.ignore_consistency or primary_node.offline:
9205         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9206                            " proceeding anyway; please make sure node"
9207                            " %s is down; error details: %s",
9208                            instance.name, source_node, source_node, msg)
9209       else:
9210         raise errors.OpExecError("Could not shutdown instance %s on"
9211                                  " node %s: %s" %
9212                                  (instance.name, source_node, msg))
9213
9214     self.feedback_fn("* deactivating the instance's disks on source node")
9215     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9216       raise errors.OpExecError("Can't shut down the instance's disks")
9217
9218     instance.primary_node = target_node
9219     # distribute new instance config to the other nodes
9220     self.cfg.Update(instance, self.feedback_fn)
9221
9222     # Only start the instance if it's marked as up
9223     if instance.admin_state == constants.ADMINST_UP:
9224       self.feedback_fn("* activating the instance's disks on target node %s" %
9225                        target_node)
9226       logging.info("Starting instance %s on node %s",
9227                    instance.name, target_node)
9228
9229       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9230                                            ignore_secondaries=True)
9231       if not disks_ok:
9232         _ShutdownInstanceDisks(self.lu, instance)
9233         raise errors.OpExecError("Can't activate the instance's disks")
9234
9235       self.feedback_fn("* starting the instance on the target node %s" %
9236                        target_node)
9237       result = self.rpc.call_instance_start(target_node, (instance, None, None),
9238                                             False)
9239       msg = result.fail_msg
9240       if msg:
9241         _ShutdownInstanceDisks(self.lu, instance)
9242         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9243                                  (instance.name, target_node, msg))
9244
9245   def Exec(self, feedback_fn):
9246     """Perform the migration.
9247
9248     """
9249     self.feedback_fn = feedback_fn
9250     self.source_node = self.instance.primary_node
9251
9252     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9253     if self.instance.disk_template in constants.DTS_INT_MIRROR:
9254       self.target_node = self.instance.secondary_nodes[0]
9255       # Otherwise self.target_node has been populated either
9256       # directly, or through an iallocator.
9257
9258     self.all_nodes = [self.source_node, self.target_node]
9259     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9260                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
9261
9262     if self.failover:
9263       feedback_fn("Failover instance %s" % self.instance.name)
9264       self._ExecFailover()
9265     else:
9266       feedback_fn("Migrating instance %s" % self.instance.name)
9267
9268       if self.cleanup:
9269         return self._ExecCleanup()
9270       else:
9271         return self._ExecMigration()
9272
9273
9274 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9275                     force_open):
9276   """Wrapper around L{_CreateBlockDevInner}.
9277
9278   This method annotates the root device first.
9279
9280   """
9281   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9282   excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9283   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9284                               force_open, excl_stor)
9285
9286
9287 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9288                          info, force_open, excl_stor):
9289   """Create a tree of block devices on a given node.
9290
9291   If this device type has to be created on secondaries, create it and
9292   all its children.
9293
9294   If not, just recurse to children keeping the same 'force' value.
9295
9296   @attention: The device has to be annotated already.
9297
9298   @param lu: the lu on whose behalf we execute
9299   @param node: the node on which to create the device
9300   @type instance: L{objects.Instance}
9301   @param instance: the instance which owns the device
9302   @type device: L{objects.Disk}
9303   @param device: the device to create
9304   @type force_create: boolean
9305   @param force_create: whether to force creation of this device; this
9306       will be change to True whenever we find a device which has
9307       CreateOnSecondary() attribute
9308   @param info: the extra 'metadata' we should attach to the device
9309       (this will be represented as a LVM tag)
9310   @type force_open: boolean
9311   @param force_open: this parameter will be passes to the
9312       L{backend.BlockdevCreate} function where it specifies
9313       whether we run on primary or not, and it affects both
9314       the child assembly and the device own Open() execution
9315   @type excl_stor: boolean
9316   @param excl_stor: Whether exclusive_storage is active for the node
9317
9318   """
9319   if device.CreateOnSecondary():
9320     force_create = True
9321
9322   if device.children:
9323     for child in device.children:
9324       _CreateBlockDevInner(lu, node, instance, child, force_create,
9325                            info, force_open, excl_stor)
9326
9327   if not force_create:
9328     return
9329
9330   _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9331                         excl_stor)
9332
9333
9334 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9335                           excl_stor):
9336   """Create a single block device on a given node.
9337
9338   This will not recurse over children of the device, so they must be
9339   created in advance.
9340
9341   @param lu: the lu on whose behalf we execute
9342   @param node: the node on which to create the device
9343   @type instance: L{objects.Instance}
9344   @param instance: the instance which owns the device
9345   @type device: L{objects.Disk}
9346   @param device: the device to create
9347   @param info: the extra 'metadata' we should attach to the device
9348       (this will be represented as a LVM tag)
9349   @type force_open: boolean
9350   @param force_open: this parameter will be passes to the
9351       L{backend.BlockdevCreate} function where it specifies
9352       whether we run on primary or not, and it affects both
9353       the child assembly and the device own Open() execution
9354   @type excl_stor: boolean
9355   @param excl_stor: Whether exclusive_storage is active for the node
9356
9357   """
9358   lu.cfg.SetDiskID(device, node)
9359   result = lu.rpc.call_blockdev_create(node, device, device.size,
9360                                        instance.name, force_open, info,
9361                                        excl_stor)
9362   result.Raise("Can't create block device %s on"
9363                " node %s for instance %s" % (device, node, instance.name))
9364   if device.physical_id is None:
9365     device.physical_id = result.payload
9366
9367
9368 def _GenerateUniqueNames(lu, exts):
9369   """Generate a suitable LV name.
9370
9371   This will generate a logical volume name for the given instance.
9372
9373   """
9374   results = []
9375   for val in exts:
9376     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9377     results.append("%s%s" % (new_id, val))
9378   return results
9379
9380
9381 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9382                          iv_name, p_minor, s_minor):
9383   """Generate a drbd8 device complete with its children.
9384
9385   """
9386   assert len(vgnames) == len(names) == 2
9387   port = lu.cfg.AllocatePort()
9388   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9389
9390   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9391                           logical_id=(vgnames[0], names[0]),
9392                           params={})
9393   dev_meta = objects.Disk(dev_type=constants.LD_LV,
9394                           size=constants.DRBD_META_SIZE,
9395                           logical_id=(vgnames[1], names[1]),
9396                           params={})
9397   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9398                           logical_id=(primary, secondary, port,
9399                                       p_minor, s_minor,
9400                                       shared_secret),
9401                           children=[dev_data, dev_meta],
9402                           iv_name=iv_name, params={})
9403   return drbd_dev
9404
9405
9406 _DISK_TEMPLATE_NAME_PREFIX = {
9407   constants.DT_PLAIN: "",
9408   constants.DT_RBD: ".rbd",
9409   constants.DT_EXT: ".ext",
9410   }
9411
9412
9413 _DISK_TEMPLATE_DEVICE_TYPE = {
9414   constants.DT_PLAIN: constants.LD_LV,
9415   constants.DT_FILE: constants.LD_FILE,
9416   constants.DT_SHARED_FILE: constants.LD_FILE,
9417   constants.DT_BLOCK: constants.LD_BLOCKDEV,
9418   constants.DT_RBD: constants.LD_RBD,
9419   constants.DT_EXT: constants.LD_EXT,
9420   }
9421
9422
9423 def _GenerateDiskTemplate(
9424   lu, template_name, instance_name, primary_node, secondary_nodes,
9425   disk_info, file_storage_dir, file_driver, base_index,
9426   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9427   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9428   """Generate the entire disk layout for a given template type.
9429
9430   """
9431   vgname = lu.cfg.GetVGName()
9432   disk_count = len(disk_info)
9433   disks = []
9434
9435   if template_name == constants.DT_DISKLESS:
9436     pass
9437   elif template_name == constants.DT_DRBD8:
9438     if len(secondary_nodes) != 1:
9439       raise errors.ProgrammerError("Wrong template configuration")
9440     remote_node = secondary_nodes[0]
9441     minors = lu.cfg.AllocateDRBDMinor(
9442       [primary_node, remote_node] * len(disk_info), instance_name)
9443
9444     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9445                                                        full_disk_params)
9446     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9447
9448     names = []
9449     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9450                                                for i in range(disk_count)]):
9451       names.append(lv_prefix + "_data")
9452       names.append(lv_prefix + "_meta")
9453     for idx, disk in enumerate(disk_info):
9454       disk_index = idx + base_index
9455       data_vg = disk.get(constants.IDISK_VG, vgname)
9456       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9457       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9458                                       disk[constants.IDISK_SIZE],
9459                                       [data_vg, meta_vg],
9460                                       names[idx * 2:idx * 2 + 2],
9461                                       "disk/%d" % disk_index,
9462                                       minors[idx * 2], minors[idx * 2 + 1])
9463       disk_dev.mode = disk[constants.IDISK_MODE]
9464       disks.append(disk_dev)
9465   else:
9466     if secondary_nodes:
9467       raise errors.ProgrammerError("Wrong template configuration")
9468
9469     if template_name == constants.DT_FILE:
9470       _req_file_storage()
9471     elif template_name == constants.DT_SHARED_FILE:
9472       _req_shr_file_storage()
9473
9474     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9475     if name_prefix is None:
9476       names = None
9477     else:
9478       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9479                                         (name_prefix, base_index + i)
9480                                         for i in range(disk_count)])
9481
9482     if template_name == constants.DT_PLAIN:
9483
9484       def logical_id_fn(idx, _, disk):
9485         vg = disk.get(constants.IDISK_VG, vgname)
9486         return (vg, names[idx])
9487
9488     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9489       logical_id_fn = \
9490         lambda _, disk_index, disk: (file_driver,
9491                                      "%s/disk%d" % (file_storage_dir,
9492                                                     disk_index))
9493     elif template_name == constants.DT_BLOCK:
9494       logical_id_fn = \
9495         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9496                                        disk[constants.IDISK_ADOPT])
9497     elif template_name == constants.DT_RBD:
9498       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9499     elif template_name == constants.DT_EXT:
9500       def logical_id_fn(idx, _, disk):
9501         provider = disk.get(constants.IDISK_PROVIDER, None)
9502         if provider is None:
9503           raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9504                                        " not found", constants.DT_EXT,
9505                                        constants.IDISK_PROVIDER)
9506         return (provider, names[idx])
9507     else:
9508       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9509
9510     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9511
9512     for idx, disk in enumerate(disk_info):
9513       params = {}
9514       # Only for the Ext template add disk_info to params
9515       if template_name == constants.DT_EXT:
9516         params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9517         for key in disk:
9518           if key not in constants.IDISK_PARAMS:
9519             params[key] = disk[key]
9520       disk_index = idx + base_index
9521       size = disk[constants.IDISK_SIZE]
9522       feedback_fn("* disk %s, size %s" %
9523                   (disk_index, utils.FormatUnit(size, "h")))
9524       disks.append(objects.Disk(dev_type=dev_type, size=size,
9525                                 logical_id=logical_id_fn(idx, disk_index, disk),
9526                                 iv_name="disk/%d" % disk_index,
9527                                 mode=disk[constants.IDISK_MODE],
9528                                 params=params))
9529
9530   return disks
9531
9532
9533 def _GetInstanceInfoText(instance):
9534   """Compute that text that should be added to the disk's metadata.
9535
9536   """
9537   return "originstname+%s" % instance.name
9538
9539
9540 def _CalcEta(time_taken, written, total_size):
9541   """Calculates the ETA based on size written and total size.
9542
9543   @param time_taken: The time taken so far
9544   @param written: amount written so far
9545   @param total_size: The total size of data to be written
9546   @return: The remaining time in seconds
9547
9548   """
9549   avg_time = time_taken / float(written)
9550   return (total_size - written) * avg_time
9551
9552
9553 def _WipeDisks(lu, instance, disks=None):
9554   """Wipes instance disks.
9555
9556   @type lu: L{LogicalUnit}
9557   @param lu: the logical unit on whose behalf we execute
9558   @type instance: L{objects.Instance}
9559   @param instance: the instance whose disks we should create
9560   @return: the success of the wipe
9561
9562   """
9563   node = instance.primary_node
9564
9565   if disks is None:
9566     disks = [(idx, disk, 0)
9567              for (idx, disk) in enumerate(instance.disks)]
9568
9569   for (_, device, _) in disks:
9570     lu.cfg.SetDiskID(device, node)
9571
9572   logging.info("Pausing synchronization of disks of instance '%s'",
9573                instance.name)
9574   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9575                                                   (map(compat.snd, disks),
9576                                                    instance),
9577                                                   True)
9578   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9579
9580   for idx, success in enumerate(result.payload):
9581     if not success:
9582       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9583                    " failed", idx, instance.name)
9584
9585   try:
9586     for (idx, device, offset) in disks:
9587       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9588       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9589       wipe_chunk_size = \
9590         int(min(constants.MAX_WIPE_CHUNK,
9591                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9592
9593       size = device.size
9594       last_output = 0
9595       start_time = time.time()
9596
9597       if offset == 0:
9598         info_text = ""
9599       else:
9600         info_text = (" (from %s to %s)" %
9601                      (utils.FormatUnit(offset, "h"),
9602                       utils.FormatUnit(size, "h")))
9603
9604       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9605
9606       logging.info("Wiping disk %d for instance %s on node %s using"
9607                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9608
9609       while offset < size:
9610         wipe_size = min(wipe_chunk_size, size - offset)
9611
9612         logging.debug("Wiping disk %d, offset %s, chunk %s",
9613                       idx, offset, wipe_size)
9614
9615         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9616                                            wipe_size)
9617         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9618                      (idx, offset, wipe_size))
9619
9620         now = time.time()
9621         offset += wipe_size
9622         if now - last_output >= 60:
9623           eta = _CalcEta(now - start_time, offset, size)
9624           lu.LogInfo(" - done: %.1f%% ETA: %s",
9625                      offset / float(size) * 100, utils.FormatSeconds(eta))
9626           last_output = now
9627   finally:
9628     logging.info("Resuming synchronization of disks for instance '%s'",
9629                  instance.name)
9630
9631     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9632                                                     (map(compat.snd, disks),
9633                                                      instance),
9634                                                     False)
9635
9636     if result.fail_msg:
9637       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9638                     node, result.fail_msg)
9639     else:
9640       for idx, success in enumerate(result.payload):
9641         if not success:
9642           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9643                         " failed", idx, instance.name)
9644
9645
9646 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9647   """Create all disks for an instance.
9648
9649   This abstracts away some work from AddInstance.
9650
9651   @type lu: L{LogicalUnit}
9652   @param lu: the logical unit on whose behalf we execute
9653   @type instance: L{objects.Instance}
9654   @param instance: the instance whose disks we should create
9655   @type to_skip: list
9656   @param to_skip: list of indices to skip
9657   @type target_node: string
9658   @param target_node: if passed, overrides the target node for creation
9659   @rtype: boolean
9660   @return: the success of the creation
9661
9662   """
9663   info = _GetInstanceInfoText(instance)
9664   if target_node is None:
9665     pnode = instance.primary_node
9666     all_nodes = instance.all_nodes
9667   else:
9668     pnode = target_node
9669     all_nodes = [pnode]
9670
9671   if instance.disk_template in constants.DTS_FILEBASED:
9672     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9673     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9674
9675     result.Raise("Failed to create directory '%s' on"
9676                  " node %s" % (file_storage_dir, pnode))
9677
9678   # Note: this needs to be kept in sync with adding of disks in
9679   # LUInstanceSetParams
9680   for idx, device in enumerate(instance.disks):
9681     if to_skip and idx in to_skip:
9682       continue
9683     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9684     #HARDCODE
9685     for node in all_nodes:
9686       f_create = node == pnode
9687       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9688
9689
9690 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9691   """Remove all disks for an instance.
9692
9693   This abstracts away some work from `AddInstance()` and
9694   `RemoveInstance()`. Note that in case some of the devices couldn't
9695   be removed, the removal will continue with the other ones (compare
9696   with `_CreateDisks()`).
9697
9698   @type lu: L{LogicalUnit}
9699   @param lu: the logical unit on whose behalf we execute
9700   @type instance: L{objects.Instance}
9701   @param instance: the instance whose disks we should remove
9702   @type target_node: string
9703   @param target_node: used to override the node on which to remove the disks
9704   @rtype: boolean
9705   @return: the success of the removal
9706
9707   """
9708   logging.info("Removing block devices for instance %s", instance.name)
9709
9710   all_result = True
9711   ports_to_release = set()
9712   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9713   for (idx, device) in enumerate(anno_disks):
9714     if target_node:
9715       edata = [(target_node, device)]
9716     else:
9717       edata = device.ComputeNodeTree(instance.primary_node)
9718     for node, disk in edata:
9719       lu.cfg.SetDiskID(disk, node)
9720       result = lu.rpc.call_blockdev_remove(node, disk)
9721       if result.fail_msg:
9722         lu.LogWarning("Could not remove disk %s on node %s,"
9723                       " continuing anyway: %s", idx, node, result.fail_msg)
9724         if not (result.offline and node != instance.primary_node):
9725           all_result = False
9726
9727     # if this is a DRBD disk, return its port to the pool
9728     if device.dev_type in constants.LDS_DRBD:
9729       ports_to_release.add(device.logical_id[2])
9730
9731   if all_result or ignore_failures:
9732     for port in ports_to_release:
9733       lu.cfg.AddTcpUdpPort(port)
9734
9735   if instance.disk_template in constants.DTS_FILEBASED:
9736     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9737     if target_node:
9738       tgt = target_node
9739     else:
9740       tgt = instance.primary_node
9741     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9742     if result.fail_msg:
9743       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9744                     file_storage_dir, instance.primary_node, result.fail_msg)
9745       all_result = False
9746
9747   return all_result
9748
9749
9750 def _ComputeDiskSizePerVG(disk_template, disks):
9751   """Compute disk size requirements in the volume group
9752
9753   """
9754   def _compute(disks, payload):
9755     """Universal algorithm.
9756
9757     """
9758     vgs = {}
9759     for disk in disks:
9760       vgs[disk[constants.IDISK_VG]] = \
9761         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9762
9763     return vgs
9764
9765   # Required free disk space as a function of disk and swap space
9766   req_size_dict = {
9767     constants.DT_DISKLESS: {},
9768     constants.DT_PLAIN: _compute(disks, 0),
9769     # 128 MB are added for drbd metadata for each disk
9770     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9771     constants.DT_FILE: {},
9772     constants.DT_SHARED_FILE: {},
9773   }
9774
9775   if disk_template not in req_size_dict:
9776     raise errors.ProgrammerError("Disk template '%s' size requirement"
9777                                  " is unknown" % disk_template)
9778
9779   return req_size_dict[disk_template]
9780
9781
9782 def _FilterVmNodes(lu, nodenames):
9783   """Filters out non-vm_capable nodes from a list.
9784
9785   @type lu: L{LogicalUnit}
9786   @param lu: the logical unit for which we check
9787   @type nodenames: list
9788   @param nodenames: the list of nodes on which we should check
9789   @rtype: list
9790   @return: the list of vm-capable nodes
9791
9792   """
9793   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9794   return [name for name in nodenames if name not in vm_nodes]
9795
9796
9797 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9798   """Hypervisor parameter validation.
9799
9800   This function abstract the hypervisor parameter validation to be
9801   used in both instance create and instance modify.
9802
9803   @type lu: L{LogicalUnit}
9804   @param lu: the logical unit for which we check
9805   @type nodenames: list
9806   @param nodenames: the list of nodes on which we should check
9807   @type hvname: string
9808   @param hvname: the name of the hypervisor we should use
9809   @type hvparams: dict
9810   @param hvparams: the parameters which we need to check
9811   @raise errors.OpPrereqError: if the parameters are not valid
9812
9813   """
9814   nodenames = _FilterVmNodes(lu, nodenames)
9815
9816   cluster = lu.cfg.GetClusterInfo()
9817   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9818
9819   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9820   for node in nodenames:
9821     info = hvinfo[node]
9822     if info.offline:
9823       continue
9824     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9825
9826
9827 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9828   """OS parameters validation.
9829
9830   @type lu: L{LogicalUnit}
9831   @param lu: the logical unit for which we check
9832   @type required: boolean
9833   @param required: whether the validation should fail if the OS is not
9834       found
9835   @type nodenames: list
9836   @param nodenames: the list of nodes on which we should check
9837   @type osname: string
9838   @param osname: the name of the hypervisor we should use
9839   @type osparams: dict
9840   @param osparams: the parameters which we need to check
9841   @raise errors.OpPrereqError: if the parameters are not valid
9842
9843   """
9844   nodenames = _FilterVmNodes(lu, nodenames)
9845   result = lu.rpc.call_os_validate(nodenames, required, osname,
9846                                    [constants.OS_VALIDATE_PARAMETERS],
9847                                    osparams)
9848   for node, nres in result.items():
9849     # we don't check for offline cases since this should be run only
9850     # against the master node and/or an instance's nodes
9851     nres.Raise("OS Parameters validation failed on node %s" % node)
9852     if not nres.payload:
9853       lu.LogInfo("OS %s not found on node %s, validation skipped",
9854                  osname, node)
9855
9856
9857 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9858   """Wrapper around IAReqInstanceAlloc.
9859
9860   @param op: The instance opcode
9861   @param disks: The computed disks
9862   @param nics: The computed nics
9863   @param beparams: The full filled beparams
9864   @param node_whitelist: List of nodes which should appear as online to the
9865     allocator (unless the node is already marked offline)
9866
9867   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9868
9869   """
9870   spindle_use = beparams[constants.BE_SPINDLE_USE]
9871   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9872                                        disk_template=op.disk_template,
9873                                        tags=op.tags,
9874                                        os=op.os_type,
9875                                        vcpus=beparams[constants.BE_VCPUS],
9876                                        memory=beparams[constants.BE_MAXMEM],
9877                                        spindle_use=spindle_use,
9878                                        disks=disks,
9879                                        nics=[n.ToDict() for n in nics],
9880                                        hypervisor=op.hypervisor,
9881                                        node_whitelist=node_whitelist)
9882
9883
9884 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9885   """Computes the nics.
9886
9887   @param op: The instance opcode
9888   @param cluster: Cluster configuration object
9889   @param default_ip: The default ip to assign
9890   @param cfg: An instance of the configuration object
9891   @param ec_id: Execution context ID
9892
9893   @returns: The build up nics
9894
9895   """
9896   nics = []
9897   for nic in op.nics:
9898     nic_mode_req = nic.get(constants.INIC_MODE, None)
9899     nic_mode = nic_mode_req
9900     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9901       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9902
9903     net = nic.get(constants.INIC_NETWORK, None)
9904     link = nic.get(constants.NIC_LINK, None)
9905     ip = nic.get(constants.INIC_IP, None)
9906
9907     if net is None or net.lower() == constants.VALUE_NONE:
9908       net = None
9909     else:
9910       if nic_mode_req is not None or link is not None:
9911         raise errors.OpPrereqError("If network is given, no mode or link"
9912                                    " is allowed to be passed",
9913                                    errors.ECODE_INVAL)
9914
9915     # ip validity checks
9916     if ip is None or ip.lower() == constants.VALUE_NONE:
9917       nic_ip = None
9918     elif ip.lower() == constants.VALUE_AUTO:
9919       if not op.name_check:
9920         raise errors.OpPrereqError("IP address set to auto but name checks"
9921                                    " have been skipped",
9922                                    errors.ECODE_INVAL)
9923       nic_ip = default_ip
9924     else:
9925       # We defer pool operations until later, so that the iallocator has
9926       # filled in the instance's node(s) dimara
9927       if ip.lower() == constants.NIC_IP_POOL:
9928         if net is None:
9929           raise errors.OpPrereqError("if ip=pool, parameter network"
9930                                      " must be passed too",
9931                                      errors.ECODE_INVAL)
9932
9933       elif not netutils.IPAddress.IsValid(ip):
9934         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9935                                    errors.ECODE_INVAL)
9936
9937       nic_ip = ip
9938
9939     # TODO: check the ip address for uniqueness
9940     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9941       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9942                                  errors.ECODE_INVAL)
9943
9944     # MAC address verification
9945     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9946     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9947       mac = utils.NormalizeAndValidateMac(mac)
9948
9949       try:
9950         # TODO: We need to factor this out
9951         cfg.ReserveMAC(mac, ec_id)
9952       except errors.ReservationError:
9953         raise errors.OpPrereqError("MAC address %s already in use"
9954                                    " in cluster" % mac,
9955                                    errors.ECODE_NOTUNIQUE)
9956
9957     #  Build nic parameters
9958     nicparams = {}
9959     if nic_mode_req:
9960       nicparams[constants.NIC_MODE] = nic_mode
9961     if link:
9962       nicparams[constants.NIC_LINK] = link
9963
9964     check_params = cluster.SimpleFillNIC(nicparams)
9965     objects.NIC.CheckParameterSyntax(check_params)
9966     net_uuid = cfg.LookupNetwork(net)
9967     nics.append(objects.NIC(mac=mac, ip=nic_ip,
9968                             network=net_uuid, nicparams=nicparams))
9969
9970   return nics
9971
9972
9973 def _ComputeDisks(op, default_vg):
9974   """Computes the instance disks.
9975
9976   @param op: The instance opcode
9977   @param default_vg: The default_vg to assume
9978
9979   @return: The computed disks
9980
9981   """
9982   disks = []
9983   for disk in op.disks:
9984     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9985     if mode not in constants.DISK_ACCESS_SET:
9986       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9987                                  mode, errors.ECODE_INVAL)
9988     size = disk.get(constants.IDISK_SIZE, None)
9989     if size is None:
9990       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9991     try:
9992       size = int(size)
9993     except (TypeError, ValueError):
9994       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9995                                  errors.ECODE_INVAL)
9996
9997     ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9998     if ext_provider and op.disk_template != constants.DT_EXT:
9999       raise errors.OpPrereqError("The '%s' option is only valid for the %s"
10000                                  " disk template, not %s" %
10001                                  (constants.IDISK_PROVIDER, constants.DT_EXT,
10002                                  op.disk_template), errors.ECODE_INVAL)
10003
10004     data_vg = disk.get(constants.IDISK_VG, default_vg)
10005     new_disk = {
10006       constants.IDISK_SIZE: size,
10007       constants.IDISK_MODE: mode,
10008       constants.IDISK_VG: data_vg,
10009       }
10010
10011     if constants.IDISK_METAVG in disk:
10012       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10013     if constants.IDISK_ADOPT in disk:
10014       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10015
10016     # For extstorage, demand the `provider' option and add any
10017     # additional parameters (ext-params) to the dict
10018     if op.disk_template == constants.DT_EXT:
10019       if ext_provider:
10020         new_disk[constants.IDISK_PROVIDER] = ext_provider
10021         for key in disk:
10022           if key not in constants.IDISK_PARAMS:
10023             new_disk[key] = disk[key]
10024       else:
10025         raise errors.OpPrereqError("Missing provider for template '%s'" %
10026                                    constants.DT_EXT, errors.ECODE_INVAL)
10027
10028     disks.append(new_disk)
10029
10030   return disks
10031
10032
10033 def _ComputeFullBeParams(op, cluster):
10034   """Computes the full beparams.
10035
10036   @param op: The instance opcode
10037   @param cluster: The cluster config object
10038
10039   @return: The fully filled beparams
10040
10041   """
10042   default_beparams = cluster.beparams[constants.PP_DEFAULT]
10043   for param, value in op.beparams.iteritems():
10044     if value == constants.VALUE_AUTO:
10045       op.beparams[param] = default_beparams[param]
10046   objects.UpgradeBeParams(op.beparams)
10047   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10048   return cluster.SimpleFillBE(op.beparams)
10049
10050
10051 def _CheckOpportunisticLocking(op):
10052   """Generate error if opportunistic locking is not possible.
10053
10054   """
10055   if op.opportunistic_locking and not op.iallocator:
10056     raise errors.OpPrereqError("Opportunistic locking is only available in"
10057                                " combination with an instance allocator",
10058                                errors.ECODE_INVAL)
10059
10060
10061 class LUInstanceCreate(LogicalUnit):
10062   """Create an instance.
10063
10064   """
10065   HPATH = "instance-add"
10066   HTYPE = constants.HTYPE_INSTANCE
10067   REQ_BGL = False
10068
10069   def CheckArguments(self):
10070     """Check arguments.
10071
10072     """
10073     # do not require name_check to ease forward/backward compatibility
10074     # for tools
10075     if self.op.no_install and self.op.start:
10076       self.LogInfo("No-installation mode selected, disabling startup")
10077       self.op.start = False
10078     # validate/normalize the instance name
10079     self.op.instance_name = \
10080       netutils.Hostname.GetNormalizedName(self.op.instance_name)
10081
10082     if self.op.ip_check and not self.op.name_check:
10083       # TODO: make the ip check more flexible and not depend on the name check
10084       raise errors.OpPrereqError("Cannot do IP address check without a name"
10085                                  " check", errors.ECODE_INVAL)
10086
10087     # check nics' parameter names
10088     for nic in self.op.nics:
10089       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10090
10091     # check disks. parameter names and consistent adopt/no-adopt strategy
10092     has_adopt = has_no_adopt = False
10093     for disk in self.op.disks:
10094       if self.op.disk_template != constants.DT_EXT:
10095         utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10096       if constants.IDISK_ADOPT in disk:
10097         has_adopt = True
10098       else:
10099         has_no_adopt = True
10100     if has_adopt and has_no_adopt:
10101       raise errors.OpPrereqError("Either all disks are adopted or none is",
10102                                  errors.ECODE_INVAL)
10103     if has_adopt:
10104       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10105         raise errors.OpPrereqError("Disk adoption is not supported for the"
10106                                    " '%s' disk template" %
10107                                    self.op.disk_template,
10108                                    errors.ECODE_INVAL)
10109       if self.op.iallocator is not None:
10110         raise errors.OpPrereqError("Disk adoption not allowed with an"
10111                                    " iallocator script", errors.ECODE_INVAL)
10112       if self.op.mode == constants.INSTANCE_IMPORT:
10113         raise errors.OpPrereqError("Disk adoption not allowed for"
10114                                    " instance import", errors.ECODE_INVAL)
10115     else:
10116       if self.op.disk_template in constants.DTS_MUST_ADOPT:
10117         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10118                                    " but no 'adopt' parameter given" %
10119                                    self.op.disk_template,
10120                                    errors.ECODE_INVAL)
10121
10122     self.adopt_disks = has_adopt
10123
10124     # instance name verification
10125     if self.op.name_check:
10126       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10127       self.op.instance_name = self.hostname1.name
10128       # used in CheckPrereq for ip ping check
10129       self.check_ip = self.hostname1.ip
10130     else:
10131       self.check_ip = None
10132
10133     # file storage checks
10134     if (self.op.file_driver and
10135         not self.op.file_driver in constants.FILE_DRIVER):
10136       raise errors.OpPrereqError("Invalid file driver name '%s'" %
10137                                  self.op.file_driver, errors.ECODE_INVAL)
10138
10139     if self.op.disk_template == constants.DT_FILE:
10140       opcodes.RequireFileStorage()
10141     elif self.op.disk_template == constants.DT_SHARED_FILE:
10142       opcodes.RequireSharedFileStorage()
10143
10144     ### Node/iallocator related checks
10145     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10146
10147     if self.op.pnode is not None:
10148       if self.op.disk_template in constants.DTS_INT_MIRROR:
10149         if self.op.snode is None:
10150           raise errors.OpPrereqError("The networked disk templates need"
10151                                      " a mirror node", errors.ECODE_INVAL)
10152       elif self.op.snode:
10153         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10154                         " template")
10155         self.op.snode = None
10156
10157     _CheckOpportunisticLocking(self.op)
10158
10159     self._cds = _GetClusterDomainSecret()
10160
10161     if self.op.mode == constants.INSTANCE_IMPORT:
10162       # On import force_variant must be True, because if we forced it at
10163       # initial install, our only chance when importing it back is that it
10164       # works again!
10165       self.op.force_variant = True
10166
10167       if self.op.no_install:
10168         self.LogInfo("No-installation mode has no effect during import")
10169
10170     elif self.op.mode == constants.INSTANCE_CREATE:
10171       if self.op.os_type is None:
10172         raise errors.OpPrereqError("No guest OS specified",
10173                                    errors.ECODE_INVAL)
10174       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10175         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10176                                    " installation" % self.op.os_type,
10177                                    errors.ECODE_STATE)
10178       if self.op.disk_template is None:
10179         raise errors.OpPrereqError("No disk template specified",
10180                                    errors.ECODE_INVAL)
10181
10182     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10183       # Check handshake to ensure both clusters have the same domain secret
10184       src_handshake = self.op.source_handshake
10185       if not src_handshake:
10186         raise errors.OpPrereqError("Missing source handshake",
10187                                    errors.ECODE_INVAL)
10188
10189       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10190                                                            src_handshake)
10191       if errmsg:
10192         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10193                                    errors.ECODE_INVAL)
10194
10195       # Load and check source CA
10196       self.source_x509_ca_pem = self.op.source_x509_ca
10197       if not self.source_x509_ca_pem:
10198         raise errors.OpPrereqError("Missing source X509 CA",
10199                                    errors.ECODE_INVAL)
10200
10201       try:
10202         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10203                                                     self._cds)
10204       except OpenSSL.crypto.Error, err:
10205         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10206                                    (err, ), errors.ECODE_INVAL)
10207
10208       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10209       if errcode is not None:
10210         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10211                                    errors.ECODE_INVAL)
10212
10213       self.source_x509_ca = cert
10214
10215       src_instance_name = self.op.source_instance_name
10216       if not src_instance_name:
10217         raise errors.OpPrereqError("Missing source instance name",
10218                                    errors.ECODE_INVAL)
10219
10220       self.source_instance_name = \
10221           netutils.GetHostname(name=src_instance_name).name
10222
10223     else:
10224       raise errors.OpPrereqError("Invalid instance creation mode %r" %
10225                                  self.op.mode, errors.ECODE_INVAL)
10226
10227   def ExpandNames(self):
10228     """ExpandNames for CreateInstance.
10229
10230     Figure out the right locks for instance creation.
10231
10232     """
10233     self.needed_locks = {}
10234
10235     instance_name = self.op.instance_name
10236     # this is just a preventive check, but someone might still add this
10237     # instance in the meantime, and creation will fail at lock-add time
10238     if instance_name in self.cfg.GetInstanceList():
10239       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10240                                  instance_name, errors.ECODE_EXISTS)
10241
10242     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10243
10244     if self.op.iallocator:
10245       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10246       # specifying a group on instance creation and then selecting nodes from
10247       # that group
10248       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10249       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10250
10251       if self.op.opportunistic_locking:
10252         self.opportunistic_locks[locking.LEVEL_NODE] = True
10253         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10254     else:
10255       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10256       nodelist = [self.op.pnode]
10257       if self.op.snode is not None:
10258         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10259         nodelist.append(self.op.snode)
10260       self.needed_locks[locking.LEVEL_NODE] = nodelist
10261
10262     # in case of import lock the source node too
10263     if self.op.mode == constants.INSTANCE_IMPORT:
10264       src_node = self.op.src_node
10265       src_path = self.op.src_path
10266
10267       if src_path is None:
10268         self.op.src_path = src_path = self.op.instance_name
10269
10270       if src_node is None:
10271         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10272         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10273         self.op.src_node = None
10274         if os.path.isabs(src_path):
10275           raise errors.OpPrereqError("Importing an instance from a path"
10276                                      " requires a source node option",
10277                                      errors.ECODE_INVAL)
10278       else:
10279         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10280         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10281           self.needed_locks[locking.LEVEL_NODE].append(src_node)
10282         if not os.path.isabs(src_path):
10283           self.op.src_path = src_path = \
10284             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10285
10286     self.needed_locks[locking.LEVEL_NODE_RES] = \
10287       _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10288
10289   def _RunAllocator(self):
10290     """Run the allocator based on input opcode.
10291
10292     """
10293     if self.op.opportunistic_locking:
10294       # Only consider nodes for which a lock is held
10295       node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10296     else:
10297       node_whitelist = None
10298
10299     #TODO Export network to iallocator so that it chooses a pnode
10300     #     in a nodegroup that has the desired network connected to
10301     req = _CreateInstanceAllocRequest(self.op, self.disks,
10302                                       self.nics, self.be_full,
10303                                       node_whitelist)
10304     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10305
10306     ial.Run(self.op.iallocator)
10307
10308     if not ial.success:
10309       # When opportunistic locks are used only a temporary failure is generated
10310       if self.op.opportunistic_locking:
10311         ecode = errors.ECODE_TEMP_NORES
10312       else:
10313         ecode = errors.ECODE_NORES
10314
10315       raise errors.OpPrereqError("Can't compute nodes using"
10316                                  " iallocator '%s': %s" %
10317                                  (self.op.iallocator, ial.info),
10318                                  ecode)
10319
10320     self.op.pnode = ial.result[0]
10321     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10322                  self.op.instance_name, self.op.iallocator,
10323                  utils.CommaJoin(ial.result))
10324
10325     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10326
10327     if req.RequiredNodes() == 2:
10328       self.op.snode = ial.result[1]
10329
10330   def BuildHooksEnv(self):
10331     """Build hooks env.
10332
10333     This runs on master, primary and secondary nodes of the instance.
10334
10335     """
10336     env = {
10337       "ADD_MODE": self.op.mode,
10338       }
10339     if self.op.mode == constants.INSTANCE_IMPORT:
10340       env["SRC_NODE"] = self.op.src_node
10341       env["SRC_PATH"] = self.op.src_path
10342       env["SRC_IMAGES"] = self.src_images
10343
10344     env.update(_BuildInstanceHookEnv(
10345       name=self.op.instance_name,
10346       primary_node=self.op.pnode,
10347       secondary_nodes=self.secondaries,
10348       status=self.op.start,
10349       os_type=self.op.os_type,
10350       minmem=self.be_full[constants.BE_MINMEM],
10351       maxmem=self.be_full[constants.BE_MAXMEM],
10352       vcpus=self.be_full[constants.BE_VCPUS],
10353       nics=_NICListToTuple(self, self.nics),
10354       disk_template=self.op.disk_template,
10355       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10356              for d in self.disks],
10357       bep=self.be_full,
10358       hvp=self.hv_full,
10359       hypervisor_name=self.op.hypervisor,
10360       tags=self.op.tags,
10361     ))
10362
10363     return env
10364
10365   def BuildHooksNodes(self):
10366     """Build hooks nodes.
10367
10368     """
10369     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10370     return nl, nl
10371
10372   def _ReadExportInfo(self):
10373     """Reads the export information from disk.
10374
10375     It will override the opcode source node and path with the actual
10376     information, if these two were not specified before.
10377
10378     @return: the export information
10379
10380     """
10381     assert self.op.mode == constants.INSTANCE_IMPORT
10382
10383     src_node = self.op.src_node
10384     src_path = self.op.src_path
10385
10386     if src_node is None:
10387       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10388       exp_list = self.rpc.call_export_list(locked_nodes)
10389       found = False
10390       for node in exp_list:
10391         if exp_list[node].fail_msg:
10392           continue
10393         if src_path in exp_list[node].payload:
10394           found = True
10395           self.op.src_node = src_node = node
10396           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10397                                                        src_path)
10398           break
10399       if not found:
10400         raise errors.OpPrereqError("No export found for relative path %s" %
10401                                     src_path, errors.ECODE_INVAL)
10402
10403     _CheckNodeOnline(self, src_node)
10404     result = self.rpc.call_export_info(src_node, src_path)
10405     result.Raise("No export or invalid export found in dir %s" % src_path)
10406
10407     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10408     if not export_info.has_section(constants.INISECT_EXP):
10409       raise errors.ProgrammerError("Corrupted export config",
10410                                    errors.ECODE_ENVIRON)
10411
10412     ei_version = export_info.get(constants.INISECT_EXP, "version")
10413     if (int(ei_version) != constants.EXPORT_VERSION):
10414       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10415                                  (ei_version, constants.EXPORT_VERSION),
10416                                  errors.ECODE_ENVIRON)
10417     return export_info
10418
10419   def _ReadExportParams(self, einfo):
10420     """Use export parameters as defaults.
10421
10422     In case the opcode doesn't specify (as in override) some instance
10423     parameters, then try to use them from the export information, if
10424     that declares them.
10425
10426     """
10427     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10428
10429     if self.op.disk_template is None:
10430       if einfo.has_option(constants.INISECT_INS, "disk_template"):
10431         self.op.disk_template = einfo.get(constants.INISECT_INS,
10432                                           "disk_template")
10433         if self.op.disk_template not in constants.DISK_TEMPLATES:
10434           raise errors.OpPrereqError("Disk template specified in configuration"
10435                                      " file is not one of the allowed values:"
10436                                      " %s" %
10437                                      " ".join(constants.DISK_TEMPLATES),
10438                                      errors.ECODE_INVAL)
10439       else:
10440         raise errors.OpPrereqError("No disk template specified and the export"
10441                                    " is missing the disk_template information",
10442                                    errors.ECODE_INVAL)
10443
10444     if not self.op.disks:
10445       disks = []
10446       # TODO: import the disk iv_name too
10447       for idx in range(constants.MAX_DISKS):
10448         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10449           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10450           disks.append({constants.IDISK_SIZE: disk_sz})
10451       self.op.disks = disks
10452       if not disks and self.op.disk_template != constants.DT_DISKLESS:
10453         raise errors.OpPrereqError("No disk info specified and the export"
10454                                    " is missing the disk information",
10455                                    errors.ECODE_INVAL)
10456
10457     if not self.op.nics:
10458       nics = []
10459       for idx in range(constants.MAX_NICS):
10460         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10461           ndict = {}
10462           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10463             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10464             ndict[name] = v
10465           nics.append(ndict)
10466         else:
10467           break
10468       self.op.nics = nics
10469
10470     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10471       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10472
10473     if (self.op.hypervisor is None and
10474         einfo.has_option(constants.INISECT_INS, "hypervisor")):
10475       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10476
10477     if einfo.has_section(constants.INISECT_HYP):
10478       # use the export parameters but do not override the ones
10479       # specified by the user
10480       for name, value in einfo.items(constants.INISECT_HYP):
10481         if name not in self.op.hvparams:
10482           self.op.hvparams[name] = value
10483
10484     if einfo.has_section(constants.INISECT_BEP):
10485       # use the parameters, without overriding
10486       for name, value in einfo.items(constants.INISECT_BEP):
10487         if name not in self.op.beparams:
10488           self.op.beparams[name] = value
10489         # Compatibility for the old "memory" be param
10490         if name == constants.BE_MEMORY:
10491           if constants.BE_MAXMEM not in self.op.beparams:
10492             self.op.beparams[constants.BE_MAXMEM] = value
10493           if constants.BE_MINMEM not in self.op.beparams:
10494             self.op.beparams[constants.BE_MINMEM] = value
10495     else:
10496       # try to read the parameters old style, from the main section
10497       for name in constants.BES_PARAMETERS:
10498         if (name not in self.op.beparams and
10499             einfo.has_option(constants.INISECT_INS, name)):
10500           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10501
10502     if einfo.has_section(constants.INISECT_OSP):
10503       # use the parameters, without overriding
10504       for name, value in einfo.items(constants.INISECT_OSP):
10505         if name not in self.op.osparams:
10506           self.op.osparams[name] = value
10507
10508   def _RevertToDefaults(self, cluster):
10509     """Revert the instance parameters to the default values.
10510
10511     """
10512     # hvparams
10513     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10514     for name in self.op.hvparams.keys():
10515       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10516         del self.op.hvparams[name]
10517     # beparams
10518     be_defs = cluster.SimpleFillBE({})
10519     for name in self.op.beparams.keys():
10520       if name in be_defs and be_defs[name] == self.op.beparams[name]:
10521         del self.op.beparams[name]
10522     # nic params
10523     nic_defs = cluster.SimpleFillNIC({})
10524     for nic in self.op.nics:
10525       for name in constants.NICS_PARAMETERS:
10526         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10527           del nic[name]
10528     # osparams
10529     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10530     for name in self.op.osparams.keys():
10531       if name in os_defs and os_defs[name] == self.op.osparams[name]:
10532         del self.op.osparams[name]
10533
10534   def _CalculateFileStorageDir(self):
10535     """Calculate final instance file storage dir.
10536
10537     """
10538     # file storage dir calculation/check
10539     self.instance_file_storage_dir = None
10540     if self.op.disk_template in constants.DTS_FILEBASED:
10541       # build the full file storage dir path
10542       joinargs = []
10543
10544       if self.op.disk_template == constants.DT_SHARED_FILE:
10545         get_fsd_fn = self.cfg.GetSharedFileStorageDir
10546       else:
10547         get_fsd_fn = self.cfg.GetFileStorageDir
10548
10549       cfg_storagedir = get_fsd_fn()
10550       if not cfg_storagedir:
10551         raise errors.OpPrereqError("Cluster file storage dir not defined",
10552                                    errors.ECODE_STATE)
10553       joinargs.append(cfg_storagedir)
10554
10555       if self.op.file_storage_dir is not None:
10556         joinargs.append(self.op.file_storage_dir)
10557
10558       joinargs.append(self.op.instance_name)
10559
10560       # pylint: disable=W0142
10561       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10562
10563   def CheckPrereq(self): # pylint: disable=R0914
10564     """Check prerequisites.
10565
10566     """
10567     self._CalculateFileStorageDir()
10568
10569     if self.op.mode == constants.INSTANCE_IMPORT:
10570       export_info = self._ReadExportInfo()
10571       self._ReadExportParams(export_info)
10572       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10573     else:
10574       self._old_instance_name = None
10575
10576     if (not self.cfg.GetVGName() and
10577         self.op.disk_template not in constants.DTS_NOT_LVM):
10578       raise errors.OpPrereqError("Cluster does not support lvm-based"
10579                                  " instances", errors.ECODE_STATE)
10580
10581     if (self.op.hypervisor is None or
10582         self.op.hypervisor == constants.VALUE_AUTO):
10583       self.op.hypervisor = self.cfg.GetHypervisorType()
10584
10585     cluster = self.cfg.GetClusterInfo()
10586     enabled_hvs = cluster.enabled_hypervisors
10587     if self.op.hypervisor not in enabled_hvs:
10588       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10589                                  " cluster (%s)" %
10590                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10591                                  errors.ECODE_STATE)
10592
10593     # Check tag validity
10594     for tag in self.op.tags:
10595       objects.TaggableObject.ValidateTag(tag)
10596
10597     # check hypervisor parameter syntax (locally)
10598     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10599     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10600                                       self.op.hvparams)
10601     hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10602     hv_type.CheckParameterSyntax(filled_hvp)
10603     self.hv_full = filled_hvp
10604     # check that we don't specify global parameters on an instance
10605     _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
10606                           "instance", "cluster")
10607
10608     # fill and remember the beparams dict
10609     self.be_full = _ComputeFullBeParams(self.op, cluster)
10610
10611     # build os parameters
10612     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10613
10614     # now that hvp/bep are in final format, let's reset to defaults,
10615     # if told to do so
10616     if self.op.identify_defaults:
10617       self._RevertToDefaults(cluster)
10618
10619     # NIC buildup
10620     self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10621                              self.proc.GetECId())
10622
10623     # disk checks/pre-build
10624     default_vg = self.cfg.GetVGName()
10625     self.disks = _ComputeDisks(self.op, default_vg)
10626
10627     if self.op.mode == constants.INSTANCE_IMPORT:
10628       disk_images = []
10629       for idx in range(len(self.disks)):
10630         option = "disk%d_dump" % idx
10631         if export_info.has_option(constants.INISECT_INS, option):
10632           # FIXME: are the old os-es, disk sizes, etc. useful?
10633           export_name = export_info.get(constants.INISECT_INS, option)
10634           image = utils.PathJoin(self.op.src_path, export_name)
10635           disk_images.append(image)
10636         else:
10637           disk_images.append(False)
10638
10639       self.src_images = disk_images
10640
10641       if self.op.instance_name == self._old_instance_name:
10642         for idx, nic in enumerate(self.nics):
10643           if nic.mac == constants.VALUE_AUTO:
10644             nic_mac_ini = "nic%d_mac" % idx
10645             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10646
10647     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10648
10649     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10650     if self.op.ip_check:
10651       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10652         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10653                                    (self.check_ip, self.op.instance_name),
10654                                    errors.ECODE_NOTUNIQUE)
10655
10656     #### mac address generation
10657     # By generating here the mac address both the allocator and the hooks get
10658     # the real final mac address rather than the 'auto' or 'generate' value.
10659     # There is a race condition between the generation and the instance object
10660     # creation, which means that we know the mac is valid now, but we're not
10661     # sure it will be when we actually add the instance. If things go bad
10662     # adding the instance will abort because of a duplicate mac, and the
10663     # creation job will fail.
10664     for nic in self.nics:
10665       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10666         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10667
10668     #### allocator run
10669
10670     if self.op.iallocator is not None:
10671       self._RunAllocator()
10672
10673     # Release all unneeded node locks
10674     keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10675     _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10676     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10677     _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10678
10679     assert (self.owned_locks(locking.LEVEL_NODE) ==
10680             self.owned_locks(locking.LEVEL_NODE_RES)), \
10681       "Node locks differ from node resource locks"
10682
10683     #### node related checks
10684
10685     # check primary node
10686     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10687     assert self.pnode is not None, \
10688       "Cannot retrieve locked node %s" % self.op.pnode
10689     if pnode.offline:
10690       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10691                                  pnode.name, errors.ECODE_STATE)
10692     if pnode.drained:
10693       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10694                                  pnode.name, errors.ECODE_STATE)
10695     if not pnode.vm_capable:
10696       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10697                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10698
10699     self.secondaries = []
10700
10701     # Fill in any IPs from IP pools. This must happen here, because we need to
10702     # know the nic's primary node, as specified by the iallocator
10703     for idx, nic in enumerate(self.nics):
10704       net_uuid = nic.network
10705       if net_uuid is not None:
10706         nobj = self.cfg.GetNetwork(net_uuid)
10707         netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
10708         if netparams is None:
10709           raise errors.OpPrereqError("No netparams found for network"
10710                                      " %s. Propably not connected to"
10711                                      " node's %s nodegroup" %
10712                                      (nobj.name, self.pnode.name),
10713                                      errors.ECODE_INVAL)
10714         self.LogInfo("NIC/%d inherits netparams %s" %
10715                      (idx, netparams.values()))
10716         nic.nicparams = dict(netparams)
10717         if nic.ip is not None:
10718           if nic.ip.lower() == constants.NIC_IP_POOL:
10719             try:
10720               nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
10721             except errors.ReservationError:
10722               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10723                                          " from the address pool" % idx,
10724                                          errors.ECODE_STATE)
10725             self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
10726           else:
10727             try:
10728               self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
10729             except errors.ReservationError:
10730               raise errors.OpPrereqError("IP address %s already in use"
10731                                          " or does not belong to network %s" %
10732                                          (nic.ip, nobj.name),
10733                                          errors.ECODE_NOTUNIQUE)
10734
10735       # net is None, ip None or given
10736       elif self.op.conflicts_check:
10737         _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10738
10739     # mirror node verification
10740     if self.op.disk_template in constants.DTS_INT_MIRROR:
10741       if self.op.snode == pnode.name:
10742         raise errors.OpPrereqError("The secondary node cannot be the"
10743                                    " primary node", errors.ECODE_INVAL)
10744       _CheckNodeOnline(self, self.op.snode)
10745       _CheckNodeNotDrained(self, self.op.snode)
10746       _CheckNodeVmCapable(self, self.op.snode)
10747       self.secondaries.append(self.op.snode)
10748
10749       snode = self.cfg.GetNodeInfo(self.op.snode)
10750       if pnode.group != snode.group:
10751         self.LogWarning("The primary and secondary nodes are in two"
10752                         " different node groups; the disk parameters"
10753                         " from the first disk's node group will be"
10754                         " used")
10755
10756     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10757       nodes = [pnode]
10758       if self.op.disk_template in constants.DTS_INT_MIRROR:
10759         nodes.append(snode)
10760       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10761       if compat.any(map(has_es, nodes)):
10762         raise errors.OpPrereqError("Disk template %s not supported with"
10763                                    " exclusive storage" % self.op.disk_template,
10764                                    errors.ECODE_STATE)
10765
10766     nodenames = [pnode.name] + self.secondaries
10767
10768     if not self.adopt_disks:
10769       if self.op.disk_template == constants.DT_RBD:
10770         # _CheckRADOSFreeSpace() is just a placeholder.
10771         # Any function that checks prerequisites can be placed here.
10772         # Check if there is enough space on the RADOS cluster.
10773         _CheckRADOSFreeSpace()
10774       elif self.op.disk_template == constants.DT_EXT:
10775         # FIXME: Function that checks prereqs if needed
10776         pass
10777       else:
10778         # Check lv size requirements, if not adopting
10779         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10780         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10781
10782     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10783       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10784                                 disk[constants.IDISK_ADOPT])
10785                      for disk in self.disks])
10786       if len(all_lvs) != len(self.disks):
10787         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10788                                    errors.ECODE_INVAL)
10789       for lv_name in all_lvs:
10790         try:
10791           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10792           # to ReserveLV uses the same syntax
10793           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10794         except errors.ReservationError:
10795           raise errors.OpPrereqError("LV named %s used by another instance" %
10796                                      lv_name, errors.ECODE_NOTUNIQUE)
10797
10798       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10799       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10800
10801       node_lvs = self.rpc.call_lv_list([pnode.name],
10802                                        vg_names.payload.keys())[pnode.name]
10803       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10804       node_lvs = node_lvs.payload
10805
10806       delta = all_lvs.difference(node_lvs.keys())
10807       if delta:
10808         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10809                                    utils.CommaJoin(delta),
10810                                    errors.ECODE_INVAL)
10811       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10812       if online_lvs:
10813         raise errors.OpPrereqError("Online logical volumes found, cannot"
10814                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10815                                    errors.ECODE_STATE)
10816       # update the size of disk based on what is found
10817       for dsk in self.disks:
10818         dsk[constants.IDISK_SIZE] = \
10819           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10820                                         dsk[constants.IDISK_ADOPT])][0]))
10821
10822     elif self.op.disk_template == constants.DT_BLOCK:
10823       # Normalize and de-duplicate device paths
10824       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10825                        for disk in self.disks])
10826       if len(all_disks) != len(self.disks):
10827         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10828                                    errors.ECODE_INVAL)
10829       baddisks = [d for d in all_disks
10830                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10831       if baddisks:
10832         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10833                                    " cannot be adopted" %
10834                                    (utils.CommaJoin(baddisks),
10835                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10836                                    errors.ECODE_INVAL)
10837
10838       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10839                                             list(all_disks))[pnode.name]
10840       node_disks.Raise("Cannot get block device information from node %s" %
10841                        pnode.name)
10842       node_disks = node_disks.payload
10843       delta = all_disks.difference(node_disks.keys())
10844       if delta:
10845         raise errors.OpPrereqError("Missing block device(s): %s" %
10846                                    utils.CommaJoin(delta),
10847                                    errors.ECODE_INVAL)
10848       for dsk in self.disks:
10849         dsk[constants.IDISK_SIZE] = \
10850           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10851
10852     # Verify instance specs
10853     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10854     ispec = {
10855       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10856       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10857       constants.ISPEC_DISK_COUNT: len(self.disks),
10858       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10859                                   for disk in self.disks],
10860       constants.ISPEC_NIC_COUNT: len(self.nics),
10861       constants.ISPEC_SPINDLE_USE: spindle_use,
10862       }
10863
10864     group_info = self.cfg.GetNodeGroup(pnode.group)
10865     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10866     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10867     if not self.op.ignore_ipolicy and res:
10868       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10869              (pnode.group, group_info.name, utils.CommaJoin(res)))
10870       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10871
10872     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10873
10874     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10875     # check OS parameters (remotely)
10876     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10877
10878     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10879
10880     #TODO: _CheckExtParams (remotely)
10881     # Check parameters for extstorage
10882
10883     # memory check on primary node
10884     #TODO(dynmem): use MINMEM for checking
10885     if self.op.start:
10886       _CheckNodeFreeMemory(self, self.pnode.name,
10887                            "creating instance %s" % self.op.instance_name,
10888                            self.be_full[constants.BE_MAXMEM],
10889                            self.op.hypervisor)
10890
10891     self.dry_run_result = list(nodenames)
10892
10893   def Exec(self, feedback_fn):
10894     """Create and add the instance to the cluster.
10895
10896     """
10897     instance = self.op.instance_name
10898     pnode_name = self.pnode.name
10899
10900     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10901                 self.owned_locks(locking.LEVEL_NODE)), \
10902       "Node locks differ from node resource locks"
10903     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10904
10905     ht_kind = self.op.hypervisor
10906     if ht_kind in constants.HTS_REQ_PORT:
10907       network_port = self.cfg.AllocatePort()
10908     else:
10909       network_port = None
10910
10911     # This is ugly but we got a chicken-egg problem here
10912     # We can only take the group disk parameters, as the instance
10913     # has no disks yet (we are generating them right here).
10914     node = self.cfg.GetNodeInfo(pnode_name)
10915     nodegroup = self.cfg.GetNodeGroup(node.group)
10916     disks = _GenerateDiskTemplate(self,
10917                                   self.op.disk_template,
10918                                   instance, pnode_name,
10919                                   self.secondaries,
10920                                   self.disks,
10921                                   self.instance_file_storage_dir,
10922                                   self.op.file_driver,
10923                                   0,
10924                                   feedback_fn,
10925                                   self.cfg.GetGroupDiskParams(nodegroup))
10926
10927     iobj = objects.Instance(name=instance, os=self.op.os_type,
10928                             primary_node=pnode_name,
10929                             nics=self.nics, disks=disks,
10930                             disk_template=self.op.disk_template,
10931                             admin_state=constants.ADMINST_DOWN,
10932                             network_port=network_port,
10933                             beparams=self.op.beparams,
10934                             hvparams=self.op.hvparams,
10935                             hypervisor=self.op.hypervisor,
10936                             osparams=self.op.osparams,
10937                             )
10938
10939     if self.op.tags:
10940       for tag in self.op.tags:
10941         iobj.AddTag(tag)
10942
10943     if self.adopt_disks:
10944       if self.op.disk_template == constants.DT_PLAIN:
10945         # rename LVs to the newly-generated names; we need to construct
10946         # 'fake' LV disks with the old data, plus the new unique_id
10947         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10948         rename_to = []
10949         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10950           rename_to.append(t_dsk.logical_id)
10951           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10952           self.cfg.SetDiskID(t_dsk, pnode_name)
10953         result = self.rpc.call_blockdev_rename(pnode_name,
10954                                                zip(tmp_disks, rename_to))
10955         result.Raise("Failed to rename adoped LVs")
10956     else:
10957       feedback_fn("* creating instance disks...")
10958       try:
10959         _CreateDisks(self, iobj)
10960       except errors.OpExecError:
10961         self.LogWarning("Device creation failed, reverting...")
10962         try:
10963           _RemoveDisks(self, iobj)
10964         finally:
10965           self.cfg.ReleaseDRBDMinors(instance)
10966           raise
10967
10968     feedback_fn("adding instance %s to cluster config" % instance)
10969
10970     self.cfg.AddInstance(iobj, self.proc.GetECId())
10971
10972     # Declare that we don't want to remove the instance lock anymore, as we've
10973     # added the instance to the config
10974     del self.remove_locks[locking.LEVEL_INSTANCE]
10975
10976     if self.op.mode == constants.INSTANCE_IMPORT:
10977       # Release unused nodes
10978       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10979     else:
10980       # Release all nodes
10981       _ReleaseLocks(self, locking.LEVEL_NODE)
10982
10983     disk_abort = False
10984     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10985       feedback_fn("* wiping instance disks...")
10986       try:
10987         _WipeDisks(self, iobj)
10988       except errors.OpExecError, err:
10989         logging.exception("Wiping disks failed")
10990         self.LogWarning("Wiping instance disks failed (%s)", err)
10991         disk_abort = True
10992
10993     if disk_abort:
10994       # Something is already wrong with the disks, don't do anything else
10995       pass
10996     elif self.op.wait_for_sync:
10997       disk_abort = not _WaitForSync(self, iobj)
10998     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10999       # make sure the disks are not degraded (still sync-ing is ok)
11000       feedback_fn("* checking mirrors status")
11001       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11002     else:
11003       disk_abort = False
11004
11005     if disk_abort:
11006       _RemoveDisks(self, iobj)
11007       self.cfg.RemoveInstance(iobj.name)
11008       # Make sure the instance lock gets removed
11009       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11010       raise errors.OpExecError("There are some degraded disks for"
11011                                " this instance")
11012
11013     # Release all node resource locks
11014     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11015
11016     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11017       # we need to set the disks ID to the primary node, since the
11018       # preceding code might or might have not done it, depending on
11019       # disk template and other options
11020       for disk in iobj.disks:
11021         self.cfg.SetDiskID(disk, pnode_name)
11022       if self.op.mode == constants.INSTANCE_CREATE:
11023         if not self.op.no_install:
11024           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11025                         not self.op.wait_for_sync)
11026           if pause_sync:
11027             feedback_fn("* pausing disk sync to install instance OS")
11028             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11029                                                               (iobj.disks,
11030                                                                iobj), True)
11031             for idx, success in enumerate(result.payload):
11032               if not success:
11033                 logging.warn("pause-sync of instance %s for disk %d failed",
11034                              instance, idx)
11035
11036           feedback_fn("* running the instance OS create scripts...")
11037           # FIXME: pass debug option from opcode to backend
11038           os_add_result = \
11039             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11040                                           self.op.debug_level)
11041           if pause_sync:
11042             feedback_fn("* resuming disk sync")
11043             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11044                                                               (iobj.disks,
11045                                                                iobj), False)
11046             for idx, success in enumerate(result.payload):
11047               if not success:
11048                 logging.warn("resume-sync of instance %s for disk %d failed",
11049                              instance, idx)
11050
11051           os_add_result.Raise("Could not add os for instance %s"
11052                               " on node %s" % (instance, pnode_name))
11053
11054       else:
11055         if self.op.mode == constants.INSTANCE_IMPORT:
11056           feedback_fn("* running the instance OS import scripts...")
11057
11058           transfers = []
11059
11060           for idx, image in enumerate(self.src_images):
11061             if not image:
11062               continue
11063
11064             # FIXME: pass debug option from opcode to backend
11065             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11066                                                constants.IEIO_FILE, (image, ),
11067                                                constants.IEIO_SCRIPT,
11068                                                (iobj.disks[idx], idx),
11069                                                None)
11070             transfers.append(dt)
11071
11072           import_result = \
11073             masterd.instance.TransferInstanceData(self, feedback_fn,
11074                                                   self.op.src_node, pnode_name,
11075                                                   self.pnode.secondary_ip,
11076                                                   iobj, transfers)
11077           if not compat.all(import_result):
11078             self.LogWarning("Some disks for instance %s on node %s were not"
11079                             " imported successfully" % (instance, pnode_name))
11080
11081           rename_from = self._old_instance_name
11082
11083         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11084           feedback_fn("* preparing remote import...")
11085           # The source cluster will stop the instance before attempting to make
11086           # a connection. In some cases stopping an instance can take a long
11087           # time, hence the shutdown timeout is added to the connection
11088           # timeout.
11089           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11090                              self.op.source_shutdown_timeout)
11091           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11092
11093           assert iobj.primary_node == self.pnode.name
11094           disk_results = \
11095             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11096                                           self.source_x509_ca,
11097                                           self._cds, timeouts)
11098           if not compat.all(disk_results):
11099             # TODO: Should the instance still be started, even if some disks
11100             # failed to import (valid for local imports, too)?
11101             self.LogWarning("Some disks for instance %s on node %s were not"
11102                             " imported successfully" % (instance, pnode_name))
11103
11104           rename_from = self.source_instance_name
11105
11106         else:
11107           # also checked in the prereq part
11108           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11109                                        % self.op.mode)
11110
11111         # Run rename script on newly imported instance
11112         assert iobj.name == instance
11113         feedback_fn("Running rename script for %s" % instance)
11114         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11115                                                    rename_from,
11116                                                    self.op.debug_level)
11117         if result.fail_msg:
11118           self.LogWarning("Failed to run rename script for %s on node"
11119                           " %s: %s" % (instance, pnode_name, result.fail_msg))
11120
11121     assert not self.owned_locks(locking.LEVEL_NODE_RES)
11122
11123     if self.op.start:
11124       iobj.admin_state = constants.ADMINST_UP
11125       self.cfg.Update(iobj, feedback_fn)
11126       logging.info("Starting instance %s on node %s", instance, pnode_name)
11127       feedback_fn("* starting instance...")
11128       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11129                                             False)
11130       result.Raise("Could not start instance")
11131
11132     return list(iobj.all_nodes)
11133
11134
11135 class LUInstanceMultiAlloc(NoHooksLU):
11136   """Allocates multiple instances at the same time.
11137
11138   """
11139   REQ_BGL = False
11140
11141   def CheckArguments(self):
11142     """Check arguments.
11143
11144     """
11145     nodes = []
11146     for inst in self.op.instances:
11147       if inst.iallocator is not None:
11148         raise errors.OpPrereqError("iallocator are not allowed to be set on"
11149                                    " instance objects", errors.ECODE_INVAL)
11150       nodes.append(bool(inst.pnode))
11151       if inst.disk_template in constants.DTS_INT_MIRROR:
11152         nodes.append(bool(inst.snode))
11153
11154     has_nodes = compat.any(nodes)
11155     if compat.all(nodes) ^ has_nodes:
11156       raise errors.OpPrereqError("There are instance objects providing"
11157                                  " pnode/snode while others do not",
11158                                  errors.ECODE_INVAL)
11159
11160     if self.op.iallocator is None:
11161       default_iallocator = self.cfg.GetDefaultIAllocator()
11162       if default_iallocator and has_nodes:
11163         self.op.iallocator = default_iallocator
11164       else:
11165         raise errors.OpPrereqError("No iallocator or nodes on the instances"
11166                                    " given and no cluster-wide default"
11167                                    " iallocator found; please specify either"
11168                                    " an iallocator or nodes on the instances"
11169                                    " or set a cluster-wide default iallocator",
11170                                    errors.ECODE_INVAL)
11171
11172     _CheckOpportunisticLocking(self.op)
11173
11174     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11175     if dups:
11176       raise errors.OpPrereqError("There are duplicate instance names: %s" %
11177                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
11178
11179   def ExpandNames(self):
11180     """Calculate the locks.
11181
11182     """
11183     self.share_locks = _ShareAll()
11184     self.needed_locks = {
11185       # iallocator will select nodes and even if no iallocator is used,
11186       # collisions with LUInstanceCreate should be avoided
11187       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11188       }
11189
11190     if self.op.iallocator:
11191       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11192       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11193
11194       if self.op.opportunistic_locking:
11195         self.opportunistic_locks[locking.LEVEL_NODE] = True
11196         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11197     else:
11198       nodeslist = []
11199       for inst in self.op.instances:
11200         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11201         nodeslist.append(inst.pnode)
11202         if inst.snode is not None:
11203           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11204           nodeslist.append(inst.snode)
11205
11206       self.needed_locks[locking.LEVEL_NODE] = nodeslist
11207       # Lock resources of instance's primary and secondary nodes (copy to
11208       # prevent accidential modification)
11209       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11210
11211   def CheckPrereq(self):
11212     """Check prerequisite.
11213
11214     """
11215     cluster = self.cfg.GetClusterInfo()
11216     default_vg = self.cfg.GetVGName()
11217     ec_id = self.proc.GetECId()
11218
11219     if self.op.opportunistic_locking:
11220       # Only consider nodes for which a lock is held
11221       node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11222     else:
11223       node_whitelist = None
11224
11225     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11226                                          _ComputeNics(op, cluster, None,
11227                                                       self.cfg, ec_id),
11228                                          _ComputeFullBeParams(op, cluster),
11229                                          node_whitelist)
11230              for op in self.op.instances]
11231
11232     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11233     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11234
11235     ial.Run(self.op.iallocator)
11236
11237     if not ial.success:
11238       raise errors.OpPrereqError("Can't compute nodes using"
11239                                  " iallocator '%s': %s" %
11240                                  (self.op.iallocator, ial.info),
11241                                  errors.ECODE_NORES)
11242
11243     self.ia_result = ial.result
11244
11245     if self.op.dry_run:
11246       self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11247         constants.JOB_IDS_KEY: [],
11248         })
11249
11250   def _ConstructPartialResult(self):
11251     """Contructs the partial result.
11252
11253     """
11254     (allocatable, failed) = self.ia_result
11255     return {
11256       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11257         map(compat.fst, allocatable),
11258       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11259       }
11260
11261   def Exec(self, feedback_fn):
11262     """Executes the opcode.
11263
11264     """
11265     op2inst = dict((op.instance_name, op) for op in self.op.instances)
11266     (allocatable, failed) = self.ia_result
11267
11268     jobs = []
11269     for (name, nodes) in allocatable:
11270       op = op2inst.pop(name)
11271
11272       if len(nodes) > 1:
11273         (op.pnode, op.snode) = nodes
11274       else:
11275         (op.pnode,) = nodes
11276
11277       jobs.append([op])
11278
11279     missing = set(op2inst.keys()) - set(failed)
11280     assert not missing, \
11281       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11282
11283     return ResultWithJobs(jobs, **self._ConstructPartialResult())
11284
11285
11286 def _CheckRADOSFreeSpace():
11287   """Compute disk size requirements inside the RADOS cluster.
11288
11289   """
11290   # For the RADOS cluster we assume there is always enough space.
11291   pass
11292
11293
11294 class LUInstanceConsole(NoHooksLU):
11295   """Connect to an instance's console.
11296
11297   This is somewhat special in that it returns the command line that
11298   you need to run on the master node in order to connect to the
11299   console.
11300
11301   """
11302   REQ_BGL = False
11303
11304   def ExpandNames(self):
11305     self.share_locks = _ShareAll()
11306     self._ExpandAndLockInstance()
11307
11308   def CheckPrereq(self):
11309     """Check prerequisites.
11310
11311     This checks that the instance is in the cluster.
11312
11313     """
11314     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11315     assert self.instance is not None, \
11316       "Cannot retrieve locked instance %s" % self.op.instance_name
11317     _CheckNodeOnline(self, self.instance.primary_node)
11318
11319   def Exec(self, feedback_fn):
11320     """Connect to the console of an instance
11321
11322     """
11323     instance = self.instance
11324     node = instance.primary_node
11325
11326     node_insts = self.rpc.call_instance_list([node],
11327                                              [instance.hypervisor])[node]
11328     node_insts.Raise("Can't get node information from %s" % node)
11329
11330     if instance.name not in node_insts.payload:
11331       if instance.admin_state == constants.ADMINST_UP:
11332         state = constants.INSTST_ERRORDOWN
11333       elif instance.admin_state == constants.ADMINST_DOWN:
11334         state = constants.INSTST_ADMINDOWN
11335       else:
11336         state = constants.INSTST_ADMINOFFLINE
11337       raise errors.OpExecError("Instance %s is not running (state %s)" %
11338                                (instance.name, state))
11339
11340     logging.debug("Connecting to console of %s on %s", instance.name, node)
11341
11342     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11343
11344
11345 def _GetInstanceConsole(cluster, instance):
11346   """Returns console information for an instance.
11347
11348   @type cluster: L{objects.Cluster}
11349   @type instance: L{objects.Instance}
11350   @rtype: dict
11351
11352   """
11353   hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11354   # beparams and hvparams are passed separately, to avoid editing the
11355   # instance and then saving the defaults in the instance itself.
11356   hvparams = cluster.FillHV(instance)
11357   beparams = cluster.FillBE(instance)
11358   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11359
11360   assert console.instance == instance.name
11361   assert console.Validate()
11362
11363   return console.ToDict()
11364
11365
11366 class LUInstanceReplaceDisks(LogicalUnit):
11367   """Replace the disks of an instance.
11368
11369   """
11370   HPATH = "mirrors-replace"
11371   HTYPE = constants.HTYPE_INSTANCE
11372   REQ_BGL = False
11373
11374   def CheckArguments(self):
11375     """Check arguments.
11376
11377     """
11378     remote_node = self.op.remote_node
11379     ialloc = self.op.iallocator
11380     if self.op.mode == constants.REPLACE_DISK_CHG:
11381       if remote_node is None and ialloc is None:
11382         raise errors.OpPrereqError("When changing the secondary either an"
11383                                    " iallocator script must be used or the"
11384                                    " new node given", errors.ECODE_INVAL)
11385       else:
11386         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11387
11388     elif remote_node is not None or ialloc is not None:
11389       # Not replacing the secondary
11390       raise errors.OpPrereqError("The iallocator and new node options can"
11391                                  " only be used when changing the"
11392                                  " secondary node", errors.ECODE_INVAL)
11393
11394   def ExpandNames(self):
11395     self._ExpandAndLockInstance()
11396
11397     assert locking.LEVEL_NODE not in self.needed_locks
11398     assert locking.LEVEL_NODE_RES not in self.needed_locks
11399     assert locking.LEVEL_NODEGROUP not in self.needed_locks
11400
11401     assert self.op.iallocator is None or self.op.remote_node is None, \
11402       "Conflicting options"
11403
11404     if self.op.remote_node is not None:
11405       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11406
11407       # Warning: do not remove the locking of the new secondary here
11408       # unless DRBD8.AddChildren is changed to work in parallel;
11409       # currently it doesn't since parallel invocations of
11410       # FindUnusedMinor will conflict
11411       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11412       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11413     else:
11414       self.needed_locks[locking.LEVEL_NODE] = []
11415       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11416
11417       if self.op.iallocator is not None:
11418         # iallocator will select a new node in the same group
11419         self.needed_locks[locking.LEVEL_NODEGROUP] = []
11420         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11421
11422     self.needed_locks[locking.LEVEL_NODE_RES] = []
11423
11424     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11425                                    self.op.iallocator, self.op.remote_node,
11426                                    self.op.disks, self.op.early_release,
11427                                    self.op.ignore_ipolicy)
11428
11429     self.tasklets = [self.replacer]
11430
11431   def DeclareLocks(self, level):
11432     if level == locking.LEVEL_NODEGROUP:
11433       assert self.op.remote_node is None
11434       assert self.op.iallocator is not None
11435       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11436
11437       self.share_locks[locking.LEVEL_NODEGROUP] = 1
11438       # Lock all groups used by instance optimistically; this requires going
11439       # via the node before it's locked, requiring verification later on
11440       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11441         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11442
11443     elif level == locking.LEVEL_NODE:
11444       if self.op.iallocator is not None:
11445         assert self.op.remote_node is None
11446         assert not self.needed_locks[locking.LEVEL_NODE]
11447         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11448
11449         # Lock member nodes of all locked groups
11450         self.needed_locks[locking.LEVEL_NODE] = \
11451             [node_name
11452              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11453              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11454       else:
11455         assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11456
11457         self._LockInstancesNodes()
11458
11459     elif level == locking.LEVEL_NODE_RES:
11460       # Reuse node locks
11461       self.needed_locks[locking.LEVEL_NODE_RES] = \
11462         self.needed_locks[locking.LEVEL_NODE]
11463
11464   def BuildHooksEnv(self):
11465     """Build hooks env.
11466
11467     This runs on the master, the primary and all the secondaries.
11468
11469     """
11470     instance = self.replacer.instance
11471     env = {
11472       "MODE": self.op.mode,
11473       "NEW_SECONDARY": self.op.remote_node,
11474       "OLD_SECONDARY": instance.secondary_nodes[0],
11475       }
11476     env.update(_BuildInstanceHookEnvByObject(self, instance))
11477     return env
11478
11479   def BuildHooksNodes(self):
11480     """Build hooks nodes.
11481
11482     """
11483     instance = self.replacer.instance
11484     nl = [
11485       self.cfg.GetMasterNode(),
11486       instance.primary_node,
11487       ]
11488     if self.op.remote_node is not None:
11489       nl.append(self.op.remote_node)
11490     return nl, nl
11491
11492   def CheckPrereq(self):
11493     """Check prerequisites.
11494
11495     """
11496     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11497             self.op.iallocator is None)
11498
11499     # Verify if node group locks are still correct
11500     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11501     if owned_groups:
11502       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11503
11504     return LogicalUnit.CheckPrereq(self)
11505
11506
11507 class TLReplaceDisks(Tasklet):
11508   """Replaces disks for an instance.
11509
11510   Note: Locking is not within the scope of this class.
11511
11512   """
11513   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11514                disks, early_release, ignore_ipolicy):
11515     """Initializes this class.
11516
11517     """
11518     Tasklet.__init__(self, lu)
11519
11520     # Parameters
11521     self.instance_name = instance_name
11522     self.mode = mode
11523     self.iallocator_name = iallocator_name
11524     self.remote_node = remote_node
11525     self.disks = disks
11526     self.early_release = early_release
11527     self.ignore_ipolicy = ignore_ipolicy
11528
11529     # Runtime data
11530     self.instance = None
11531     self.new_node = None
11532     self.target_node = None
11533     self.other_node = None
11534     self.remote_node_info = None
11535     self.node_secondary_ip = None
11536
11537   @staticmethod
11538   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11539     """Compute a new secondary node using an IAllocator.
11540
11541     """
11542     req = iallocator.IAReqRelocate(name=instance_name,
11543                                    relocate_from=list(relocate_from))
11544     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11545
11546     ial.Run(iallocator_name)
11547
11548     if not ial.success:
11549       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11550                                  " %s" % (iallocator_name, ial.info),
11551                                  errors.ECODE_NORES)
11552
11553     remote_node_name = ial.result[0]
11554
11555     lu.LogInfo("Selected new secondary for instance '%s': %s",
11556                instance_name, remote_node_name)
11557
11558     return remote_node_name
11559
11560   def _FindFaultyDisks(self, node_name):
11561     """Wrapper for L{_FindFaultyInstanceDisks}.
11562
11563     """
11564     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11565                                     node_name, True)
11566
11567   def _CheckDisksActivated(self, instance):
11568     """Checks if the instance disks are activated.
11569
11570     @param instance: The instance to check disks
11571     @return: True if they are activated, False otherwise
11572
11573     """
11574     nodes = instance.all_nodes
11575
11576     for idx, dev in enumerate(instance.disks):
11577       for node in nodes:
11578         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11579         self.cfg.SetDiskID(dev, node)
11580
11581         result = _BlockdevFind(self, node, dev, instance)
11582
11583         if result.offline:
11584           continue
11585         elif result.fail_msg or not result.payload:
11586           return False
11587
11588     return True
11589
11590   def CheckPrereq(self):
11591     """Check prerequisites.
11592
11593     This checks that the instance is in the cluster.
11594
11595     """
11596     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11597     assert instance is not None, \
11598       "Cannot retrieve locked instance %s" % self.instance_name
11599
11600     if instance.disk_template != constants.DT_DRBD8:
11601       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11602                                  " instances", errors.ECODE_INVAL)
11603
11604     if len(instance.secondary_nodes) != 1:
11605       raise errors.OpPrereqError("The instance has a strange layout,"
11606                                  " expected one secondary but found %d" %
11607                                  len(instance.secondary_nodes),
11608                                  errors.ECODE_FAULT)
11609
11610     instance = self.instance
11611     secondary_node = instance.secondary_nodes[0]
11612
11613     if self.iallocator_name is None:
11614       remote_node = self.remote_node
11615     else:
11616       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11617                                        instance.name, instance.secondary_nodes)
11618
11619     if remote_node is None:
11620       self.remote_node_info = None
11621     else:
11622       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11623              "Remote node '%s' is not locked" % remote_node
11624
11625       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11626       assert self.remote_node_info is not None, \
11627         "Cannot retrieve locked node %s" % remote_node
11628
11629     if remote_node == self.instance.primary_node:
11630       raise errors.OpPrereqError("The specified node is the primary node of"
11631                                  " the instance", errors.ECODE_INVAL)
11632
11633     if remote_node == secondary_node:
11634       raise errors.OpPrereqError("The specified node is already the"
11635                                  " secondary node of the instance",
11636                                  errors.ECODE_INVAL)
11637
11638     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11639                                     constants.REPLACE_DISK_CHG):
11640       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11641                                  errors.ECODE_INVAL)
11642
11643     if self.mode == constants.REPLACE_DISK_AUTO:
11644       if not self._CheckDisksActivated(instance):
11645         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11646                                    " first" % self.instance_name,
11647                                    errors.ECODE_STATE)
11648       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11649       faulty_secondary = self._FindFaultyDisks(secondary_node)
11650
11651       if faulty_primary and faulty_secondary:
11652         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11653                                    " one node and can not be repaired"
11654                                    " automatically" % self.instance_name,
11655                                    errors.ECODE_STATE)
11656
11657       if faulty_primary:
11658         self.disks = faulty_primary
11659         self.target_node = instance.primary_node
11660         self.other_node = secondary_node
11661         check_nodes = [self.target_node, self.other_node]
11662       elif faulty_secondary:
11663         self.disks = faulty_secondary
11664         self.target_node = secondary_node
11665         self.other_node = instance.primary_node
11666         check_nodes = [self.target_node, self.other_node]
11667       else:
11668         self.disks = []
11669         check_nodes = []
11670
11671     else:
11672       # Non-automatic modes
11673       if self.mode == constants.REPLACE_DISK_PRI:
11674         self.target_node = instance.primary_node
11675         self.other_node = secondary_node
11676         check_nodes = [self.target_node, self.other_node]
11677
11678       elif self.mode == constants.REPLACE_DISK_SEC:
11679         self.target_node = secondary_node
11680         self.other_node = instance.primary_node
11681         check_nodes = [self.target_node, self.other_node]
11682
11683       elif self.mode == constants.REPLACE_DISK_CHG:
11684         self.new_node = remote_node
11685         self.other_node = instance.primary_node
11686         self.target_node = secondary_node
11687         check_nodes = [self.new_node, self.other_node]
11688
11689         _CheckNodeNotDrained(self.lu, remote_node)
11690         _CheckNodeVmCapable(self.lu, remote_node)
11691
11692         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11693         assert old_node_info is not None
11694         if old_node_info.offline and not self.early_release:
11695           # doesn't make sense to delay the release
11696           self.early_release = True
11697           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11698                           " early-release mode", secondary_node)
11699
11700       else:
11701         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11702                                      self.mode)
11703
11704       # If not specified all disks should be replaced
11705       if not self.disks:
11706         self.disks = range(len(self.instance.disks))
11707
11708     # TODO: This is ugly, but right now we can't distinguish between internal
11709     # submitted opcode and external one. We should fix that.
11710     if self.remote_node_info:
11711       # We change the node, lets verify it still meets instance policy
11712       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11713       cluster = self.cfg.GetClusterInfo()
11714       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11715                                                               new_group_info)
11716       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11717                               ignore=self.ignore_ipolicy)
11718
11719     for node in check_nodes:
11720       _CheckNodeOnline(self.lu, node)
11721
11722     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11723                                                           self.other_node,
11724                                                           self.target_node]
11725                               if node_name is not None)
11726
11727     # Release unneeded node and node resource locks
11728     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11729     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11730     _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11731
11732     # Release any owned node group
11733     _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11734
11735     # Check whether disks are valid
11736     for disk_idx in self.disks:
11737       instance.FindDisk(disk_idx)
11738
11739     # Get secondary node IP addresses
11740     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11741                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11742
11743   def Exec(self, feedback_fn):
11744     """Execute disk replacement.
11745
11746     This dispatches the disk replacement to the appropriate handler.
11747
11748     """
11749     if __debug__:
11750       # Verify owned locks before starting operation
11751       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11752       assert set(owned_nodes) == set(self.node_secondary_ip), \
11753           ("Incorrect node locks, owning %s, expected %s" %
11754            (owned_nodes, self.node_secondary_ip.keys()))
11755       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11756               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11757       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11758
11759       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11760       assert list(owned_instances) == [self.instance_name], \
11761           "Instance '%s' not locked" % self.instance_name
11762
11763       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11764           "Should not own any node group lock at this point"
11765
11766     if not self.disks:
11767       feedback_fn("No disks need replacement for instance '%s'" %
11768                   self.instance.name)
11769       return
11770
11771     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11772                 (utils.CommaJoin(self.disks), self.instance.name))
11773     feedback_fn("Current primary node: %s" % self.instance.primary_node)
11774     feedback_fn("Current seconary node: %s" %
11775                 utils.CommaJoin(self.instance.secondary_nodes))
11776
11777     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11778
11779     # Activate the instance disks if we're replacing them on a down instance
11780     if activate_disks:
11781       _StartInstanceDisks(self.lu, self.instance, True)
11782
11783     try:
11784       # Should we replace the secondary node?
11785       if self.new_node is not None:
11786         fn = self._ExecDrbd8Secondary
11787       else:
11788         fn = self._ExecDrbd8DiskOnly
11789
11790       result = fn(feedback_fn)
11791     finally:
11792       # Deactivate the instance disks if we're replacing them on a
11793       # down instance
11794       if activate_disks:
11795         _SafeShutdownInstanceDisks(self.lu, self.instance)
11796
11797     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11798
11799     if __debug__:
11800       # Verify owned locks
11801       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11802       nodes = frozenset(self.node_secondary_ip)
11803       assert ((self.early_release and not owned_nodes) or
11804               (not self.early_release and not (set(owned_nodes) - nodes))), \
11805         ("Not owning the correct locks, early_release=%s, owned=%r,"
11806          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11807
11808     return result
11809
11810   def _CheckVolumeGroup(self, nodes):
11811     self.lu.LogInfo("Checking volume groups")
11812
11813     vgname = self.cfg.GetVGName()
11814
11815     # Make sure volume group exists on all involved nodes
11816     results = self.rpc.call_vg_list(nodes)
11817     if not results:
11818       raise errors.OpExecError("Can't list volume groups on the nodes")
11819
11820     for node in nodes:
11821       res = results[node]
11822       res.Raise("Error checking node %s" % node)
11823       if vgname not in res.payload:
11824         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11825                                  (vgname, node))
11826
11827   def _CheckDisksExistence(self, nodes):
11828     # Check disk existence
11829     for idx, dev in enumerate(self.instance.disks):
11830       if idx not in self.disks:
11831         continue
11832
11833       for node in nodes:
11834         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11835         self.cfg.SetDiskID(dev, node)
11836
11837         result = _BlockdevFind(self, node, dev, self.instance)
11838
11839         msg = result.fail_msg
11840         if msg or not result.payload:
11841           if not msg:
11842             msg = "disk not found"
11843           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11844                                    (idx, node, msg))
11845
11846   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11847     for idx, dev in enumerate(self.instance.disks):
11848       if idx not in self.disks:
11849         continue
11850
11851       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11852                       (idx, node_name))
11853
11854       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11855                                    on_primary, ldisk=ldisk):
11856         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11857                                  " replace disks for instance %s" %
11858                                  (node_name, self.instance.name))
11859
11860   def _CreateNewStorage(self, node_name):
11861     """Create new storage on the primary or secondary node.
11862
11863     This is only used for same-node replaces, not for changing the
11864     secondary node, hence we don't want to modify the existing disk.
11865
11866     """
11867     iv_names = {}
11868
11869     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11870     for idx, dev in enumerate(disks):
11871       if idx not in self.disks:
11872         continue
11873
11874       self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11875
11876       self.cfg.SetDiskID(dev, node_name)
11877
11878       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11879       names = _GenerateUniqueNames(self.lu, lv_names)
11880
11881       (data_disk, meta_disk) = dev.children
11882       vg_data = data_disk.logical_id[0]
11883       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11884                              logical_id=(vg_data, names[0]),
11885                              params=data_disk.params)
11886       vg_meta = meta_disk.logical_id[0]
11887       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11888                              size=constants.DRBD_META_SIZE,
11889                              logical_id=(vg_meta, names[1]),
11890                              params=meta_disk.params)
11891
11892       new_lvs = [lv_data, lv_meta]
11893       old_lvs = [child.Copy() for child in dev.children]
11894       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11895       excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11896
11897       # we pass force_create=True to force the LVM creation
11898       for new_lv in new_lvs:
11899         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11900                              _GetInstanceInfoText(self.instance), False,
11901                              excl_stor)
11902
11903     return iv_names
11904
11905   def _CheckDevices(self, node_name, iv_names):
11906     for name, (dev, _, _) in iv_names.iteritems():
11907       self.cfg.SetDiskID(dev, node_name)
11908
11909       result = _BlockdevFind(self, node_name, dev, self.instance)
11910
11911       msg = result.fail_msg
11912       if msg or not result.payload:
11913         if not msg:
11914           msg = "disk not found"
11915         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11916                                  (name, msg))
11917
11918       if result.payload.is_degraded:
11919         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11920
11921   def _RemoveOldStorage(self, node_name, iv_names):
11922     for name, (_, old_lvs, _) in iv_names.iteritems():
11923       self.lu.LogInfo("Remove logical volumes for %s", name)
11924
11925       for lv in old_lvs:
11926         self.cfg.SetDiskID(lv, node_name)
11927
11928         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11929         if msg:
11930           self.lu.LogWarning("Can't remove old LV: %s", msg,
11931                              hint="remove unused LVs manually")
11932
11933   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11934     """Replace a disk on the primary or secondary for DRBD 8.
11935
11936     The algorithm for replace is quite complicated:
11937
11938       1. for each disk to be replaced:
11939
11940         1. create new LVs on the target node with unique names
11941         1. detach old LVs from the drbd device
11942         1. rename old LVs to name_replaced.<time_t>
11943         1. rename new LVs to old LVs
11944         1. attach the new LVs (with the old names now) to the drbd device
11945
11946       1. wait for sync across all devices
11947
11948       1. for each modified disk:
11949
11950         1. remove old LVs (which have the name name_replaces.<time_t>)
11951
11952     Failures are not very well handled.
11953
11954     """
11955     steps_total = 6
11956
11957     # Step: check device activation
11958     self.lu.LogStep(1, steps_total, "Check device existence")
11959     self._CheckDisksExistence([self.other_node, self.target_node])
11960     self._CheckVolumeGroup([self.target_node, self.other_node])
11961
11962     # Step: check other node consistency
11963     self.lu.LogStep(2, steps_total, "Check peer consistency")
11964     self._CheckDisksConsistency(self.other_node,
11965                                 self.other_node == self.instance.primary_node,
11966                                 False)
11967
11968     # Step: create new storage
11969     self.lu.LogStep(3, steps_total, "Allocate new storage")
11970     iv_names = self._CreateNewStorage(self.target_node)
11971
11972     # Step: for each lv, detach+rename*2+attach
11973     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11974     for dev, old_lvs, new_lvs in iv_names.itervalues():
11975       self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11976
11977       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11978                                                      old_lvs)
11979       result.Raise("Can't detach drbd from local storage on node"
11980                    " %s for device %s" % (self.target_node, dev.iv_name))
11981       #dev.children = []
11982       #cfg.Update(instance)
11983
11984       # ok, we created the new LVs, so now we know we have the needed
11985       # storage; as such, we proceed on the target node to rename
11986       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11987       # using the assumption that logical_id == physical_id (which in
11988       # turn is the unique_id on that node)
11989
11990       # FIXME(iustin): use a better name for the replaced LVs
11991       temp_suffix = int(time.time())
11992       ren_fn = lambda d, suff: (d.physical_id[0],
11993                                 d.physical_id[1] + "_replaced-%s" % suff)
11994
11995       # Build the rename list based on what LVs exist on the node
11996       rename_old_to_new = []
11997       for to_ren in old_lvs:
11998         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11999         if not result.fail_msg and result.payload:
12000           # device exists
12001           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12002
12003       self.lu.LogInfo("Renaming the old LVs on the target node")
12004       result = self.rpc.call_blockdev_rename(self.target_node,
12005                                              rename_old_to_new)
12006       result.Raise("Can't rename old LVs on node %s" % self.target_node)
12007
12008       # Now we rename the new LVs to the old LVs
12009       self.lu.LogInfo("Renaming the new LVs on the target node")
12010       rename_new_to_old = [(new, old.physical_id)
12011                            for old, new in zip(old_lvs, new_lvs)]
12012       result = self.rpc.call_blockdev_rename(self.target_node,
12013                                              rename_new_to_old)
12014       result.Raise("Can't rename new LVs on node %s" % self.target_node)
12015
12016       # Intermediate steps of in memory modifications
12017       for old, new in zip(old_lvs, new_lvs):
12018         new.logical_id = old.logical_id
12019         self.cfg.SetDiskID(new, self.target_node)
12020
12021       # We need to modify old_lvs so that removal later removes the
12022       # right LVs, not the newly added ones; note that old_lvs is a
12023       # copy here
12024       for disk in old_lvs:
12025         disk.logical_id = ren_fn(disk, temp_suffix)
12026         self.cfg.SetDiskID(disk, self.target_node)
12027
12028       # Now that the new lvs have the old name, we can add them to the device
12029       self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12030       result = self.rpc.call_blockdev_addchildren(self.target_node,
12031                                                   (dev, self.instance), new_lvs)
12032       msg = result.fail_msg
12033       if msg:
12034         for new_lv in new_lvs:
12035           msg2 = self.rpc.call_blockdev_remove(self.target_node,
12036                                                new_lv).fail_msg
12037           if msg2:
12038             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12039                                hint=("cleanup manually the unused logical"
12040                                      "volumes"))
12041         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12042
12043     cstep = itertools.count(5)
12044
12045     if self.early_release:
12046       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12047       self._RemoveOldStorage(self.target_node, iv_names)
12048       # TODO: Check if releasing locks early still makes sense
12049       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12050     else:
12051       # Release all resource locks except those used by the instance
12052       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12053                     keep=self.node_secondary_ip.keys())
12054
12055     # Release all node locks while waiting for sync
12056     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12057
12058     # TODO: Can the instance lock be downgraded here? Take the optional disk
12059     # shutdown in the caller into consideration.
12060
12061     # Wait for sync
12062     # This can fail as the old devices are degraded and _WaitForSync
12063     # does a combined result over all disks, so we don't check its return value
12064     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12065     _WaitForSync(self.lu, self.instance)
12066
12067     # Check all devices manually
12068     self._CheckDevices(self.instance.primary_node, iv_names)
12069
12070     # Step: remove old storage
12071     if not self.early_release:
12072       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12073       self._RemoveOldStorage(self.target_node, iv_names)
12074
12075   def _ExecDrbd8Secondary(self, feedback_fn):
12076     """Replace the secondary node for DRBD 8.
12077
12078     The algorithm for replace is quite complicated:
12079       - for all disks of the instance:
12080         - create new LVs on the new node with same names
12081         - shutdown the drbd device on the old secondary
12082         - disconnect the drbd network on the primary
12083         - create the drbd device on the new secondary
12084         - network attach the drbd on the primary, using an artifice:
12085           the drbd code for Attach() will connect to the network if it
12086           finds a device which is connected to the good local disks but
12087           not network enabled
12088       - wait for sync across all devices
12089       - remove all disks from the old secondary
12090
12091     Failures are not very well handled.
12092
12093     """
12094     steps_total = 6
12095
12096     pnode = self.instance.primary_node
12097
12098     # Step: check device activation
12099     self.lu.LogStep(1, steps_total, "Check device existence")
12100     self._CheckDisksExistence([self.instance.primary_node])
12101     self._CheckVolumeGroup([self.instance.primary_node])
12102
12103     # Step: check other node consistency
12104     self.lu.LogStep(2, steps_total, "Check peer consistency")
12105     self._CheckDisksConsistency(self.instance.primary_node, True, True)
12106
12107     # Step: create new storage
12108     self.lu.LogStep(3, steps_total, "Allocate new storage")
12109     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12110     excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12111     for idx, dev in enumerate(disks):
12112       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12113                       (self.new_node, idx))
12114       # we pass force_create=True to force LVM creation
12115       for new_lv in dev.children:
12116         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12117                              True, _GetInstanceInfoText(self.instance), False,
12118                              excl_stor)
12119
12120     # Step 4: dbrd minors and drbd setups changes
12121     # after this, we must manually remove the drbd minors on both the
12122     # error and the success paths
12123     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12124     minors = self.cfg.AllocateDRBDMinor([self.new_node
12125                                          for dev in self.instance.disks],
12126                                         self.instance.name)
12127     logging.debug("Allocated minors %r", minors)
12128
12129     iv_names = {}
12130     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12131       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12132                       (self.new_node, idx))
12133       # create new devices on new_node; note that we create two IDs:
12134       # one without port, so the drbd will be activated without
12135       # networking information on the new node at this stage, and one
12136       # with network, for the latter activation in step 4
12137       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12138       if self.instance.primary_node == o_node1:
12139         p_minor = o_minor1
12140       else:
12141         assert self.instance.primary_node == o_node2, "Three-node instance?"
12142         p_minor = o_minor2
12143
12144       new_alone_id = (self.instance.primary_node, self.new_node, None,
12145                       p_minor, new_minor, o_secret)
12146       new_net_id = (self.instance.primary_node, self.new_node, o_port,
12147                     p_minor, new_minor, o_secret)
12148
12149       iv_names[idx] = (dev, dev.children, new_net_id)
12150       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12151                     new_net_id)
12152       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12153                               logical_id=new_alone_id,
12154                               children=dev.children,
12155                               size=dev.size,
12156                               params={})
12157       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12158                                              self.cfg)
12159       try:
12160         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12161                               anno_new_drbd,
12162                               _GetInstanceInfoText(self.instance), False,
12163                               excl_stor)
12164       except errors.GenericError:
12165         self.cfg.ReleaseDRBDMinors(self.instance.name)
12166         raise
12167
12168     # We have new devices, shutdown the drbd on the old secondary
12169     for idx, dev in enumerate(self.instance.disks):
12170       self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12171       self.cfg.SetDiskID(dev, self.target_node)
12172       msg = self.rpc.call_blockdev_shutdown(self.target_node,
12173                                             (dev, self.instance)).fail_msg
12174       if msg:
12175         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12176                            "node: %s" % (idx, msg),
12177                            hint=("Please cleanup this device manually as"
12178                                  " soon as possible"))
12179
12180     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12181     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12182                                                self.instance.disks)[pnode]
12183
12184     msg = result.fail_msg
12185     if msg:
12186       # detaches didn't succeed (unlikely)
12187       self.cfg.ReleaseDRBDMinors(self.instance.name)
12188       raise errors.OpExecError("Can't detach the disks from the network on"
12189                                " old node: %s" % (msg,))
12190
12191     # if we managed to detach at least one, we update all the disks of
12192     # the instance to point to the new secondary
12193     self.lu.LogInfo("Updating instance configuration")
12194     for dev, _, new_logical_id in iv_names.itervalues():
12195       dev.logical_id = new_logical_id
12196       self.cfg.SetDiskID(dev, self.instance.primary_node)
12197
12198     self.cfg.Update(self.instance, feedback_fn)
12199
12200     # Release all node locks (the configuration has been updated)
12201     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12202
12203     # and now perform the drbd attach
12204     self.lu.LogInfo("Attaching primary drbds to new secondary"
12205                     " (standalone => connected)")
12206     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12207                                             self.new_node],
12208                                            self.node_secondary_ip,
12209                                            (self.instance.disks, self.instance),
12210                                            self.instance.name,
12211                                            False)
12212     for to_node, to_result in result.items():
12213       msg = to_result.fail_msg
12214       if msg:
12215         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12216                            to_node, msg,
12217                            hint=("please do a gnt-instance info to see the"
12218                                  " status of disks"))
12219
12220     cstep = itertools.count(5)
12221
12222     if self.early_release:
12223       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12224       self._RemoveOldStorage(self.target_node, iv_names)
12225       # TODO: Check if releasing locks early still makes sense
12226       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12227     else:
12228       # Release all resource locks except those used by the instance
12229       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12230                     keep=self.node_secondary_ip.keys())
12231
12232     # TODO: Can the instance lock be downgraded here? Take the optional disk
12233     # shutdown in the caller into consideration.
12234
12235     # Wait for sync
12236     # This can fail as the old devices are degraded and _WaitForSync
12237     # does a combined result over all disks, so we don't check its return value
12238     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12239     _WaitForSync(self.lu, self.instance)
12240
12241     # Check all devices manually
12242     self._CheckDevices(self.instance.primary_node, iv_names)
12243
12244     # Step: remove old storage
12245     if not self.early_release:
12246       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12247       self._RemoveOldStorage(self.target_node, iv_names)
12248
12249
12250 class LURepairNodeStorage(NoHooksLU):
12251   """Repairs the volume group on a node.
12252
12253   """
12254   REQ_BGL = False
12255
12256   def CheckArguments(self):
12257     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12258
12259     storage_type = self.op.storage_type
12260
12261     if (constants.SO_FIX_CONSISTENCY not in
12262         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12263       raise errors.OpPrereqError("Storage units of type '%s' can not be"
12264                                  " repaired" % storage_type,
12265                                  errors.ECODE_INVAL)
12266
12267   def ExpandNames(self):
12268     self.needed_locks = {
12269       locking.LEVEL_NODE: [self.op.node_name],
12270       }
12271
12272   def _CheckFaultyDisks(self, instance, node_name):
12273     """Ensure faulty disks abort the opcode or at least warn."""
12274     try:
12275       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12276                                   node_name, True):
12277         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12278                                    " node '%s'" % (instance.name, node_name),
12279                                    errors.ECODE_STATE)
12280     except errors.OpPrereqError, err:
12281       if self.op.ignore_consistency:
12282         self.LogWarning(str(err.args[0]))
12283       else:
12284         raise
12285
12286   def CheckPrereq(self):
12287     """Check prerequisites.
12288
12289     """
12290     # Check whether any instance on this node has faulty disks
12291     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12292       if inst.admin_state != constants.ADMINST_UP:
12293         continue
12294       check_nodes = set(inst.all_nodes)
12295       check_nodes.discard(self.op.node_name)
12296       for inst_node_name in check_nodes:
12297         self._CheckFaultyDisks(inst, inst_node_name)
12298
12299   def Exec(self, feedback_fn):
12300     feedback_fn("Repairing storage unit '%s' on %s ..." %
12301                 (self.op.name, self.op.node_name))
12302
12303     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12304     result = self.rpc.call_storage_execute(self.op.node_name,
12305                                            self.op.storage_type, st_args,
12306                                            self.op.name,
12307                                            constants.SO_FIX_CONSISTENCY)
12308     result.Raise("Failed to repair storage unit '%s' on %s" %
12309                  (self.op.name, self.op.node_name))
12310
12311
12312 class LUNodeEvacuate(NoHooksLU):
12313   """Evacuates instances off a list of nodes.
12314
12315   """
12316   REQ_BGL = False
12317
12318   _MODE2IALLOCATOR = {
12319     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12320     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12321     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12322     }
12323   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12324   assert (frozenset(_MODE2IALLOCATOR.values()) ==
12325           constants.IALLOCATOR_NEVAC_MODES)
12326
12327   def CheckArguments(self):
12328     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12329
12330   def ExpandNames(self):
12331     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12332
12333     if self.op.remote_node is not None:
12334       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12335       assert self.op.remote_node
12336
12337       if self.op.remote_node == self.op.node_name:
12338         raise errors.OpPrereqError("Can not use evacuated node as a new"
12339                                    " secondary node", errors.ECODE_INVAL)
12340
12341       if self.op.mode != constants.NODE_EVAC_SEC:
12342         raise errors.OpPrereqError("Without the use of an iallocator only"
12343                                    " secondary instances can be evacuated",
12344                                    errors.ECODE_INVAL)
12345
12346     # Declare locks
12347     self.share_locks = _ShareAll()
12348     self.needed_locks = {
12349       locking.LEVEL_INSTANCE: [],
12350       locking.LEVEL_NODEGROUP: [],
12351       locking.LEVEL_NODE: [],
12352       }
12353
12354     # Determine nodes (via group) optimistically, needs verification once locks
12355     # have been acquired
12356     self.lock_nodes = self._DetermineNodes()
12357
12358   def _DetermineNodes(self):
12359     """Gets the list of nodes to operate on.
12360
12361     """
12362     if self.op.remote_node is None:
12363       # Iallocator will choose any node(s) in the same group
12364       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12365     else:
12366       group_nodes = frozenset([self.op.remote_node])
12367
12368     # Determine nodes to be locked
12369     return set([self.op.node_name]) | group_nodes
12370
12371   def _DetermineInstances(self):
12372     """Builds list of instances to operate on.
12373
12374     """
12375     assert self.op.mode in constants.NODE_EVAC_MODES
12376
12377     if self.op.mode == constants.NODE_EVAC_PRI:
12378       # Primary instances only
12379       inst_fn = _GetNodePrimaryInstances
12380       assert self.op.remote_node is None, \
12381         "Evacuating primary instances requires iallocator"
12382     elif self.op.mode == constants.NODE_EVAC_SEC:
12383       # Secondary instances only
12384       inst_fn = _GetNodeSecondaryInstances
12385     else:
12386       # All instances
12387       assert self.op.mode == constants.NODE_EVAC_ALL
12388       inst_fn = _GetNodeInstances
12389       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12390       # per instance
12391       raise errors.OpPrereqError("Due to an issue with the iallocator"
12392                                  " interface it is not possible to evacuate"
12393                                  " all instances at once; specify explicitly"
12394                                  " whether to evacuate primary or secondary"
12395                                  " instances",
12396                                  errors.ECODE_INVAL)
12397
12398     return inst_fn(self.cfg, self.op.node_name)
12399
12400   def DeclareLocks(self, level):
12401     if level == locking.LEVEL_INSTANCE:
12402       # Lock instances optimistically, needs verification once node and group
12403       # locks have been acquired
12404       self.needed_locks[locking.LEVEL_INSTANCE] = \
12405         set(i.name for i in self._DetermineInstances())
12406
12407     elif level == locking.LEVEL_NODEGROUP:
12408       # Lock node groups for all potential target nodes optimistically, needs
12409       # verification once nodes have been acquired
12410       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12411         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12412
12413     elif level == locking.LEVEL_NODE:
12414       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12415
12416   def CheckPrereq(self):
12417     # Verify locks
12418     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12419     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12420     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12421
12422     need_nodes = self._DetermineNodes()
12423
12424     if not owned_nodes.issuperset(need_nodes):
12425       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12426                                  " locks were acquired, current nodes are"
12427                                  " are '%s', used to be '%s'; retry the"
12428                                  " operation" %
12429                                  (self.op.node_name,
12430                                   utils.CommaJoin(need_nodes),
12431                                   utils.CommaJoin(owned_nodes)),
12432                                  errors.ECODE_STATE)
12433
12434     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12435     if owned_groups != wanted_groups:
12436       raise errors.OpExecError("Node groups changed since locks were acquired,"
12437                                " current groups are '%s', used to be '%s';"
12438                                " retry the operation" %
12439                                (utils.CommaJoin(wanted_groups),
12440                                 utils.CommaJoin(owned_groups)))
12441
12442     # Determine affected instances
12443     self.instances = self._DetermineInstances()
12444     self.instance_names = [i.name for i in self.instances]
12445
12446     if set(self.instance_names) != owned_instances:
12447       raise errors.OpExecError("Instances on node '%s' changed since locks"
12448                                " were acquired, current instances are '%s',"
12449                                " used to be '%s'; retry the operation" %
12450                                (self.op.node_name,
12451                                 utils.CommaJoin(self.instance_names),
12452                                 utils.CommaJoin(owned_instances)))
12453
12454     if self.instance_names:
12455       self.LogInfo("Evacuating instances from node '%s': %s",
12456                    self.op.node_name,
12457                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
12458     else:
12459       self.LogInfo("No instances to evacuate from node '%s'",
12460                    self.op.node_name)
12461
12462     if self.op.remote_node is not None:
12463       for i in self.instances:
12464         if i.primary_node == self.op.remote_node:
12465           raise errors.OpPrereqError("Node %s is the primary node of"
12466                                      " instance %s, cannot use it as"
12467                                      " secondary" %
12468                                      (self.op.remote_node, i.name),
12469                                      errors.ECODE_INVAL)
12470
12471   def Exec(self, feedback_fn):
12472     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12473
12474     if not self.instance_names:
12475       # No instances to evacuate
12476       jobs = []
12477
12478     elif self.op.iallocator is not None:
12479       # TODO: Implement relocation to other group
12480       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12481       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12482                                      instances=list(self.instance_names))
12483       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12484
12485       ial.Run(self.op.iallocator)
12486
12487       if not ial.success:
12488         raise errors.OpPrereqError("Can't compute node evacuation using"
12489                                    " iallocator '%s': %s" %
12490                                    (self.op.iallocator, ial.info),
12491                                    errors.ECODE_NORES)
12492
12493       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12494
12495     elif self.op.remote_node is not None:
12496       assert self.op.mode == constants.NODE_EVAC_SEC
12497       jobs = [
12498         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12499                                         remote_node=self.op.remote_node,
12500                                         disks=[],
12501                                         mode=constants.REPLACE_DISK_CHG,
12502                                         early_release=self.op.early_release)]
12503         for instance_name in self.instance_names]
12504
12505     else:
12506       raise errors.ProgrammerError("No iallocator or remote node")
12507
12508     return ResultWithJobs(jobs)
12509
12510
12511 def _SetOpEarlyRelease(early_release, op):
12512   """Sets C{early_release} flag on opcodes if available.
12513
12514   """
12515   try:
12516     op.early_release = early_release
12517   except AttributeError:
12518     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12519
12520   return op
12521
12522
12523 def _NodeEvacDest(use_nodes, group, nodes):
12524   """Returns group or nodes depending on caller's choice.
12525
12526   """
12527   if use_nodes:
12528     return utils.CommaJoin(nodes)
12529   else:
12530     return group
12531
12532
12533 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12534   """Unpacks the result of change-group and node-evacuate iallocator requests.
12535
12536   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12537   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12538
12539   @type lu: L{LogicalUnit}
12540   @param lu: Logical unit instance
12541   @type alloc_result: tuple/list
12542   @param alloc_result: Result from iallocator
12543   @type early_release: bool
12544   @param early_release: Whether to release locks early if possible
12545   @type use_nodes: bool
12546   @param use_nodes: Whether to display node names instead of groups
12547
12548   """
12549   (moved, failed, jobs) = alloc_result
12550
12551   if failed:
12552     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12553                                  for (name, reason) in failed)
12554     lu.LogWarning("Unable to evacuate instances %s", failreason)
12555     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12556
12557   if moved:
12558     lu.LogInfo("Instances to be moved: %s",
12559                utils.CommaJoin("%s (to %s)" %
12560                                (name, _NodeEvacDest(use_nodes, group, nodes))
12561                                for (name, group, nodes) in moved))
12562
12563   return [map(compat.partial(_SetOpEarlyRelease, early_release),
12564               map(opcodes.OpCode.LoadOpCode, ops))
12565           for ops in jobs]
12566
12567
12568 def _DiskSizeInBytesToMebibytes(lu, size):
12569   """Converts a disk size in bytes to mebibytes.
12570
12571   Warns and rounds up if the size isn't an even multiple of 1 MiB.
12572
12573   """
12574   (mib, remainder) = divmod(size, 1024 * 1024)
12575
12576   if remainder != 0:
12577     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12578                   " to not overwrite existing data (%s bytes will not be"
12579                   " wiped)", (1024 * 1024) - remainder)
12580     mib += 1
12581
12582   return mib
12583
12584
12585 class LUInstanceGrowDisk(LogicalUnit):
12586   """Grow a disk of an instance.
12587
12588   """
12589   HPATH = "disk-grow"
12590   HTYPE = constants.HTYPE_INSTANCE
12591   REQ_BGL = False
12592
12593   def ExpandNames(self):
12594     self._ExpandAndLockInstance()
12595     self.needed_locks[locking.LEVEL_NODE] = []
12596     self.needed_locks[locking.LEVEL_NODE_RES] = []
12597     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12598     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12599
12600   def DeclareLocks(self, level):
12601     if level == locking.LEVEL_NODE:
12602       self._LockInstancesNodes()
12603     elif level == locking.LEVEL_NODE_RES:
12604       # Copy node locks
12605       self.needed_locks[locking.LEVEL_NODE_RES] = \
12606         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12607
12608   def BuildHooksEnv(self):
12609     """Build hooks env.
12610
12611     This runs on the master, the primary and all the secondaries.
12612
12613     """
12614     env = {
12615       "DISK": self.op.disk,
12616       "AMOUNT": self.op.amount,
12617       "ABSOLUTE": self.op.absolute,
12618       }
12619     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12620     return env
12621
12622   def BuildHooksNodes(self):
12623     """Build hooks nodes.
12624
12625     """
12626     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12627     return (nl, nl)
12628
12629   def CheckPrereq(self):
12630     """Check prerequisites.
12631
12632     This checks that the instance is in the cluster.
12633
12634     """
12635     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12636     assert instance is not None, \
12637       "Cannot retrieve locked instance %s" % self.op.instance_name
12638     nodenames = list(instance.all_nodes)
12639     for node in nodenames:
12640       _CheckNodeOnline(self, node)
12641
12642     self.instance = instance
12643
12644     if instance.disk_template not in constants.DTS_GROWABLE:
12645       raise errors.OpPrereqError("Instance's disk layout does not support"
12646                                  " growing", errors.ECODE_INVAL)
12647
12648     self.disk = instance.FindDisk(self.op.disk)
12649
12650     if self.op.absolute:
12651       self.target = self.op.amount
12652       self.delta = self.target - self.disk.size
12653       if self.delta < 0:
12654         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12655                                    "current disk size (%s)" %
12656                                    (utils.FormatUnit(self.target, "h"),
12657                                     utils.FormatUnit(self.disk.size, "h")),
12658                                    errors.ECODE_STATE)
12659     else:
12660       self.delta = self.op.amount
12661       self.target = self.disk.size + self.delta
12662       if self.delta < 0:
12663         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12664                                    utils.FormatUnit(self.delta, "h"),
12665                                    errors.ECODE_INVAL)
12666
12667     self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12668
12669   def _CheckDiskSpace(self, nodenames, req_vgspace):
12670     template = self.instance.disk_template
12671     if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12672       # TODO: check the free disk space for file, when that feature will be
12673       # supported
12674       nodes = map(self.cfg.GetNodeInfo, nodenames)
12675       es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12676                         nodes)
12677       if es_nodes:
12678         # With exclusive storage we need to something smarter than just looking
12679         # at free space; for now, let's simply abort the operation.
12680         raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12681                                    " is enabled", errors.ECODE_STATE)
12682       _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12683
12684   def Exec(self, feedback_fn):
12685     """Execute disk grow.
12686
12687     """
12688     instance = self.instance
12689     disk = self.disk
12690
12691     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12692     assert (self.owned_locks(locking.LEVEL_NODE) ==
12693             self.owned_locks(locking.LEVEL_NODE_RES))
12694
12695     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12696
12697     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12698     if not disks_ok:
12699       raise errors.OpExecError("Cannot activate block device to grow")
12700
12701     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12702                 (self.op.disk, instance.name,
12703                  utils.FormatUnit(self.delta, "h"),
12704                  utils.FormatUnit(self.target, "h")))
12705
12706     # First run all grow ops in dry-run mode
12707     for node in instance.all_nodes:
12708       self.cfg.SetDiskID(disk, node)
12709       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12710                                            True, True)
12711       result.Raise("Dry-run grow request failed to node %s" % node)
12712
12713     if wipe_disks:
12714       # Get disk size from primary node for wiping
12715       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12716       result.Raise("Failed to retrieve disk size from node '%s'" %
12717                    instance.primary_node)
12718
12719       (disk_size_in_bytes, ) = result.payload
12720
12721       if disk_size_in_bytes is None:
12722         raise errors.OpExecError("Failed to retrieve disk size from primary"
12723                                  " node '%s'" % instance.primary_node)
12724
12725       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12726
12727       assert old_disk_size >= disk.size, \
12728         ("Retrieved disk size too small (got %s, should be at least %s)" %
12729          (old_disk_size, disk.size))
12730     else:
12731       old_disk_size = None
12732
12733     # We know that (as far as we can test) operations across different
12734     # nodes will succeed, time to run it for real on the backing storage
12735     for node in instance.all_nodes:
12736       self.cfg.SetDiskID(disk, node)
12737       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12738                                            False, True)
12739       result.Raise("Grow request failed to node %s" % node)
12740
12741     # And now execute it for logical storage, on the primary node
12742     node = instance.primary_node
12743     self.cfg.SetDiskID(disk, node)
12744     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12745                                          False, False)
12746     result.Raise("Grow request failed to node %s" % node)
12747
12748     disk.RecordGrow(self.delta)
12749     self.cfg.Update(instance, feedback_fn)
12750
12751     # Changes have been recorded, release node lock
12752     _ReleaseLocks(self, locking.LEVEL_NODE)
12753
12754     # Downgrade lock while waiting for sync
12755     self.glm.downgrade(locking.LEVEL_INSTANCE)
12756
12757     assert wipe_disks ^ (old_disk_size is None)
12758
12759     if wipe_disks:
12760       assert instance.disks[self.op.disk] == disk
12761
12762       # Wipe newly added disk space
12763       _WipeDisks(self, instance,
12764                  disks=[(self.op.disk, disk, old_disk_size)])
12765
12766     if self.op.wait_for_sync:
12767       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12768       if disk_abort:
12769         self.LogWarning("Disk syncing has not returned a good status; check"
12770                         " the instance")
12771       if instance.admin_state != constants.ADMINST_UP:
12772         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12773     elif instance.admin_state != constants.ADMINST_UP:
12774       self.LogWarning("Not shutting down the disk even if the instance is"
12775                       " not supposed to be running because no wait for"
12776                       " sync mode was requested")
12777
12778     assert self.owned_locks(locking.LEVEL_NODE_RES)
12779     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12780
12781
12782 class LUInstanceQueryData(NoHooksLU):
12783   """Query runtime instance data.
12784
12785   """
12786   REQ_BGL = False
12787
12788   def ExpandNames(self):
12789     self.needed_locks = {}
12790
12791     # Use locking if requested or when non-static information is wanted
12792     if not (self.op.static or self.op.use_locking):
12793       self.LogWarning("Non-static data requested, locks need to be acquired")
12794       self.op.use_locking = True
12795
12796     if self.op.instances or not self.op.use_locking:
12797       # Expand instance names right here
12798       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12799     else:
12800       # Will use acquired locks
12801       self.wanted_names = None
12802
12803     if self.op.use_locking:
12804       self.share_locks = _ShareAll()
12805
12806       if self.wanted_names is None:
12807         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12808       else:
12809         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12810
12811       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12812       self.needed_locks[locking.LEVEL_NODE] = []
12813       self.needed_locks[locking.LEVEL_NETWORK] = []
12814       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12815
12816   def DeclareLocks(self, level):
12817     if self.op.use_locking:
12818       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12819       if level == locking.LEVEL_NODEGROUP:
12820
12821         # Lock all groups used by instances optimistically; this requires going
12822         # via the node before it's locked, requiring verification later on
12823         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12824           frozenset(group_uuid
12825                     for instance_name in owned_instances
12826                     for group_uuid in
12827                       self.cfg.GetInstanceNodeGroups(instance_name))
12828
12829       elif level == locking.LEVEL_NODE:
12830         self._LockInstancesNodes()
12831
12832       elif level == locking.LEVEL_NETWORK:
12833         self.needed_locks[locking.LEVEL_NETWORK] = \
12834           frozenset(net_uuid
12835                     for instance_name in owned_instances
12836                     for net_uuid in
12837                        self.cfg.GetInstanceNetworks(instance_name))
12838
12839   def CheckPrereq(self):
12840     """Check prerequisites.
12841
12842     This only checks the optional instance list against the existing names.
12843
12844     """
12845     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12846     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12847     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12848     owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK))
12849
12850     if self.wanted_names is None:
12851       assert self.op.use_locking, "Locking was not used"
12852       self.wanted_names = owned_instances
12853
12854     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12855
12856     if self.op.use_locking:
12857       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12858                                 None)
12859     else:
12860       assert not (owned_instances or owned_groups or
12861                   owned_nodes or owned_networks)
12862
12863     self.wanted_instances = instances.values()
12864
12865   def _ComputeBlockdevStatus(self, node, instance, dev):
12866     """Returns the status of a block device
12867
12868     """
12869     if self.op.static or not node:
12870       return None
12871
12872     self.cfg.SetDiskID(dev, node)
12873
12874     result = self.rpc.call_blockdev_find(node, dev)
12875     if result.offline:
12876       return None
12877
12878     result.Raise("Can't compute disk status for %s" % instance.name)
12879
12880     status = result.payload
12881     if status is None:
12882       return None
12883
12884     return (status.dev_path, status.major, status.minor,
12885             status.sync_percent, status.estimated_time,
12886             status.is_degraded, status.ldisk_status)
12887
12888   def _ComputeDiskStatus(self, instance, snode, dev):
12889     """Compute block device status.
12890
12891     """
12892     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12893
12894     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12895
12896   def _ComputeDiskStatusInner(self, instance, snode, dev):
12897     """Compute block device status.
12898
12899     @attention: The device has to be annotated already.
12900
12901     """
12902     if dev.dev_type in constants.LDS_DRBD:
12903       # we change the snode then (otherwise we use the one passed in)
12904       if dev.logical_id[0] == instance.primary_node:
12905         snode = dev.logical_id[1]
12906       else:
12907         snode = dev.logical_id[0]
12908
12909     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12910                                               instance, dev)
12911     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12912
12913     if dev.children:
12914       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12915                                         instance, snode),
12916                          dev.children)
12917     else:
12918       dev_children = []
12919
12920     return {
12921       "iv_name": dev.iv_name,
12922       "dev_type": dev.dev_type,
12923       "logical_id": dev.logical_id,
12924       "physical_id": dev.physical_id,
12925       "pstatus": dev_pstatus,
12926       "sstatus": dev_sstatus,
12927       "children": dev_children,
12928       "mode": dev.mode,
12929       "size": dev.size,
12930       }
12931
12932   def Exec(self, feedback_fn):
12933     """Gather and return data"""
12934     result = {}
12935
12936     cluster = self.cfg.GetClusterInfo()
12937
12938     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12939     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12940
12941     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12942                                                  for node in nodes.values()))
12943
12944     group2name_fn = lambda uuid: groups[uuid].name
12945     for instance in self.wanted_instances:
12946       pnode = nodes[instance.primary_node]
12947
12948       if self.op.static or pnode.offline:
12949         remote_state = None
12950         if pnode.offline:
12951           self.LogWarning("Primary node %s is marked offline, returning static"
12952                           " information only for instance %s" %
12953                           (pnode.name, instance.name))
12954       else:
12955         remote_info = self.rpc.call_instance_info(instance.primary_node,
12956                                                   instance.name,
12957                                                   instance.hypervisor)
12958         remote_info.Raise("Error checking node %s" % instance.primary_node)
12959         remote_info = remote_info.payload
12960         if remote_info and "state" in remote_info:
12961           remote_state = "up"
12962         else:
12963           if instance.admin_state == constants.ADMINST_UP:
12964             remote_state = "down"
12965           else:
12966             remote_state = instance.admin_state
12967
12968       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12969                   instance.disks)
12970
12971       snodes_group_uuids = [nodes[snode_name].group
12972                             for snode_name in instance.secondary_nodes]
12973
12974       result[instance.name] = {
12975         "name": instance.name,
12976         "config_state": instance.admin_state,
12977         "run_state": remote_state,
12978         "pnode": instance.primary_node,
12979         "pnode_group_uuid": pnode.group,
12980         "pnode_group_name": group2name_fn(pnode.group),
12981         "snodes": instance.secondary_nodes,
12982         "snodes_group_uuids": snodes_group_uuids,
12983         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12984         "os": instance.os,
12985         # this happens to be the same format used for hooks
12986         "nics": _NICListToTuple(self, instance.nics),
12987         "disk_template": instance.disk_template,
12988         "disks": disks,
12989         "hypervisor": instance.hypervisor,
12990         "network_port": instance.network_port,
12991         "hv_instance": instance.hvparams,
12992         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12993         "be_instance": instance.beparams,
12994         "be_actual": cluster.FillBE(instance),
12995         "os_instance": instance.osparams,
12996         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12997         "serial_no": instance.serial_no,
12998         "mtime": instance.mtime,
12999         "ctime": instance.ctime,
13000         "uuid": instance.uuid,
13001         }
13002
13003     return result
13004
13005
13006 def PrepareContainerMods(mods, private_fn):
13007   """Prepares a list of container modifications by adding a private data field.
13008
13009   @type mods: list of tuples; (operation, index, parameters)
13010   @param mods: List of modifications
13011   @type private_fn: callable or None
13012   @param private_fn: Callable for constructing a private data field for a
13013     modification
13014   @rtype: list
13015
13016   """
13017   if private_fn is None:
13018     fn = lambda: None
13019   else:
13020     fn = private_fn
13021
13022   return [(op, idx, params, fn()) for (op, idx, params) in mods]
13023
13024
13025 #: Type description for changes as returned by L{ApplyContainerMods}'s
13026 #: callbacks
13027 _TApplyContModsCbChanges = \
13028   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13029     ht.TNonEmptyString,
13030     ht.TAny,
13031     ])))
13032
13033
13034 def ApplyContainerMods(kind, container, chgdesc, mods,
13035                        create_fn, modify_fn, remove_fn):
13036   """Applies descriptions in C{mods} to C{container}.
13037
13038   @type kind: string
13039   @param kind: One-word item description
13040   @type container: list
13041   @param container: Container to modify
13042   @type chgdesc: None or list
13043   @param chgdesc: List of applied changes
13044   @type mods: list
13045   @param mods: Modifications as returned by L{PrepareContainerMods}
13046   @type create_fn: callable
13047   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13048     receives absolute item index, parameters and private data object as added
13049     by L{PrepareContainerMods}, returns tuple containing new item and changes
13050     as list
13051   @type modify_fn: callable
13052   @param modify_fn: Callback for modifying an existing item
13053     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13054     and private data object as added by L{PrepareContainerMods}, returns
13055     changes as list
13056   @type remove_fn: callable
13057   @param remove_fn: Callback on removing item; receives absolute item index,
13058     item and private data object as added by L{PrepareContainerMods}
13059
13060   """
13061   for (op, idx, params, private) in mods:
13062     if idx == -1:
13063       # Append
13064       absidx = len(container) - 1
13065     elif idx < 0:
13066       raise IndexError("Not accepting negative indices other than -1")
13067     elif idx > len(container):
13068       raise IndexError("Got %s index %s, but there are only %s" %
13069                        (kind, idx, len(container)))
13070     else:
13071       absidx = idx
13072
13073     changes = None
13074
13075     if op == constants.DDM_ADD:
13076       # Calculate where item will be added
13077       if idx == -1:
13078         addidx = len(container)
13079       else:
13080         addidx = idx
13081
13082       if create_fn is None:
13083         item = params
13084       else:
13085         (item, changes) = create_fn(addidx, params, private)
13086
13087       if idx == -1:
13088         container.append(item)
13089       else:
13090         assert idx >= 0
13091         assert idx <= len(container)
13092         # list.insert does so before the specified index
13093         container.insert(idx, item)
13094     else:
13095       # Retrieve existing item
13096       try:
13097         item = container[absidx]
13098       except IndexError:
13099         raise IndexError("Invalid %s index %s" % (kind, idx))
13100
13101       if op == constants.DDM_REMOVE:
13102         assert not params
13103
13104         if remove_fn is not None:
13105           remove_fn(absidx, item, private)
13106
13107         changes = [("%s/%s" % (kind, absidx), "remove")]
13108
13109         assert container[absidx] == item
13110         del container[absidx]
13111       elif op == constants.DDM_MODIFY:
13112         if modify_fn is not None:
13113           changes = modify_fn(absidx, item, params, private)
13114       else:
13115         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13116
13117     assert _TApplyContModsCbChanges(changes)
13118
13119     if not (chgdesc is None or changes is None):
13120       chgdesc.extend(changes)
13121
13122
13123 def _UpdateIvNames(base_index, disks):
13124   """Updates the C{iv_name} attribute of disks.
13125
13126   @type disks: list of L{objects.Disk}
13127
13128   """
13129   for (idx, disk) in enumerate(disks):
13130     disk.iv_name = "disk/%s" % (base_index + idx, )
13131
13132
13133 class _InstNicModPrivate:
13134   """Data structure for network interface modifications.
13135
13136   Used by L{LUInstanceSetParams}.
13137
13138   """
13139   def __init__(self):
13140     self.params = None
13141     self.filled = None
13142
13143
13144 class LUInstanceSetParams(LogicalUnit):
13145   """Modifies an instances's parameters.
13146
13147   """
13148   HPATH = "instance-modify"
13149   HTYPE = constants.HTYPE_INSTANCE
13150   REQ_BGL = False
13151
13152   @staticmethod
13153   def _UpgradeDiskNicMods(kind, mods, verify_fn):
13154     assert ht.TList(mods)
13155     assert not mods or len(mods[0]) in (2, 3)
13156
13157     if mods and len(mods[0]) == 2:
13158       result = []
13159
13160       addremove = 0
13161       for op, params in mods:
13162         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13163           result.append((op, -1, params))
13164           addremove += 1
13165
13166           if addremove > 1:
13167             raise errors.OpPrereqError("Only one %s add or remove operation is"
13168                                        " supported at a time" % kind,
13169                                        errors.ECODE_INVAL)
13170         else:
13171           result.append((constants.DDM_MODIFY, op, params))
13172
13173       assert verify_fn(result)
13174     else:
13175       result = mods
13176
13177     return result
13178
13179   @staticmethod
13180   def _CheckMods(kind, mods, key_types, item_fn):
13181     """Ensures requested disk/NIC modifications are valid.
13182
13183     """
13184     for (op, _, params) in mods:
13185       assert ht.TDict(params)
13186
13187       # If 'key_types' is an empty dict, we assume we have an
13188       # 'ext' template and thus do not ForceDictType
13189       if key_types:
13190         utils.ForceDictType(params, key_types)
13191
13192       if op == constants.DDM_REMOVE:
13193         if params:
13194           raise errors.OpPrereqError("No settings should be passed when"
13195                                      " removing a %s" % kind,
13196                                      errors.ECODE_INVAL)
13197       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13198         item_fn(op, params)
13199       else:
13200         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13201
13202   @staticmethod
13203   def _VerifyDiskModification(op, params):
13204     """Verifies a disk modification.
13205
13206     """
13207     if op == constants.DDM_ADD:
13208       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13209       if mode not in constants.DISK_ACCESS_SET:
13210         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13211                                    errors.ECODE_INVAL)
13212
13213       size = params.get(constants.IDISK_SIZE, None)
13214       if size is None:
13215         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13216                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
13217
13218       try:
13219         size = int(size)
13220       except (TypeError, ValueError), err:
13221         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13222                                    errors.ECODE_INVAL)
13223
13224       params[constants.IDISK_SIZE] = size
13225
13226     elif op == constants.DDM_MODIFY:
13227       if constants.IDISK_SIZE in params:
13228         raise errors.OpPrereqError("Disk size change not possible, use"
13229                                    " grow-disk", errors.ECODE_INVAL)
13230       if constants.IDISK_MODE not in params:
13231         raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13232                                    " modification supported, but missing",
13233                                    errors.ECODE_NOENT)
13234       if len(params) > 1:
13235         raise errors.OpPrereqError("Disk modification doesn't support"
13236                                    " additional arbitrary parameters",
13237                                    errors.ECODE_INVAL)
13238
13239   @staticmethod
13240   def _VerifyNicModification(op, params):
13241     """Verifies a network interface modification.
13242
13243     """
13244     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13245       ip = params.get(constants.INIC_IP, None)
13246       req_net = params.get(constants.INIC_NETWORK, None)
13247       link = params.get(constants.NIC_LINK, None)
13248       mode = params.get(constants.NIC_MODE, None)
13249       if req_net is not None:
13250         if req_net.lower() == constants.VALUE_NONE:
13251           params[constants.INIC_NETWORK] = None
13252           req_net = None
13253         elif link is not None or mode is not None:
13254           raise errors.OpPrereqError("If network is given"
13255                                      " mode or link should not",
13256                                      errors.ECODE_INVAL)
13257
13258       if op == constants.DDM_ADD:
13259         macaddr = params.get(constants.INIC_MAC, None)
13260         if macaddr is None:
13261           params[constants.INIC_MAC] = constants.VALUE_AUTO
13262
13263       if ip is not None:
13264         if ip.lower() == constants.VALUE_NONE:
13265           params[constants.INIC_IP] = None
13266         else:
13267           if ip.lower() == constants.NIC_IP_POOL:
13268             if op == constants.DDM_ADD and req_net is None:
13269               raise errors.OpPrereqError("If ip=pool, parameter network"
13270                                          " cannot be none",
13271                                          errors.ECODE_INVAL)
13272           else:
13273             if not netutils.IPAddress.IsValid(ip):
13274               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13275                                          errors.ECODE_INVAL)
13276
13277       if constants.INIC_MAC in params:
13278         macaddr = params[constants.INIC_MAC]
13279         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13280           macaddr = utils.NormalizeAndValidateMac(macaddr)
13281
13282         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13283           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13284                                      " modifying an existing NIC",
13285                                      errors.ECODE_INVAL)
13286
13287   def CheckArguments(self):
13288     if not (self.op.nics or self.op.disks or self.op.disk_template or
13289             self.op.hvparams or self.op.beparams or self.op.os_name or
13290             self.op.offline is not None or self.op.runtime_mem):
13291       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13292
13293     if self.op.hvparams:
13294       _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
13295                             "hypervisor", "instance", "cluster")
13296
13297     self.op.disks = self._UpgradeDiskNicMods(
13298       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13299     self.op.nics = self._UpgradeDiskNicMods(
13300       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13301
13302     if self.op.disks and self.op.disk_template is not None:
13303       raise errors.OpPrereqError("Disk template conversion and other disk"
13304                                  " changes not supported at the same time",
13305                                  errors.ECODE_INVAL)
13306
13307     if (self.op.disk_template and
13308         self.op.disk_template in constants.DTS_INT_MIRROR and
13309         self.op.remote_node is None):
13310       raise errors.OpPrereqError("Changing the disk template to a mirrored"
13311                                  " one requires specifying a secondary node",
13312                                  errors.ECODE_INVAL)
13313
13314     # Check NIC modifications
13315     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13316                     self._VerifyNicModification)
13317
13318   def ExpandNames(self):
13319     self._ExpandAndLockInstance()
13320     self.needed_locks[locking.LEVEL_NODEGROUP] = []
13321     # Can't even acquire node locks in shared mode as upcoming changes in
13322     # Ganeti 2.6 will start to modify the node object on disk conversion
13323     self.needed_locks[locking.LEVEL_NODE] = []
13324     self.needed_locks[locking.LEVEL_NODE_RES] = []
13325     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13326     # Look node group to look up the ipolicy
13327     self.share_locks[locking.LEVEL_NODEGROUP] = 1
13328
13329   def DeclareLocks(self, level):
13330     if level == locking.LEVEL_NODEGROUP:
13331       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13332       # Acquire locks for the instance's nodegroups optimistically. Needs
13333       # to be verified in CheckPrereq
13334       self.needed_locks[locking.LEVEL_NODEGROUP] = \
13335         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13336     elif level == locking.LEVEL_NODE:
13337       self._LockInstancesNodes()
13338       if self.op.disk_template and self.op.remote_node:
13339         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13340         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13341     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13342       # Copy node locks
13343       self.needed_locks[locking.LEVEL_NODE_RES] = \
13344         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13345
13346   def BuildHooksEnv(self):
13347     """Build hooks env.
13348
13349     This runs on the master, primary and secondaries.
13350
13351     """
13352     args = {}
13353     if constants.BE_MINMEM in self.be_new:
13354       args["minmem"] = self.be_new[constants.BE_MINMEM]
13355     if constants.BE_MAXMEM in self.be_new:
13356       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13357     if constants.BE_VCPUS in self.be_new:
13358       args["vcpus"] = self.be_new[constants.BE_VCPUS]
13359     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13360     # information at all.
13361
13362     if self._new_nics is not None:
13363       nics = []
13364
13365       for nic in self._new_nics:
13366         n = copy.deepcopy(nic)
13367         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13368         n.nicparams = nicparams
13369         nics.append(_NICToTuple(self, n))
13370
13371       args["nics"] = nics
13372
13373     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13374     if self.op.disk_template:
13375       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13376     if self.op.runtime_mem:
13377       env["RUNTIME_MEMORY"] = self.op.runtime_mem
13378
13379     return env
13380
13381   def BuildHooksNodes(self):
13382     """Build hooks nodes.
13383
13384     """
13385     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13386     return (nl, nl)
13387
13388   def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
13389                               old_params, cluster, pnode):
13390
13391     update_params_dict = dict([(key, params[key])
13392                                for key in constants.NICS_PARAMETERS
13393                                if key in params])
13394
13395     req_link = update_params_dict.get(constants.NIC_LINK, None)
13396     req_mode = update_params_dict.get(constants.NIC_MODE, None)
13397
13398     new_net_uuid = None
13399     new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
13400     if new_net_uuid_or_name:
13401       new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
13402       new_net_obj = self.cfg.GetNetwork(new_net_uuid)
13403
13404     if old_net_uuid:
13405       old_net_obj = self.cfg.GetNetwork(old_net_uuid)
13406
13407     if new_net_uuid:
13408       netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
13409       if not netparams:
13410         raise errors.OpPrereqError("No netparams found for the network"
13411                                    " %s, probably not connected" %
13412                                    new_net_obj.name, errors.ECODE_INVAL)
13413       new_params = dict(netparams)
13414     else:
13415       new_params = _GetUpdatedParams(old_params, update_params_dict)
13416
13417     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13418
13419     new_filled_params = cluster.SimpleFillNIC(new_params)
13420     objects.NIC.CheckParameterSyntax(new_filled_params)
13421
13422     new_mode = new_filled_params[constants.NIC_MODE]
13423     if new_mode == constants.NIC_MODE_BRIDGED:
13424       bridge = new_filled_params[constants.NIC_LINK]
13425       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13426       if msg:
13427         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13428         if self.op.force:
13429           self.warn.append(msg)
13430         else:
13431           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13432
13433     elif new_mode == constants.NIC_MODE_ROUTED:
13434       ip = params.get(constants.INIC_IP, old_ip)
13435       if ip is None:
13436         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13437                                    " on a routed NIC", errors.ECODE_INVAL)
13438
13439     elif new_mode == constants.NIC_MODE_OVS:
13440       # TODO: check OVS link
13441       self.LogInfo("OVS links are currently not checked for correctness")
13442
13443     if constants.INIC_MAC in params:
13444       mac = params[constants.INIC_MAC]
13445       if mac is None:
13446         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13447                                    errors.ECODE_INVAL)
13448       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13449         # otherwise generate the MAC address
13450         params[constants.INIC_MAC] = \
13451           self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13452       else:
13453         # or validate/reserve the current one
13454         try:
13455           self.cfg.ReserveMAC(mac, self.proc.GetECId())
13456         except errors.ReservationError:
13457           raise errors.OpPrereqError("MAC address '%s' already in use"
13458                                      " in cluster" % mac,
13459                                      errors.ECODE_NOTUNIQUE)
13460     elif new_net_uuid != old_net_uuid:
13461
13462       def get_net_prefix(net_uuid):
13463         mac_prefix = None
13464         if net_uuid:
13465           nobj = self.cfg.GetNetwork(net_uuid)
13466           mac_prefix = nobj.mac_prefix
13467
13468         return mac_prefix
13469
13470       new_prefix = get_net_prefix(new_net_uuid)
13471       old_prefix = get_net_prefix(old_net_uuid)
13472       if old_prefix != new_prefix:
13473         params[constants.INIC_MAC] = \
13474           self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13475
13476     # if there is a change in (ip, network) tuple
13477     new_ip = params.get(constants.INIC_IP, old_ip)
13478     if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
13479       if new_ip:
13480         # if IP is pool then require a network and generate one IP
13481         if new_ip.lower() == constants.NIC_IP_POOL:
13482           if new_net_uuid:
13483             try:
13484               new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
13485             except errors.ReservationError:
13486               raise errors.OpPrereqError("Unable to get a free IP"
13487                                          " from the address pool",
13488                                          errors.ECODE_STATE)
13489             self.LogInfo("Chose IP %s from network %s",
13490                          new_ip,
13491                          new_net_obj.name)
13492             params[constants.INIC_IP] = new_ip
13493           else:
13494             raise errors.OpPrereqError("ip=pool, but no network found",
13495                                        errors.ECODE_INVAL)
13496         # Reserve new IP if in the new network if any
13497         elif new_net_uuid:
13498           try:
13499             self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
13500             self.LogInfo("Reserving IP %s in network %s",
13501                          new_ip, new_net_obj.name)
13502           except errors.ReservationError:
13503             raise errors.OpPrereqError("IP %s not available in network %s" %
13504                                        (new_ip, new_net_obj.name),
13505                                        errors.ECODE_NOTUNIQUE)
13506         # new network is None so check if new IP is a conflicting IP
13507         elif self.op.conflicts_check:
13508           _CheckForConflictingIp(self, new_ip, pnode)
13509
13510       # release old IP if old network is not None
13511       if old_ip and old_net_uuid:
13512         try:
13513           self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
13514         except errors.AddressPoolError:
13515           logging.warning("Release IP %s not contained in network %s",
13516                           old_ip, old_net_obj.name)
13517
13518     # there are no changes in (ip, network) tuple and old network is not None
13519     elif (old_net_uuid is not None and
13520           (req_link is not None or req_mode is not None)):
13521       raise errors.OpPrereqError("Not allowed to change link or mode of"
13522                                  " a NIC that is connected to a network",
13523                                  errors.ECODE_INVAL)
13524
13525     private.params = new_params
13526     private.filled = new_filled_params
13527
13528   def _PreCheckDiskTemplate(self, pnode_info):
13529     """CheckPrereq checks related to a new disk template."""
13530     # Arguments are passed to avoid configuration lookups
13531     instance = self.instance
13532     pnode = instance.primary_node
13533     cluster = self.cluster
13534     if instance.disk_template == self.op.disk_template:
13535       raise errors.OpPrereqError("Instance already has disk template %s" %
13536                                  instance.disk_template, errors.ECODE_INVAL)
13537
13538     if (instance.disk_template,
13539         self.op.disk_template) not in self._DISK_CONVERSIONS:
13540       raise errors.OpPrereqError("Unsupported disk template conversion from"
13541                                  " %s to %s" % (instance.disk_template,
13542                                                 self.op.disk_template),
13543                                  errors.ECODE_INVAL)
13544     _CheckInstanceState(self, instance, INSTANCE_DOWN,
13545                         msg="cannot change disk template")
13546     if self.op.disk_template in constants.DTS_INT_MIRROR:
13547       if self.op.remote_node == pnode:
13548         raise errors.OpPrereqError("Given new secondary node %s is the same"
13549                                    " as the primary node of the instance" %
13550                                    self.op.remote_node, errors.ECODE_STATE)
13551       _CheckNodeOnline(self, self.op.remote_node)
13552       _CheckNodeNotDrained(self, self.op.remote_node)
13553       # FIXME: here we assume that the old instance type is DT_PLAIN
13554       assert instance.disk_template == constants.DT_PLAIN
13555       disks = [{constants.IDISK_SIZE: d.size,
13556                 constants.IDISK_VG: d.logical_id[0]}
13557                for d in instance.disks]
13558       required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13559       _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13560
13561       snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13562       snode_group = self.cfg.GetNodeGroup(snode_info.group)
13563       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13564                                                               snode_group)
13565       _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13566                               ignore=self.op.ignore_ipolicy)
13567       if pnode_info.group != snode_info.group:
13568         self.LogWarning("The primary and secondary nodes are in two"
13569                         " different node groups; the disk parameters"
13570                         " from the first disk's node group will be"
13571                         " used")
13572
13573     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13574       # Make sure none of the nodes require exclusive storage
13575       nodes = [pnode_info]
13576       if self.op.disk_template in constants.DTS_INT_MIRROR:
13577         assert snode_info
13578         nodes.append(snode_info)
13579       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13580       if compat.any(map(has_es, nodes)):
13581         errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13582                   " storage is enabled" % (instance.disk_template,
13583                                            self.op.disk_template))
13584         raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13585
13586   def CheckPrereq(self):
13587     """Check prerequisites.
13588
13589     This only checks the instance list against the existing names.
13590
13591     """
13592     assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13593     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13594
13595     cluster = self.cluster = self.cfg.GetClusterInfo()
13596     assert self.instance is not None, \
13597       "Cannot retrieve locked instance %s" % self.op.instance_name
13598
13599     pnode = instance.primary_node
13600     assert pnode in self.owned_locks(locking.LEVEL_NODE)
13601     nodelist = list(instance.all_nodes)
13602     pnode_info = self.cfg.GetNodeInfo(pnode)
13603     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13604
13605     #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13606     assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13607     group_info = self.cfg.GetNodeGroup(pnode_info.group)
13608
13609     # dictionary with instance information after the modification
13610     ispec = {}
13611
13612     # Check disk modifications. This is done here and not in CheckArguments
13613     # (as with NICs), because we need to know the instance's disk template
13614     if instance.disk_template == constants.DT_EXT:
13615       self._CheckMods("disk", self.op.disks, {},
13616                       self._VerifyDiskModification)
13617     else:
13618       self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13619                       self._VerifyDiskModification)
13620
13621     # Prepare disk/NIC modifications
13622     self.diskmod = PrepareContainerMods(self.op.disks, None)
13623     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13624
13625     # Check the validity of the `provider' parameter
13626     if instance.disk_template in constants.DT_EXT:
13627       for mod in self.diskmod:
13628         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13629         if mod[0] == constants.DDM_ADD:
13630           if ext_provider is None:
13631             raise errors.OpPrereqError("Instance template is '%s' and parameter"
13632                                        " '%s' missing, during disk add" %
13633                                        (constants.DT_EXT,
13634                                         constants.IDISK_PROVIDER),
13635                                        errors.ECODE_NOENT)
13636         elif mod[0] == constants.DDM_MODIFY:
13637           if ext_provider:
13638             raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13639                                        " modification" %
13640                                        constants.IDISK_PROVIDER,
13641                                        errors.ECODE_INVAL)
13642     else:
13643       for mod in self.diskmod:
13644         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13645         if ext_provider is not None:
13646           raise errors.OpPrereqError("Parameter '%s' is only valid for"
13647                                      " instances of type '%s'" %
13648                                      (constants.IDISK_PROVIDER,
13649                                       constants.DT_EXT),
13650                                      errors.ECODE_INVAL)
13651
13652     # OS change
13653     if self.op.os_name and not self.op.force:
13654       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13655                       self.op.force_variant)
13656       instance_os = self.op.os_name
13657     else:
13658       instance_os = instance.os
13659
13660     assert not (self.op.disk_template and self.op.disks), \
13661       "Can't modify disk template and apply disk changes at the same time"
13662
13663     if self.op.disk_template:
13664       self._PreCheckDiskTemplate(pnode_info)
13665
13666     # hvparams processing
13667     if self.op.hvparams:
13668       hv_type = instance.hypervisor
13669       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13670       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13671       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13672
13673       # local check
13674       hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13675       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13676       self.hv_proposed = self.hv_new = hv_new # the new actual values
13677       self.hv_inst = i_hvdict # the new dict (without defaults)
13678     else:
13679       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13680                                               instance.hvparams)
13681       self.hv_new = self.hv_inst = {}
13682
13683     # beparams processing
13684     if self.op.beparams:
13685       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13686                                    use_none=True)
13687       objects.UpgradeBeParams(i_bedict)
13688       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13689       be_new = cluster.SimpleFillBE(i_bedict)
13690       self.be_proposed = self.be_new = be_new # the new actual values
13691       self.be_inst = i_bedict # the new dict (without defaults)
13692     else:
13693       self.be_new = self.be_inst = {}
13694       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13695     be_old = cluster.FillBE(instance)
13696
13697     # CPU param validation -- checking every time a parameter is
13698     # changed to cover all cases where either CPU mask or vcpus have
13699     # changed
13700     if (constants.BE_VCPUS in self.be_proposed and
13701         constants.HV_CPU_MASK in self.hv_proposed):
13702       cpu_list = \
13703         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13704       # Verify mask is consistent with number of vCPUs. Can skip this
13705       # test if only 1 entry in the CPU mask, which means same mask
13706       # is applied to all vCPUs.
13707       if (len(cpu_list) > 1 and
13708           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13709         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13710                                    " CPU mask [%s]" %
13711                                    (self.be_proposed[constants.BE_VCPUS],
13712                                     self.hv_proposed[constants.HV_CPU_MASK]),
13713                                    errors.ECODE_INVAL)
13714
13715       # Only perform this test if a new CPU mask is given
13716       if constants.HV_CPU_MASK in self.hv_new:
13717         # Calculate the largest CPU number requested
13718         max_requested_cpu = max(map(max, cpu_list))
13719         # Check that all of the instance's nodes have enough physical CPUs to
13720         # satisfy the requested CPU mask
13721         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13722                                 max_requested_cpu + 1, instance.hypervisor)
13723
13724     # osparams processing
13725     if self.op.osparams:
13726       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13727       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13728       self.os_inst = i_osdict # the new dict (without defaults)
13729     else:
13730       self.os_inst = {}
13731
13732     self.warn = []
13733
13734     #TODO(dynmem): do the appropriate check involving MINMEM
13735     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13736         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13737       mem_check_list = [pnode]
13738       if be_new[constants.BE_AUTO_BALANCE]:
13739         # either we changed auto_balance to yes or it was from before
13740         mem_check_list.extend(instance.secondary_nodes)
13741       instance_info = self.rpc.call_instance_info(pnode, instance.name,
13742                                                   instance.hypervisor)
13743       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13744                                          [instance.hypervisor], False)
13745       pninfo = nodeinfo[pnode]
13746       msg = pninfo.fail_msg
13747       if msg:
13748         # Assume the primary node is unreachable and go ahead
13749         self.warn.append("Can't get info from primary node %s: %s" %
13750                          (pnode, msg))
13751       else:
13752         (_, _, (pnhvinfo, )) = pninfo.payload
13753         if not isinstance(pnhvinfo.get("memory_free", None), int):
13754           self.warn.append("Node data from primary node %s doesn't contain"
13755                            " free memory information" % pnode)
13756         elif instance_info.fail_msg:
13757           self.warn.append("Can't get instance runtime information: %s" %
13758                            instance_info.fail_msg)
13759         else:
13760           if instance_info.payload:
13761             current_mem = int(instance_info.payload["memory"])
13762           else:
13763             # Assume instance not running
13764             # (there is a slight race condition here, but it's not very
13765             # probable, and we have no other way to check)
13766             # TODO: Describe race condition
13767             current_mem = 0
13768           #TODO(dynmem): do the appropriate check involving MINMEM
13769           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13770                       pnhvinfo["memory_free"])
13771           if miss_mem > 0:
13772             raise errors.OpPrereqError("This change will prevent the instance"
13773                                        " from starting, due to %d MB of memory"
13774                                        " missing on its primary node" %
13775                                        miss_mem, errors.ECODE_NORES)
13776
13777       if be_new[constants.BE_AUTO_BALANCE]:
13778         for node, nres in nodeinfo.items():
13779           if node not in instance.secondary_nodes:
13780             continue
13781           nres.Raise("Can't get info from secondary node %s" % node,
13782                      prereq=True, ecode=errors.ECODE_STATE)
13783           (_, _, (nhvinfo, )) = nres.payload
13784           if not isinstance(nhvinfo.get("memory_free", None), int):
13785             raise errors.OpPrereqError("Secondary node %s didn't return free"
13786                                        " memory information" % node,
13787                                        errors.ECODE_STATE)
13788           #TODO(dynmem): do the appropriate check involving MINMEM
13789           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13790             raise errors.OpPrereqError("This change will prevent the instance"
13791                                        " from failover to its secondary node"
13792                                        " %s, due to not enough memory" % node,
13793                                        errors.ECODE_STATE)
13794
13795     if self.op.runtime_mem:
13796       remote_info = self.rpc.call_instance_info(instance.primary_node,
13797                                                 instance.name,
13798                                                 instance.hypervisor)
13799       remote_info.Raise("Error checking node %s" % instance.primary_node)
13800       if not remote_info.payload: # not running already
13801         raise errors.OpPrereqError("Instance %s is not running" %
13802                                    instance.name, errors.ECODE_STATE)
13803
13804       current_memory = remote_info.payload["memory"]
13805       if (not self.op.force and
13806            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13807             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13808         raise errors.OpPrereqError("Instance %s must have memory between %d"
13809                                    " and %d MB of memory unless --force is"
13810                                    " given" %
13811                                    (instance.name,
13812                                     self.be_proposed[constants.BE_MINMEM],
13813                                     self.be_proposed[constants.BE_MAXMEM]),
13814                                    errors.ECODE_INVAL)
13815
13816       delta = self.op.runtime_mem - current_memory
13817       if delta > 0:
13818         _CheckNodeFreeMemory(self, instance.primary_node,
13819                              "ballooning memory for instance %s" %
13820                              instance.name, delta, instance.hypervisor)
13821
13822     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13823       raise errors.OpPrereqError("Disk operations not supported for"
13824                                  " diskless instances", errors.ECODE_INVAL)
13825
13826     def _PrepareNicCreate(_, params, private):
13827       self._PrepareNicModification(params, private, None, None,
13828                                    {}, cluster, pnode)
13829       return (None, None)
13830
13831     def _PrepareNicMod(_, nic, params, private):
13832       self._PrepareNicModification(params, private, nic.ip, nic.network,
13833                                    nic.nicparams, cluster, pnode)
13834       return None
13835
13836     def _PrepareNicRemove(_, params, __):
13837       ip = params.ip
13838       net = params.network
13839       if net is not None and ip is not None:
13840         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13841
13842     # Verify NIC changes (operating on copy)
13843     nics = instance.nics[:]
13844     ApplyContainerMods("NIC", nics, None, self.nicmod,
13845                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13846     if len(nics) > constants.MAX_NICS:
13847       raise errors.OpPrereqError("Instance has too many network interfaces"
13848                                  " (%d), cannot add more" % constants.MAX_NICS,
13849                                  errors.ECODE_STATE)
13850
13851     # Verify disk changes (operating on a copy)
13852     disks = instance.disks[:]
13853     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13854     if len(disks) > constants.MAX_DISKS:
13855       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13856                                  " more" % constants.MAX_DISKS,
13857                                  errors.ECODE_STATE)
13858     disk_sizes = [disk.size for disk in instance.disks]
13859     disk_sizes.extend(params["size"] for (op, idx, params, private) in
13860                       self.diskmod if op == constants.DDM_ADD)
13861     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13862     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13863
13864     if self.op.offline is not None and self.op.offline:
13865       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13866                           msg="can't change to offline")
13867
13868     # Pre-compute NIC changes (necessary to use result in hooks)
13869     self._nic_chgdesc = []
13870     if self.nicmod:
13871       # Operate on copies as this is still in prereq
13872       nics = [nic.Copy() for nic in instance.nics]
13873       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13874                          self._CreateNewNic, self._ApplyNicMods, None)
13875       self._new_nics = nics
13876       ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13877     else:
13878       self._new_nics = None
13879       ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13880
13881     if not self.op.ignore_ipolicy:
13882       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13883                                                               group_info)
13884
13885       # Fill ispec with backend parameters
13886       ispec[constants.ISPEC_SPINDLE_USE] = \
13887         self.be_new.get(constants.BE_SPINDLE_USE, None)
13888       ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13889                                                          None)
13890
13891       # Copy ispec to verify parameters with min/max values separately
13892       ispec_max = ispec.copy()
13893       ispec_max[constants.ISPEC_MEM_SIZE] = \
13894         self.be_new.get(constants.BE_MAXMEM, None)
13895       res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13896       ispec_min = ispec.copy()
13897       ispec_min[constants.ISPEC_MEM_SIZE] = \
13898         self.be_new.get(constants.BE_MINMEM, None)
13899       res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13900
13901       if (res_max or res_min):
13902         # FIXME: Improve error message by including information about whether
13903         # the upper or lower limit of the parameter fails the ipolicy.
13904         msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13905                (group_info, group_info.name,
13906                 utils.CommaJoin(set(res_max + res_min))))
13907         raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13908
13909   def _ConvertPlainToDrbd(self, feedback_fn):
13910     """Converts an instance from plain to drbd.
13911
13912     """
13913     feedback_fn("Converting template to drbd")
13914     instance = self.instance
13915     pnode = instance.primary_node
13916     snode = self.op.remote_node
13917
13918     assert instance.disk_template == constants.DT_PLAIN
13919
13920     # create a fake disk info for _GenerateDiskTemplate
13921     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13922                   constants.IDISK_VG: d.logical_id[0]}
13923                  for d in instance.disks]
13924     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13925                                       instance.name, pnode, [snode],
13926                                       disk_info, None, None, 0, feedback_fn,
13927                                       self.diskparams)
13928     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13929                                         self.diskparams)
13930     p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13931     s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13932     info = _GetInstanceInfoText(instance)
13933     feedback_fn("Creating additional volumes...")
13934     # first, create the missing data and meta devices
13935     for disk in anno_disks:
13936       # unfortunately this is... not too nice
13937       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13938                             info, True, p_excl_stor)
13939       for child in disk.children:
13940         _CreateSingleBlockDev(self, snode, instance, child, info, True,
13941                               s_excl_stor)
13942     # at this stage, all new LVs have been created, we can rename the
13943     # old ones
13944     feedback_fn("Renaming original volumes...")
13945     rename_list = [(o, n.children[0].logical_id)
13946                    for (o, n) in zip(instance.disks, new_disks)]
13947     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13948     result.Raise("Failed to rename original LVs")
13949
13950     feedback_fn("Initializing DRBD devices...")
13951     # all child devices are in place, we can now create the DRBD devices
13952     for disk in anno_disks:
13953       for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13954         f_create = node == pnode
13955         _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13956                               excl_stor)
13957
13958     # at this point, the instance has been modified
13959     instance.disk_template = constants.DT_DRBD8
13960     instance.disks = new_disks
13961     self.cfg.Update(instance, feedback_fn)
13962
13963     # Release node locks while waiting for sync
13964     _ReleaseLocks(self, locking.LEVEL_NODE)
13965
13966     # disks are created, waiting for sync
13967     disk_abort = not _WaitForSync(self, instance,
13968                                   oneshot=not self.op.wait_for_sync)
13969     if disk_abort:
13970       raise errors.OpExecError("There are some degraded disks for"
13971                                " this instance, please cleanup manually")
13972
13973     # Node resource locks will be released by caller
13974
13975   def _ConvertDrbdToPlain(self, feedback_fn):
13976     """Converts an instance from drbd to plain.
13977
13978     """
13979     instance = self.instance
13980
13981     assert len(instance.secondary_nodes) == 1
13982     assert instance.disk_template == constants.DT_DRBD8
13983
13984     pnode = instance.primary_node
13985     snode = instance.secondary_nodes[0]
13986     feedback_fn("Converting template to plain")
13987
13988     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13989     new_disks = [d.children[0] for d in instance.disks]
13990
13991     # copy over size and mode
13992     for parent, child in zip(old_disks, new_disks):
13993       child.size = parent.size
13994       child.mode = parent.mode
13995
13996     # this is a DRBD disk, return its port to the pool
13997     # NOTE: this must be done right before the call to cfg.Update!
13998     for disk in old_disks:
13999       tcp_port = disk.logical_id[2]
14000       self.cfg.AddTcpUdpPort(tcp_port)
14001
14002     # update instance structure
14003     instance.disks = new_disks
14004     instance.disk_template = constants.DT_PLAIN
14005     self.cfg.Update(instance, feedback_fn)
14006
14007     # Release locks in case removing disks takes a while
14008     _ReleaseLocks(self, locking.LEVEL_NODE)
14009
14010     feedback_fn("Removing volumes on the secondary node...")
14011     for disk in old_disks:
14012       self.cfg.SetDiskID(disk, snode)
14013       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14014       if msg:
14015         self.LogWarning("Could not remove block device %s on node %s,"
14016                         " continuing anyway: %s", disk.iv_name, snode, msg)
14017
14018     feedback_fn("Removing unneeded volumes on the primary node...")
14019     for idx, disk in enumerate(old_disks):
14020       meta = disk.children[1]
14021       self.cfg.SetDiskID(meta, pnode)
14022       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14023       if msg:
14024         self.LogWarning("Could not remove metadata for disk %d on node %s,"
14025                         " continuing anyway: %s", idx, pnode, msg)
14026
14027   def _CreateNewDisk(self, idx, params, _):
14028     """Creates a new disk.
14029
14030     """
14031     instance = self.instance
14032
14033     # add a new disk
14034     if instance.disk_template in constants.DTS_FILEBASED:
14035       (file_driver, file_path) = instance.disks[0].logical_id
14036       file_path = os.path.dirname(file_path)
14037     else:
14038       file_driver = file_path = None
14039
14040     disk = \
14041       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14042                             instance.primary_node, instance.secondary_nodes,
14043                             [params], file_path, file_driver, idx,
14044                             self.Log, self.diskparams)[0]
14045
14046     info = _GetInstanceInfoText(instance)
14047
14048     logging.info("Creating volume %s for instance %s",
14049                  disk.iv_name, instance.name)
14050     # Note: this needs to be kept in sync with _CreateDisks
14051     #HARDCODE
14052     for node in instance.all_nodes:
14053       f_create = (node == instance.primary_node)
14054       try:
14055         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14056       except errors.OpExecError, err:
14057         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14058                         disk.iv_name, disk, node, err)
14059
14060     return (disk, [
14061       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14062       ])
14063
14064   @staticmethod
14065   def _ModifyDisk(idx, disk, params, _):
14066     """Modifies a disk.
14067
14068     """
14069     disk.mode = params[constants.IDISK_MODE]
14070
14071     return [
14072       ("disk.mode/%d" % idx, disk.mode),
14073       ]
14074
14075   def _RemoveDisk(self, idx, root, _):
14076     """Removes a disk.
14077
14078     """
14079     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14080     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14081       self.cfg.SetDiskID(disk, node)
14082       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14083       if msg:
14084         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14085                         " continuing anyway", idx, node, msg)
14086
14087     # if this is a DRBD disk, return its port to the pool
14088     if root.dev_type in constants.LDS_DRBD:
14089       self.cfg.AddTcpUdpPort(root.logical_id[2])
14090
14091   def _CreateNewNic(self, idx, params, private):
14092     """Creates data structure for a new network interface.
14093
14094     """
14095     mac = params[constants.INIC_MAC]
14096     ip = params.get(constants.INIC_IP, None)
14097     net = params.get(constants.INIC_NETWORK, None)
14098     net_uuid = self.cfg.LookupNetwork(net)
14099     #TODO: not private.filled?? can a nic have no nicparams??
14100     nicparams = private.filled
14101     nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, nicparams=nicparams)
14102
14103     return (nobj, [
14104       ("nic.%d" % idx,
14105        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14106        (mac, ip, private.filled[constants.NIC_MODE],
14107        private.filled[constants.NIC_LINK],
14108        net)),
14109       ])
14110
14111   def _ApplyNicMods(self, idx, nic, params, private):
14112     """Modifies a network interface.
14113
14114     """
14115     changes = []
14116
14117     for key in [constants.INIC_MAC, constants.INIC_IP]:
14118       if key in params:
14119         changes.append(("nic.%s/%d" % (key, idx), params[key]))
14120         setattr(nic, key, params[key])
14121
14122     new_net = params.get(constants.INIC_NETWORK, nic.network)
14123     new_net_uuid = self.cfg.LookupNetwork(new_net)
14124     if new_net_uuid != nic.network:
14125       changes.append(("nic.network/%d" % idx, new_net))
14126       nic.network = new_net_uuid
14127
14128     if private.filled:
14129       nic.nicparams = private.filled
14130
14131       for (key, val) in nic.nicparams.items():
14132         changes.append(("nic.%s/%d" % (key, idx), val))
14133
14134     return changes
14135
14136   def Exec(self, feedback_fn):
14137     """Modifies an instance.
14138
14139     All parameters take effect only at the next restart of the instance.
14140
14141     """
14142     # Process here the warnings from CheckPrereq, as we don't have a
14143     # feedback_fn there.
14144     # TODO: Replace with self.LogWarning
14145     for warn in self.warn:
14146       feedback_fn("WARNING: %s" % warn)
14147
14148     assert ((self.op.disk_template is None) ^
14149             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14150       "Not owning any node resource locks"
14151
14152     result = []
14153     instance = self.instance
14154
14155     # runtime memory
14156     if self.op.runtime_mem:
14157       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14158                                                      instance,
14159                                                      self.op.runtime_mem)
14160       rpcres.Raise("Cannot modify instance runtime memory")
14161       result.append(("runtime_memory", self.op.runtime_mem))
14162
14163     # Apply disk changes
14164     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14165                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14166     _UpdateIvNames(0, instance.disks)
14167
14168     if self.op.disk_template:
14169       if __debug__:
14170         check_nodes = set(instance.all_nodes)
14171         if self.op.remote_node:
14172           check_nodes.add(self.op.remote_node)
14173         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14174           owned = self.owned_locks(level)
14175           assert not (check_nodes - owned), \
14176             ("Not owning the correct locks, owning %r, expected at least %r" %
14177              (owned, check_nodes))
14178
14179       r_shut = _ShutdownInstanceDisks(self, instance)
14180       if not r_shut:
14181         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14182                                  " proceed with disk template conversion")
14183       mode = (instance.disk_template, self.op.disk_template)
14184       try:
14185         self._DISK_CONVERSIONS[mode](self, feedback_fn)
14186       except:
14187         self.cfg.ReleaseDRBDMinors(instance.name)
14188         raise
14189       result.append(("disk_template", self.op.disk_template))
14190
14191       assert instance.disk_template == self.op.disk_template, \
14192         ("Expected disk template '%s', found '%s'" %
14193          (self.op.disk_template, instance.disk_template))
14194
14195     # Release node and resource locks if there are any (they might already have
14196     # been released during disk conversion)
14197     _ReleaseLocks(self, locking.LEVEL_NODE)
14198     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14199
14200     # Apply NIC changes
14201     if self._new_nics is not None:
14202       instance.nics = self._new_nics
14203       result.extend(self._nic_chgdesc)
14204
14205     # hvparams changes
14206     if self.op.hvparams:
14207       instance.hvparams = self.hv_inst
14208       for key, val in self.op.hvparams.iteritems():
14209         result.append(("hv/%s" % key, val))
14210
14211     # beparams changes
14212     if self.op.beparams:
14213       instance.beparams = self.be_inst
14214       for key, val in self.op.beparams.iteritems():
14215         result.append(("be/%s" % key, val))
14216
14217     # OS change
14218     if self.op.os_name:
14219       instance.os = self.op.os_name
14220
14221     # osparams changes
14222     if self.op.osparams:
14223       instance.osparams = self.os_inst
14224       for key, val in self.op.osparams.iteritems():
14225         result.append(("os/%s" % key, val))
14226
14227     if self.op.offline is None:
14228       # Ignore
14229       pass
14230     elif self.op.offline:
14231       # Mark instance as offline
14232       self.cfg.MarkInstanceOffline(instance.name)
14233       result.append(("admin_state", constants.ADMINST_OFFLINE))
14234     else:
14235       # Mark instance as online, but stopped
14236       self.cfg.MarkInstanceDown(instance.name)
14237       result.append(("admin_state", constants.ADMINST_DOWN))
14238
14239     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14240
14241     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14242                 self.owned_locks(locking.LEVEL_NODE)), \
14243       "All node locks should have been released by now"
14244
14245     return result
14246
14247   _DISK_CONVERSIONS = {
14248     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14249     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14250     }
14251
14252
14253 class LUInstanceChangeGroup(LogicalUnit):
14254   HPATH = "instance-change-group"
14255   HTYPE = constants.HTYPE_INSTANCE
14256   REQ_BGL = False
14257
14258   def ExpandNames(self):
14259     self.share_locks = _ShareAll()
14260
14261     self.needed_locks = {
14262       locking.LEVEL_NODEGROUP: [],
14263       locking.LEVEL_NODE: [],
14264       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14265       }
14266
14267     self._ExpandAndLockInstance()
14268
14269     if self.op.target_groups:
14270       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14271                                   self.op.target_groups)
14272     else:
14273       self.req_target_uuids = None
14274
14275     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14276
14277   def DeclareLocks(self, level):
14278     if level == locking.LEVEL_NODEGROUP:
14279       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14280
14281       if self.req_target_uuids:
14282         lock_groups = set(self.req_target_uuids)
14283
14284         # Lock all groups used by instance optimistically; this requires going
14285         # via the node before it's locked, requiring verification later on
14286         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14287         lock_groups.update(instance_groups)
14288       else:
14289         # No target groups, need to lock all of them
14290         lock_groups = locking.ALL_SET
14291
14292       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14293
14294     elif level == locking.LEVEL_NODE:
14295       if self.req_target_uuids:
14296         # Lock all nodes used by instances
14297         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14298         self._LockInstancesNodes()
14299
14300         # Lock all nodes in all potential target groups
14301         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14302                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14303         member_nodes = [node_name
14304                         for group in lock_groups
14305                         for node_name in self.cfg.GetNodeGroup(group).members]
14306         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14307       else:
14308         # Lock all nodes as all groups are potential targets
14309         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14310
14311   def CheckPrereq(self):
14312     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14313     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14314     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14315
14316     assert (self.req_target_uuids is None or
14317             owned_groups.issuperset(self.req_target_uuids))
14318     assert owned_instances == set([self.op.instance_name])
14319
14320     # Get instance information
14321     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14322
14323     # Check if node groups for locked instance are still correct
14324     assert owned_nodes.issuperset(self.instance.all_nodes), \
14325       ("Instance %s's nodes changed while we kept the lock" %
14326        self.op.instance_name)
14327
14328     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14329                                            owned_groups)
14330
14331     if self.req_target_uuids:
14332       # User requested specific target groups
14333       self.target_uuids = frozenset(self.req_target_uuids)
14334     else:
14335       # All groups except those used by the instance are potential targets
14336       self.target_uuids = owned_groups - inst_groups
14337
14338     conflicting_groups = self.target_uuids & inst_groups
14339     if conflicting_groups:
14340       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14341                                  " used by the instance '%s'" %
14342                                  (utils.CommaJoin(conflicting_groups),
14343                                   self.op.instance_name),
14344                                  errors.ECODE_INVAL)
14345
14346     if not self.target_uuids:
14347       raise errors.OpPrereqError("There are no possible target groups",
14348                                  errors.ECODE_INVAL)
14349
14350   def BuildHooksEnv(self):
14351     """Build hooks env.
14352
14353     """
14354     assert self.target_uuids
14355
14356     env = {
14357       "TARGET_GROUPS": " ".join(self.target_uuids),
14358       }
14359
14360     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14361
14362     return env
14363
14364   def BuildHooksNodes(self):
14365     """Build hooks nodes.
14366
14367     """
14368     mn = self.cfg.GetMasterNode()
14369     return ([mn], [mn])
14370
14371   def Exec(self, feedback_fn):
14372     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14373
14374     assert instances == [self.op.instance_name], "Instance not locked"
14375
14376     req = iallocator.IAReqGroupChange(instances=instances,
14377                                       target_groups=list(self.target_uuids))
14378     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14379
14380     ial.Run(self.op.iallocator)
14381
14382     if not ial.success:
14383       raise errors.OpPrereqError("Can't compute solution for changing group of"
14384                                  " instance '%s' using iallocator '%s': %s" %
14385                                  (self.op.instance_name, self.op.iallocator,
14386                                   ial.info), errors.ECODE_NORES)
14387
14388     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14389
14390     self.LogInfo("Iallocator returned %s job(s) for changing group of"
14391                  " instance '%s'", len(jobs), self.op.instance_name)
14392
14393     return ResultWithJobs(jobs)
14394
14395
14396 class LUBackupQuery(NoHooksLU):
14397   """Query the exports list
14398
14399   """
14400   REQ_BGL = False
14401
14402   def CheckArguments(self):
14403     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14404                              ["node", "export"], self.op.use_locking)
14405
14406   def ExpandNames(self):
14407     self.expq.ExpandNames(self)
14408
14409   def DeclareLocks(self, level):
14410     self.expq.DeclareLocks(self, level)
14411
14412   def Exec(self, feedback_fn):
14413     result = {}
14414
14415     for (node, expname) in self.expq.OldStyleQuery(self):
14416       if expname is None:
14417         result[node] = False
14418       else:
14419         result.setdefault(node, []).append(expname)
14420
14421     return result
14422
14423
14424 class _ExportQuery(_QueryBase):
14425   FIELDS = query.EXPORT_FIELDS
14426
14427   #: The node name is not a unique key for this query
14428   SORT_FIELD = "node"
14429
14430   def ExpandNames(self, lu):
14431     lu.needed_locks = {}
14432
14433     # The following variables interact with _QueryBase._GetNames
14434     if self.names:
14435       self.wanted = _GetWantedNodes(lu, self.names)
14436     else:
14437       self.wanted = locking.ALL_SET
14438
14439     self.do_locking = self.use_locking
14440
14441     if self.do_locking:
14442       lu.share_locks = _ShareAll()
14443       lu.needed_locks = {
14444         locking.LEVEL_NODE: self.wanted,
14445         }
14446
14447       if not self.names:
14448         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14449
14450   def DeclareLocks(self, lu, level):
14451     pass
14452
14453   def _GetQueryData(self, lu):
14454     """Computes the list of nodes and their attributes.
14455
14456     """
14457     # Locking is not used
14458     # TODO
14459     assert not (compat.any(lu.glm.is_owned(level)
14460                            for level in locking.LEVELS
14461                            if level != locking.LEVEL_CLUSTER) or
14462                 self.do_locking or self.use_locking)
14463
14464     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14465
14466     result = []
14467
14468     for (node, nres) in lu.rpc.call_export_list(nodes).items():
14469       if nres.fail_msg:
14470         result.append((node, None))
14471       else:
14472         result.extend((node, expname) for expname in nres.payload)
14473
14474     return result
14475
14476
14477 class LUBackupPrepare(NoHooksLU):
14478   """Prepares an instance for an export and returns useful information.
14479
14480   """
14481   REQ_BGL = False
14482
14483   def ExpandNames(self):
14484     self._ExpandAndLockInstance()
14485
14486   def CheckPrereq(self):
14487     """Check prerequisites.
14488
14489     """
14490     instance_name = self.op.instance_name
14491
14492     self.instance = self.cfg.GetInstanceInfo(instance_name)
14493     assert self.instance is not None, \
14494           "Cannot retrieve locked instance %s" % self.op.instance_name
14495     _CheckNodeOnline(self, self.instance.primary_node)
14496
14497     self._cds = _GetClusterDomainSecret()
14498
14499   def Exec(self, feedback_fn):
14500     """Prepares an instance for an export.
14501
14502     """
14503     instance = self.instance
14504
14505     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14506       salt = utils.GenerateSecret(8)
14507
14508       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14509       result = self.rpc.call_x509_cert_create(instance.primary_node,
14510                                               constants.RIE_CERT_VALIDITY)
14511       result.Raise("Can't create X509 key and certificate on %s" % result.node)
14512
14513       (name, cert_pem) = result.payload
14514
14515       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14516                                              cert_pem)
14517
14518       return {
14519         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14520         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14521                           salt),
14522         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14523         }
14524
14525     return None
14526
14527
14528 class LUBackupExport(LogicalUnit):
14529   """Export an instance to an image in the cluster.
14530
14531   """
14532   HPATH = "instance-export"
14533   HTYPE = constants.HTYPE_INSTANCE
14534   REQ_BGL = False
14535
14536   def CheckArguments(self):
14537     """Check the arguments.
14538
14539     """
14540     self.x509_key_name = self.op.x509_key_name
14541     self.dest_x509_ca_pem = self.op.destination_x509_ca
14542
14543     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14544       if not self.x509_key_name:
14545         raise errors.OpPrereqError("Missing X509 key name for encryption",
14546                                    errors.ECODE_INVAL)
14547
14548       if not self.dest_x509_ca_pem:
14549         raise errors.OpPrereqError("Missing destination X509 CA",
14550                                    errors.ECODE_INVAL)
14551
14552   def ExpandNames(self):
14553     self._ExpandAndLockInstance()
14554
14555     # Lock all nodes for local exports
14556     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14557       # FIXME: lock only instance primary and destination node
14558       #
14559       # Sad but true, for now we have do lock all nodes, as we don't know where
14560       # the previous export might be, and in this LU we search for it and
14561       # remove it from its current node. In the future we could fix this by:
14562       #  - making a tasklet to search (share-lock all), then create the
14563       #    new one, then one to remove, after
14564       #  - removing the removal operation altogether
14565       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14566
14567       # Allocations should be stopped while this LU runs with node locks, but
14568       # it doesn't have to be exclusive
14569       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14570       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14571
14572   def DeclareLocks(self, level):
14573     """Last minute lock declaration."""
14574     # All nodes are locked anyway, so nothing to do here.
14575
14576   def BuildHooksEnv(self):
14577     """Build hooks env.
14578
14579     This will run on the master, primary node and target node.
14580
14581     """
14582     env = {
14583       "EXPORT_MODE": self.op.mode,
14584       "EXPORT_NODE": self.op.target_node,
14585       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14586       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14587       # TODO: Generic function for boolean env variables
14588       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14589       }
14590
14591     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14592
14593     return env
14594
14595   def BuildHooksNodes(self):
14596     """Build hooks nodes.
14597
14598     """
14599     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14600
14601     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14602       nl.append(self.op.target_node)
14603
14604     return (nl, nl)
14605
14606   def CheckPrereq(self):
14607     """Check prerequisites.
14608
14609     This checks that the instance and node names are valid.
14610
14611     """
14612     instance_name = self.op.instance_name
14613
14614     self.instance = self.cfg.GetInstanceInfo(instance_name)
14615     assert self.instance is not None, \
14616           "Cannot retrieve locked instance %s" % self.op.instance_name
14617     _CheckNodeOnline(self, self.instance.primary_node)
14618
14619     if (self.op.remove_instance and
14620         self.instance.admin_state == constants.ADMINST_UP and
14621         not self.op.shutdown):
14622       raise errors.OpPrereqError("Can not remove instance without shutting it"
14623                                  " down before", errors.ECODE_STATE)
14624
14625     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14626       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14627       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14628       assert self.dst_node is not None
14629
14630       _CheckNodeOnline(self, self.dst_node.name)
14631       _CheckNodeNotDrained(self, self.dst_node.name)
14632
14633       self._cds = None
14634       self.dest_disk_info = None
14635       self.dest_x509_ca = None
14636
14637     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14638       self.dst_node = None
14639
14640       if len(self.op.target_node) != len(self.instance.disks):
14641         raise errors.OpPrereqError(("Received destination information for %s"
14642                                     " disks, but instance %s has %s disks") %
14643                                    (len(self.op.target_node), instance_name,
14644                                     len(self.instance.disks)),
14645                                    errors.ECODE_INVAL)
14646
14647       cds = _GetClusterDomainSecret()
14648
14649       # Check X509 key name
14650       try:
14651         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14652       except (TypeError, ValueError), err:
14653         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14654                                    errors.ECODE_INVAL)
14655
14656       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14657         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14658                                    errors.ECODE_INVAL)
14659
14660       # Load and verify CA
14661       try:
14662         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14663       except OpenSSL.crypto.Error, err:
14664         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14665                                    (err, ), errors.ECODE_INVAL)
14666
14667       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14668       if errcode is not None:
14669         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14670                                    (msg, ), errors.ECODE_INVAL)
14671
14672       self.dest_x509_ca = cert
14673
14674       # Verify target information
14675       disk_info = []
14676       for idx, disk_data in enumerate(self.op.target_node):
14677         try:
14678           (host, port, magic) = \
14679             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14680         except errors.GenericError, err:
14681           raise errors.OpPrereqError("Target info for disk %s: %s" %
14682                                      (idx, err), errors.ECODE_INVAL)
14683
14684         disk_info.append((host, port, magic))
14685
14686       assert len(disk_info) == len(self.op.target_node)
14687       self.dest_disk_info = disk_info
14688
14689     else:
14690       raise errors.ProgrammerError("Unhandled export mode %r" %
14691                                    self.op.mode)
14692
14693     # instance disk type verification
14694     # TODO: Implement export support for file-based disks
14695     for disk in self.instance.disks:
14696       if disk.dev_type == constants.LD_FILE:
14697         raise errors.OpPrereqError("Export not supported for instances with"
14698                                    " file-based disks", errors.ECODE_INVAL)
14699
14700   def _CleanupExports(self, feedback_fn):
14701     """Removes exports of current instance from all other nodes.
14702
14703     If an instance in a cluster with nodes A..D was exported to node C, its
14704     exports will be removed from the nodes A, B and D.
14705
14706     """
14707     assert self.op.mode != constants.EXPORT_MODE_REMOTE
14708
14709     nodelist = self.cfg.GetNodeList()
14710     nodelist.remove(self.dst_node.name)
14711
14712     # on one-node clusters nodelist will be empty after the removal
14713     # if we proceed the backup would be removed because OpBackupQuery
14714     # substitutes an empty list with the full cluster node list.
14715     iname = self.instance.name
14716     if nodelist:
14717       feedback_fn("Removing old exports for instance %s" % iname)
14718       exportlist = self.rpc.call_export_list(nodelist)
14719       for node in exportlist:
14720         if exportlist[node].fail_msg:
14721           continue
14722         if iname in exportlist[node].payload:
14723           msg = self.rpc.call_export_remove(node, iname).fail_msg
14724           if msg:
14725             self.LogWarning("Could not remove older export for instance %s"
14726                             " on node %s: %s", iname, node, msg)
14727
14728   def Exec(self, feedback_fn):
14729     """Export an instance to an image in the cluster.
14730
14731     """
14732     assert self.op.mode in constants.EXPORT_MODES
14733
14734     instance = self.instance
14735     src_node = instance.primary_node
14736
14737     if self.op.shutdown:
14738       # shutdown the instance, but not the disks
14739       feedback_fn("Shutting down instance %s" % instance.name)
14740       result = self.rpc.call_instance_shutdown(src_node, instance,
14741                                                self.op.shutdown_timeout)
14742       # TODO: Maybe ignore failures if ignore_remove_failures is set
14743       result.Raise("Could not shutdown instance %s on"
14744                    " node %s" % (instance.name, src_node))
14745
14746     # set the disks ID correctly since call_instance_start needs the
14747     # correct drbd minor to create the symlinks
14748     for disk in instance.disks:
14749       self.cfg.SetDiskID(disk, src_node)
14750
14751     activate_disks = (instance.admin_state != constants.ADMINST_UP)
14752
14753     if activate_disks:
14754       # Activate the instance disks if we'exporting a stopped instance
14755       feedback_fn("Activating disks for %s" % instance.name)
14756       _StartInstanceDisks(self, instance, None)
14757
14758     try:
14759       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14760                                                      instance)
14761
14762       helper.CreateSnapshots()
14763       try:
14764         if (self.op.shutdown and
14765             instance.admin_state == constants.ADMINST_UP and
14766             not self.op.remove_instance):
14767           assert not activate_disks
14768           feedback_fn("Starting instance %s" % instance.name)
14769           result = self.rpc.call_instance_start(src_node,
14770                                                 (instance, None, None), False)
14771           msg = result.fail_msg
14772           if msg:
14773             feedback_fn("Failed to start instance: %s" % msg)
14774             _ShutdownInstanceDisks(self, instance)
14775             raise errors.OpExecError("Could not start instance: %s" % msg)
14776
14777         if self.op.mode == constants.EXPORT_MODE_LOCAL:
14778           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14779         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14780           connect_timeout = constants.RIE_CONNECT_TIMEOUT
14781           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14782
14783           (key_name, _, _) = self.x509_key_name
14784
14785           dest_ca_pem = \
14786             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14787                                             self.dest_x509_ca)
14788
14789           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14790                                                      key_name, dest_ca_pem,
14791                                                      timeouts)
14792       finally:
14793         helper.Cleanup()
14794
14795       # Check for backwards compatibility
14796       assert len(dresults) == len(instance.disks)
14797       assert compat.all(isinstance(i, bool) for i in dresults), \
14798              "Not all results are boolean: %r" % dresults
14799
14800     finally:
14801       if activate_disks:
14802         feedback_fn("Deactivating disks for %s" % instance.name)
14803         _ShutdownInstanceDisks(self, instance)
14804
14805     if not (compat.all(dresults) and fin_resu):
14806       failures = []
14807       if not fin_resu:
14808         failures.append("export finalization")
14809       if not compat.all(dresults):
14810         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14811                                if not dsk)
14812         failures.append("disk export: disk(s) %s" % fdsk)
14813
14814       raise errors.OpExecError("Export failed, errors in %s" %
14815                                utils.CommaJoin(failures))
14816
14817     # At this point, the export was successful, we can cleanup/finish
14818
14819     # Remove instance if requested
14820     if self.op.remove_instance:
14821       feedback_fn("Removing instance %s" % instance.name)
14822       _RemoveInstance(self, feedback_fn, instance,
14823                       self.op.ignore_remove_failures)
14824
14825     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14826       self._CleanupExports(feedback_fn)
14827
14828     return fin_resu, dresults
14829
14830
14831 class LUBackupRemove(NoHooksLU):
14832   """Remove exports related to the named instance.
14833
14834   """
14835   REQ_BGL = False
14836
14837   def ExpandNames(self):
14838     self.needed_locks = {
14839       # We need all nodes to be locked in order for RemoveExport to work, but
14840       # we don't need to lock the instance itself, as nothing will happen to it
14841       # (and we can remove exports also for a removed instance)
14842       locking.LEVEL_NODE: locking.ALL_SET,
14843
14844       # Removing backups is quick, so blocking allocations is justified
14845       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14846       }
14847
14848     # Allocations should be stopped while this LU runs with node locks, but it
14849     # doesn't have to be exclusive
14850     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14851
14852   def Exec(self, feedback_fn):
14853     """Remove any export.
14854
14855     """
14856     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14857     # If the instance was not found we'll try with the name that was passed in.
14858     # This will only work if it was an FQDN, though.
14859     fqdn_warn = False
14860     if not instance_name:
14861       fqdn_warn = True
14862       instance_name = self.op.instance_name
14863
14864     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14865     exportlist = self.rpc.call_export_list(locked_nodes)
14866     found = False
14867     for node in exportlist:
14868       msg = exportlist[node].fail_msg
14869       if msg:
14870         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14871         continue
14872       if instance_name in exportlist[node].payload:
14873         found = True
14874         result = self.rpc.call_export_remove(node, instance_name)
14875         msg = result.fail_msg
14876         if msg:
14877           logging.error("Could not remove export for instance %s"
14878                         " on node %s: %s", instance_name, node, msg)
14879
14880     if fqdn_warn and not found:
14881       feedback_fn("Export not found. If trying to remove an export belonging"
14882                   " to a deleted instance please use its Fully Qualified"
14883                   " Domain Name.")
14884
14885
14886 class LUGroupAdd(LogicalUnit):
14887   """Logical unit for creating node groups.
14888
14889   """
14890   HPATH = "group-add"
14891   HTYPE = constants.HTYPE_GROUP
14892   REQ_BGL = False
14893
14894   def ExpandNames(self):
14895     # We need the new group's UUID here so that we can create and acquire the
14896     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14897     # that it should not check whether the UUID exists in the configuration.
14898     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14899     self.needed_locks = {}
14900     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14901
14902   def CheckPrereq(self):
14903     """Check prerequisites.
14904
14905     This checks that the given group name is not an existing node group
14906     already.
14907
14908     """
14909     try:
14910       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14911     except errors.OpPrereqError:
14912       pass
14913     else:
14914       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14915                                  " node group (UUID: %s)" %
14916                                  (self.op.group_name, existing_uuid),
14917                                  errors.ECODE_EXISTS)
14918
14919     if self.op.ndparams:
14920       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14921
14922     if self.op.hv_state:
14923       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14924     else:
14925       self.new_hv_state = None
14926
14927     if self.op.disk_state:
14928       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14929     else:
14930       self.new_disk_state = None
14931
14932     if self.op.diskparams:
14933       for templ in constants.DISK_TEMPLATES:
14934         if templ in self.op.diskparams:
14935           utils.ForceDictType(self.op.diskparams[templ],
14936                               constants.DISK_DT_TYPES)
14937       self.new_diskparams = self.op.diskparams
14938       try:
14939         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14940       except errors.OpPrereqError, err:
14941         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14942                                    errors.ECODE_INVAL)
14943     else:
14944       self.new_diskparams = {}
14945
14946     if self.op.ipolicy:
14947       cluster = self.cfg.GetClusterInfo()
14948       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14949       try:
14950         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14951       except errors.ConfigurationError, err:
14952         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14953                                    errors.ECODE_INVAL)
14954
14955   def BuildHooksEnv(self):
14956     """Build hooks env.
14957
14958     """
14959     return {
14960       "GROUP_NAME": self.op.group_name,
14961       }
14962
14963   def BuildHooksNodes(self):
14964     """Build hooks nodes.
14965
14966     """
14967     mn = self.cfg.GetMasterNode()
14968     return ([mn], [mn])
14969
14970   def Exec(self, feedback_fn):
14971     """Add the node group to the cluster.
14972
14973     """
14974     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14975                                   uuid=self.group_uuid,
14976                                   alloc_policy=self.op.alloc_policy,
14977                                   ndparams=self.op.ndparams,
14978                                   diskparams=self.new_diskparams,
14979                                   ipolicy=self.op.ipolicy,
14980                                   hv_state_static=self.new_hv_state,
14981                                   disk_state_static=self.new_disk_state)
14982
14983     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14984     del self.remove_locks[locking.LEVEL_NODEGROUP]
14985
14986
14987 class LUGroupAssignNodes(NoHooksLU):
14988   """Logical unit for assigning nodes to groups.
14989
14990   """
14991   REQ_BGL = False
14992
14993   def ExpandNames(self):
14994     # These raise errors.OpPrereqError on their own:
14995     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14996     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14997
14998     # We want to lock all the affected nodes and groups. We have readily
14999     # available the list of nodes, and the *destination* group. To gather the
15000     # list of "source" groups, we need to fetch node information later on.
15001     self.needed_locks = {
15002       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
15003       locking.LEVEL_NODE: self.op.nodes,
15004       }
15005
15006   def DeclareLocks(self, level):
15007     if level == locking.LEVEL_NODEGROUP:
15008       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
15009
15010       # Try to get all affected nodes' groups without having the group or node
15011       # lock yet. Needs verification later in the code flow.
15012       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
15013
15014       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15015
15016   def CheckPrereq(self):
15017     """Check prerequisites.
15018
15019     """
15020     assert self.needed_locks[locking.LEVEL_NODEGROUP]
15021     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15022             frozenset(self.op.nodes))
15023
15024     expected_locks = (set([self.group_uuid]) |
15025                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15026     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15027     if actual_locks != expected_locks:
15028       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15029                                " current groups are '%s', used to be '%s'" %
15030                                (utils.CommaJoin(expected_locks),
15031                                 utils.CommaJoin(actual_locks)))
15032
15033     self.node_data = self.cfg.GetAllNodesInfo()
15034     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15035     instance_data = self.cfg.GetAllInstancesInfo()
15036
15037     if self.group is None:
15038       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15039                                (self.op.group_name, self.group_uuid))
15040
15041     (new_splits, previous_splits) = \
15042       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15043                                              for node in self.op.nodes],
15044                                             self.node_data, instance_data)
15045
15046     if new_splits:
15047       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15048
15049       if not self.op.force:
15050         raise errors.OpExecError("The following instances get split by this"
15051                                  " change and --force was not given: %s" %
15052                                  fmt_new_splits)
15053       else:
15054         self.LogWarning("This operation will split the following instances: %s",
15055                         fmt_new_splits)
15056
15057         if previous_splits:
15058           self.LogWarning("In addition, these already-split instances continue"
15059                           " to be split across groups: %s",
15060                           utils.CommaJoin(utils.NiceSort(previous_splits)))
15061
15062   def Exec(self, feedback_fn):
15063     """Assign nodes to a new group.
15064
15065     """
15066     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15067
15068     self.cfg.AssignGroupNodes(mods)
15069
15070   @staticmethod
15071   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15072     """Check for split instances after a node assignment.
15073
15074     This method considers a series of node assignments as an atomic operation,
15075     and returns information about split instances after applying the set of
15076     changes.
15077
15078     In particular, it returns information about newly split instances, and
15079     instances that were already split, and remain so after the change.
15080
15081     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15082     considered.
15083
15084     @type changes: list of (node_name, new_group_uuid) pairs.
15085     @param changes: list of node assignments to consider.
15086     @param node_data: a dict with data for all nodes
15087     @param instance_data: a dict with all instances to consider
15088     @rtype: a two-tuple
15089     @return: a list of instances that were previously okay and result split as a
15090       consequence of this change, and a list of instances that were previously
15091       split and this change does not fix.
15092
15093     """
15094     changed_nodes = dict((node, group) for node, group in changes
15095                          if node_data[node].group != group)
15096
15097     all_split_instances = set()
15098     previously_split_instances = set()
15099
15100     def InstanceNodes(instance):
15101       return [instance.primary_node] + list(instance.secondary_nodes)
15102
15103     for inst in instance_data.values():
15104       if inst.disk_template not in constants.DTS_INT_MIRROR:
15105         continue
15106
15107       instance_nodes = InstanceNodes(inst)
15108
15109       if len(set(node_data[node].group for node in instance_nodes)) > 1:
15110         previously_split_instances.add(inst.name)
15111
15112       if len(set(changed_nodes.get(node, node_data[node].group)
15113                  for node in instance_nodes)) > 1:
15114         all_split_instances.add(inst.name)
15115
15116     return (list(all_split_instances - previously_split_instances),
15117             list(previously_split_instances & all_split_instances))
15118
15119
15120 class _GroupQuery(_QueryBase):
15121   FIELDS = query.GROUP_FIELDS
15122
15123   def ExpandNames(self, lu):
15124     lu.needed_locks = {}
15125
15126     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15127     self._cluster = lu.cfg.GetClusterInfo()
15128     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15129
15130     if not self.names:
15131       self.wanted = [name_to_uuid[name]
15132                      for name in utils.NiceSort(name_to_uuid.keys())]
15133     else:
15134       # Accept names to be either names or UUIDs.
15135       missing = []
15136       self.wanted = []
15137       all_uuid = frozenset(self._all_groups.keys())
15138
15139       for name in self.names:
15140         if name in all_uuid:
15141           self.wanted.append(name)
15142         elif name in name_to_uuid:
15143           self.wanted.append(name_to_uuid[name])
15144         else:
15145           missing.append(name)
15146
15147       if missing:
15148         raise errors.OpPrereqError("Some groups do not exist: %s" %
15149                                    utils.CommaJoin(missing),
15150                                    errors.ECODE_NOENT)
15151
15152   def DeclareLocks(self, lu, level):
15153     pass
15154
15155   def _GetQueryData(self, lu):
15156     """Computes the list of node groups and their attributes.
15157
15158     """
15159     do_nodes = query.GQ_NODE in self.requested_data
15160     do_instances = query.GQ_INST in self.requested_data
15161
15162     group_to_nodes = None
15163     group_to_instances = None
15164
15165     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15166     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15167     # latter GetAllInstancesInfo() is not enough, for we have to go through
15168     # instance->node. Hence, we will need to process nodes even if we only need
15169     # instance information.
15170     if do_nodes or do_instances:
15171       all_nodes = lu.cfg.GetAllNodesInfo()
15172       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15173       node_to_group = {}
15174
15175       for node in all_nodes.values():
15176         if node.group in group_to_nodes:
15177           group_to_nodes[node.group].append(node.name)
15178           node_to_group[node.name] = node.group
15179
15180       if do_instances:
15181         all_instances = lu.cfg.GetAllInstancesInfo()
15182         group_to_instances = dict((uuid, []) for uuid in self.wanted)
15183
15184         for instance in all_instances.values():
15185           node = instance.primary_node
15186           if node in node_to_group:
15187             group_to_instances[node_to_group[node]].append(instance.name)
15188
15189         if not do_nodes:
15190           # Do not pass on node information if it was not requested.
15191           group_to_nodes = None
15192
15193     return query.GroupQueryData(self._cluster,
15194                                 [self._all_groups[uuid]
15195                                  for uuid in self.wanted],
15196                                 group_to_nodes, group_to_instances,
15197                                 query.GQ_DISKPARAMS in self.requested_data)
15198
15199
15200 class LUGroupQuery(NoHooksLU):
15201   """Logical unit for querying node groups.
15202
15203   """
15204   REQ_BGL = False
15205
15206   def CheckArguments(self):
15207     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15208                           self.op.output_fields, False)
15209
15210   def ExpandNames(self):
15211     self.gq.ExpandNames(self)
15212
15213   def DeclareLocks(self, level):
15214     self.gq.DeclareLocks(self, level)
15215
15216   def Exec(self, feedback_fn):
15217     return self.gq.OldStyleQuery(self)
15218
15219
15220 class LUGroupSetParams(LogicalUnit):
15221   """Modifies the parameters of a node group.
15222
15223   """
15224   HPATH = "group-modify"
15225   HTYPE = constants.HTYPE_GROUP
15226   REQ_BGL = False
15227
15228   def CheckArguments(self):
15229     all_changes = [
15230       self.op.ndparams,
15231       self.op.diskparams,
15232       self.op.alloc_policy,
15233       self.op.hv_state,
15234       self.op.disk_state,
15235       self.op.ipolicy,
15236       ]
15237
15238     if all_changes.count(None) == len(all_changes):
15239       raise errors.OpPrereqError("Please pass at least one modification",
15240                                  errors.ECODE_INVAL)
15241
15242   def ExpandNames(self):
15243     # This raises errors.OpPrereqError on its own:
15244     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15245
15246     self.needed_locks = {
15247       locking.LEVEL_INSTANCE: [],
15248       locking.LEVEL_NODEGROUP: [self.group_uuid],
15249       }
15250
15251     self.share_locks[locking.LEVEL_INSTANCE] = 1
15252
15253   def DeclareLocks(self, level):
15254     if level == locking.LEVEL_INSTANCE:
15255       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15256
15257       # Lock instances optimistically, needs verification once group lock has
15258       # been acquired
15259       self.needed_locks[locking.LEVEL_INSTANCE] = \
15260           self.cfg.GetNodeGroupInstances(self.group_uuid)
15261
15262   @staticmethod
15263   def _UpdateAndVerifyDiskParams(old, new):
15264     """Updates and verifies disk parameters.
15265
15266     """
15267     new_params = _GetUpdatedParams(old, new)
15268     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15269     return new_params
15270
15271   def CheckPrereq(self):
15272     """Check prerequisites.
15273
15274     """
15275     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15276
15277     # Check if locked instances are still correct
15278     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15279
15280     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15281     cluster = self.cfg.GetClusterInfo()
15282
15283     if self.group is None:
15284       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15285                                (self.op.group_name, self.group_uuid))
15286
15287     if self.op.ndparams:
15288       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15289       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15290       self.new_ndparams = new_ndparams
15291
15292     if self.op.diskparams:
15293       diskparams = self.group.diskparams
15294       uavdp = self._UpdateAndVerifyDiskParams
15295       # For each disktemplate subdict update and verify the values
15296       new_diskparams = dict((dt,
15297                              uavdp(diskparams.get(dt, {}),
15298                                    self.op.diskparams[dt]))
15299                             for dt in constants.DISK_TEMPLATES
15300                             if dt in self.op.diskparams)
15301       # As we've all subdicts of diskparams ready, lets merge the actual
15302       # dict with all updated subdicts
15303       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15304       try:
15305         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15306       except errors.OpPrereqError, err:
15307         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15308                                    errors.ECODE_INVAL)
15309
15310     if self.op.hv_state:
15311       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15312                                                  self.group.hv_state_static)
15313
15314     if self.op.disk_state:
15315       self.new_disk_state = \
15316         _MergeAndVerifyDiskState(self.op.disk_state,
15317                                  self.group.disk_state_static)
15318
15319     if self.op.ipolicy:
15320       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15321                                             self.op.ipolicy,
15322                                             group_policy=True)
15323
15324       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15325       inst_filter = lambda inst: inst.name in owned_instances
15326       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15327       gmi = ganeti.masterd.instance
15328       violations = \
15329           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15330                                                                   self.group),
15331                                         new_ipolicy, instances)
15332
15333       if violations:
15334         self.LogWarning("After the ipolicy change the following instances"
15335                         " violate them: %s",
15336                         utils.CommaJoin(violations))
15337
15338   def BuildHooksEnv(self):
15339     """Build hooks env.
15340
15341     """
15342     return {
15343       "GROUP_NAME": self.op.group_name,
15344       "NEW_ALLOC_POLICY": self.op.alloc_policy,
15345       }
15346
15347   def BuildHooksNodes(self):
15348     """Build hooks nodes.
15349
15350     """
15351     mn = self.cfg.GetMasterNode()
15352     return ([mn], [mn])
15353
15354   def Exec(self, feedback_fn):
15355     """Modifies the node group.
15356
15357     """
15358     result = []
15359
15360     if self.op.ndparams:
15361       self.group.ndparams = self.new_ndparams
15362       result.append(("ndparams", str(self.group.ndparams)))
15363
15364     if self.op.diskparams:
15365       self.group.diskparams = self.new_diskparams
15366       result.append(("diskparams", str(self.group.diskparams)))
15367
15368     if self.op.alloc_policy:
15369       self.group.alloc_policy = self.op.alloc_policy
15370
15371     if self.op.hv_state:
15372       self.group.hv_state_static = self.new_hv_state
15373
15374     if self.op.disk_state:
15375       self.group.disk_state_static = self.new_disk_state
15376
15377     if self.op.ipolicy:
15378       self.group.ipolicy = self.new_ipolicy
15379
15380     self.cfg.Update(self.group, feedback_fn)
15381     return result
15382
15383
15384 class LUGroupRemove(LogicalUnit):
15385   HPATH = "group-remove"
15386   HTYPE = constants.HTYPE_GROUP
15387   REQ_BGL = False
15388
15389   def ExpandNames(self):
15390     # This will raises errors.OpPrereqError on its own:
15391     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15392     self.needed_locks = {
15393       locking.LEVEL_NODEGROUP: [self.group_uuid],
15394       }
15395
15396   def CheckPrereq(self):
15397     """Check prerequisites.
15398
15399     This checks that the given group name exists as a node group, that is
15400     empty (i.e., contains no nodes), and that is not the last group of the
15401     cluster.
15402
15403     """
15404     # Verify that the group is empty.
15405     group_nodes = [node.name
15406                    for node in self.cfg.GetAllNodesInfo().values()
15407                    if node.group == self.group_uuid]
15408
15409     if group_nodes:
15410       raise errors.OpPrereqError("Group '%s' not empty, has the following"
15411                                  " nodes: %s" %
15412                                  (self.op.group_name,
15413                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
15414                                  errors.ECODE_STATE)
15415
15416     # Verify the cluster would not be left group-less.
15417     if len(self.cfg.GetNodeGroupList()) == 1:
15418       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15419                                  " removed" % self.op.group_name,
15420                                  errors.ECODE_STATE)
15421
15422   def BuildHooksEnv(self):
15423     """Build hooks env.
15424
15425     """
15426     return {
15427       "GROUP_NAME": self.op.group_name,
15428       }
15429
15430   def BuildHooksNodes(self):
15431     """Build hooks nodes.
15432
15433     """
15434     mn = self.cfg.GetMasterNode()
15435     return ([mn], [mn])
15436
15437   def Exec(self, feedback_fn):
15438     """Remove the node group.
15439
15440     """
15441     try:
15442       self.cfg.RemoveNodeGroup(self.group_uuid)
15443     except errors.ConfigurationError:
15444       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15445                                (self.op.group_name, self.group_uuid))
15446
15447     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15448
15449
15450 class LUGroupRename(LogicalUnit):
15451   HPATH = "group-rename"
15452   HTYPE = constants.HTYPE_GROUP
15453   REQ_BGL = False
15454
15455   def ExpandNames(self):
15456     # This raises errors.OpPrereqError on its own:
15457     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15458
15459     self.needed_locks = {
15460       locking.LEVEL_NODEGROUP: [self.group_uuid],
15461       }
15462
15463   def CheckPrereq(self):
15464     """Check prerequisites.
15465
15466     Ensures requested new name is not yet used.
15467
15468     """
15469     try:
15470       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15471     except errors.OpPrereqError:
15472       pass
15473     else:
15474       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15475                                  " node group (UUID: %s)" %
15476                                  (self.op.new_name, new_name_uuid),
15477                                  errors.ECODE_EXISTS)
15478
15479   def BuildHooksEnv(self):
15480     """Build hooks env.
15481
15482     """
15483     return {
15484       "OLD_NAME": self.op.group_name,
15485       "NEW_NAME": self.op.new_name,
15486       }
15487
15488   def BuildHooksNodes(self):
15489     """Build hooks nodes.
15490
15491     """
15492     mn = self.cfg.GetMasterNode()
15493
15494     all_nodes = self.cfg.GetAllNodesInfo()
15495     all_nodes.pop(mn, None)
15496
15497     run_nodes = [mn]
15498     run_nodes.extend(node.name for node in all_nodes.values()
15499                      if node.group == self.group_uuid)
15500
15501     return (run_nodes, run_nodes)
15502
15503   def Exec(self, feedback_fn):
15504     """Rename the node group.
15505
15506     """
15507     group = self.cfg.GetNodeGroup(self.group_uuid)
15508
15509     if group is None:
15510       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15511                                (self.op.group_name, self.group_uuid))
15512
15513     group.name = self.op.new_name
15514     self.cfg.Update(group, feedback_fn)
15515
15516     return self.op.new_name
15517
15518
15519 class LUGroupEvacuate(LogicalUnit):
15520   HPATH = "group-evacuate"
15521   HTYPE = constants.HTYPE_GROUP
15522   REQ_BGL = False
15523
15524   def ExpandNames(self):
15525     # This raises errors.OpPrereqError on its own:
15526     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15527
15528     if self.op.target_groups:
15529       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15530                                   self.op.target_groups)
15531     else:
15532       self.req_target_uuids = []
15533
15534     if self.group_uuid in self.req_target_uuids:
15535       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15536                                  " as a target group (targets are %s)" %
15537                                  (self.group_uuid,
15538                                   utils.CommaJoin(self.req_target_uuids)),
15539                                  errors.ECODE_INVAL)
15540
15541     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15542
15543     self.share_locks = _ShareAll()
15544     self.needed_locks = {
15545       locking.LEVEL_INSTANCE: [],
15546       locking.LEVEL_NODEGROUP: [],
15547       locking.LEVEL_NODE: [],
15548       }
15549
15550   def DeclareLocks(self, level):
15551     if level == locking.LEVEL_INSTANCE:
15552       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15553
15554       # Lock instances optimistically, needs verification once node and group
15555       # locks have been acquired
15556       self.needed_locks[locking.LEVEL_INSTANCE] = \
15557         self.cfg.GetNodeGroupInstances(self.group_uuid)
15558
15559     elif level == locking.LEVEL_NODEGROUP:
15560       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15561
15562       if self.req_target_uuids:
15563         lock_groups = set([self.group_uuid] + self.req_target_uuids)
15564
15565         # Lock all groups used by instances optimistically; this requires going
15566         # via the node before it's locked, requiring verification later on
15567         lock_groups.update(group_uuid
15568                            for instance_name in
15569                              self.owned_locks(locking.LEVEL_INSTANCE)
15570                            for group_uuid in
15571                              self.cfg.GetInstanceNodeGroups(instance_name))
15572       else:
15573         # No target groups, need to lock all of them
15574         lock_groups = locking.ALL_SET
15575
15576       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15577
15578     elif level == locking.LEVEL_NODE:
15579       # This will only lock the nodes in the group to be evacuated which
15580       # contain actual instances
15581       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15582       self._LockInstancesNodes()
15583
15584       # Lock all nodes in group to be evacuated and target groups
15585       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15586       assert self.group_uuid in owned_groups
15587       member_nodes = [node_name
15588                       for group in owned_groups
15589                       for node_name in self.cfg.GetNodeGroup(group).members]
15590       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15591
15592   def CheckPrereq(self):
15593     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15594     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15595     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15596
15597     assert owned_groups.issuperset(self.req_target_uuids)
15598     assert self.group_uuid in owned_groups
15599
15600     # Check if locked instances are still correct
15601     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15602
15603     # Get instance information
15604     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15605
15606     # Check if node groups for locked instances are still correct
15607     _CheckInstancesNodeGroups(self.cfg, self.instances,
15608                               owned_groups, owned_nodes, self.group_uuid)
15609
15610     if self.req_target_uuids:
15611       # User requested specific target groups
15612       self.target_uuids = self.req_target_uuids
15613     else:
15614       # All groups except the one to be evacuated are potential targets
15615       self.target_uuids = [group_uuid for group_uuid in owned_groups
15616                            if group_uuid != self.group_uuid]
15617
15618       if not self.target_uuids:
15619         raise errors.OpPrereqError("There are no possible target groups",
15620                                    errors.ECODE_INVAL)
15621
15622   def BuildHooksEnv(self):
15623     """Build hooks env.
15624
15625     """
15626     return {
15627       "GROUP_NAME": self.op.group_name,
15628       "TARGET_GROUPS": " ".join(self.target_uuids),
15629       }
15630
15631   def BuildHooksNodes(self):
15632     """Build hooks nodes.
15633
15634     """
15635     mn = self.cfg.GetMasterNode()
15636
15637     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15638
15639     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15640
15641     return (run_nodes, run_nodes)
15642
15643   def Exec(self, feedback_fn):
15644     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15645
15646     assert self.group_uuid not in self.target_uuids
15647
15648     req = iallocator.IAReqGroupChange(instances=instances,
15649                                       target_groups=self.target_uuids)
15650     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15651
15652     ial.Run(self.op.iallocator)
15653
15654     if not ial.success:
15655       raise errors.OpPrereqError("Can't compute group evacuation using"
15656                                  " iallocator '%s': %s" %
15657                                  (self.op.iallocator, ial.info),
15658                                  errors.ECODE_NORES)
15659
15660     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15661
15662     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15663                  len(jobs), self.op.group_name)
15664
15665     return ResultWithJobs(jobs)
15666
15667
15668 class TagsLU(NoHooksLU): # pylint: disable=W0223
15669   """Generic tags LU.
15670
15671   This is an abstract class which is the parent of all the other tags LUs.
15672
15673   """
15674   def ExpandNames(self):
15675     self.group_uuid = None
15676     self.needed_locks = {}
15677
15678     if self.op.kind == constants.TAG_NODE:
15679       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15680       lock_level = locking.LEVEL_NODE
15681       lock_name = self.op.name
15682     elif self.op.kind == constants.TAG_INSTANCE:
15683       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15684       lock_level = locking.LEVEL_INSTANCE
15685       lock_name = self.op.name
15686     elif self.op.kind == constants.TAG_NODEGROUP:
15687       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15688       lock_level = locking.LEVEL_NODEGROUP
15689       lock_name = self.group_uuid
15690     elif self.op.kind == constants.TAG_NETWORK:
15691       self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15692       lock_level = locking.LEVEL_NETWORK
15693       lock_name = self.network_uuid
15694     else:
15695       lock_level = None
15696       lock_name = None
15697
15698     if lock_level and getattr(self.op, "use_locking", True):
15699       self.needed_locks[lock_level] = lock_name
15700
15701     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15702     # not possible to acquire the BGL based on opcode parameters)
15703
15704   def CheckPrereq(self):
15705     """Check prerequisites.
15706
15707     """
15708     if self.op.kind == constants.TAG_CLUSTER:
15709       self.target = self.cfg.GetClusterInfo()
15710     elif self.op.kind == constants.TAG_NODE:
15711       self.target = self.cfg.GetNodeInfo(self.op.name)
15712     elif self.op.kind == constants.TAG_INSTANCE:
15713       self.target = self.cfg.GetInstanceInfo(self.op.name)
15714     elif self.op.kind == constants.TAG_NODEGROUP:
15715       self.target = self.cfg.GetNodeGroup(self.group_uuid)
15716     elif self.op.kind == constants.TAG_NETWORK:
15717       self.target = self.cfg.GetNetwork(self.network_uuid)
15718     else:
15719       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15720                                  str(self.op.kind), errors.ECODE_INVAL)
15721
15722
15723 class LUTagsGet(TagsLU):
15724   """Returns the tags of a given object.
15725
15726   """
15727   REQ_BGL = False
15728
15729   def ExpandNames(self):
15730     TagsLU.ExpandNames(self)
15731
15732     # Share locks as this is only a read operation
15733     self.share_locks = _ShareAll()
15734
15735   def Exec(self, feedback_fn):
15736     """Returns the tag list.
15737
15738     """
15739     return list(self.target.GetTags())
15740
15741
15742 class LUTagsSearch(NoHooksLU):
15743   """Searches the tags for a given pattern.
15744
15745   """
15746   REQ_BGL = False
15747
15748   def ExpandNames(self):
15749     self.needed_locks = {}
15750
15751   def CheckPrereq(self):
15752     """Check prerequisites.
15753
15754     This checks the pattern passed for validity by compiling it.
15755
15756     """
15757     try:
15758       self.re = re.compile(self.op.pattern)
15759     except re.error, err:
15760       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15761                                  (self.op.pattern, err), errors.ECODE_INVAL)
15762
15763   def Exec(self, feedback_fn):
15764     """Returns the tag list.
15765
15766     """
15767     cfg = self.cfg
15768     tgts = [("/cluster", cfg.GetClusterInfo())]
15769     ilist = cfg.GetAllInstancesInfo().values()
15770     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15771     nlist = cfg.GetAllNodesInfo().values()
15772     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15773     tgts.extend(("/nodegroup/%s" % n.name, n)
15774                 for n in cfg.GetAllNodeGroupsInfo().values())
15775     results = []
15776     for path, target in tgts:
15777       for tag in target.GetTags():
15778         if self.re.search(tag):
15779           results.append((path, tag))
15780     return results
15781
15782
15783 class LUTagsSet(TagsLU):
15784   """Sets a tag on a given object.
15785
15786   """
15787   REQ_BGL = False
15788
15789   def CheckPrereq(self):
15790     """Check prerequisites.
15791
15792     This checks the type and length of the tag name and value.
15793
15794     """
15795     TagsLU.CheckPrereq(self)
15796     for tag in self.op.tags:
15797       objects.TaggableObject.ValidateTag(tag)
15798
15799   def Exec(self, feedback_fn):
15800     """Sets the tag.
15801
15802     """
15803     try:
15804       for tag in self.op.tags:
15805         self.target.AddTag(tag)
15806     except errors.TagError, err:
15807       raise errors.OpExecError("Error while setting tag: %s" % str(err))
15808     self.cfg.Update(self.target, feedback_fn)
15809
15810
15811 class LUTagsDel(TagsLU):
15812   """Delete a list of tags from a given object.
15813
15814   """
15815   REQ_BGL = False
15816
15817   def CheckPrereq(self):
15818     """Check prerequisites.
15819
15820     This checks that we have the given tag.
15821
15822     """
15823     TagsLU.CheckPrereq(self)
15824     for tag in self.op.tags:
15825       objects.TaggableObject.ValidateTag(tag)
15826     del_tags = frozenset(self.op.tags)
15827     cur_tags = self.target.GetTags()
15828
15829     diff_tags = del_tags - cur_tags
15830     if diff_tags:
15831       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15832       raise errors.OpPrereqError("Tag(s) %s not found" %
15833                                  (utils.CommaJoin(diff_names), ),
15834                                  errors.ECODE_NOENT)
15835
15836   def Exec(self, feedback_fn):
15837     """Remove the tag from the object.
15838
15839     """
15840     for tag in self.op.tags:
15841       self.target.RemoveTag(tag)
15842     self.cfg.Update(self.target, feedback_fn)
15843
15844
15845 class LUTestDelay(NoHooksLU):
15846   """Sleep for a specified amount of time.
15847
15848   This LU sleeps on the master and/or nodes for a specified amount of
15849   time.
15850
15851   """
15852   REQ_BGL = False
15853
15854   def ExpandNames(self):
15855     """Expand names and set required locks.
15856
15857     This expands the node list, if any.
15858
15859     """
15860     self.needed_locks = {}
15861     if self.op.on_nodes:
15862       # _GetWantedNodes can be used here, but is not always appropriate to use
15863       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15864       # more information.
15865       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15866       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15867
15868   def _TestDelay(self):
15869     """Do the actual sleep.
15870
15871     """
15872     if self.op.on_master:
15873       if not utils.TestDelay(self.op.duration):
15874         raise errors.OpExecError("Error during master delay test")
15875     if self.op.on_nodes:
15876       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15877       for node, node_result in result.items():
15878         node_result.Raise("Failure during rpc call to node %s" % node)
15879
15880   def Exec(self, feedback_fn):
15881     """Execute the test delay opcode, with the wanted repetitions.
15882
15883     """
15884     if self.op.repeat == 0:
15885       self._TestDelay()
15886     else:
15887       top_value = self.op.repeat - 1
15888       for i in range(self.op.repeat):
15889         self.LogInfo("Test delay iteration %d/%d", i, top_value)
15890         self._TestDelay()
15891
15892
15893 class LURestrictedCommand(NoHooksLU):
15894   """Logical unit for executing restricted commands.
15895
15896   """
15897   REQ_BGL = False
15898
15899   def ExpandNames(self):
15900     if self.op.nodes:
15901       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15902
15903     self.needed_locks = {
15904       locking.LEVEL_NODE: self.op.nodes,
15905       }
15906     self.share_locks = {
15907       locking.LEVEL_NODE: not self.op.use_locking,
15908       }
15909
15910   def CheckPrereq(self):
15911     """Check prerequisites.
15912
15913     """
15914
15915   def Exec(self, feedback_fn):
15916     """Execute restricted command and return output.
15917
15918     """
15919     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15920
15921     # Check if correct locks are held
15922     assert set(self.op.nodes).issubset(owned_nodes)
15923
15924     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15925
15926     result = []
15927
15928     for node_name in self.op.nodes:
15929       nres = rpcres[node_name]
15930       if nres.fail_msg:
15931         msg = ("Command '%s' on node '%s' failed: %s" %
15932                (self.op.command, node_name, nres.fail_msg))
15933         result.append((False, msg))
15934       else:
15935         result.append((True, nres.payload))
15936
15937     return result
15938
15939
15940 class LUTestJqueue(NoHooksLU):
15941   """Utility LU to test some aspects of the job queue.
15942
15943   """
15944   REQ_BGL = False
15945
15946   # Must be lower than default timeout for WaitForJobChange to see whether it
15947   # notices changed jobs
15948   _CLIENT_CONNECT_TIMEOUT = 20.0
15949   _CLIENT_CONFIRM_TIMEOUT = 60.0
15950
15951   @classmethod
15952   def _NotifyUsingSocket(cls, cb, errcls):
15953     """Opens a Unix socket and waits for another program to connect.
15954
15955     @type cb: callable
15956     @param cb: Callback to send socket name to client
15957     @type errcls: class
15958     @param errcls: Exception class to use for errors
15959
15960     """
15961     # Using a temporary directory as there's no easy way to create temporary
15962     # sockets without writing a custom loop around tempfile.mktemp and
15963     # socket.bind
15964     tmpdir = tempfile.mkdtemp()
15965     try:
15966       tmpsock = utils.PathJoin(tmpdir, "sock")
15967
15968       logging.debug("Creating temporary socket at %s", tmpsock)
15969       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15970       try:
15971         sock.bind(tmpsock)
15972         sock.listen(1)
15973
15974         # Send details to client
15975         cb(tmpsock)
15976
15977         # Wait for client to connect before continuing
15978         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15979         try:
15980           (conn, _) = sock.accept()
15981         except socket.error, err:
15982           raise errcls("Client didn't connect in time (%s)" % err)
15983       finally:
15984         sock.close()
15985     finally:
15986       # Remove as soon as client is connected
15987       shutil.rmtree(tmpdir)
15988
15989     # Wait for client to close
15990     try:
15991       try:
15992         # pylint: disable=E1101
15993         # Instance of '_socketobject' has no ... member
15994         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15995         conn.recv(1)
15996       except socket.error, err:
15997         raise errcls("Client failed to confirm notification (%s)" % err)
15998     finally:
15999       conn.close()
16000
16001   def _SendNotification(self, test, arg, sockname):
16002     """Sends a notification to the client.
16003
16004     @type test: string
16005     @param test: Test name
16006     @param arg: Test argument (depends on test)
16007     @type sockname: string
16008     @param sockname: Socket path
16009
16010     """
16011     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
16012
16013   def _Notify(self, prereq, test, arg):
16014     """Notifies the client of a test.
16015
16016     @type prereq: bool
16017     @param prereq: Whether this is a prereq-phase test
16018     @type test: string
16019     @param test: Test name
16020     @param arg: Test argument (depends on test)
16021
16022     """
16023     if prereq:
16024       errcls = errors.OpPrereqError
16025     else:
16026       errcls = errors.OpExecError
16027
16028     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16029                                                   test, arg),
16030                                    errcls)
16031
16032   def CheckArguments(self):
16033     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16034     self.expandnames_calls = 0
16035
16036   def ExpandNames(self):
16037     checkargs_calls = getattr(self, "checkargs_calls", 0)
16038     if checkargs_calls < 1:
16039       raise errors.ProgrammerError("CheckArguments was not called")
16040
16041     self.expandnames_calls += 1
16042
16043     if self.op.notify_waitlock:
16044       self._Notify(True, constants.JQT_EXPANDNAMES, None)
16045
16046     self.LogInfo("Expanding names")
16047
16048     # Get lock on master node (just to get a lock, not for a particular reason)
16049     self.needed_locks = {
16050       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16051       }
16052
16053   def Exec(self, feedback_fn):
16054     if self.expandnames_calls < 1:
16055       raise errors.ProgrammerError("ExpandNames was not called")
16056
16057     if self.op.notify_exec:
16058       self._Notify(False, constants.JQT_EXEC, None)
16059
16060     self.LogInfo("Executing")
16061
16062     if self.op.log_messages:
16063       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16064       for idx, msg in enumerate(self.op.log_messages):
16065         self.LogInfo("Sending log message %s", idx + 1)
16066         feedback_fn(constants.JQT_MSGPREFIX + msg)
16067         # Report how many test messages have been sent
16068         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16069
16070     if self.op.fail:
16071       raise errors.OpExecError("Opcode failure was requested")
16072
16073     return True
16074
16075
16076 class LUTestAllocator(NoHooksLU):
16077   """Run allocator tests.
16078
16079   This LU runs the allocator tests
16080
16081   """
16082   def CheckPrereq(self):
16083     """Check prerequisites.
16084
16085     This checks the opcode parameters depending on the director and mode test.
16086
16087     """
16088     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16089                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
16090       for attr in ["memory", "disks", "disk_template",
16091                    "os", "tags", "nics", "vcpus"]:
16092         if not hasattr(self.op, attr):
16093           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16094                                      attr, errors.ECODE_INVAL)
16095       iname = self.cfg.ExpandInstanceName(self.op.name)
16096       if iname is not None:
16097         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16098                                    iname, errors.ECODE_EXISTS)
16099       if not isinstance(self.op.nics, list):
16100         raise errors.OpPrereqError("Invalid parameter 'nics'",
16101                                    errors.ECODE_INVAL)
16102       if not isinstance(self.op.disks, list):
16103         raise errors.OpPrereqError("Invalid parameter 'disks'",
16104                                    errors.ECODE_INVAL)
16105       for row in self.op.disks:
16106         if (not isinstance(row, dict) or
16107             constants.IDISK_SIZE not in row or
16108             not isinstance(row[constants.IDISK_SIZE], int) or
16109             constants.IDISK_MODE not in row or
16110             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16111           raise errors.OpPrereqError("Invalid contents of the 'disks'"
16112                                      " parameter", errors.ECODE_INVAL)
16113       if self.op.hypervisor is None:
16114         self.op.hypervisor = self.cfg.GetHypervisorType()
16115     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16116       fname = _ExpandInstanceName(self.cfg, self.op.name)
16117       self.op.name = fname
16118       self.relocate_from = \
16119           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16120     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16121                           constants.IALLOCATOR_MODE_NODE_EVAC):
16122       if not self.op.instances:
16123         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16124       self.op.instances = _GetWantedInstances(self, self.op.instances)
16125     else:
16126       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16127                                  self.op.mode, errors.ECODE_INVAL)
16128
16129     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16130       if self.op.iallocator is None:
16131         raise errors.OpPrereqError("Missing allocator name",
16132                                    errors.ECODE_INVAL)
16133     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16134       raise errors.OpPrereqError("Wrong allocator test '%s'" %
16135                                  self.op.direction, errors.ECODE_INVAL)
16136
16137   def Exec(self, feedback_fn):
16138     """Run the allocator test.
16139
16140     """
16141     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16142       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16143                                           memory=self.op.memory,
16144                                           disks=self.op.disks,
16145                                           disk_template=self.op.disk_template,
16146                                           os=self.op.os,
16147                                           tags=self.op.tags,
16148                                           nics=self.op.nics,
16149                                           vcpus=self.op.vcpus,
16150                                           spindle_use=self.op.spindle_use,
16151                                           hypervisor=self.op.hypervisor,
16152                                           node_whitelist=None)
16153     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16154       req = iallocator.IAReqRelocate(name=self.op.name,
16155                                      relocate_from=list(self.relocate_from))
16156     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16157       req = iallocator.IAReqGroupChange(instances=self.op.instances,
16158                                         target_groups=self.op.target_groups)
16159     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16160       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16161                                      evac_mode=self.op.evac_mode)
16162     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16163       disk_template = self.op.disk_template
16164       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16165                                              memory=self.op.memory,
16166                                              disks=self.op.disks,
16167                                              disk_template=disk_template,
16168                                              os=self.op.os,
16169                                              tags=self.op.tags,
16170                                              nics=self.op.nics,
16171                                              vcpus=self.op.vcpus,
16172                                              spindle_use=self.op.spindle_use,
16173                                              hypervisor=self.op.hypervisor)
16174                for idx in range(self.op.count)]
16175       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16176     else:
16177       raise errors.ProgrammerError("Uncatched mode %s in"
16178                                    " LUTestAllocator.Exec", self.op.mode)
16179
16180     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16181     if self.op.direction == constants.IALLOCATOR_DIR_IN:
16182       result = ial.in_text
16183     else:
16184       ial.Run(self.op.iallocator, validate=False)
16185       result = ial.out_text
16186     return result
16187
16188
16189 class LUNetworkAdd(LogicalUnit):
16190   """Logical unit for creating networks.
16191
16192   """
16193   HPATH = "network-add"
16194   HTYPE = constants.HTYPE_NETWORK
16195   REQ_BGL = False
16196
16197   def BuildHooksNodes(self):
16198     """Build hooks nodes.
16199
16200     """
16201     mn = self.cfg.GetMasterNode()
16202     return ([mn], [mn])
16203
16204   def CheckArguments(self):
16205     if self.op.mac_prefix:
16206       self.op.mac_prefix = \
16207         utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16208
16209   def ExpandNames(self):
16210     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16211
16212     if self.op.conflicts_check:
16213       self.share_locks[locking.LEVEL_NODE] = 1
16214       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16215       self.needed_locks = {
16216         locking.LEVEL_NODE: locking.ALL_SET,
16217         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16218         }
16219     else:
16220       self.needed_locks = {}
16221
16222     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16223
16224   def CheckPrereq(self):
16225     if self.op.network is None:
16226       raise errors.OpPrereqError("Network must be given",
16227                                  errors.ECODE_INVAL)
16228
16229     try:
16230       existing_uuid = self.cfg.LookupNetwork(self.op.network_name)
16231     except errors.OpPrereqError:
16232       pass
16233     else:
16234       raise errors.OpPrereqError("Desired network name '%s' already exists as a"
16235                                  " network (UUID: %s)" %
16236                                  (self.op.network_name, existing_uuid),
16237                                  errors.ECODE_EXISTS)
16238
16239     # Check tag validity
16240     for tag in self.op.tags:
16241       objects.TaggableObject.ValidateTag(tag)
16242
16243   def BuildHooksEnv(self):
16244     """Build hooks env.
16245
16246     """
16247     args = {
16248       "name": self.op.network_name,
16249       "subnet": self.op.network,
16250       "gateway": self.op.gateway,
16251       "network6": self.op.network6,
16252       "gateway6": self.op.gateway6,
16253       "mac_prefix": self.op.mac_prefix,
16254       "tags": self.op.tags,
16255       }
16256     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16257
16258   def Exec(self, feedback_fn):
16259     """Add the ip pool to the cluster.
16260
16261     """
16262     nobj = objects.Network(name=self.op.network_name,
16263                            network=self.op.network,
16264                            gateway=self.op.gateway,
16265                            network6=self.op.network6,
16266                            gateway6=self.op.gateway6,
16267                            mac_prefix=self.op.mac_prefix,
16268                            uuid=self.network_uuid)
16269     # Initialize the associated address pool
16270     try:
16271       pool = network.AddressPool.InitializeNetwork(nobj)
16272     except errors.AddressPoolError, err:
16273       raise errors.OpExecError("Cannot create IP address pool for network"
16274                                " '%s': %s" % (self.op.network_name, err))
16275
16276     # Check if we need to reserve the nodes and the cluster master IP
16277     # These may not be allocated to any instances in routed mode, as
16278     # they wouldn't function anyway.
16279     if self.op.conflicts_check:
16280       for node in self.cfg.GetAllNodesInfo().values():
16281         for ip in [node.primary_ip, node.secondary_ip]:
16282           try:
16283             if pool.Contains(ip):
16284               pool.Reserve(ip)
16285               self.LogInfo("Reserved IP address of node '%s' (%s)",
16286                            node.name, ip)
16287           except errors.AddressPoolError, err:
16288             self.LogWarning("Cannot reserve IP address '%s' of node '%s': %s",
16289                             ip, node.name, err)
16290
16291       master_ip = self.cfg.GetClusterInfo().master_ip
16292       try:
16293         if pool.Contains(master_ip):
16294           pool.Reserve(master_ip)
16295           self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16296       except errors.AddressPoolError, err:
16297         self.LogWarning("Cannot reserve cluster master IP address (%s): %s",
16298                         master_ip, err)
16299
16300     if self.op.add_reserved_ips:
16301       for ip in self.op.add_reserved_ips:
16302         try:
16303           pool.Reserve(ip, external=True)
16304         except errors.AddressPoolError, err:
16305           raise errors.OpExecError("Cannot reserve IP address '%s': %s" %
16306                                    (ip, err))
16307
16308     if self.op.tags:
16309       for tag in self.op.tags:
16310         nobj.AddTag(tag)
16311
16312     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16313     del self.remove_locks[locking.LEVEL_NETWORK]
16314
16315
16316 class LUNetworkRemove(LogicalUnit):
16317   HPATH = "network-remove"
16318   HTYPE = constants.HTYPE_NETWORK
16319   REQ_BGL = False
16320
16321   def ExpandNames(self):
16322     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16323
16324     self.share_locks[locking.LEVEL_NODEGROUP] = 1
16325     self.needed_locks = {
16326       locking.LEVEL_NETWORK: [self.network_uuid],
16327       locking.LEVEL_NODEGROUP: locking.ALL_SET,
16328       }
16329
16330   def CheckPrereq(self):
16331     """Check prerequisites.
16332
16333     This checks that the given network name exists as a network, that is
16334     empty (i.e., contains no nodes), and that is not the last group of the
16335     cluster.
16336
16337     """
16338     # Verify that the network is not conncted.
16339     node_groups = [group.name
16340                    for group in self.cfg.GetAllNodeGroupsInfo().values()
16341                    if self.network_uuid in group.networks]
16342
16343     if node_groups:
16344       self.LogWarning("Network '%s' is connected to the following"
16345                       " node groups: %s" %
16346                       (self.op.network_name,
16347                        utils.CommaJoin(utils.NiceSort(node_groups))))
16348       raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16349
16350   def BuildHooksEnv(self):
16351     """Build hooks env.
16352
16353     """
16354     return {
16355       "NETWORK_NAME": self.op.network_name,
16356       }
16357
16358   def BuildHooksNodes(self):
16359     """Build hooks nodes.
16360
16361     """
16362     mn = self.cfg.GetMasterNode()
16363     return ([mn], [mn])
16364
16365   def Exec(self, feedback_fn):
16366     """Remove the network.
16367
16368     """
16369     try:
16370       self.cfg.RemoveNetwork(self.network_uuid)
16371     except errors.ConfigurationError:
16372       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16373                                (self.op.network_name, self.network_uuid))
16374
16375
16376 class LUNetworkSetParams(LogicalUnit):
16377   """Modifies the parameters of a network.
16378
16379   """
16380   HPATH = "network-modify"
16381   HTYPE = constants.HTYPE_NETWORK
16382   REQ_BGL = False
16383
16384   def CheckArguments(self):
16385     if (self.op.gateway and
16386         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16387       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16388                                  " at once", errors.ECODE_INVAL)
16389
16390   def ExpandNames(self):
16391     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16392
16393     self.needed_locks = {
16394       locking.LEVEL_NETWORK: [self.network_uuid],
16395       }
16396
16397   def CheckPrereq(self):
16398     """Check prerequisites.
16399
16400     """
16401     self.network = self.cfg.GetNetwork(self.network_uuid)
16402     self.gateway = self.network.gateway
16403     self.mac_prefix = self.network.mac_prefix
16404     self.network6 = self.network.network6
16405     self.gateway6 = self.network.gateway6
16406     self.tags = self.network.tags
16407
16408     self.pool = network.AddressPool(self.network)
16409
16410     if self.op.gateway:
16411       if self.op.gateway == constants.VALUE_NONE:
16412         self.gateway = None
16413       else:
16414         self.gateway = self.op.gateway
16415         if self.pool.IsReserved(self.gateway):
16416           raise errors.OpPrereqError("Gateway IP address '%s' is already"
16417                                      " reserved" % self.gateway,
16418                                      errors.ECODE_STATE)
16419
16420     if self.op.mac_prefix:
16421       if self.op.mac_prefix == constants.VALUE_NONE:
16422         self.mac_prefix = None
16423       else:
16424         self.mac_prefix = \
16425           utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16426
16427     if self.op.gateway6:
16428       if self.op.gateway6 == constants.VALUE_NONE:
16429         self.gateway6 = None
16430       else:
16431         self.gateway6 = self.op.gateway6
16432
16433     if self.op.network6:
16434       if self.op.network6 == constants.VALUE_NONE:
16435         self.network6 = None
16436       else:
16437         self.network6 = self.op.network6
16438
16439   def BuildHooksEnv(self):
16440     """Build hooks env.
16441
16442     """
16443     args = {
16444       "name": self.op.network_name,
16445       "subnet": self.network.network,
16446       "gateway": self.gateway,
16447       "network6": self.network6,
16448       "gateway6": self.gateway6,
16449       "mac_prefix": self.mac_prefix,
16450       "tags": self.tags,
16451       }
16452     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16453
16454   def BuildHooksNodes(self):
16455     """Build hooks nodes.
16456
16457     """
16458     mn = self.cfg.GetMasterNode()
16459     return ([mn], [mn])
16460
16461   def Exec(self, feedback_fn):
16462     """Modifies the network.
16463
16464     """
16465     #TODO: reserve/release via temporary reservation manager
16466     #      extend cfg.ReserveIp/ReleaseIp with the external flag
16467     if self.op.gateway:
16468       if self.gateway == self.network.gateway:
16469         self.LogWarning("Gateway is already %s", self.gateway)
16470       else:
16471         if self.gateway:
16472           self.pool.Reserve(self.gateway, external=True)
16473         if self.network.gateway:
16474           self.pool.Release(self.network.gateway, external=True)
16475         self.network.gateway = self.gateway
16476
16477     if self.op.add_reserved_ips:
16478       for ip in self.op.add_reserved_ips:
16479         try:
16480           if self.pool.IsReserved(ip):
16481             self.LogWarning("IP address %s is already reserved", ip)
16482           else:
16483             self.pool.Reserve(ip, external=True)
16484         except errors.AddressPoolError, err:
16485           self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16486
16487     if self.op.remove_reserved_ips:
16488       for ip in self.op.remove_reserved_ips:
16489         if ip == self.network.gateway:
16490           self.LogWarning("Cannot unreserve Gateway's IP")
16491           continue
16492         try:
16493           if not self.pool.IsReserved(ip):
16494             self.LogWarning("IP address %s is already unreserved", ip)
16495           else:
16496             self.pool.Release(ip, external=True)
16497         except errors.AddressPoolError, err:
16498           self.LogWarning("Cannot release IP address %s: %s", ip, err)
16499
16500     if self.op.mac_prefix:
16501       self.network.mac_prefix = self.mac_prefix
16502
16503     if self.op.network6:
16504       self.network.network6 = self.network6
16505
16506     if self.op.gateway6:
16507       self.network.gateway6 = self.gateway6
16508
16509     self.pool.Validate()
16510
16511     self.cfg.Update(self.network, feedback_fn)
16512
16513
16514 class _NetworkQuery(_QueryBase):
16515   FIELDS = query.NETWORK_FIELDS
16516
16517   def ExpandNames(self, lu):
16518     lu.needed_locks = {}
16519     lu.share_locks = _ShareAll()
16520
16521     self.do_locking = self.use_locking
16522
16523     all_networks = lu.cfg.GetAllNetworksInfo()
16524     name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16525
16526     if self.names:
16527       missing = []
16528       self.wanted = []
16529
16530       for name in self.names:
16531         if name in name_to_uuid:
16532           self.wanted.append(name_to_uuid[name])
16533         else:
16534           missing.append(name)
16535
16536       if missing:
16537         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16538                                    errors.ECODE_NOENT)
16539     else:
16540       self.wanted = locking.ALL_SET
16541
16542     if self.do_locking:
16543       lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16544       if query.NETQ_INST in self.requested_data:
16545         lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16546       if query.NETQ_GROUP in self.requested_data:
16547         lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16548
16549   def DeclareLocks(self, lu, level):
16550     pass
16551
16552   def _GetQueryData(self, lu):
16553     """Computes the list of networks and their attributes.
16554
16555     """
16556     all_networks = lu.cfg.GetAllNetworksInfo()
16557
16558     network_uuids = self._GetNames(lu, all_networks.keys(),
16559                                    locking.LEVEL_NETWORK)
16560
16561     do_instances = query.NETQ_INST in self.requested_data
16562     do_groups = query.NETQ_GROUP in self.requested_data
16563
16564     network_to_instances = None
16565     network_to_groups = None
16566
16567     # For NETQ_GROUP, we need to map network->[groups]
16568     if do_groups:
16569       all_groups = lu.cfg.GetAllNodeGroupsInfo()
16570       network_to_groups = dict((uuid, []) for uuid in network_uuids)
16571       for _, group in all_groups.iteritems():
16572         for net_uuid in network_uuids:
16573           netparams = group.networks.get(net_uuid, None)
16574           if netparams:
16575             info = (group.name, netparams[constants.NIC_MODE],
16576                     netparams[constants.NIC_LINK])
16577
16578             network_to_groups[net_uuid].append(info)
16579
16580     if do_instances:
16581       all_instances = lu.cfg.GetAllInstancesInfo()
16582       network_to_instances = dict((uuid, []) for uuid in network_uuids)
16583       for instance in all_instances.values():
16584         for nic in instance.nics:
16585           if nic.network in network_uuids:
16586             network_to_instances[nic.network].append(instance.name)
16587             break
16588
16589     if query.NETQ_STATS in self.requested_data:
16590       stats = \
16591         dict((uuid,
16592               self._GetStats(network.AddressPool(all_networks[uuid])))
16593              for uuid in network_uuids)
16594     else:
16595       stats = None
16596
16597     return query.NetworkQueryData([all_networks[uuid]
16598                                    for uuid in network_uuids],
16599                                    network_to_groups,
16600                                    network_to_instances,
16601                                    stats)
16602
16603   @staticmethod
16604   def _GetStats(pool):
16605     """Returns statistics for a network address pool.
16606
16607     """
16608     return {
16609       "free_count": pool.GetFreeCount(),
16610       "reserved_count": pool.GetReservedCount(),
16611       "map": pool.GetMap(),
16612       "external_reservations":
16613         utils.CommaJoin(pool.GetExternalReservations()),
16614       }
16615
16616
16617 class LUNetworkQuery(NoHooksLU):
16618   """Logical unit for querying networks.
16619
16620   """
16621   REQ_BGL = False
16622
16623   def CheckArguments(self):
16624     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16625                             self.op.output_fields, self.op.use_locking)
16626
16627   def ExpandNames(self):
16628     self.nq.ExpandNames(self)
16629
16630   def Exec(self, feedback_fn):
16631     return self.nq.OldStyleQuery(self)
16632
16633
16634 class LUNetworkConnect(LogicalUnit):
16635   """Connect a network to a nodegroup
16636
16637   """
16638   HPATH = "network-connect"
16639   HTYPE = constants.HTYPE_NETWORK
16640   REQ_BGL = False
16641
16642   def ExpandNames(self):
16643     self.network_name = self.op.network_name
16644     self.group_name = self.op.group_name
16645     self.network_mode = self.op.network_mode
16646     self.network_link = self.op.network_link
16647
16648     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16649     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16650
16651     self.needed_locks = {
16652       locking.LEVEL_INSTANCE: [],
16653       locking.LEVEL_NODEGROUP: [self.group_uuid],
16654       }
16655     self.share_locks[locking.LEVEL_INSTANCE] = 1
16656
16657     if self.op.conflicts_check:
16658       self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16659       self.share_locks[locking.LEVEL_NETWORK] = 1
16660
16661   def DeclareLocks(self, level):
16662     if level == locking.LEVEL_INSTANCE:
16663       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16664
16665       # Lock instances optimistically, needs verification once group lock has
16666       # been acquired
16667       if self.op.conflicts_check:
16668         self.needed_locks[locking.LEVEL_INSTANCE] = \
16669             self.cfg.GetNodeGroupInstances(self.group_uuid)
16670
16671   def BuildHooksEnv(self):
16672     ret = {
16673       "GROUP_NAME": self.group_name,
16674       "GROUP_NETWORK_MODE": self.network_mode,
16675       "GROUP_NETWORK_LINK": self.network_link,
16676       }
16677     return ret
16678
16679   def BuildHooksNodes(self):
16680     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16681     return (nodes, nodes)
16682
16683   def CheckPrereq(self):
16684     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16685
16686     assert self.group_uuid in owned_groups
16687
16688     # Check if locked instances are still correct
16689     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16690     if self.op.conflicts_check:
16691       _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16692
16693     self.netparams = {
16694       constants.NIC_MODE: self.network_mode,
16695       constants.NIC_LINK: self.network_link,
16696       }
16697     objects.NIC.CheckParameterSyntax(self.netparams)
16698
16699     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16700     #if self.network_mode == constants.NIC_MODE_BRIDGED:
16701     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16702     self.connected = False
16703     if self.network_uuid in self.group.networks:
16704       self.LogWarning("Network '%s' is already mapped to group '%s'" %
16705                       (self.network_name, self.group.name))
16706       self.connected = True
16707
16708     # check only if not already connected
16709     elif self.op.conflicts_check:
16710       pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16711
16712       _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16713                             "connect to", owned_instances)
16714
16715   def Exec(self, feedback_fn):
16716     # Connect the network and update the group only if not already connected
16717     if not self.connected:
16718       self.group.networks[self.network_uuid] = self.netparams
16719       self.cfg.Update(self.group, feedback_fn)
16720
16721
16722 def _NetworkConflictCheck(lu, check_fn, action, instances):
16723   """Checks for network interface conflicts with a network.
16724
16725   @type lu: L{LogicalUnit}
16726   @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16727     returning boolean
16728   @param check_fn: Function checking for conflict
16729   @type action: string
16730   @param action: Part of error message (see code)
16731   @raise errors.OpPrereqError: If conflicting IP addresses are found.
16732
16733   """
16734   conflicts = []
16735
16736   for (_, instance) in lu.cfg.GetMultiInstanceInfo(instances):
16737     instconflicts = [(idx, nic.ip)
16738                      for (idx, nic) in enumerate(instance.nics)
16739                      if check_fn(nic)]
16740
16741     if instconflicts:
16742       conflicts.append((instance.name, instconflicts))
16743
16744   if conflicts:
16745     lu.LogWarning("IP addresses from network '%s', which is about to %s"
16746                   " node group '%s', are in use: %s" %
16747                   (lu.network_name, action, lu.group.name,
16748                    utils.CommaJoin(("%s: %s" %
16749                                     (name, _FmtNetworkConflict(details)))
16750                                    for (name, details) in conflicts)))
16751
16752     raise errors.OpPrereqError("Conflicting IP addresses found; "
16753                                " remove/modify the corresponding network"
16754                                " interfaces", errors.ECODE_STATE)
16755
16756
16757 def _FmtNetworkConflict(details):
16758   """Utility for L{_NetworkConflictCheck}.
16759
16760   """
16761   return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16762                          for (idx, ipaddr) in details)
16763
16764
16765 class LUNetworkDisconnect(LogicalUnit):
16766   """Disconnect a network to a nodegroup
16767
16768   """
16769   HPATH = "network-disconnect"
16770   HTYPE = constants.HTYPE_NETWORK
16771   REQ_BGL = False
16772
16773   def ExpandNames(self):
16774     self.network_name = self.op.network_name
16775     self.group_name = self.op.group_name
16776
16777     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16778     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16779
16780     self.needed_locks = {
16781       locking.LEVEL_INSTANCE: [],
16782       locking.LEVEL_NODEGROUP: [self.group_uuid],
16783       }
16784     self.share_locks[locking.LEVEL_INSTANCE] = 1
16785
16786   def DeclareLocks(self, level):
16787     if level == locking.LEVEL_INSTANCE:
16788       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16789
16790       # Lock instances optimistically, needs verification once group lock has
16791       # been acquired
16792       self.needed_locks[locking.LEVEL_INSTANCE] = \
16793         self.cfg.GetNodeGroupInstances(self.group_uuid)
16794
16795   def BuildHooksEnv(self):
16796     ret = {
16797       "GROUP_NAME": self.group_name,
16798       }
16799     return ret
16800
16801   def BuildHooksNodes(self):
16802     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16803     return (nodes, nodes)
16804
16805   def CheckPrereq(self):
16806     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16807
16808     assert self.group_uuid in owned_groups
16809
16810     # Check if locked instances are still correct
16811     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16812     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16813
16814     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16815     self.connected = True
16816     if self.network_uuid not in self.group.networks:
16817       self.LogWarning("Network '%s' is not mapped to group '%s'",
16818                       self.network_name, self.group.name)
16819       self.connected = False
16820
16821     # We need this check only if network is not already connected
16822     else:
16823       _NetworkConflictCheck(self, lambda nic: nic.network == self.network_uuid,
16824                             "disconnect from", owned_instances)
16825
16826   def Exec(self, feedback_fn):
16827     # Disconnect the network and update the group only if network is connected
16828     if self.connected:
16829       del self.group.networks[self.network_uuid]
16830       self.cfg.Update(self.group, feedback_fn)
16831
16832
16833 #: Query type implementations
16834 _QUERY_IMPL = {
16835   constants.QR_CLUSTER: _ClusterQuery,
16836   constants.QR_INSTANCE: _InstanceQuery,
16837   constants.QR_NODE: _NodeQuery,
16838   constants.QR_GROUP: _GroupQuery,
16839   constants.QR_NETWORK: _NetworkQuery,
16840   constants.QR_OS: _OsQuery,
16841   constants.QR_EXTSTORAGE: _ExtStorageQuery,
16842   constants.QR_EXPORT: _ExportQuery,
16843   }
16844
16845 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16846
16847
16848 def _GetQueryImplementation(name):
16849   """Returns the implemtnation for a query type.
16850
16851   @param name: Query type, must be one of L{constants.QR_VIA_OP}
16852
16853   """
16854   try:
16855     return _QUERY_IMPL[name]
16856   except KeyError:
16857     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16858                                errors.ECODE_INVAL)
16859
16860
16861 def _CheckForConflictingIp(lu, ip, node):
16862   """In case of conflicting IP address raise error.
16863
16864   @type ip: string
16865   @param ip: IP address
16866   @type node: string
16867   @param node: node name
16868
16869   """
16870   (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16871   if conf_net is not None:
16872     raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16873                                 (ip, conf_net)),
16874                                errors.ECODE_STATE)
16875
16876   return (None, None)