code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti import network
  65 from ganeti.masterd import iallocator
  66
  67 import ganeti.masterd.instance # pylint: disable=W0611
  68
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141
 142     # Dictionaries used to declare locking needs to mcpu
 143     self.needed_locks = None
 144     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 145     self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
 146
 147     self.add_locks = {}
 148     self.remove_locks = {}
 149
 150     # Used to force good behavior when calling helper functions
 151     self.recalculate_locks = {}
 152
 153     # logging
 154     self.Log = processor.Log # pylint: disable=C0103
 155     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 156     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 157     self.LogStep = processor.LogStep # pylint: disable=C0103
 158     # support for dry-run
 159     self.dry_run_result = None
 160     # support for generic debug attribute
 161     if (not hasattr(self.op, "debug_level") or
 162         not isinstance(self.op.debug_level, int)):
 163       self.op.debug_level = 0
 164
 165     # Tasklets
 166     self.tasklets = None
 167
 168     # Validate opcode parameters and set defaults
 169     self.op.Validate(True)
 170
 171     self.CheckArguments()
 172
 173   def CheckArguments(self):
 174     """Check syntactic validity for the opcode arguments.
 175
 176     This method is for doing a simple syntactic check and ensure
 177     validity of opcode parameters, without any cluster-related
 178     checks. While the same can be accomplished in ExpandNames and/or
 179     CheckPrereq, doing these separate is better because:
 180
 181       - ExpandNames is left as as purely a lock-related function
 182       - CheckPrereq is run after we have acquired locks (and possible
 183         waited for them)
 184
 185     The function is allowed to change the self.op attribute so that
 186     later methods can no longer worry about missing parameters.
 187
 188     """
 189     pass
 190
 191   def ExpandNames(self):
 192     """Expand names for this LU.
 193
 194     This method is called before starting to execute the opcode, and it should
 195     update all the parameters of the opcode to their canonical form (e.g. a
 196     short node name must be fully expanded after this method has successfully
 197     completed). This way locking, hooks, logging, etc. can work correctly.
 198
 199     LUs which implement this method must also populate the self.needed_locks
 200     member, as a dict with lock levels as keys, and a list of needed lock names
 201     as values. Rules:
 202
 203       - use an empty dict if you don't need any lock
 204       - if you don't need any lock at a particular level omit that
 205         level (note that in this case C{DeclareLocks} won't be called
 206         at all for that level)
 207       - if you need locks at a level, but you can't calculate it in
 208         this function, initialise that level with an empty list and do
 209         further processing in L{LogicalUnit.DeclareLocks} (see that
 210         function's docstring)
 211       - don't put anything for the BGL level
 212       - if you want all locks at a level use L{locking.ALL_SET} as a value
 213
 214     If you need to share locks (rather than acquire them exclusively) at one
 215     level you can modify self.share_locks, setting a true value (usually 1) for
 216     that level. By default locks are not shared.
 217
 218     This function can also define a list of tasklets, which then will be
 219     executed in order instead of the usual LU-level CheckPrereq and Exec
 220     functions, if those are not defined by the LU.
 221
 222     Examples::
 223
 224       # Acquire all nodes and one instance
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: locking.ALL_SET,
 227         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 228       }
 229       # Acquire just two nodes
 230       self.needed_locks = {
 231         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 232       }
 233       # Acquire no locks
 234       self.needed_locks = {} # No, you can't leave it to the default value None
 235
 236     """
 237     # The implementation of this method is mandatory only if the new LU is
 238     # concurrent, so that old LUs don't need to be changed all at the same
 239     # time.
 240     if self.REQ_BGL:
 241       self.needed_locks = {} # Exclusive LUs don't need locks.
 242     else:
 243       raise NotImplementedError
 244
 245   def DeclareLocks(self, level):
 246     """Declare LU locking needs for a level
 247
 248     While most LUs can just declare their locking needs at ExpandNames time,
 249     sometimes there's the need to calculate some locks after having acquired
 250     the ones before. This function is called just before acquiring locks at a
 251     particular level, but after acquiring the ones at lower levels, and permits
 252     such calculations. It can be used to modify self.needed_locks, and by
 253     default it does nothing.
 254
 255     This function is only called if you have something already set in
 256     self.needed_locks for the level.
 257
 258     @param level: Locking level which is going to be locked
 259     @type level: member of L{ganeti.locking.LEVELS}
 260
 261     """
 262
 263   def CheckPrereq(self):
 264     """Check prerequisites for this LU.
 265
 266     This method should check that the prerequisites for the execution
 267     of this LU are fulfilled. It can do internode communication, but
 268     it should be idempotent - no cluster or system changes are
 269     allowed.
 270
 271     The method should raise errors.OpPrereqError in case something is
 272     not fulfilled. Its return value is ignored.
 273
 274     This method should also update all the parameters of the opcode to
 275     their canonical form if it hasn't been done by ExpandNames before.
 276
 277     """
 278     if self.tasklets is not None:
 279       for (idx, tl) in enumerate(self.tasklets):
 280         logging.debug("Checking prerequisites for tasklet %s/%s",
 281                       idx + 1, len(self.tasklets))
 282         tl.CheckPrereq()
 283     else:
 284       pass
 285
 286   def Exec(self, feedback_fn):
 287     """Execute the LU.
 288
 289     This method should implement the actual work. It should raise
 290     errors.OpExecError for failures that are somewhat dealt with in
 291     code, or expected.
 292
 293     """
 294     if self.tasklets is not None:
 295       for (idx, tl) in enumerate(self.tasklets):
 296         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 297         tl.Exec(feedback_fn)
 298     else:
 299       raise NotImplementedError
 300
 301   def BuildHooksEnv(self):
 302     """Build hooks environment for this LU.
 303
 304     @rtype: dict
 305     @return: Dictionary containing the environment that will be used for
 306       running the hooks for this LU. The keys of the dict must not be prefixed
 307       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 308       will extend the environment with additional variables. If no environment
 309       should be defined, an empty dictionary should be returned (not C{None}).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def BuildHooksNodes(self):
 317     """Build list of nodes to run LU's hooks.
 318
 319     @rtype: tuple; (list, list)
 320     @return: Tuple containing a list of node names on which the hook
 321       should run before the execution and a list of node names on which the
 322       hook should run after the execution. No nodes should be returned as an
 323       empty list (and not None).
 324     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 325       will not be called.
 326
 327     """
 328     raise NotImplementedError
 329
 330   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 331     """Notify the LU about the results of its hooks.
 332
 333     This method is called every time a hooks phase is executed, and notifies
 334     the Logical Unit about the hooks' result. The LU can then use it to alter
 335     its result based on the hooks.  By default the method does nothing and the
 336     previous result is passed back unchanged but any LU can define it if it
 337     wants to use the local cluster hook-scripts somehow.
 338
 339     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 340         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 341     @param hook_results: the results of the multi-node hooks rpc call
 342     @param feedback_fn: function used send feedback back to the caller
 343     @param lu_result: the previous Exec result this LU had, or None
 344         in the PRE phase
 345     @return: the new Exec result, based on the previous result
 346         and hook results
 347
 348     """
 349     # API must be kept, thus we ignore the unused argument and could
 350     # be a function warnings
 351     # pylint: disable=W0613,R0201
 352     return lu_result
 353
 354   def _ExpandAndLockInstance(self):
 355     """Helper function to expand and lock an instance.
 356
 357     Many LUs that work on an instance take its name in self.op.instance_name
 358     and need to expand it and then declare the expanded name for locking. This
 359     function does it, and then updates self.op.instance_name to the expanded
 360     name. It also initializes needed_locks as a dict, if this hasn't been done
 361     before.
 362
 363     """
 364     if self.needed_locks is None:
 365       self.needed_locks = {}
 366     else:
 367       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 368         "_ExpandAndLockInstance called with instance-level locks set"
 369     self.op.instance_name = _ExpandInstanceName(self.cfg,
 370                                                 self.op.instance_name)
 371     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 372
 373   def _LockInstancesNodes(self, primary_only=False,
 374                           level=locking.LEVEL_NODE):
 375     """Helper function to declare instances' nodes for locking.
 376
 377     This function should be called after locking one or more instances to lock
 378     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 379     with all primary or secondary nodes for instances already locked and
 380     present in self.needed_locks[locking.LEVEL_INSTANCE].
 381
 382     It should be called from DeclareLocks, and for safety only works if
 383     self.recalculate_locks[locking.LEVEL_NODE] is set.
 384
 385     In the future it may grow parameters to just lock some instance's nodes, or
 386     to just lock primaries or secondary nodes, if needed.
 387
 388     If should be called in DeclareLocks in a way similar to::
 389
 390       if level == locking.LEVEL_NODE:
 391         self._LockInstancesNodes()
 392
 393     @type primary_only: boolean
 394     @param primary_only: only lock primary nodes of locked instances
 395     @param level: Which lock level to use for locking nodes
 396
 397     """
 398     assert level in self.recalculate_locks, \
 399       "_LockInstancesNodes helper function called with no nodes to recalculate"
 400
 401     # TODO: check if we're really been called with the instance locks held
 402
 403     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 404     # future we might want to have different behaviors depending on the value
 405     # of self.recalculate_locks[locking.LEVEL_NODE]
 406     wanted_nodes = []
 407     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 408     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 409       wanted_nodes.append(instance.primary_node)
 410       if not primary_only:
 411         wanted_nodes.extend(instance.secondary_nodes)
 412
 413     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 414       self.needed_locks[level] = wanted_nodes
 415     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 416       self.needed_locks[level].extend(wanted_nodes)
 417     else:
 418       raise errors.ProgrammerError("Unknown recalculation mode")
 419
 420     del self.recalculate_locks[level]
 421
 422
 423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 424   """Simple LU which runs no hooks.
 425
 426   This LU is intended as a parent for other LogicalUnits which will
 427   run no hooks, in order to reduce duplicate code.
 428
 429   """
 430   HPATH = None
 431   HTYPE = None
 432
 433   def BuildHooksEnv(self):
 434     """Empty BuildHooksEnv for NoHooksLu.
 435
 436     This just raises an error.
 437
 438     """
 439     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 440
 441   def BuildHooksNodes(self):
 442     """Empty BuildHooksNodes for NoHooksLU.
 443
 444     """
 445     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 446
 447
 448 class Tasklet:
 449   """Tasklet base class.
 450
 451   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 452   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 453   tasklets know nothing about locks.
 454
 455   Subclasses must follow these rules:
 456     - Implement CheckPrereq
 457     - Implement Exec
 458
 459   """
 460   def __init__(self, lu):
 461     self.lu = lu
 462
 463     # Shortcuts
 464     self.cfg = lu.cfg
 465     self.rpc = lu.rpc
 466
 467   def CheckPrereq(self):
 468     """Check prerequisites for this tasklets.
 469
 470     This method should check whether the prerequisites for the execution of
 471     this tasklet are fulfilled. It can do internode communication, but it
 472     should be idempotent - no cluster or system changes are allowed.
 473
 474     The method should raise errors.OpPrereqError in case something is not
 475     fulfilled. Its return value is ignored.
 476
 477     This method should also update all parameters to their canonical form if it
 478     hasn't been done before.
 479
 480     """
 481     pass
 482
 483   def Exec(self, feedback_fn):
 484     """Execute the tasklet.
 485
 486     This method should implement the actual work. It should raise
 487     errors.OpExecError for failures that are somewhat dealt with in code, or
 488     expected.
 489
 490     """
 491     raise NotImplementedError
 492
 493
 494 class _QueryBase:
 495   """Base for query utility classes.
 496
 497   """
 498   #: Attribute holding field definitions
 499   FIELDS = None
 500
 501   #: Field to sort by
 502   SORT_FIELD = "name"
 503
 504   def __init__(self, qfilter, fields, use_locking):
 505     """Initializes this class.
 506
 507     """
 508     self.use_locking = use_locking
 509
 510     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 511                              namefield=self.SORT_FIELD)
 512     self.requested_data = self.query.RequestedData()
 513     self.names = self.query.RequestedNames()
 514
 515     # Sort only if no names were requested
 516     self.sort_by_name = not self.names
 517
 518     self.do_locking = None
 519     self.wanted = None
 520
 521   def _GetNames(self, lu, all_names, lock_level):
 522     """Helper function to determine names asked for in the query.
 523
 524     """
 525     if self.do_locking:
 526       names = lu.owned_locks(lock_level)
 527     else:
 528       names = all_names
 529
 530     if self.wanted == locking.ALL_SET:
 531       assert not self.names
 532       # caller didn't specify names, so ordering is not important
 533       return utils.NiceSort(names)
 534
 535     # caller specified names and we must keep the same order
 536     assert self.names
 537     assert not self.do_locking or lu.glm.is_owned(lock_level)
 538
 539     missing = set(self.wanted).difference(names)
 540     if missing:
 541       raise errors.OpExecError("Some items were removed before retrieving"
 542                                " their data: %s" % missing)
 543
 544     # Return expanded names
 545     return self.wanted
 546
 547   def ExpandNames(self, lu):
 548     """Expand names for this query.
 549
 550     See L{LogicalUnit.ExpandNames}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def DeclareLocks(self, lu, level):
 556     """Declare locks for this query.
 557
 558     See L{LogicalUnit.DeclareLocks}.
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def _GetQueryData(self, lu):
 564     """Collects all data for this query.
 565
 566     @return: Query data object
 567
 568     """
 569     raise NotImplementedError()
 570
 571   def NewStyleQuery(self, lu):
 572     """Collect data and execute query.
 573
 574     """
 575     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 576                                   sort_by_name=self.sort_by_name)
 577
 578   def OldStyleQuery(self, lu):
 579     """Collect data and execute query.
 580
 581     """
 582     return self.query.OldStyleQuery(self._GetQueryData(lu),
 583                                     sort_by_name=self.sort_by_name)
 584
 585
 586 def _ShareAll():
 587   """Returns a dict declaring all lock levels shared.
 588
 589   """
 590   return dict.fromkeys(locking.LEVELS, 1)
 591
 592
 593 def _AnnotateDiskParams(instance, devs, cfg):
 594   """Little helper wrapper to the rpc annotation method.
 595
 596   @param instance: The instance object
 597   @type devs: List of L{objects.Disk}
 598   @param devs: The root devices (not any of its children!)
 599   @param cfg: The config object
 600   @returns The annotated disk copies
 601   @see L{rpc.AnnotateDiskParams}
 602
 603   """
 604   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 605                                 cfg.GetInstanceDiskParams(instance))
 606
 607
 608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 609                               cur_group_uuid):
 610   """Checks if node groups for locked instances are still correct.
 611
 612   @type cfg: L{config.ConfigWriter}
 613   @param cfg: Cluster configuration
 614   @type instances: dict; string as key, L{objects.Instance} as value
 615   @param instances: Dictionary, instance name as key, instance object as value
 616   @type owned_groups: iterable of string
 617   @param owned_groups: List of owned groups
 618   @type owned_nodes: iterable of string
 619   @param owned_nodes: List of owned nodes
 620   @type cur_group_uuid: string or None
 621   @param cur_group_uuid: Optional group UUID to check against instance's groups
 622
 623   """
 624   for (name, inst) in instances.items():
 625     assert owned_nodes.issuperset(inst.all_nodes), \
 626       "Instance %s's nodes changed while we kept the lock" % name
 627
 628     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 629
 630     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 631       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 632
 633
 634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 635                              primary_only=False):
 636   """Checks if the owned node groups are still correct for an instance.
 637
 638   @type cfg: L{config.ConfigWriter}
 639   @param cfg: The cluster configuration
 640   @type instance_name: string
 641   @param instance_name: Instance name
 642   @type owned_groups: set or frozenset
 643   @param owned_groups: List of currently owned node groups
 644   @type primary_only: boolean
 645   @param primary_only: Whether to check node groups for only the primary node
 646
 647   """
 648   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 649
 650   if not owned_groups.issuperset(inst_groups):
 651     raise errors.OpPrereqError("Instance %s's node groups changed since"
 652                                " locks were acquired, current groups are"
 653                                " are '%s', owning groups '%s'; retry the"
 654                                " operation" %
 655                                (instance_name,
 656                                 utils.CommaJoin(inst_groups),
 657                                 utils.CommaJoin(owned_groups)),
 658                                errors.ECODE_STATE)
 659
 660   return inst_groups
 661
 662
 663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 664   """Checks if the instances in a node group are still correct.
 665
 666   @type cfg: L{config.ConfigWriter}
 667   @param cfg: The cluster configuration
 668   @type group_uuid: string
 669   @param group_uuid: Node group UUID
 670   @type owned_instances: set or frozenset
 671   @param owned_instances: List of currently owned instances
 672
 673   """
 674   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 675   if owned_instances != wanted_instances:
 676     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 677                                " locks were acquired, wanted '%s', have '%s';"
 678                                " retry the operation" %
 679                                (group_uuid,
 680                                 utils.CommaJoin(wanted_instances),
 681                                 utils.CommaJoin(owned_instances)),
 682                                errors.ECODE_STATE)
 683
 684   return wanted_instances
 685
 686
 687 def _SupportsOob(cfg, node):
 688   """Tells if node supports OOB.
 689
 690   @type cfg: L{config.ConfigWriter}
 691   @param cfg: The cluster configuration
 692   @type node: L{objects.Node}
 693   @param node: The node
 694   @return: The OOB script if supported or an empty string otherwise
 695
 696   """
 697   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 698
 699
 700 def _IsExclusiveStorageEnabledNode(cfg, node):
 701   """Whether exclusive_storage is in effect for the given node.
 702
 703   @type cfg: L{config.ConfigWriter}
 704   @param cfg: The cluster configuration
 705   @type node: L{objects.Node}
 706   @param node: The node
 707   @rtype: bool
 708   @return: The effective value of exclusive_storage
 709
 710   """
 711   return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
 712
 713
 714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
 715   """Whether exclusive_storage is in effect for the given node.
 716
 717   @type cfg: L{config.ConfigWriter}
 718   @param cfg: The cluster configuration
 719   @type nodename: string
 720   @param nodename: The node
 721   @rtype: bool
 722   @return: The effective value of exclusive_storage
 723   @raise errors.OpPrereqError: if no node exists with the given name
 724
 725   """
 726   ni = cfg.GetNodeInfo(nodename)
 727   if ni is None:
 728     raise errors.OpPrereqError("Invalid node name %s" % nodename,
 729                                errors.ECODE_NOENT)
 730   return _IsExclusiveStorageEnabledNode(cfg, ni)
 731
 732
 733 def _CopyLockList(names):
 734   """Makes a copy of a list of lock names.
 735
 736   Handles L{locking.ALL_SET} correctly.
 737
 738   """
 739   if names == locking.ALL_SET:
 740     return locking.ALL_SET
 741   else:
 742     return names[:]
 743
 744
 745 def _GetWantedNodes(lu, nodes):
 746   """Returns list of checked and expanded node names.
 747
 748   @type lu: L{LogicalUnit}
 749   @param lu: the logical unit on whose behalf we execute
 750   @type nodes: list
 751   @param nodes: list of node names or None for all nodes
 752   @rtype: list
 753   @return: the list of nodes, sorted
 754   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 755
 756   """
 757   if nodes:
 758     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 759
 760   return utils.NiceSort(lu.cfg.GetNodeList())
 761
 762
 763 def _GetWantedInstances(lu, instances):
 764   """Returns list of checked and expanded instance names.
 765
 766   @type lu: L{LogicalUnit}
 767   @param lu: the logical unit on whose behalf we execute
 768   @type instances: list
 769   @param instances: list of instance names or None for all instances
 770   @rtype: list
 771   @return: the list of instances, sorted
 772   @raise errors.OpPrereqError: if the instances parameter is wrong type
 773   @raise errors.OpPrereqError: if any of the passed instances is not found
 774
 775   """
 776   if instances:
 777     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 778   else:
 779     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 780   return wanted
 781
 782
 783 def _GetUpdatedParams(old_params, update_dict,
 784                       use_default=True, use_none=False):
 785   """Return the new version of a parameter dictionary.
 786
 787   @type old_params: dict
 788   @param old_params: old parameters
 789   @type update_dict: dict
 790   @param update_dict: dict containing new parameter values, or
 791       constants.VALUE_DEFAULT to reset the parameter to its default
 792       value
 793   @param use_default: boolean
 794   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 795       values as 'to be deleted' values
 796   @param use_none: boolean
 797   @type use_none: whether to recognise C{None} values as 'to be
 798       deleted' values
 799   @rtype: dict
 800   @return: the new parameter dictionary
 801
 802   """
 803   params_copy = copy.deepcopy(old_params)
 804   for key, val in update_dict.iteritems():
 805     if ((use_default and val == constants.VALUE_DEFAULT) or
 806         (use_none and val is None)):
 807       try:
 808         del params_copy[key]
 809       except KeyError:
 810         pass
 811     else:
 812       params_copy[key] = val
 813   return params_copy
 814
 815
 816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 817   """Return the new version of a instance policy.
 818
 819   @param group_policy: whether this policy applies to a group and thus
 820     we should support removal of policy entries
 821
 822   """
 823   use_none = use_default = group_policy
 824   ipolicy = copy.deepcopy(old_ipolicy)
 825   for key, value in new_ipolicy.items():
 826     if key not in constants.IPOLICY_ALL_KEYS:
 827       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 828                                  errors.ECODE_INVAL)
 829     if key in constants.IPOLICY_ISPECS:
 830       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 831       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 832                                        use_none=use_none,
 833                                        use_default=use_default)
 834     else:
 835       if (not value or value == [constants.VALUE_DEFAULT] or
 836           value == constants.VALUE_DEFAULT):
 837         if group_policy:
 838           del ipolicy[key]
 839         else:
 840           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 841                                      " on the cluster'" % key,
 842                                      errors.ECODE_INVAL)
 843       else:
 844         if key in constants.IPOLICY_PARAMETERS:
 845           # FIXME: we assume all such values are float
 846           try:
 847             ipolicy[key] = float(value)
 848           except (TypeError, ValueError), err:
 849             raise errors.OpPrereqError("Invalid value for attribute"
 850                                        " '%s': '%s', error: %s" %
 851                                        (key, value, err), errors.ECODE_INVAL)
 852         else:
 853           # FIXME: we assume all others are lists; this should be redone
 854           # in a nicer way
 855           ipolicy[key] = list(value)
 856   try:
 857     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 858   except errors.ConfigurationError, err:
 859     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 860                                errors.ECODE_INVAL)
 861   return ipolicy
 862
 863
 864 def _UpdateAndVerifySubDict(base, updates, type_check):
 865   """Updates and verifies a dict with sub dicts of the same type.
 866
 867   @param base: The dict with the old data
 868   @param updates: The dict with the new data
 869   @param type_check: Dict suitable to ForceDictType to verify correct types
 870   @returns: A new dict with updated and verified values
 871
 872   """
 873   def fn(old, value):
 874     new = _GetUpdatedParams(old, value)
 875     utils.ForceDictType(new, type_check)
 876     return new
 877
 878   ret = copy.deepcopy(base)
 879   ret.update(dict((key, fn(base.get(key, {}), value))
 880                   for key, value in updates.items()))
 881   return ret
 882
 883
 884 def _MergeAndVerifyHvState(op_input, obj_input):
 885   """Combines the hv state from an opcode with the one of the object
 886
 887   @param op_input: The input dict from the opcode
 888   @param obj_input: The input dict from the objects
 889   @return: The verified and updated dict
 890
 891   """
 892   if op_input:
 893     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 894     if invalid_hvs:
 895       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 896                                  " %s" % utils.CommaJoin(invalid_hvs),
 897                                  errors.ECODE_INVAL)
 898     if obj_input is None:
 899       obj_input = {}
 900     type_check = constants.HVSTS_PARAMETER_TYPES
 901     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 902
 903   return None
 904
 905
 906 def _MergeAndVerifyDiskState(op_input, obj_input):
 907   """Combines the disk state from an opcode with the one of the object
 908
 909   @param op_input: The input dict from the opcode
 910   @param obj_input: The input dict from the objects
 911   @return: The verified and updated dict
 912   """
 913   if op_input:
 914     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 915     if invalid_dst:
 916       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 917                                  utils.CommaJoin(invalid_dst),
 918                                  errors.ECODE_INVAL)
 919     type_check = constants.DSS_PARAMETER_TYPES
 920     if obj_input is None:
 921       obj_input = {}
 922     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 923                                               type_check))
 924                 for key, value in op_input.items())
 925
 926   return None
 927
 928
 929 def _ReleaseLocks(lu, level, names=None, keep=None):
 930   """Releases locks owned by an LU.
 931
 932   @type lu: L{LogicalUnit}
 933   @param level: Lock level
 934   @type names: list or None
 935   @param names: Names of locks to release
 936   @type keep: list or None
 937   @param keep: Names of locks to retain
 938
 939   """
 940   assert not (keep is not None and names is not None), \
 941          "Only one of the 'names' and the 'keep' parameters can be given"
 942
 943   if names is not None:
 944     should_release = names.__contains__
 945   elif keep:
 946     should_release = lambda name: name not in keep
 947   else:
 948     should_release = None
 949
 950   owned = lu.owned_locks(level)
 951   if not owned:
 952     # Not owning any lock at this level, do nothing
 953     pass
 954
 955   elif should_release:
 956     retain = []
 957     release = []
 958
 959     # Determine which locks to release
 960     for name in owned:
 961       if should_release(name):
 962         release.append(name)
 963       else:
 964         retain.append(name)
 965
 966     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 967
 968     # Release just some locks
 969     lu.glm.release(level, names=release)
 970
 971     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 972   else:
 973     # Release everything
 974     lu.glm.release(level)
 975
 976     assert not lu.glm.is_owned(level), "No locks should be owned"
 977
 978
 979 def _MapInstanceDisksToNodes(instances):
 980   """Creates a map from (node, volume) to instance name.
 981
 982   @type instances: list of L{objects.Instance}
 983   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 984
 985   """
 986   return dict(((node, vol), inst.name)
 987               for inst in instances
 988               for (node, vols) in inst.MapLVsByNode().items()
 989               for vol in vols)
 990
 991
 992 def _RunPostHook(lu, node_name):
 993   """Runs the post-hook for an opcode on a single node.
 994
 995   """
 996   hm = lu.proc.BuildHooksManager(lu)
 997   try:
 998     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 999   except Exception, err: # pylint: disable=W0703
1000     lu.LogWarning("Errors occurred running hooks on %s: %s",
1001                   node_name, err)
1002
1003
1004 def _CheckOutputFields(static, dynamic, selected):
1005   """Checks whether all selected fields are valid.
1006
1007   @type static: L{utils.FieldSet}
1008   @param static: static fields set
1009   @type dynamic: L{utils.FieldSet}
1010   @param dynamic: dynamic fields set
1011
1012   """
1013   f = utils.FieldSet()
1014   f.Extend(static)
1015   f.Extend(dynamic)
1016
1017   delta = f.NonMatching(selected)
1018   if delta:
1019     raise errors.OpPrereqError("Unknown output fields selected: %s"
1020                                % ",".join(delta), errors.ECODE_INVAL)
1021
1022
1023 def _CheckGlobalHvParams(params):
1024   """Validates that given hypervisor params are not global ones.
1025
1026   This will ensure that instances don't get customised versions of
1027   global params.
1028
1029   """
1030   used_globals = constants.HVC_GLOBALS.intersection(params)
1031   if used_globals:
1032     msg = ("The following hypervisor parameters are global and cannot"
1033            " be customized at instance level, please modify them at"
1034            " cluster level: %s" % utils.CommaJoin(used_globals))
1035     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1036
1037
1038 def _CheckNodeOnline(lu, node, msg=None):
1039   """Ensure that a given node is online.
1040
1041   @param lu: the LU on behalf of which we make the check
1042   @param node: the node to check
1043   @param msg: if passed, should be a message to replace the default one
1044   @raise errors.OpPrereqError: if the node is offline
1045
1046   """
1047   if msg is None:
1048     msg = "Can't use offline node"
1049   if lu.cfg.GetNodeInfo(node).offline:
1050     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1051
1052
1053 def _CheckNodeNotDrained(lu, node):
1054   """Ensure that a given node is not drained.
1055
1056   @param lu: the LU on behalf of which we make the check
1057   @param node: the node to check
1058   @raise errors.OpPrereqError: if the node is drained
1059
1060   """
1061   if lu.cfg.GetNodeInfo(node).drained:
1062     raise errors.OpPrereqError("Can't use drained node %s" % node,
1063                                errors.ECODE_STATE)
1064
1065
1066 def _CheckNodeVmCapable(lu, node):
1067   """Ensure that a given node is vm capable.
1068
1069   @param lu: the LU on behalf of which we make the check
1070   @param node: the node to check
1071   @raise errors.OpPrereqError: if the node is not vm capable
1072
1073   """
1074   if not lu.cfg.GetNodeInfo(node).vm_capable:
1075     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1076                                errors.ECODE_STATE)
1077
1078
1079 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1080   """Ensure that a node supports a given OS.
1081
1082   @param lu: the LU on behalf of which we make the check
1083   @param node: the node to check
1084   @param os_name: the OS to query about
1085   @param force_variant: whether to ignore variant errors
1086   @raise errors.OpPrereqError: if the node is not supporting the OS
1087
1088   """
1089   result = lu.rpc.call_os_get(node, os_name)
1090   result.Raise("OS '%s' not in supported OS list for node %s" %
1091                (os_name, node),
1092                prereq=True, ecode=errors.ECODE_INVAL)
1093   if not force_variant:
1094     _CheckOSVariant(result.payload, os_name)
1095
1096
1097 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1098   """Ensure that a node has the given secondary ip.
1099
1100   @type lu: L{LogicalUnit}
1101   @param lu: the LU on behalf of which we make the check
1102   @type node: string
1103   @param node: the node to check
1104   @type secondary_ip: string
1105   @param secondary_ip: the ip to check
1106   @type prereq: boolean
1107   @param prereq: whether to throw a prerequisite or an execute error
1108   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1109   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1110
1111   """
1112   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1113   result.Raise("Failure checking secondary ip on node %s" % node,
1114                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1115   if not result.payload:
1116     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1117            " please fix and re-run this command" % secondary_ip)
1118     if prereq:
1119       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1120     else:
1121       raise errors.OpExecError(msg)
1122
1123
1124 def _CheckNodePVs(nresult, exclusive_storage):
1125   """Check node PVs.
1126
1127   """
1128   pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1129   if pvlist_dict is None:
1130     return (["Can't get PV list from node"], None)
1131   pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1132   errlist = []
1133   # check that ':' is not present in PV names, since it's a
1134   # special character for lvcreate (denotes the range of PEs to
1135   # use on the PV)
1136   for pv in pvlist:
1137     if ":" in pv.name:
1138       errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1139                      (pv.name, pv.vg_name))
1140   es_pvinfo = None
1141   if exclusive_storage:
1142     (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1143     errlist.extend(errmsgs)
1144     shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1145     if shared_pvs:
1146       for (pvname, lvlist) in shared_pvs:
1147         # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1148         errlist.append("PV %s is shared among unrelated LVs (%s)" %
1149                        (pvname, utils.CommaJoin(lvlist)))
1150   return (errlist, es_pvinfo)
1151
1152
1153 def _GetClusterDomainSecret():
1154   """Reads the cluster domain secret.
1155
1156   """
1157   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1158                                strict=True)
1159
1160
1161 def _CheckInstanceState(lu, instance, req_states, msg=None):
1162   """Ensure that an instance is in one of the required states.
1163
1164   @param lu: the LU on behalf of which we make the check
1165   @param instance: the instance to check
1166   @param msg: if passed, should be a message to replace the default one
1167   @raise errors.OpPrereqError: if the instance is not in the required state
1168
1169   """
1170   if msg is None:
1171     msg = ("can't use instance from outside %s states" %
1172            utils.CommaJoin(req_states))
1173   if instance.admin_state not in req_states:
1174     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1175                                (instance.name, instance.admin_state, msg),
1176                                errors.ECODE_STATE)
1177
1178   if constants.ADMINST_UP not in req_states:
1179     pnode = instance.primary_node
1180     if not lu.cfg.GetNodeInfo(pnode).offline:
1181       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1182       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1183                   prereq=True, ecode=errors.ECODE_ENVIRON)
1184       if instance.name in ins_l.payload:
1185         raise errors.OpPrereqError("Instance %s is running, %s" %
1186                                    (instance.name, msg), errors.ECODE_STATE)
1187     else:
1188       lu.LogWarning("Primary node offline, ignoring check that instance"
1189                      " is down")
1190
1191
1192 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1193   """Computes if value is in the desired range.
1194
1195   @param name: name of the parameter for which we perform the check
1196   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1197       not just 'disk')
1198   @param ipolicy: dictionary containing min, max and std values
1199   @param value: actual value that we want to use
1200   @return: None or element not meeting the criteria
1201
1202
1203   """
1204   if value in [None, constants.VALUE_AUTO]:
1205     return None
1206   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1207   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1208   if value > max_v or min_v > value:
1209     if qualifier:
1210       fqn = "%s/%s" % (name, qualifier)
1211     else:
1212       fqn = name
1213     return ("%s value %s is not in range [%s, %s]" %
1214             (fqn, value, min_v, max_v))
1215   return None
1216
1217
1218 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1219                                  nic_count, disk_sizes, spindle_use,
1220                                  _compute_fn=_ComputeMinMaxSpec):
1221   """Verifies ipolicy against provided specs.
1222
1223   @type ipolicy: dict
1224   @param ipolicy: The ipolicy
1225   @type mem_size: int
1226   @param mem_size: The memory size
1227   @type cpu_count: int
1228   @param cpu_count: Used cpu cores
1229   @type disk_count: int
1230   @param disk_count: Number of disks used
1231   @type nic_count: int
1232   @param nic_count: Number of nics used
1233   @type disk_sizes: list of ints
1234   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1235   @type spindle_use: int
1236   @param spindle_use: The number of spindles this instance uses
1237   @param _compute_fn: The compute function (unittest only)
1238   @return: A list of violations, or an empty list of no violations are found
1239
1240   """
1241   assert disk_count == len(disk_sizes)
1242
1243   test_settings = [
1244     (constants.ISPEC_MEM_SIZE, "", mem_size),
1245     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1246     (constants.ISPEC_DISK_COUNT, "", disk_count),
1247     (constants.ISPEC_NIC_COUNT, "", nic_count),
1248     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1249     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1250          for idx, d in enumerate(disk_sizes)]
1251
1252   return filter(None,
1253                 (_compute_fn(name, qualifier, ipolicy, value)
1254                  for (name, qualifier, value) in test_settings))
1255
1256
1257 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1258                                      _compute_fn=_ComputeIPolicySpecViolation):
1259   """Compute if instance meets the specs of ipolicy.
1260
1261   @type ipolicy: dict
1262   @param ipolicy: The ipolicy to verify against
1263   @type instance: L{objects.Instance}
1264   @param instance: The instance to verify
1265   @param _compute_fn: The function to verify ipolicy (unittest only)
1266   @see: L{_ComputeIPolicySpecViolation}
1267
1268   """
1269   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1270   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1271   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1272   disk_count = len(instance.disks)
1273   disk_sizes = [disk.size for disk in instance.disks]
1274   nic_count = len(instance.nics)
1275
1276   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1277                      disk_sizes, spindle_use)
1278
1279
1280 def _ComputeIPolicyInstanceSpecViolation(
1281   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1282   """Compute if instance specs meets the specs of ipolicy.
1283
1284   @type ipolicy: dict
1285   @param ipolicy: The ipolicy to verify against
1286   @param instance_spec: dict
1287   @param instance_spec: The instance spec to verify
1288   @param _compute_fn: The function to verify ipolicy (unittest only)
1289   @see: L{_ComputeIPolicySpecViolation}
1290
1291   """
1292   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1293   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1294   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1295   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1296   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1297   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1298
1299   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1300                      disk_sizes, spindle_use)
1301
1302
1303 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1304                                  target_group,
1305                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1306   """Compute if instance meets the specs of the new target group.
1307
1308   @param ipolicy: The ipolicy to verify
1309   @param instance: The instance object to verify
1310   @param current_group: The current group of the instance
1311   @param target_group: The new group of the instance
1312   @param _compute_fn: The function to verify ipolicy (unittest only)
1313   @see: L{_ComputeIPolicySpecViolation}
1314
1315   """
1316   if current_group == target_group:
1317     return []
1318   else:
1319     return _compute_fn(ipolicy, instance)
1320
1321
1322 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1323                             _compute_fn=_ComputeIPolicyNodeViolation):
1324   """Checks that the target node is correct in terms of instance policy.
1325
1326   @param ipolicy: The ipolicy to verify
1327   @param instance: The instance object to verify
1328   @param node: The new node to relocate
1329   @param ignore: Ignore violations of the ipolicy
1330   @param _compute_fn: The function to verify ipolicy (unittest only)
1331   @see: L{_ComputeIPolicySpecViolation}
1332
1333   """
1334   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1335   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1336
1337   if res:
1338     msg = ("Instance does not meet target node group's (%s) instance"
1339            " policy: %s") % (node.group, utils.CommaJoin(res))
1340     if ignore:
1341       lu.LogWarning(msg)
1342     else:
1343       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1344
1345
1346 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1347   """Computes a set of any instances that would violate the new ipolicy.
1348
1349   @param old_ipolicy: The current (still in-place) ipolicy
1350   @param new_ipolicy: The new (to become) ipolicy
1351   @param instances: List of instances to verify
1352   @return: A list of instances which violates the new ipolicy but
1353       did not before
1354
1355   """
1356   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1357           _ComputeViolatingInstances(old_ipolicy, instances))
1358
1359
1360 def _ExpandItemName(fn, name, kind):
1361   """Expand an item name.
1362
1363   @param fn: the function to use for expansion
1364   @param name: requested item name
1365   @param kind: text description ('Node' or 'Instance')
1366   @return: the resolved (full) name
1367   @raise errors.OpPrereqError: if the item is not found
1368
1369   """
1370   full_name = fn(name)
1371   if full_name is None:
1372     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1373                                errors.ECODE_NOENT)
1374   return full_name
1375
1376
1377 def _ExpandNodeName(cfg, name):
1378   """Wrapper over L{_ExpandItemName} for nodes."""
1379   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1380
1381
1382 def _ExpandInstanceName(cfg, name):
1383   """Wrapper over L{_ExpandItemName} for instance."""
1384   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1385
1386
1387 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1388                          network_type, mac_prefix, tags):
1389   """Builds network related env variables for hooks
1390
1391   This builds the hook environment from individual variables.
1392
1393   @type name: string
1394   @param name: the name of the network
1395   @type subnet: string
1396   @param subnet: the ipv4 subnet
1397   @type gateway: string
1398   @param gateway: the ipv4 gateway
1399   @type network6: string
1400   @param network6: the ipv6 subnet
1401   @type gateway6: string
1402   @param gateway6: the ipv6 gateway
1403   @type network_type: string
1404   @param network_type: the type of the network
1405   @type mac_prefix: string
1406   @param mac_prefix: the mac_prefix
1407   @type tags: list
1408   @param tags: the tags of the network
1409
1410   """
1411   env = {}
1412   if name:
1413     env["NETWORK_NAME"] = name
1414   if subnet:
1415     env["NETWORK_SUBNET"] = subnet
1416   if gateway:
1417     env["NETWORK_GATEWAY"] = gateway
1418   if network6:
1419     env["NETWORK_SUBNET6"] = network6
1420   if gateway6:
1421     env["NETWORK_GATEWAY6"] = gateway6
1422   if mac_prefix:
1423     env["NETWORK_MAC_PREFIX"] = mac_prefix
1424   if network_type:
1425     env["NETWORK_TYPE"] = network_type
1426   if tags:
1427     env["NETWORK_TAGS"] = " ".join(tags)
1428
1429   return env
1430
1431
1432 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1433                           minmem, maxmem, vcpus, nics, disk_template, disks,
1434                           bep, hvp, hypervisor_name, tags):
1435   """Builds instance related env variables for hooks
1436
1437   This builds the hook environment from individual variables.
1438
1439   @type name: string
1440   @param name: the name of the instance
1441   @type primary_node: string
1442   @param primary_node: the name of the instance's primary node
1443   @type secondary_nodes: list
1444   @param secondary_nodes: list of secondary nodes as strings
1445   @type os_type: string
1446   @param os_type: the name of the instance's OS
1447   @type status: string
1448   @param status: the desired status of the instance
1449   @type minmem: string
1450   @param minmem: the minimum memory size of the instance
1451   @type maxmem: string
1452   @param maxmem: the maximum memory size of the instance
1453   @type vcpus: string
1454   @param vcpus: the count of VCPUs the instance has
1455   @type nics: list
1456   @param nics: list of tuples (ip, mac, mode, link, network) representing
1457       the NICs the instance has
1458   @type disk_template: string
1459   @param disk_template: the disk template of the instance
1460   @type disks: list
1461   @param disks: the list of (size, mode) pairs
1462   @type bep: dict
1463   @param bep: the backend parameters for the instance
1464   @type hvp: dict
1465   @param hvp: the hypervisor parameters for the instance
1466   @type hypervisor_name: string
1467   @param hypervisor_name: the hypervisor for the instance
1468   @type tags: list
1469   @param tags: list of instance tags as strings
1470   @rtype: dict
1471   @return: the hook environment for this instance
1472
1473   """
1474   env = {
1475     "OP_TARGET": name,
1476     "INSTANCE_NAME": name,
1477     "INSTANCE_PRIMARY": primary_node,
1478     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1479     "INSTANCE_OS_TYPE": os_type,
1480     "INSTANCE_STATUS": status,
1481     "INSTANCE_MINMEM": minmem,
1482     "INSTANCE_MAXMEM": maxmem,
1483     # TODO(2.7) remove deprecated "memory" value
1484     "INSTANCE_MEMORY": maxmem,
1485     "INSTANCE_VCPUS": vcpus,
1486     "INSTANCE_DISK_TEMPLATE": disk_template,
1487     "INSTANCE_HYPERVISOR": hypervisor_name,
1488   }
1489   if nics:
1490     nic_count = len(nics)
1491     for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1492       if ip is None:
1493         ip = ""
1494       env["INSTANCE_NIC%d_IP" % idx] = ip
1495       env["INSTANCE_NIC%d_MAC" % idx] = mac
1496       env["INSTANCE_NIC%d_MODE" % idx] = mode
1497       env["INSTANCE_NIC%d_LINK" % idx] = link
1498       if network:
1499         env["INSTANCE_NIC%d_NETWORK" % idx] = net
1500         if netinfo:
1501           nobj = objects.Network.FromDict(netinfo)
1502           if nobj.network:
1503             env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1504           if nobj.gateway:
1505             env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1506           if nobj.network6:
1507             env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1508           if nobj.gateway6:
1509             env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1510           if nobj.mac_prefix:
1511             env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1512           if nobj.network_type:
1513             env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1514           if nobj.tags:
1515             env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1516       if mode == constants.NIC_MODE_BRIDGED:
1517         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1518   else:
1519     nic_count = 0
1520
1521   env["INSTANCE_NIC_COUNT"] = nic_count
1522
1523   if disks:
1524     disk_count = len(disks)
1525     for idx, (size, mode) in enumerate(disks):
1526       env["INSTANCE_DISK%d_SIZE" % idx] = size
1527       env["INSTANCE_DISK%d_MODE" % idx] = mode
1528   else:
1529     disk_count = 0
1530
1531   env["INSTANCE_DISK_COUNT"] = disk_count
1532
1533   if not tags:
1534     tags = []
1535
1536   env["INSTANCE_TAGS"] = " ".join(tags)
1537
1538   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1539     for key, value in source.items():
1540       env["INSTANCE_%s_%s" % (kind, key)] = value
1541
1542   return env
1543
1544
1545 def _NICToTuple(lu, nic):
1546   """Build a tupple of nic information.
1547
1548   @type lu:  L{LogicalUnit}
1549   @param lu: the logical unit on whose behalf we execute
1550   @type nic: L{objects.NIC}
1551   @param nic: nic to convert to hooks tuple
1552
1553   """
1554   ip = nic.ip
1555   mac = nic.mac
1556   cluster = lu.cfg.GetClusterInfo()
1557   filled_params = cluster.SimpleFillNIC(nic.nicparams)
1558   mode = filled_params[constants.NIC_MODE]
1559   link = filled_params[constants.NIC_LINK]
1560   net = nic.network
1561   netinfo = None
1562   if net:
1563     net_uuid = lu.cfg.LookupNetwork(net)
1564     if net_uuid:
1565       nobj = lu.cfg.GetNetwork(net_uuid)
1566       netinfo = objects.Network.ToDict(nobj)
1567   return (ip, mac, mode, link, net, netinfo)
1568
1569
1570 def _NICListToTuple(lu, nics):
1571   """Build a list of nic information tuples.
1572
1573   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1574   value in LUInstanceQueryData.
1575
1576   @type lu:  L{LogicalUnit}
1577   @param lu: the logical unit on whose behalf we execute
1578   @type nics: list of L{objects.NIC}
1579   @param nics: list of nics to convert to hooks tuples
1580
1581   """
1582   hooks_nics = []
1583   for nic in nics:
1584     hooks_nics.append(_NICToTuple(lu, nic))
1585   return hooks_nics
1586
1587
1588 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1589   """Builds instance related env variables for hooks from an object.
1590
1591   @type lu: L{LogicalUnit}
1592   @param lu: the logical unit on whose behalf we execute
1593   @type instance: L{objects.Instance}
1594   @param instance: the instance for which we should build the
1595       environment
1596   @type override: dict
1597   @param override: dictionary with key/values that will override
1598       our values
1599   @rtype: dict
1600   @return: the hook environment dictionary
1601
1602   """
1603   cluster = lu.cfg.GetClusterInfo()
1604   bep = cluster.FillBE(instance)
1605   hvp = cluster.FillHV(instance)
1606   args = {
1607     "name": instance.name,
1608     "primary_node": instance.primary_node,
1609     "secondary_nodes": instance.secondary_nodes,
1610     "os_type": instance.os,
1611     "status": instance.admin_state,
1612     "maxmem": bep[constants.BE_MAXMEM],
1613     "minmem": bep[constants.BE_MINMEM],
1614     "vcpus": bep[constants.BE_VCPUS],
1615     "nics": _NICListToTuple(lu, instance.nics),
1616     "disk_template": instance.disk_template,
1617     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1618     "bep": bep,
1619     "hvp": hvp,
1620     "hypervisor_name": instance.hypervisor,
1621     "tags": instance.tags,
1622   }
1623   if override:
1624     args.update(override)
1625   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1626
1627
1628 def _AdjustCandidatePool(lu, exceptions):
1629   """Adjust the candidate pool after node operations.
1630
1631   """
1632   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1633   if mod_list:
1634     lu.LogInfo("Promoted nodes to master candidate role: %s",
1635                utils.CommaJoin(node.name for node in mod_list))
1636     for name in mod_list:
1637       lu.context.ReaddNode(name)
1638   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1639   if mc_now > mc_max:
1640     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1641                (mc_now, mc_max))
1642
1643
1644 def _DecideSelfPromotion(lu, exceptions=None):
1645   """Decide whether I should promote myself as a master candidate.
1646
1647   """
1648   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1649   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1650   # the new node will increase mc_max with one, so:
1651   mc_should = min(mc_should + 1, cp_size)
1652   return mc_now < mc_should
1653
1654
1655 def _ComputeViolatingInstances(ipolicy, instances):
1656   """Computes a set of instances who violates given ipolicy.
1657
1658   @param ipolicy: The ipolicy to verify
1659   @type instances: object.Instance
1660   @param instances: List of instances to verify
1661   @return: A frozenset of instance names violating the ipolicy
1662
1663   """
1664   return frozenset([inst.name for inst in instances
1665                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1666
1667
1668 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1669   """Check that the brigdes needed by a list of nics exist.
1670
1671   """
1672   cluster = lu.cfg.GetClusterInfo()
1673   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1674   brlist = [params[constants.NIC_LINK] for params in paramslist
1675             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1676   if brlist:
1677     result = lu.rpc.call_bridges_exist(target_node, brlist)
1678     result.Raise("Error checking bridges on destination node '%s'" %
1679                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1680
1681
1682 def _CheckInstanceBridgesExist(lu, instance, node=None):
1683   """Check that the brigdes needed by an instance exist.
1684
1685   """
1686   if node is None:
1687     node = instance.primary_node
1688   _CheckNicsBridgesExist(lu, instance.nics, node)
1689
1690
1691 def _CheckOSVariant(os_obj, name):
1692   """Check whether an OS name conforms to the os variants specification.
1693
1694   @type os_obj: L{objects.OS}
1695   @param os_obj: OS object to check
1696   @type name: string
1697   @param name: OS name passed by the user, to check for validity
1698
1699   """
1700   variant = objects.OS.GetVariant(name)
1701   if not os_obj.supported_variants:
1702     if variant:
1703       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1704                                  " passed)" % (os_obj.name, variant),
1705                                  errors.ECODE_INVAL)
1706     return
1707   if not variant:
1708     raise errors.OpPrereqError("OS name must include a variant",
1709                                errors.ECODE_INVAL)
1710
1711   if variant not in os_obj.supported_variants:
1712     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1713
1714
1715 def _GetNodeInstancesInner(cfg, fn):
1716   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1717
1718
1719 def _GetNodeInstances(cfg, node_name):
1720   """Returns a list of all primary and secondary instances on a node.
1721
1722   """
1723
1724   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1725
1726
1727 def _GetNodePrimaryInstances(cfg, node_name):
1728   """Returns primary instances on a node.
1729
1730   """
1731   return _GetNodeInstancesInner(cfg,
1732                                 lambda inst: node_name == inst.primary_node)
1733
1734
1735 def _GetNodeSecondaryInstances(cfg, node_name):
1736   """Returns secondary instances on a node.
1737
1738   """
1739   return _GetNodeInstancesInner(cfg,
1740                                 lambda inst: node_name in inst.secondary_nodes)
1741
1742
1743 def _GetStorageTypeArgs(cfg, storage_type):
1744   """Returns the arguments for a storage type.
1745
1746   """
1747   # Special case for file storage
1748   if storage_type == constants.ST_FILE:
1749     # storage.FileStorage wants a list of storage directories
1750     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1751
1752   return []
1753
1754
1755 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1756   faulty = []
1757
1758   for dev in instance.disks:
1759     cfg.SetDiskID(dev, node_name)
1760
1761   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1762                                                                 instance))
1763   result.Raise("Failed to get disk status from node %s" % node_name,
1764                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1765
1766   for idx, bdev_status in enumerate(result.payload):
1767     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1768       faulty.append(idx)
1769
1770   return faulty
1771
1772
1773 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1774   """Check the sanity of iallocator and node arguments and use the
1775   cluster-wide iallocator if appropriate.
1776
1777   Check that at most one of (iallocator, node) is specified. If none is
1778   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1779   then the LU's opcode's iallocator slot is filled with the cluster-wide
1780   default iallocator.
1781
1782   @type iallocator_slot: string
1783   @param iallocator_slot: the name of the opcode iallocator slot
1784   @type node_slot: string
1785   @param node_slot: the name of the opcode target node slot
1786
1787   """
1788   node = getattr(lu.op, node_slot, None)
1789   ialloc = getattr(lu.op, iallocator_slot, None)
1790   if node == []:
1791     node = None
1792
1793   if node is not None and ialloc is not None:
1794     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1795                                errors.ECODE_INVAL)
1796   elif ((node is None and ialloc is None) or
1797         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1798     default_iallocator = lu.cfg.GetDefaultIAllocator()
1799     if default_iallocator:
1800       setattr(lu.op, iallocator_slot, default_iallocator)
1801     else:
1802       raise errors.OpPrereqError("No iallocator or node given and no"
1803                                  " cluster-wide default iallocator found;"
1804                                  " please specify either an iallocator or a"
1805                                  " node, or set a cluster-wide default"
1806                                  " iallocator", errors.ECODE_INVAL)
1807
1808
1809 def _GetDefaultIAllocator(cfg, ialloc):
1810   """Decides on which iallocator to use.
1811
1812   @type cfg: L{config.ConfigWriter}
1813   @param cfg: Cluster configuration object
1814   @type ialloc: string or None
1815   @param ialloc: Iallocator specified in opcode
1816   @rtype: string
1817   @return: Iallocator name
1818
1819   """
1820   if not ialloc:
1821     # Use default iallocator
1822     ialloc = cfg.GetDefaultIAllocator()
1823
1824   if not ialloc:
1825     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1826                                " opcode nor as a cluster-wide default",
1827                                errors.ECODE_INVAL)
1828
1829   return ialloc
1830
1831
1832 def _CheckHostnameSane(lu, name):
1833   """Ensures that a given hostname resolves to a 'sane' name.
1834
1835   The given name is required to be a prefix of the resolved hostname,
1836   to prevent accidental mismatches.
1837
1838   @param lu: the logical unit on behalf of which we're checking
1839   @param name: the name we should resolve and check
1840   @return: the resolved hostname object
1841
1842   """
1843   hostname = netutils.GetHostname(name=name)
1844   if hostname.name != name:
1845     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1846   if not utils.MatchNameComponent(name, [hostname.name]):
1847     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1848                                 " same as given hostname '%s'") %
1849                                 (hostname.name, name), errors.ECODE_INVAL)
1850   return hostname
1851
1852
1853 class LUClusterPostInit(LogicalUnit):
1854   """Logical unit for running hooks after cluster initialization.
1855
1856   """
1857   HPATH = "cluster-init"
1858   HTYPE = constants.HTYPE_CLUSTER
1859
1860   def BuildHooksEnv(self):
1861     """Build hooks env.
1862
1863     """
1864     return {
1865       "OP_TARGET": self.cfg.GetClusterName(),
1866       }
1867
1868   def BuildHooksNodes(self):
1869     """Build hooks nodes.
1870
1871     """
1872     return ([], [self.cfg.GetMasterNode()])
1873
1874   def Exec(self, feedback_fn):
1875     """Nothing to do.
1876
1877     """
1878     return True
1879
1880
1881 class LUClusterDestroy(LogicalUnit):
1882   """Logical unit for destroying the cluster.
1883
1884   """
1885   HPATH = "cluster-destroy"
1886   HTYPE = constants.HTYPE_CLUSTER
1887
1888   def BuildHooksEnv(self):
1889     """Build hooks env.
1890
1891     """
1892     return {
1893       "OP_TARGET": self.cfg.GetClusterName(),
1894       }
1895
1896   def BuildHooksNodes(self):
1897     """Build hooks nodes.
1898
1899     """
1900     return ([], [])
1901
1902   def CheckPrereq(self):
1903     """Check prerequisites.
1904
1905     This checks whether the cluster is empty.
1906
1907     Any errors are signaled by raising errors.OpPrereqError.
1908
1909     """
1910     master = self.cfg.GetMasterNode()
1911
1912     nodelist = self.cfg.GetNodeList()
1913     if len(nodelist) != 1 or nodelist[0] != master:
1914       raise errors.OpPrereqError("There are still %d node(s) in"
1915                                  " this cluster." % (len(nodelist) - 1),
1916                                  errors.ECODE_INVAL)
1917     instancelist = self.cfg.GetInstanceList()
1918     if instancelist:
1919       raise errors.OpPrereqError("There are still %d instance(s) in"
1920                                  " this cluster." % len(instancelist),
1921                                  errors.ECODE_INVAL)
1922
1923   def Exec(self, feedback_fn):
1924     """Destroys the cluster.
1925
1926     """
1927     master_params = self.cfg.GetMasterNetworkParameters()
1928
1929     # Run post hooks on master node before it's removed
1930     _RunPostHook(self, master_params.name)
1931
1932     ems = self.cfg.GetUseExternalMipScript()
1933     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1934                                                      master_params, ems)
1935     if result.fail_msg:
1936       self.LogWarning("Error disabling the master IP address: %s",
1937                       result.fail_msg)
1938
1939     return master_params.name
1940
1941
1942 def _VerifyCertificate(filename):
1943   """Verifies a certificate for L{LUClusterVerifyConfig}.
1944
1945   @type filename: string
1946   @param filename: Path to PEM file
1947
1948   """
1949   try:
1950     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1951                                            utils.ReadFile(filename))
1952   except Exception, err: # pylint: disable=W0703
1953     return (LUClusterVerifyConfig.ETYPE_ERROR,
1954             "Failed to load X509 certificate %s: %s" % (filename, err))
1955
1956   (errcode, msg) = \
1957     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1958                                 constants.SSL_CERT_EXPIRATION_ERROR)
1959
1960   if msg:
1961     fnamemsg = "While verifying %s: %s" % (filename, msg)
1962   else:
1963     fnamemsg = None
1964
1965   if errcode is None:
1966     return (None, fnamemsg)
1967   elif errcode == utils.CERT_WARNING:
1968     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1969   elif errcode == utils.CERT_ERROR:
1970     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1971
1972   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1973
1974
1975 def _GetAllHypervisorParameters(cluster, instances):
1976   """Compute the set of all hypervisor parameters.
1977
1978   @type cluster: L{objects.Cluster}
1979   @param cluster: the cluster object
1980   @param instances: list of L{objects.Instance}
1981   @param instances: additional instances from which to obtain parameters
1982   @rtype: list of (origin, hypervisor, parameters)
1983   @return: a list with all parameters found, indicating the hypervisor they
1984        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1985
1986   """
1987   hvp_data = []
1988
1989   for hv_name in cluster.enabled_hypervisors:
1990     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1991
1992   for os_name, os_hvp in cluster.os_hvp.items():
1993     for hv_name, hv_params in os_hvp.items():
1994       if hv_params:
1995         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1996         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1997
1998   # TODO: collapse identical parameter values in a single one
1999   for instance in instances:
2000     if instance.hvparams:
2001       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2002                        cluster.FillHV(instance)))
2003
2004   return hvp_data
2005
2006
2007 class _VerifyErrors(object):
2008   """Mix-in for cluster/group verify LUs.
2009
2010   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2011   self.op and self._feedback_fn to be available.)
2012
2013   """
2014
2015   ETYPE_FIELD = "code"
2016   ETYPE_ERROR = "ERROR"
2017   ETYPE_WARNING = "WARNING"
2018
2019   def _Error(self, ecode, item, msg, *args, **kwargs):
2020     """Format an error message.
2021
2022     Based on the opcode's error_codes parameter, either format a
2023     parseable error code, or a simpler error string.
2024
2025     This must be called only from Exec and functions called from Exec.
2026
2027     """
2028     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2029     itype, etxt, _ = ecode
2030     # If the error code is in the list of ignored errors, demote the error to a
2031     # warning
2032     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
2033       ltype = self.ETYPE_WARNING
2034     # first complete the msg
2035     if args:
2036       msg = msg % args
2037     # then format the whole message
2038     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2039       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2040     else:
2041       if item:
2042         item = " " + item
2043       else:
2044         item = ""
2045       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2046     # and finally report it via the feedback_fn
2047     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
2048     # do not mark the operation as failed for WARN cases only
2049     if ltype == self.ETYPE_ERROR:
2050       self.bad = True
2051
2052   def _ErrorIf(self, cond, *args, **kwargs):
2053     """Log an error message if the passed condition is True.
2054
2055     """
2056     if (bool(cond)
2057         or self.op.debug_simulate_errors): # pylint: disable=E1101
2058       self._Error(*args, **kwargs)
2059
2060
2061 class LUClusterVerify(NoHooksLU):
2062   """Submits all jobs necessary to verify the cluster.
2063
2064   """
2065   REQ_BGL = False
2066
2067   def ExpandNames(self):
2068     self.needed_locks = {}
2069
2070   def Exec(self, feedback_fn):
2071     jobs = []
2072
2073     if self.op.group_name:
2074       groups = [self.op.group_name]
2075       depends_fn = lambda: None
2076     else:
2077       groups = self.cfg.GetNodeGroupList()
2078
2079       # Verify global configuration
2080       jobs.append([
2081         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2082         ])
2083
2084       # Always depend on global verification
2085       depends_fn = lambda: [(-len(jobs), [])]
2086
2087     jobs.extend(
2088       [opcodes.OpClusterVerifyGroup(group_name=group,
2089                                     ignore_errors=self.op.ignore_errors,
2090                                     depends=depends_fn())]
2091       for group in groups)
2092
2093     # Fix up all parameters
2094     for op in itertools.chain(*jobs): # pylint: disable=W0142
2095       op.debug_simulate_errors = self.op.debug_simulate_errors
2096       op.verbose = self.op.verbose
2097       op.error_codes = self.op.error_codes
2098       try:
2099         op.skip_checks = self.op.skip_checks
2100       except AttributeError:
2101         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2102
2103     return ResultWithJobs(jobs)
2104
2105
2106 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2107   """Verifies the cluster config.
2108
2109   """
2110   REQ_BGL = False
2111
2112   def _VerifyHVP(self, hvp_data):
2113     """Verifies locally the syntax of the hypervisor parameters.
2114
2115     """
2116     for item, hv_name, hv_params in hvp_data:
2117       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2118              (item, hv_name))
2119       try:
2120         hv_class = hypervisor.GetHypervisorClass(hv_name)
2121         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2122         hv_class.CheckParameterSyntax(hv_params)
2123       except errors.GenericError, err:
2124         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2125
2126   def ExpandNames(self):
2127     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2128     self.share_locks = _ShareAll()
2129
2130   def CheckPrereq(self):
2131     """Check prerequisites.
2132
2133     """
2134     # Retrieve all information
2135     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2136     self.all_node_info = self.cfg.GetAllNodesInfo()
2137     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2138
2139   def Exec(self, feedback_fn):
2140     """Verify integrity of cluster, performing various test on nodes.
2141
2142     """
2143     self.bad = False
2144     self._feedback_fn = feedback_fn
2145
2146     feedback_fn("* Verifying cluster config")
2147
2148     for msg in self.cfg.VerifyConfig():
2149       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2150
2151     feedback_fn("* Verifying cluster certificate files")
2152
2153     for cert_filename in pathutils.ALL_CERT_FILES:
2154       (errcode, msg) = _VerifyCertificate(cert_filename)
2155       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2156
2157     feedback_fn("* Verifying hypervisor parameters")
2158
2159     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2160                                                 self.all_inst_info.values()))
2161
2162     feedback_fn("* Verifying all nodes belong to an existing group")
2163
2164     # We do this verification here because, should this bogus circumstance
2165     # occur, it would never be caught by VerifyGroup, which only acts on
2166     # nodes/instances reachable from existing node groups.
2167
2168     dangling_nodes = set(node.name for node in self.all_node_info.values()
2169                          if node.group not in self.all_group_info)
2170
2171     dangling_instances = {}
2172     no_node_instances = []
2173
2174     for inst in self.all_inst_info.values():
2175       if inst.primary_node in dangling_nodes:
2176         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2177       elif inst.primary_node not in self.all_node_info:
2178         no_node_instances.append(inst.name)
2179
2180     pretty_dangling = [
2181         "%s (%s)" %
2182         (node.name,
2183          utils.CommaJoin(dangling_instances.get(node.name,
2184                                                 ["no instances"])))
2185         for node in dangling_nodes]
2186
2187     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2188                   None,
2189                   "the following nodes (and their instances) belong to a non"
2190                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2191
2192     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2193                   None,
2194                   "the following instances have a non-existing primary-node:"
2195                   " %s", utils.CommaJoin(no_node_instances))
2196
2197     return not self.bad
2198
2199
2200 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2201   """Verifies the status of a node group.
2202
2203   """
2204   HPATH = "cluster-verify"
2205   HTYPE = constants.HTYPE_CLUSTER
2206   REQ_BGL = False
2207
2208   _HOOKS_INDENT_RE = re.compile("^", re.M)
2209
2210   class NodeImage(object):
2211     """A class representing the logical and physical status of a node.
2212
2213     @type name: string
2214     @ivar name: the node name to which this object refers
2215     @ivar volumes: a structure as returned from
2216         L{ganeti.backend.GetVolumeList} (runtime)
2217     @ivar instances: a list of running instances (runtime)
2218     @ivar pinst: list of configured primary instances (config)
2219     @ivar sinst: list of configured secondary instances (config)
2220     @ivar sbp: dictionary of {primary-node: list of instances} for all
2221         instances for which this node is secondary (config)
2222     @ivar mfree: free memory, as reported by hypervisor (runtime)
2223     @ivar dfree: free disk, as reported by the node (runtime)
2224     @ivar offline: the offline status (config)
2225     @type rpc_fail: boolean
2226     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2227         not whether the individual keys were correct) (runtime)
2228     @type lvm_fail: boolean
2229     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2230     @type hyp_fail: boolean
2231     @ivar hyp_fail: whether the RPC call didn't return the instance list
2232     @type ghost: boolean
2233     @ivar ghost: whether this is a known node or not (config)
2234     @type os_fail: boolean
2235     @ivar os_fail: whether the RPC call didn't return valid OS data
2236     @type oslist: list
2237     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2238     @type vm_capable: boolean
2239     @ivar vm_capable: whether the node can host instances
2240     @type pv_min: float
2241     @ivar pv_min: size in MiB of the smallest PVs
2242     @type pv_max: float
2243     @ivar pv_max: size in MiB of the biggest PVs
2244
2245     """
2246     def __init__(self, offline=False, name=None, vm_capable=True):
2247       self.name = name
2248       self.volumes = {}
2249       self.instances = []
2250       self.pinst = []
2251       self.sinst = []
2252       self.sbp = {}
2253       self.mfree = 0
2254       self.dfree = 0
2255       self.offline = offline
2256       self.vm_capable = vm_capable
2257       self.rpc_fail = False
2258       self.lvm_fail = False
2259       self.hyp_fail = False
2260       self.ghost = False
2261       self.os_fail = False
2262       self.oslist = {}
2263       self.pv_min = None
2264       self.pv_max = None
2265
2266   def ExpandNames(self):
2267     # This raises errors.OpPrereqError on its own:
2268     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2269
2270     # Get instances in node group; this is unsafe and needs verification later
2271     inst_names = \
2272       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2273
2274     self.needed_locks = {
2275       locking.LEVEL_INSTANCE: inst_names,
2276       locking.LEVEL_NODEGROUP: [self.group_uuid],
2277       locking.LEVEL_NODE: [],
2278
2279       # This opcode is run by watcher every five minutes and acquires all nodes
2280       # for a group. It doesn't run for a long time, so it's better to acquire
2281       # the node allocation lock as well.
2282       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2283       }
2284
2285     self.share_locks = _ShareAll()
2286
2287   def DeclareLocks(self, level):
2288     if level == locking.LEVEL_NODE:
2289       # Get members of node group; this is unsafe and needs verification later
2290       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2291
2292       all_inst_info = self.cfg.GetAllInstancesInfo()
2293
2294       # In Exec(), we warn about mirrored instances that have primary and
2295       # secondary living in separate node groups. To fully verify that
2296       # volumes for these instances are healthy, we will need to do an
2297       # extra call to their secondaries. We ensure here those nodes will
2298       # be locked.
2299       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2300         # Important: access only the instances whose lock is owned
2301         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2302           nodes.update(all_inst_info[inst].secondary_nodes)
2303
2304       self.needed_locks[locking.LEVEL_NODE] = nodes
2305
2306   def CheckPrereq(self):
2307     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2308     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2309
2310     group_nodes = set(self.group_info.members)
2311     group_instances = \
2312       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2313
2314     unlocked_nodes = \
2315         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2316
2317     unlocked_instances = \
2318         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2319
2320     if unlocked_nodes:
2321       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2322                                  utils.CommaJoin(unlocked_nodes),
2323                                  errors.ECODE_STATE)
2324
2325     if unlocked_instances:
2326       raise errors.OpPrereqError("Missing lock for instances: %s" %
2327                                  utils.CommaJoin(unlocked_instances),
2328                                  errors.ECODE_STATE)
2329
2330     self.all_node_info = self.cfg.GetAllNodesInfo()
2331     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2332
2333     self.my_node_names = utils.NiceSort(group_nodes)
2334     self.my_inst_names = utils.NiceSort(group_instances)
2335
2336     self.my_node_info = dict((name, self.all_node_info[name])
2337                              for name in self.my_node_names)
2338
2339     self.my_inst_info = dict((name, self.all_inst_info[name])
2340                              for name in self.my_inst_names)
2341
2342     # We detect here the nodes that will need the extra RPC calls for verifying
2343     # split LV volumes; they should be locked.
2344     extra_lv_nodes = set()
2345
2346     for inst in self.my_inst_info.values():
2347       if inst.disk_template in constants.DTS_INT_MIRROR:
2348         for nname in inst.all_nodes:
2349           if self.all_node_info[nname].group != self.group_uuid:
2350             extra_lv_nodes.add(nname)
2351
2352     unlocked_lv_nodes = \
2353         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2354
2355     if unlocked_lv_nodes:
2356       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2357                                  utils.CommaJoin(unlocked_lv_nodes),
2358                                  errors.ECODE_STATE)
2359     self.extra_lv_nodes = list(extra_lv_nodes)
2360
2361   def _VerifyNode(self, ninfo, nresult):
2362     """Perform some basic validation on data returned from a node.
2363
2364       - check the result data structure is well formed and has all the
2365         mandatory fields
2366       - check ganeti version
2367
2368     @type ninfo: L{objects.Node}
2369     @param ninfo: the node to check
2370     @param nresult: the results from the node
2371     @rtype: boolean
2372     @return: whether overall this call was successful (and we can expect
2373          reasonable values in the respose)
2374
2375     """
2376     node = ninfo.name
2377     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2378
2379     # main result, nresult should be a non-empty dict
2380     test = not nresult or not isinstance(nresult, dict)
2381     _ErrorIf(test, constants.CV_ENODERPC, node,
2382                   "unable to verify node: no data returned")
2383     if test:
2384       return False
2385
2386     # compares ganeti version
2387     local_version = constants.PROTOCOL_VERSION
2388     remote_version = nresult.get("version", None)
2389     test = not (remote_version and
2390                 isinstance(remote_version, (list, tuple)) and
2391                 len(remote_version) == 2)
2392     _ErrorIf(test, constants.CV_ENODERPC, node,
2393              "connection to node returned invalid data")
2394     if test:
2395       return False
2396
2397     test = local_version != remote_version[0]
2398     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2399              "incompatible protocol versions: master %s,"
2400              " node %s", local_version, remote_version[0])
2401     if test:
2402       return False
2403
2404     # node seems compatible, we can actually try to look into its results
2405
2406     # full package version
2407     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2408                   constants.CV_ENODEVERSION, node,
2409                   "software version mismatch: master %s, node %s",
2410                   constants.RELEASE_VERSION, remote_version[1],
2411                   code=self.ETYPE_WARNING)
2412
2413     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2414     if ninfo.vm_capable and isinstance(hyp_result, dict):
2415       for hv_name, hv_result in hyp_result.iteritems():
2416         test = hv_result is not None
2417         _ErrorIf(test, constants.CV_ENODEHV, node,
2418                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2419
2420     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2421     if ninfo.vm_capable and isinstance(hvp_result, list):
2422       for item, hv_name, hv_result in hvp_result:
2423         _ErrorIf(True, constants.CV_ENODEHV, node,
2424                  "hypervisor %s parameter verify failure (source %s): %s",
2425                  hv_name, item, hv_result)
2426
2427     test = nresult.get(constants.NV_NODESETUP,
2428                        ["Missing NODESETUP results"])
2429     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2430              "; ".join(test))
2431
2432     return True
2433
2434   def _VerifyNodeTime(self, ninfo, nresult,
2435                       nvinfo_starttime, nvinfo_endtime):
2436     """Check the node time.
2437
2438     @type ninfo: L{objects.Node}
2439     @param ninfo: the node to check
2440     @param nresult: the remote results for the node
2441     @param nvinfo_starttime: the start time of the RPC call
2442     @param nvinfo_endtime: the end time of the RPC call
2443
2444     """
2445     node = ninfo.name
2446     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2447
2448     ntime = nresult.get(constants.NV_TIME, None)
2449     try:
2450       ntime_merged = utils.MergeTime(ntime)
2451     except (ValueError, TypeError):
2452       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2453       return
2454
2455     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2456       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2457     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2458       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2459     else:
2460       ntime_diff = None
2461
2462     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2463              "Node time diverges by at least %s from master node time",
2464              ntime_diff)
2465
2466   def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2467     """Check the node LVM results and update info for cross-node checks.
2468
2469     @type ninfo: L{objects.Node}
2470     @param ninfo: the node to check
2471     @param nresult: the remote results for the node
2472     @param vg_name: the configured VG name
2473     @type nimg: L{NodeImage}
2474     @param nimg: node image
2475
2476     """
2477     if vg_name is None:
2478       return
2479
2480     node = ninfo.name
2481     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2482
2483     # checks vg existence and size > 20G
2484     vglist = nresult.get(constants.NV_VGLIST, None)
2485     test = not vglist
2486     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2487     if not test:
2488       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2489                                             constants.MIN_VG_SIZE)
2490       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2491
2492     # Check PVs
2493     (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2494     for em in errmsgs:
2495       self._Error(constants.CV_ENODELVM, node, em)
2496     if pvminmax is not None:
2497       (nimg.pv_min, nimg.pv_max) = pvminmax
2498
2499   def _VerifyGroupLVM(self, node_image, vg_name):
2500     """Check cross-node consistency in LVM.
2501
2502     @type node_image: dict
2503     @param node_image: info about nodes, mapping from node to names to
2504       L{NodeImage} objects
2505     @param vg_name: the configured VG name
2506
2507     """
2508     if vg_name is None:
2509       return
2510
2511     # Only exlcusive storage needs this kind of checks
2512     if not self._exclusive_storage:
2513       return
2514
2515     # exclusive_storage wants all PVs to have the same size (approximately),
2516     # if the smallest and the biggest ones are okay, everything is fine.
2517     # pv_min is None iff pv_max is None
2518     vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2519     if not vals:
2520       return
2521     (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2522     (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2523     bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2524     self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2525                   "PV sizes differ too much in the group; smallest (%s MB) is"
2526                   " on %s, biggest (%s MB) is on %s",
2527                   pvmin, minnode, pvmax, maxnode)
2528
2529   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2530     """Check the node bridges.
2531
2532     @type ninfo: L{objects.Node}
2533     @param ninfo: the node to check
2534     @param nresult: the remote results for the node
2535     @param bridges: the expected list of bridges
2536
2537     """
2538     if not bridges:
2539       return
2540
2541     node = ninfo.name
2542     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2543
2544     missing = nresult.get(constants.NV_BRIDGES, None)
2545     test = not isinstance(missing, list)
2546     _ErrorIf(test, constants.CV_ENODENET, node,
2547              "did not return valid bridge information")
2548     if not test:
2549       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2550                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2551
2552   def _VerifyNodeUserScripts(self, ninfo, nresult):
2553     """Check the results of user scripts presence and executability on the node
2554
2555     @type ninfo: L{objects.Node}
2556     @param ninfo: the node to check
2557     @param nresult: the remote results for the node
2558
2559     """
2560     node = ninfo.name
2561
2562     test = not constants.NV_USERSCRIPTS in nresult
2563     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2564                   "did not return user scripts information")
2565
2566     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2567     if not test:
2568       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2569                     "user scripts not present or not executable: %s" %
2570                     utils.CommaJoin(sorted(broken_scripts)))
2571
2572   def _VerifyNodeNetwork(self, ninfo, nresult):
2573     """Check the node network connectivity results.
2574
2575     @type ninfo: L{objects.Node}
2576     @param ninfo: the node to check
2577     @param nresult: the remote results for the node
2578
2579     """
2580     node = ninfo.name
2581     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2582
2583     test = constants.NV_NODELIST not in nresult
2584     _ErrorIf(test, constants.CV_ENODESSH, node,
2585              "node hasn't returned node ssh connectivity data")
2586     if not test:
2587       if nresult[constants.NV_NODELIST]:
2588         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2589           _ErrorIf(True, constants.CV_ENODESSH, node,
2590                    "ssh communication with node '%s': %s", a_node, a_msg)
2591
2592     test = constants.NV_NODENETTEST not in nresult
2593     _ErrorIf(test, constants.CV_ENODENET, node,
2594              "node hasn't returned node tcp connectivity data")
2595     if not test:
2596       if nresult[constants.NV_NODENETTEST]:
2597         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2598         for anode in nlist:
2599           _ErrorIf(True, constants.CV_ENODENET, node,
2600                    "tcp communication with node '%s': %s",
2601                    anode, nresult[constants.NV_NODENETTEST][anode])
2602
2603     test = constants.NV_MASTERIP not in nresult
2604     _ErrorIf(test, constants.CV_ENODENET, node,
2605              "node hasn't returned node master IP reachability data")
2606     if not test:
2607       if not nresult[constants.NV_MASTERIP]:
2608         if node == self.master_node:
2609           msg = "the master node cannot reach the master IP (not configured?)"
2610         else:
2611           msg = "cannot reach the master IP"
2612         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2613
2614   def _VerifyInstance(self, instance, inst_config, node_image,
2615                       diskstatus):
2616     """Verify an instance.
2617
2618     This function checks to see if the required block devices are
2619     available on the instance's node, and that the nodes are in the correct
2620     state.
2621
2622     """
2623     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2624     pnode = inst_config.primary_node
2625     pnode_img = node_image[pnode]
2626     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2627
2628     node_vol_should = {}
2629     inst_config.MapLVsByNode(node_vol_should)
2630
2631     cluster = self.cfg.GetClusterInfo()
2632     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2633                                                             self.group_info)
2634     err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config)
2635     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2636              code=self.ETYPE_WARNING)
2637
2638     for node in node_vol_should:
2639       n_img = node_image[node]
2640       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2641         # ignore missing volumes on offline or broken nodes
2642         continue
2643       for volume in node_vol_should[node]:
2644         test = volume not in n_img.volumes
2645         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2646                  "volume %s missing on node %s", volume, node)
2647
2648     if inst_config.admin_state == constants.ADMINST_UP:
2649       test = instance not in pnode_img.instances and not pnode_img.offline
2650       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2651                "instance not running on its primary node %s",
2652                pnode)
2653       _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2654                "instance is marked as running and lives on offline node %s",
2655                pnode)
2656
2657     diskdata = [(nname, success, status, idx)
2658                 for (nname, disks) in diskstatus.items()
2659                 for idx, (success, status) in enumerate(disks)]
2660
2661     for nname, success, bdev_status, idx in diskdata:
2662       # the 'ghost node' construction in Exec() ensures that we have a
2663       # node here
2664       snode = node_image[nname]
2665       bad_snode = snode.ghost or snode.offline
2666       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2667                not success and not bad_snode,
2668                constants.CV_EINSTANCEFAULTYDISK, instance,
2669                "couldn't retrieve status for disk/%s on %s: %s",
2670                idx, nname, bdev_status)
2671       _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2672                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2673                constants.CV_EINSTANCEFAULTYDISK, instance,
2674                "disk/%s on %s is faulty", idx, nname)
2675
2676     _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2677              constants.CV_ENODERPC, pnode, "instance %s, connection to"
2678              " primary node failed", instance)
2679
2680     _ErrorIf(len(inst_config.secondary_nodes) > 1,
2681              constants.CV_EINSTANCELAYOUT,
2682              instance, "instance has multiple secondary nodes: %s",
2683              utils.CommaJoin(inst_config.secondary_nodes),
2684              code=self.ETYPE_WARNING)
2685
2686     if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2687       # Disk template not compatible with exclusive_storage: no instance
2688       # node should have the flag set
2689       es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2690                                                      inst_config.all_nodes)
2691       es_nodes = [n for (n, es) in es_flags.items()
2692                   if es]
2693       _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2694                "instance has template %s, which is not supported on nodes"
2695                " that have exclusive storage set: %s",
2696                inst_config.disk_template, utils.CommaJoin(es_nodes))
2697
2698     if inst_config.disk_template in constants.DTS_INT_MIRROR:
2699       instance_nodes = utils.NiceSort(inst_config.all_nodes)
2700       instance_groups = {}
2701
2702       for node in instance_nodes:
2703         instance_groups.setdefault(self.all_node_info[node].group,
2704                                    []).append(node)
2705
2706       pretty_list = [
2707         "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2708         # Sort so that we always list the primary node first.
2709         for group, nodes in sorted(instance_groups.items(),
2710                                    key=lambda (_, nodes): pnode in nodes,
2711                                    reverse=True)]
2712
2713       self._ErrorIf(len(instance_groups) > 1,
2714                     constants.CV_EINSTANCESPLITGROUPS,
2715                     instance, "instance has primary and secondary nodes in"
2716                     " different groups: %s", utils.CommaJoin(pretty_list),
2717                     code=self.ETYPE_WARNING)
2718
2719     inst_nodes_offline = []
2720     for snode in inst_config.secondary_nodes:
2721       s_img = node_image[snode]
2722       _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2723                snode, "instance %s, connection to secondary node failed",
2724                instance)
2725
2726       if s_img.offline:
2727         inst_nodes_offline.append(snode)
2728
2729     # warn that the instance lives on offline nodes
2730     _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2731              "instance has offline secondary node(s) %s",
2732              utils.CommaJoin(inst_nodes_offline))
2733     # ... or ghost/non-vm_capable nodes
2734     for node in inst_config.all_nodes:
2735       _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2736                instance, "instance lives on ghost node %s", node)
2737       _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2738                instance, "instance lives on non-vm_capable node %s", node)
2739
2740   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2741     """Verify if there are any unknown volumes in the cluster.
2742
2743     The .os, .swap and backup volumes are ignored. All other volumes are
2744     reported as unknown.
2745
2746     @type reserved: L{ganeti.utils.FieldSet}
2747     @param reserved: a FieldSet of reserved volume names
2748
2749     """
2750     for node, n_img in node_image.items():
2751       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2752           self.all_node_info[node].group != self.group_uuid):
2753         # skip non-healthy nodes
2754         continue
2755       for volume in n_img.volumes:
2756         test = ((node not in node_vol_should or
2757                 volume not in node_vol_should[node]) and
2758                 not reserved.Matches(volume))
2759         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2760                       "volume %s is unknown", volume)
2761
2762   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2763     """Verify N+1 Memory Resilience.
2764
2765     Check that if one single node dies we can still start all the
2766     instances it was primary for.
2767
2768     """
2769     cluster_info = self.cfg.GetClusterInfo()
2770     for node, n_img in node_image.items():
2771       # This code checks that every node which is now listed as
2772       # secondary has enough memory to host all instances it is
2773       # supposed to should a single other node in the cluster fail.
2774       # FIXME: not ready for failover to an arbitrary node
2775       # FIXME: does not support file-backed instances
2776       # WARNING: we currently take into account down instances as well
2777       # as up ones, considering that even if they're down someone
2778       # might want to start them even in the event of a node failure.
2779       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2780         # we're skipping nodes marked offline and nodes in other groups from
2781         # the N+1 warning, since most likely we don't have good memory
2782         # infromation from them; we already list instances living on such
2783         # nodes, and that's enough warning
2784         continue
2785       #TODO(dynmem): also consider ballooning out other instances
2786       for prinode, instances in n_img.sbp.items():
2787         needed_mem = 0
2788         for instance in instances:
2789           bep = cluster_info.FillBE(instance_cfg[instance])
2790           if bep[constants.BE_AUTO_BALANCE]:
2791             needed_mem += bep[constants.BE_MINMEM]
2792         test = n_img.mfree < needed_mem
2793         self._ErrorIf(test, constants.CV_ENODEN1, node,
2794                       "not enough memory to accomodate instance failovers"
2795                       " should node %s fail (%dMiB needed, %dMiB available)",
2796                       prinode, needed_mem, n_img.mfree)
2797
2798   @classmethod
2799   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2800                    (files_all, files_opt, files_mc, files_vm)):
2801     """Verifies file checksums collected from all nodes.
2802
2803     @param errorif: Callback for reporting errors
2804     @param nodeinfo: List of L{objects.Node} objects
2805     @param master_node: Name of master node
2806     @param all_nvinfo: RPC results
2807
2808     """
2809     # Define functions determining which nodes to consider for a file
2810     files2nodefn = [
2811       (files_all, None),
2812       (files_mc, lambda node: (node.master_candidate or
2813                                node.name == master_node)),
2814       (files_vm, lambda node: node.vm_capable),
2815       ]
2816
2817     # Build mapping from filename to list of nodes which should have the file
2818     nodefiles = {}
2819     for (files, fn) in files2nodefn:
2820       if fn is None:
2821         filenodes = nodeinfo
2822       else:
2823         filenodes = filter(fn, nodeinfo)
2824       nodefiles.update((filename,
2825                         frozenset(map(operator.attrgetter("name"), filenodes)))
2826                        for filename in files)
2827
2828     assert set(nodefiles) == (files_all | files_mc | files_vm)
2829
2830     fileinfo = dict((filename, {}) for filename in nodefiles)
2831     ignore_nodes = set()
2832
2833     for node in nodeinfo:
2834       if node.offline:
2835         ignore_nodes.add(node.name)
2836         continue
2837
2838       nresult = all_nvinfo[node.name]
2839
2840       if nresult.fail_msg or not nresult.payload:
2841         node_files = None
2842       else:
2843         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2844         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2845                           for (key, value) in fingerprints.items())
2846         del fingerprints
2847
2848       test = not (node_files and isinstance(node_files, dict))
2849       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2850               "Node did not return file checksum data")
2851       if test:
2852         ignore_nodes.add(node.name)
2853         continue
2854
2855       # Build per-checksum mapping from filename to nodes having it
2856       for (filename, checksum) in node_files.items():
2857         assert filename in nodefiles
2858         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2859
2860     for (filename, checksums) in fileinfo.items():
2861       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2862
2863       # Nodes having the file
2864       with_file = frozenset(node_name
2865                             for nodes in fileinfo[filename].values()
2866                             for node_name in nodes) - ignore_nodes
2867
2868       expected_nodes = nodefiles[filename] - ignore_nodes
2869
2870       # Nodes missing file
2871       missing_file = expected_nodes - with_file
2872
2873       if filename in files_opt:
2874         # All or no nodes
2875         errorif(missing_file and missing_file != expected_nodes,
2876                 constants.CV_ECLUSTERFILECHECK, None,
2877                 "File %s is optional, but it must exist on all or no"
2878                 " nodes (not found on %s)",
2879                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2880       else:
2881         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2882                 "File %s is missing from node(s) %s", filename,
2883                 utils.CommaJoin(utils.NiceSort(missing_file)))
2884
2885         # Warn if a node has a file it shouldn't
2886         unexpected = with_file - expected_nodes
2887         errorif(unexpected,
2888                 constants.CV_ECLUSTERFILECHECK, None,
2889                 "File %s should not exist on node(s) %s",
2890                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2891
2892       # See if there are multiple versions of the file
2893       test = len(checksums) > 1
2894       if test:
2895         variants = ["variant %s on %s" %
2896                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2897                     for (idx, (checksum, nodes)) in
2898                       enumerate(sorted(checksums.items()))]
2899       else:
2900         variants = []
2901
2902       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2903               "File %s found with %s different checksums (%s)",
2904               filename, len(checksums), "; ".join(variants))
2905
2906   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2907                       drbd_map):
2908     """Verifies and the node DRBD status.
2909
2910     @type ninfo: L{objects.Node}
2911     @param ninfo: the node to check
2912     @param nresult: the remote results for the node
2913     @param instanceinfo: the dict of instances
2914     @param drbd_helper: the configured DRBD usermode helper
2915     @param drbd_map: the DRBD map as returned by
2916         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2917
2918     """
2919     node = ninfo.name
2920     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2921
2922     if drbd_helper:
2923       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2924       test = (helper_result is None)
2925       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2926                "no drbd usermode helper returned")
2927       if helper_result:
2928         status, payload = helper_result
2929         test = not status
2930         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2931                  "drbd usermode helper check unsuccessful: %s", payload)
2932         test = status and (payload != drbd_helper)
2933         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2934                  "wrong drbd usermode helper: %s", payload)
2935
2936     # compute the DRBD minors
2937     node_drbd = {}
2938     for minor, instance in drbd_map[node].items():
2939       test = instance not in instanceinfo
2940       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2941                "ghost instance '%s' in temporary DRBD map", instance)
2942         # ghost instance should not be running, but otherwise we
2943         # don't give double warnings (both ghost instance and
2944         # unallocated minor in use)
2945       if test:
2946         node_drbd[minor] = (instance, False)
2947       else:
2948         instance = instanceinfo[instance]
2949         node_drbd[minor] = (instance.name,
2950                             instance.admin_state == constants.ADMINST_UP)
2951
2952     # and now check them
2953     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2954     test = not isinstance(used_minors, (tuple, list))
2955     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2956              "cannot parse drbd status file: %s", str(used_minors))
2957     if test:
2958       # we cannot check drbd status
2959       return
2960
2961     for minor, (iname, must_exist) in node_drbd.items():
2962       test = minor not in used_minors and must_exist
2963       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2964                "drbd minor %d of instance %s is not active", minor, iname)
2965     for minor in used_minors:
2966       test = minor not in node_drbd
2967       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2968                "unallocated drbd minor %d is in use", minor)
2969
2970   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2971     """Builds the node OS structures.
2972
2973     @type ninfo: L{objects.Node}
2974     @param ninfo: the node to check
2975     @param nresult: the remote results for the node
2976     @param nimg: the node image object
2977
2978     """
2979     node = ninfo.name
2980     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2981
2982     remote_os = nresult.get(constants.NV_OSLIST, None)
2983     test = (not isinstance(remote_os, list) or
2984             not compat.all(isinstance(v, list) and len(v) == 7
2985                            for v in remote_os))
2986
2987     _ErrorIf(test, constants.CV_ENODEOS, node,
2988              "node hasn't returned valid OS data")
2989
2990     nimg.os_fail = test
2991
2992     if test:
2993       return
2994
2995     os_dict = {}
2996
2997     for (name, os_path, status, diagnose,
2998          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2999
3000       if name not in os_dict:
3001         os_dict[name] = []
3002
3003       # parameters is a list of lists instead of list of tuples due to
3004       # JSON lacking a real tuple type, fix it:
3005       parameters = [tuple(v) for v in parameters]
3006       os_dict[name].append((os_path, status, diagnose,
3007                             set(variants), set(parameters), set(api_ver)))
3008
3009     nimg.oslist = os_dict
3010
3011   def _VerifyNodeOS(self, ninfo, nimg, base):
3012     """Verifies the node OS list.
3013
3014     @type ninfo: L{objects.Node}
3015     @param ninfo: the node to check
3016     @param nimg: the node image object
3017     @param base: the 'template' node we match against (e.g. from the master)
3018
3019     """
3020     node = ninfo.name
3021     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3022
3023     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3024
3025     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3026     for os_name, os_data in nimg.oslist.items():
3027       assert os_data, "Empty OS status for OS %s?!" % os_name
3028       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3029       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3030                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3031       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3032                "OS '%s' has multiple entries (first one shadows the rest): %s",
3033                os_name, utils.CommaJoin([v[0] for v in os_data]))
3034       # comparisons with the 'base' image
3035       test = os_name not in base.oslist
3036       _ErrorIf(test, constants.CV_ENODEOS, node,
3037                "Extra OS %s not present on reference node (%s)",
3038                os_name, base.name)
3039       if test:
3040         continue
3041       assert base.oslist[os_name], "Base node has empty OS status?"
3042       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3043       if not b_status:
3044         # base OS is invalid, skipping
3045         continue
3046       for kind, a, b in [("API version", f_api, b_api),
3047                          ("variants list", f_var, b_var),
3048                          ("parameters", beautify_params(f_param),
3049                           beautify_params(b_param))]:
3050         _ErrorIf(a != b, constants.CV_ENODEOS, node,
3051                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3052                  kind, os_name, base.name,
3053                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3054
3055     # check any missing OSes
3056     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3057     _ErrorIf(missing, constants.CV_ENODEOS, node,
3058              "OSes present on reference node %s but missing on this node: %s",
3059              base.name, utils.CommaJoin(missing))
3060
3061   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3062     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3063
3064     @type ninfo: L{objects.Node}
3065     @param ninfo: the node to check
3066     @param nresult: the remote results for the node
3067     @type is_master: bool
3068     @param is_master: Whether node is the master node
3069
3070     """
3071     node = ninfo.name
3072
3073     if (is_master and
3074         (constants.ENABLE_FILE_STORAGE or
3075          constants.ENABLE_SHARED_FILE_STORAGE)):
3076       try:
3077         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3078       except KeyError:
3079         # This should never happen
3080         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3081                       "Node did not return forbidden file storage paths")
3082       else:
3083         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3084                       "Found forbidden file storage paths: %s",
3085                       utils.CommaJoin(fspaths))
3086     else:
3087       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3088                     constants.CV_ENODEFILESTORAGEPATHS, node,
3089                     "Node should not have returned forbidden file storage"
3090                     " paths")
3091
3092   def _VerifyOob(self, ninfo, nresult):
3093     """Verifies out of band functionality of a node.
3094
3095     @type ninfo: L{objects.Node}
3096     @param ninfo: the node to check
3097     @param nresult: the remote results for the node
3098
3099     """
3100     node = ninfo.name
3101     # We just have to verify the paths on master and/or master candidates
3102     # as the oob helper is invoked on the master
3103     if ((ninfo.master_candidate or ninfo.master_capable) and
3104         constants.NV_OOB_PATHS in nresult):
3105       for path_result in nresult[constants.NV_OOB_PATHS]:
3106         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3107
3108   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3109     """Verifies and updates the node volume data.
3110
3111     This function will update a L{NodeImage}'s internal structures
3112     with data from the remote call.
3113
3114     @type ninfo: L{objects.Node}
3115     @param ninfo: the node to check
3116     @param nresult: the remote results for the node
3117     @param nimg: the node image object
3118     @param vg_name: the configured VG name
3119
3120     """
3121     node = ninfo.name
3122     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3123
3124     nimg.lvm_fail = True
3125     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3126     if vg_name is None:
3127       pass
3128     elif isinstance(lvdata, basestring):
3129       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3130                utils.SafeEncode(lvdata))
3131     elif not isinstance(lvdata, dict):
3132       _ErrorIf(True, constants.CV_ENODELVM, node,
3133                "rpc call to node failed (lvlist)")
3134     else:
3135       nimg.volumes = lvdata
3136       nimg.lvm_fail = False
3137
3138   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3139     """Verifies and updates the node instance list.
3140
3141     If the listing was successful, then updates this node's instance
3142     list. Otherwise, it marks the RPC call as failed for the instance
3143     list key.
3144
3145     @type ninfo: L{objects.Node}
3146     @param ninfo: the node to check
3147     @param nresult: the remote results for the node
3148     @param nimg: the node image object
3149
3150     """
3151     idata = nresult.get(constants.NV_INSTANCELIST, None)
3152     test = not isinstance(idata, list)
3153     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3154                   "rpc call to node failed (instancelist): %s",
3155                   utils.SafeEncode(str(idata)))
3156     if test:
3157       nimg.hyp_fail = True
3158     else:
3159       nimg.instances = idata
3160
3161   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3162     """Verifies and computes a node information map
3163
3164     @type ninfo: L{objects.Node}
3165     @param ninfo: the node to check
3166     @param nresult: the remote results for the node
3167     @param nimg: the node image object
3168     @param vg_name: the configured VG name
3169
3170     """
3171     node = ninfo.name
3172     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3173
3174     # try to read free memory (from the hypervisor)
3175     hv_info = nresult.get(constants.NV_HVINFO, None)
3176     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3177     _ErrorIf(test, constants.CV_ENODEHV, node,
3178              "rpc call to node failed (hvinfo)")
3179     if not test:
3180       try:
3181         nimg.mfree = int(hv_info["memory_free"])
3182       except (ValueError, TypeError):
3183         _ErrorIf(True, constants.CV_ENODERPC, node,
3184                  "node returned invalid nodeinfo, check hypervisor")
3185
3186     # FIXME: devise a free space model for file based instances as well
3187     if vg_name is not None:
3188       test = (constants.NV_VGLIST not in nresult or
3189               vg_name not in nresult[constants.NV_VGLIST])
3190       _ErrorIf(test, constants.CV_ENODELVM, node,
3191                "node didn't return data for the volume group '%s'"
3192                " - it is either missing or broken", vg_name)
3193       if not test:
3194         try:
3195           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3196         except (ValueError, TypeError):
3197           _ErrorIf(True, constants.CV_ENODERPC, node,
3198                    "node returned invalid LVM info, check LVM status")
3199
3200   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3201     """Gets per-disk status information for all instances.
3202
3203     @type nodelist: list of strings
3204     @param nodelist: Node names
3205     @type node_image: dict of (name, L{objects.Node})
3206     @param node_image: Node objects
3207     @type instanceinfo: dict of (name, L{objects.Instance})
3208     @param instanceinfo: Instance objects
3209     @rtype: {instance: {node: [(succes, payload)]}}
3210     @return: a dictionary of per-instance dictionaries with nodes as
3211         keys and disk information as values; the disk information is a
3212         list of tuples (success, payload)
3213
3214     """
3215     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3216
3217     node_disks = {}
3218     node_disks_devonly = {}
3219     diskless_instances = set()
3220     diskless = constants.DT_DISKLESS
3221
3222     for nname in nodelist:
3223       node_instances = list(itertools.chain(node_image[nname].pinst,
3224                                             node_image[nname].sinst))
3225       diskless_instances.update(inst for inst in node_instances
3226                                 if instanceinfo[inst].disk_template == diskless)
3227       disks = [(inst, disk)
3228                for inst in node_instances
3229                for disk in instanceinfo[inst].disks]
3230
3231       if not disks:
3232         # No need to collect data
3233         continue
3234
3235       node_disks[nname] = disks
3236
3237       # _AnnotateDiskParams makes already copies of the disks
3238       devonly = []
3239       for (inst, dev) in disks:
3240         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3241         self.cfg.SetDiskID(anno_disk, nname)
3242         devonly.append(anno_disk)
3243
3244       node_disks_devonly[nname] = devonly
3245
3246     assert len(node_disks) == len(node_disks_devonly)
3247
3248     # Collect data from all nodes with disks
3249     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3250                                                           node_disks_devonly)
3251
3252     assert len(result) == len(node_disks)
3253
3254     instdisk = {}
3255
3256     for (nname, nres) in result.items():
3257       disks = node_disks[nname]
3258
3259       if nres.offline:
3260         # No data from this node
3261         data = len(disks) * [(False, "node offline")]
3262       else:
3263         msg = nres.fail_msg
3264         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3265                  "while getting disk information: %s", msg)
3266         if msg:
3267           # No data from this node
3268           data = len(disks) * [(False, msg)]
3269         else:
3270           data = []
3271           for idx, i in enumerate(nres.payload):
3272             if isinstance(i, (tuple, list)) and len(i) == 2:
3273               data.append(i)
3274             else:
3275               logging.warning("Invalid result from node %s, entry %d: %s",
3276                               nname, idx, i)
3277               data.append((False, "Invalid result from the remote node"))
3278
3279       for ((inst, _), status) in zip(disks, data):
3280         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3281
3282     # Add empty entries for diskless instances.
3283     for inst in diskless_instances:
3284       assert inst not in instdisk
3285       instdisk[inst] = {}
3286
3287     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3288                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3289                       compat.all(isinstance(s, (tuple, list)) and
3290                                  len(s) == 2 for s in statuses)
3291                       for inst, nnames in instdisk.items()
3292                       for nname, statuses in nnames.items())
3293     if __debug__:
3294       instdisk_keys = set(instdisk)
3295       instanceinfo_keys = set(instanceinfo)
3296       assert instdisk_keys == instanceinfo_keys, \
3297         ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3298          (instdisk_keys, instanceinfo_keys))
3299
3300     return instdisk
3301
3302   @staticmethod
3303   def _SshNodeSelector(group_uuid, all_nodes):
3304     """Create endless iterators for all potential SSH check hosts.
3305
3306     """
3307     nodes = [node for node in all_nodes
3308              if (node.group != group_uuid and
3309                  not node.offline)]
3310     keyfunc = operator.attrgetter("group")
3311
3312     return map(itertools.cycle,
3313                [sorted(map(operator.attrgetter("name"), names))
3314                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3315                                                   keyfunc)])
3316
3317   @classmethod
3318   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3319     """Choose which nodes should talk to which other nodes.
3320
3321     We will make nodes contact all nodes in their group, and one node from
3322     every other group.
3323
3324     @warning: This algorithm has a known issue if one node group is much
3325       smaller than others (e.g. just one node). In such a case all other
3326       nodes will talk to the single node.
3327
3328     """
3329     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3330     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3331
3332     return (online_nodes,
3333             dict((name, sorted([i.next() for i in sel]))
3334                  for name in online_nodes))
3335
3336   def BuildHooksEnv(self):
3337     """Build hooks env.
3338
3339     Cluster-Verify hooks just ran in the post phase and their failure makes
3340     the output be logged in the verify output and the verification to fail.
3341
3342     """
3343     env = {
3344       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3345       }
3346
3347     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3348                for node in self.my_node_info.values())
3349
3350     return env
3351
3352   def BuildHooksNodes(self):
3353     """Build hooks nodes.
3354
3355     """
3356     return ([], self.my_node_names)
3357
3358   def Exec(self, feedback_fn):
3359     """Verify integrity of the node group, performing various test on nodes.
3360
3361     """
3362     # This method has too many local variables. pylint: disable=R0914
3363     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3364
3365     if not self.my_node_names:
3366       # empty node group
3367       feedback_fn("* Empty node group, skipping verification")
3368       return True
3369
3370     self.bad = False
3371     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3372     verbose = self.op.verbose
3373     self._feedback_fn = feedback_fn
3374
3375     vg_name = self.cfg.GetVGName()
3376     drbd_helper = self.cfg.GetDRBDHelper()
3377     cluster = self.cfg.GetClusterInfo()
3378     hypervisors = cluster.enabled_hypervisors
3379     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3380
3381     i_non_redundant = [] # Non redundant instances
3382     i_non_a_balanced = [] # Non auto-balanced instances
3383     i_offline = 0 # Count of offline instances
3384     n_offline = 0 # Count of offline nodes
3385     n_drained = 0 # Count of nodes being drained
3386     node_vol_should = {}
3387
3388     # FIXME: verify OS list
3389
3390     # File verification
3391     filemap = _ComputeAncillaryFiles(cluster, False)
3392
3393     # do local checksums
3394     master_node = self.master_node = self.cfg.GetMasterNode()
3395     master_ip = self.cfg.GetMasterIP()
3396
3397     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3398
3399     user_scripts = []
3400     if self.cfg.GetUseExternalMipScript():
3401       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3402
3403     node_verify_param = {
3404       constants.NV_FILELIST:
3405         map(vcluster.MakeVirtualPath,
3406             utils.UniqueSequence(filename
3407                                  for files in filemap
3408                                  for filename in files)),
3409       constants.NV_NODELIST:
3410         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3411                                   self.all_node_info.values()),
3412       constants.NV_HYPERVISOR: hypervisors,
3413       constants.NV_HVPARAMS:
3414         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3415       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3416                                  for node in node_data_list
3417                                  if not node.offline],
3418       constants.NV_INSTANCELIST: hypervisors,
3419       constants.NV_VERSION: None,
3420       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3421       constants.NV_NODESETUP: None,
3422       constants.NV_TIME: None,
3423       constants.NV_MASTERIP: (master_node, master_ip),
3424       constants.NV_OSLIST: None,
3425       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3426       constants.NV_USERSCRIPTS: user_scripts,
3427       }
3428
3429     if vg_name is not None:
3430       node_verify_param[constants.NV_VGLIST] = None
3431       node_verify_param[constants.NV_LVLIST] = vg_name
3432       node_verify_param[constants.NV_PVLIST] = [vg_name]
3433
3434     if drbd_helper:
3435       node_verify_param[constants.NV_DRBDLIST] = None
3436       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3437
3438     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3439       # Load file storage paths only from master node
3440       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3441
3442     # bridge checks
3443     # FIXME: this needs to be changed per node-group, not cluster-wide
3444     bridges = set()
3445     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3446     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3447       bridges.add(default_nicpp[constants.NIC_LINK])
3448     for instance in self.my_inst_info.values():
3449       for nic in instance.nics:
3450         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3451         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3452           bridges.add(full_nic[constants.NIC_LINK])
3453
3454     if bridges:
3455       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3456
3457     # Build our expected cluster state
3458     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3459                                                  name=node.name,
3460                                                  vm_capable=node.vm_capable))
3461                       for node in node_data_list)
3462
3463     # Gather OOB paths
3464     oob_paths = []
3465     for node in self.all_node_info.values():
3466       path = _SupportsOob(self.cfg, node)
3467       if path and path not in oob_paths:
3468         oob_paths.append(path)
3469
3470     if oob_paths:
3471       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3472
3473     for instance in self.my_inst_names:
3474       inst_config = self.my_inst_info[instance]
3475       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3476         i_offline += 1
3477
3478       for nname in inst_config.all_nodes:
3479         if nname not in node_image:
3480           gnode = self.NodeImage(name=nname)
3481           gnode.ghost = (nname not in self.all_node_info)
3482           node_image[nname] = gnode
3483
3484       inst_config.MapLVsByNode(node_vol_should)
3485
3486       pnode = inst_config.primary_node
3487       node_image[pnode].pinst.append(instance)
3488
3489       for snode in inst_config.secondary_nodes:
3490         nimg = node_image[snode]
3491         nimg.sinst.append(instance)
3492         if pnode not in nimg.sbp:
3493           nimg.sbp[pnode] = []
3494         nimg.sbp[pnode].append(instance)
3495
3496     es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3497     es_unset_nodes = []
3498     # The value of exclusive_storage should be the same across the group, so if
3499     # it's True for at least a node, we act as if it were set for all the nodes
3500     self._exclusive_storage = compat.any(es_flags.values())
3501     if self._exclusive_storage:
3502       node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3503       es_unset_nodes = [n for (n, es) in es_flags.items()
3504                         if not es]
3505
3506     if es_unset_nodes:
3507       self._Error(constants.CV_EGROUPMIXEDESFLAG, self.group_info.name,
3508                   "The exclusive_storage flag should be uniform in a group,"
3509                   " but these nodes have it unset: %s",
3510                   utils.CommaJoin(utils.NiceSort(es_unset_nodes)))
3511       self.LogWarning("Some checks required by exclusive storage will be"
3512                       " performed also on nodes with the flag unset")
3513
3514     # At this point, we have the in-memory data structures complete,
3515     # except for the runtime information, which we'll gather next
3516
3517     # Due to the way our RPC system works, exact response times cannot be
3518     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3519     # time before and after executing the request, we can at least have a time
3520     # window.
3521     nvinfo_starttime = time.time()
3522     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3523                                            node_verify_param,
3524                                            self.cfg.GetClusterName())
3525     nvinfo_endtime = time.time()
3526
3527     if self.extra_lv_nodes and vg_name is not None:
3528       extra_lv_nvinfo = \
3529           self.rpc.call_node_verify(self.extra_lv_nodes,
3530                                     {constants.NV_LVLIST: vg_name},
3531                                     self.cfg.GetClusterName())
3532     else:
3533       extra_lv_nvinfo = {}
3534
3535     all_drbd_map = self.cfg.ComputeDRBDMap()
3536
3537     feedback_fn("* Gathering disk information (%s nodes)" %
3538                 len(self.my_node_names))
3539     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3540                                      self.my_inst_info)
3541
3542     feedback_fn("* Verifying configuration file consistency")
3543
3544     # If not all nodes are being checked, we need to make sure the master node
3545     # and a non-checked vm_capable node are in the list.
3546     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3547     if absent_nodes:
3548       vf_nvinfo = all_nvinfo.copy()
3549       vf_node_info = list(self.my_node_info.values())
3550       additional_nodes = []
3551       if master_node not in self.my_node_info:
3552         additional_nodes.append(master_node)
3553         vf_node_info.append(self.all_node_info[master_node])
3554       # Add the first vm_capable node we find which is not included,
3555       # excluding the master node (which we already have)
3556       for node in absent_nodes:
3557         nodeinfo = self.all_node_info[node]
3558         if (nodeinfo.vm_capable and not nodeinfo.offline and
3559             node != master_node):
3560           additional_nodes.append(node)
3561           vf_node_info.append(self.all_node_info[node])
3562           break
3563       key = constants.NV_FILELIST
3564       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3565                                                  {key: node_verify_param[key]},
3566                                                  self.cfg.GetClusterName()))
3567     else:
3568       vf_nvinfo = all_nvinfo
3569       vf_node_info = self.my_node_info.values()
3570
3571     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3572
3573     feedback_fn("* Verifying node status")
3574
3575     refos_img = None
3576
3577     for node_i in node_data_list:
3578       node = node_i.name
3579       nimg = node_image[node]
3580
3581       if node_i.offline:
3582         if verbose:
3583           feedback_fn("* Skipping offline node %s" % (node,))
3584         n_offline += 1
3585         continue
3586
3587       if node == master_node:
3588         ntype = "master"
3589       elif node_i.master_candidate:
3590         ntype = "master candidate"
3591       elif node_i.drained:
3592         ntype = "drained"
3593         n_drained += 1
3594       else:
3595         ntype = "regular"
3596       if verbose:
3597         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3598
3599       msg = all_nvinfo[node].fail_msg
3600       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3601                msg)
3602       if msg:
3603         nimg.rpc_fail = True
3604         continue
3605
3606       nresult = all_nvinfo[node].payload
3607
3608       nimg.call_ok = self._VerifyNode(node_i, nresult)
3609       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3610       self._VerifyNodeNetwork(node_i, nresult)
3611       self._VerifyNodeUserScripts(node_i, nresult)
3612       self._VerifyOob(node_i, nresult)
3613       self._VerifyFileStoragePaths(node_i, nresult,
3614                                    node == master_node)
3615
3616       if nimg.vm_capable:
3617         self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3618         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3619                              all_drbd_map)
3620
3621         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3622         self._UpdateNodeInstances(node_i, nresult, nimg)
3623         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3624         self._UpdateNodeOS(node_i, nresult, nimg)
3625
3626         if not nimg.os_fail:
3627           if refos_img is None:
3628             refos_img = nimg
3629           self._VerifyNodeOS(node_i, nimg, refos_img)
3630         self._VerifyNodeBridges(node_i, nresult, bridges)
3631
3632         # Check whether all running instancies are primary for the node. (This
3633         # can no longer be done from _VerifyInstance below, since some of the
3634         # wrong instances could be from other node groups.)
3635         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3636
3637         for inst in non_primary_inst:
3638           test = inst in self.all_inst_info
3639           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3640                    "instance should not run on node %s", node_i.name)
3641           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3642                    "node is running unknown instance %s", inst)
3643
3644     self._VerifyGroupLVM(node_image, vg_name)
3645
3646     for node, result in extra_lv_nvinfo.items():
3647       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3648                               node_image[node], vg_name)
3649
3650     feedback_fn("* Verifying instance status")
3651     for instance in self.my_inst_names:
3652       if verbose:
3653         feedback_fn("* Verifying instance %s" % instance)
3654       inst_config = self.my_inst_info[instance]
3655       self._VerifyInstance(instance, inst_config, node_image,
3656                            instdisk[instance])
3657
3658       # If the instance is non-redundant we cannot survive losing its primary
3659       # node, so we are not N+1 compliant.
3660       if inst_config.disk_template not in constants.DTS_MIRRORED:
3661         i_non_redundant.append(instance)
3662
3663       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3664         i_non_a_balanced.append(instance)
3665
3666     feedback_fn("* Verifying orphan volumes")
3667     reserved = utils.FieldSet(*cluster.reserved_lvs)
3668
3669     # We will get spurious "unknown volume" warnings if any node of this group
3670     # is secondary for an instance whose primary is in another group. To avoid
3671     # them, we find these instances and add their volumes to node_vol_should.
3672     for inst in self.all_inst_info.values():
3673       for secondary in inst.secondary_nodes:
3674         if (secondary in self.my_node_info
3675             and inst.name not in self.my_inst_info):
3676           inst.MapLVsByNode(node_vol_should)
3677           break
3678
3679     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3680
3681     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3682       feedback_fn("* Verifying N+1 Memory redundancy")
3683       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3684
3685     feedback_fn("* Other Notes")
3686     if i_non_redundant:
3687       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3688                   % len(i_non_redundant))
3689
3690     if i_non_a_balanced:
3691       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3692                   % len(i_non_a_balanced))
3693
3694     if i_offline:
3695       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3696
3697     if n_offline:
3698       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3699
3700     if n_drained:
3701       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3702
3703     return not self.bad
3704
3705   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3706     """Analyze the post-hooks' result
3707
3708     This method analyses the hook result, handles it, and sends some
3709     nicely-formatted feedback back to the user.
3710
3711     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3712         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3713     @param hooks_results: the results of the multi-node hooks rpc call
3714     @param feedback_fn: function used send feedback back to the caller
3715     @param lu_result: previous Exec result
3716     @return: the new Exec result, based on the previous result
3717         and hook results
3718
3719     """
3720     # We only really run POST phase hooks, only for non-empty groups,
3721     # and are only interested in their results
3722     if not self.my_node_names:
3723       # empty node group
3724       pass
3725     elif phase == constants.HOOKS_PHASE_POST:
3726       # Used to change hooks' output to proper indentation
3727       feedback_fn("* Hooks Results")
3728       assert hooks_results, "invalid result from hooks"
3729
3730       for node_name in hooks_results:
3731         res = hooks_results[node_name]
3732         msg = res.fail_msg
3733         test = msg and not res.offline
3734         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3735                       "Communication failure in hooks execution: %s", msg)
3736         if res.offline or msg:
3737           # No need to investigate payload if node is offline or gave
3738           # an error.
3739           continue
3740         for script, hkr, output in res.payload:
3741           test = hkr == constants.HKR_FAIL
3742           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3743                         "Script %s failed, output:", script)
3744           if test:
3745             output = self._HOOKS_INDENT_RE.sub("      ", output)
3746             feedback_fn("%s" % output)
3747             lu_result = False
3748
3749     return lu_result
3750
3751
3752 class LUClusterVerifyDisks(NoHooksLU):
3753   """Verifies the cluster disks status.
3754
3755   """
3756   REQ_BGL = False
3757
3758   def ExpandNames(self):
3759     self.share_locks = _ShareAll()
3760     self.needed_locks = {
3761       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3762       }
3763
3764   def Exec(self, feedback_fn):
3765     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3766
3767     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3768     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3769                            for group in group_names])
3770
3771
3772 class LUGroupVerifyDisks(NoHooksLU):
3773   """Verifies the status of all disks in a node group.
3774
3775   """
3776   REQ_BGL = False
3777
3778   def ExpandNames(self):
3779     # Raises errors.OpPrereqError on its own if group can't be found
3780     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3781
3782     self.share_locks = _ShareAll()
3783     self.needed_locks = {
3784       locking.LEVEL_INSTANCE: [],
3785       locking.LEVEL_NODEGROUP: [],
3786       locking.LEVEL_NODE: [],
3787
3788       # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3789       # starts one instance of this opcode for every group, which means all
3790       # nodes will be locked for a short amount of time, so it's better to
3791       # acquire the node allocation lock as well.
3792       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3793       }
3794
3795   def DeclareLocks(self, level):
3796     if level == locking.LEVEL_INSTANCE:
3797       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3798
3799       # Lock instances optimistically, needs verification once node and group
3800       # locks have been acquired
3801       self.needed_locks[locking.LEVEL_INSTANCE] = \
3802         self.cfg.GetNodeGroupInstances(self.group_uuid)
3803
3804     elif level == locking.LEVEL_NODEGROUP:
3805       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3806
3807       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3808         set([self.group_uuid] +
3809             # Lock all groups used by instances optimistically; this requires
3810             # going via the node before it's locked, requiring verification
3811             # later on
3812             [group_uuid
3813              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3814              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3815
3816     elif level == locking.LEVEL_NODE:
3817       # This will only lock the nodes in the group to be verified which contain
3818       # actual instances
3819       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3820       self._LockInstancesNodes()
3821
3822       # Lock all nodes in group to be verified
3823       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3824       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3825       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3826
3827   def CheckPrereq(self):
3828     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3829     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3830     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3831
3832     assert self.group_uuid in owned_groups
3833
3834     # Check if locked instances are still correct
3835     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3836
3837     # Get instance information
3838     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3839
3840     # Check if node groups for locked instances are still correct
3841     _CheckInstancesNodeGroups(self.cfg, self.instances,
3842                               owned_groups, owned_nodes, self.group_uuid)
3843
3844   def Exec(self, feedback_fn):
3845     """Verify integrity of cluster disks.
3846
3847     @rtype: tuple of three items
3848     @return: a tuple of (dict of node-to-node_error, list of instances
3849         which need activate-disks, dict of instance: (node, volume) for
3850         missing volumes
3851
3852     """
3853     res_nodes = {}
3854     res_instances = set()
3855     res_missing = {}
3856
3857     nv_dict = _MapInstanceDisksToNodes(
3858       [inst for inst in self.instances.values()
3859        if inst.admin_state == constants.ADMINST_UP])
3860
3861     if nv_dict:
3862       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3863                              set(self.cfg.GetVmCapableNodeList()))
3864
3865       node_lvs = self.rpc.call_lv_list(nodes, [])
3866
3867       for (node, node_res) in node_lvs.items():
3868         if node_res.offline:
3869           continue
3870
3871         msg = node_res.fail_msg
3872         if msg:
3873           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3874           res_nodes[node] = msg
3875           continue
3876
3877         for lv_name, (_, _, lv_online) in node_res.payload.items():
3878           inst = nv_dict.pop((node, lv_name), None)
3879           if not (lv_online or inst is None):
3880             res_instances.add(inst)
3881
3882       # any leftover items in nv_dict are missing LVs, let's arrange the data
3883       # better
3884       for key, inst in nv_dict.iteritems():
3885         res_missing.setdefault(inst, []).append(list(key))
3886
3887     return (res_nodes, list(res_instances), res_missing)
3888
3889
3890 class LUClusterRepairDiskSizes(NoHooksLU):
3891   """Verifies the cluster disks sizes.
3892
3893   """
3894   REQ_BGL = False
3895
3896   def ExpandNames(self):
3897     if self.op.instances:
3898       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3899       # Not getting the node allocation lock as only a specific set of
3900       # instances (and their nodes) is going to be acquired
3901       self.needed_locks = {
3902         locking.LEVEL_NODE_RES: [],
3903         locking.LEVEL_INSTANCE: self.wanted_names,
3904         }
3905       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3906     else:
3907       self.wanted_names = None
3908       self.needed_locks = {
3909         locking.LEVEL_NODE_RES: locking.ALL_SET,
3910         locking.LEVEL_INSTANCE: locking.ALL_SET,
3911
3912         # This opcode is acquires the node locks for all instances
3913         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3914         }
3915
3916     self.share_locks = {
3917       locking.LEVEL_NODE_RES: 1,
3918       locking.LEVEL_INSTANCE: 0,
3919       locking.LEVEL_NODE_ALLOC: 1,
3920       }
3921
3922   def DeclareLocks(self, level):
3923     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3924       self._LockInstancesNodes(primary_only=True, level=level)
3925
3926   def CheckPrereq(self):
3927     """Check prerequisites.
3928
3929     This only checks the optional instance list against the existing names.
3930
3931     """
3932     if self.wanted_names is None:
3933       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3934
3935     self.wanted_instances = \
3936         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3937
3938   def _EnsureChildSizes(self, disk):
3939     """Ensure children of the disk have the needed disk size.
3940
3941     This is valid mainly for DRBD8 and fixes an issue where the
3942     children have smaller disk size.
3943
3944     @param disk: an L{ganeti.objects.Disk} object
3945
3946     """
3947     if disk.dev_type == constants.LD_DRBD8:
3948       assert disk.children, "Empty children for DRBD8?"
3949       fchild = disk.children[0]
3950       mismatch = fchild.size < disk.size
3951       if mismatch:
3952         self.LogInfo("Child disk has size %d, parent %d, fixing",
3953                      fchild.size, disk.size)
3954         fchild.size = disk.size
3955
3956       # and we recurse on this child only, not on the metadev
3957       return self._EnsureChildSizes(fchild) or mismatch
3958     else:
3959       return False
3960
3961   def Exec(self, feedback_fn):
3962     """Verify the size of cluster disks.
3963
3964     """
3965     # TODO: check child disks too
3966     # TODO: check differences in size between primary/secondary nodes
3967     per_node_disks = {}
3968     for instance in self.wanted_instances:
3969       pnode = instance.primary_node
3970       if pnode not in per_node_disks:
3971         per_node_disks[pnode] = []
3972       for idx, disk in enumerate(instance.disks):
3973         per_node_disks[pnode].append((instance, idx, disk))
3974
3975     assert not (frozenset(per_node_disks.keys()) -
3976                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3977       "Not owning correct locks"
3978     assert not self.owned_locks(locking.LEVEL_NODE)
3979
3980     changed = []
3981     for node, dskl in per_node_disks.items():
3982       newl = [v[2].Copy() for v in dskl]
3983       for dsk in newl:
3984         self.cfg.SetDiskID(dsk, node)
3985       result = self.rpc.call_blockdev_getsize(node, newl)
3986       if result.fail_msg:
3987         self.LogWarning("Failure in blockdev_getsize call to node"
3988                         " %s, ignoring", node)
3989         continue
3990       if len(result.payload) != len(dskl):
3991         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3992                         " result.payload=%s", node, len(dskl), result.payload)
3993         self.LogWarning("Invalid result from node %s, ignoring node results",
3994                         node)
3995         continue
3996       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3997         if size is None:
3998           self.LogWarning("Disk %d of instance %s did not return size"
3999                           " information, ignoring", idx, instance.name)
4000           continue
4001         if not isinstance(size, (int, long)):
4002           self.LogWarning("Disk %d of instance %s did not return valid"
4003                           " size information, ignoring", idx, instance.name)
4004           continue
4005         size = size >> 20
4006         if size != disk.size:
4007           self.LogInfo("Disk %d of instance %s has mismatched size,"
4008                        " correcting: recorded %d, actual %d", idx,
4009                        instance.name, disk.size, size)
4010           disk.size = size
4011           self.cfg.Update(instance, feedback_fn)
4012           changed.append((instance.name, idx, size))
4013         if self._EnsureChildSizes(disk):
4014           self.cfg.Update(instance, feedback_fn)
4015           changed.append((instance.name, idx, disk.size))
4016     return changed
4017
4018
4019 class LUClusterRename(LogicalUnit):
4020   """Rename the cluster.
4021
4022   """
4023   HPATH = "cluster-rename"
4024   HTYPE = constants.HTYPE_CLUSTER
4025
4026   def BuildHooksEnv(self):
4027     """Build hooks env.
4028
4029     """
4030     return {
4031       "OP_TARGET": self.cfg.GetClusterName(),
4032       "NEW_NAME": self.op.name,
4033       }
4034
4035   def BuildHooksNodes(self):
4036     """Build hooks nodes.
4037
4038     """
4039     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4040
4041   def CheckPrereq(self):
4042     """Verify that the passed name is a valid one.
4043
4044     """
4045     hostname = netutils.GetHostname(name=self.op.name,
4046                                     family=self.cfg.GetPrimaryIPFamily())
4047
4048     new_name = hostname.name
4049     self.ip = new_ip = hostname.ip
4050     old_name = self.cfg.GetClusterName()
4051     old_ip = self.cfg.GetMasterIP()
4052     if new_name == old_name and new_ip == old_ip:
4053       raise errors.OpPrereqError("Neither the name nor the IP address of the"
4054                                  " cluster has changed",
4055                                  errors.ECODE_INVAL)
4056     if new_ip != old_ip:
4057       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4058         raise errors.OpPrereqError("The given cluster IP address (%s) is"
4059                                    " reachable on the network" %
4060                                    new_ip, errors.ECODE_NOTUNIQUE)
4061
4062     self.op.name = new_name
4063
4064   def Exec(self, feedback_fn):
4065     """Rename the cluster.
4066
4067     """
4068     clustername = self.op.name
4069     new_ip = self.ip
4070
4071     # shutdown the master IP
4072     master_params = self.cfg.GetMasterNetworkParameters()
4073     ems = self.cfg.GetUseExternalMipScript()
4074     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4075                                                      master_params, ems)
4076     result.Raise("Could not disable the master role")
4077
4078     try:
4079       cluster = self.cfg.GetClusterInfo()
4080       cluster.cluster_name = clustername
4081       cluster.master_ip = new_ip
4082       self.cfg.Update(cluster, feedback_fn)
4083
4084       # update the known hosts file
4085       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4086       node_list = self.cfg.GetOnlineNodeList()
4087       try:
4088         node_list.remove(master_params.name)
4089       except ValueError:
4090         pass
4091       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4092     finally:
4093       master_params.ip = new_ip
4094       result = self.rpc.call_node_activate_master_ip(master_params.name,
4095                                                      master_params, ems)
4096       msg = result.fail_msg
4097       if msg:
4098         self.LogWarning("Could not re-enable the master role on"
4099                         " the master, please restart manually: %s", msg)
4100
4101     return clustername
4102
4103
4104 def _ValidateNetmask(cfg, netmask):
4105   """Checks if a netmask is valid.
4106
4107   @type cfg: L{config.ConfigWriter}
4108   @param cfg: The cluster configuration
4109   @type netmask: int
4110   @param netmask: the netmask to be verified
4111   @raise errors.OpPrereqError: if the validation fails
4112
4113   """
4114   ip_family = cfg.GetPrimaryIPFamily()
4115   try:
4116     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4117   except errors.ProgrammerError:
4118     raise errors.OpPrereqError("Invalid primary ip family: %s." %
4119                                ip_family, errors.ECODE_INVAL)
4120   if not ipcls.ValidateNetmask(netmask):
4121     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4122                                 (netmask), errors.ECODE_INVAL)
4123
4124
4125 class LUClusterSetParams(LogicalUnit):
4126   """Change the parameters of the cluster.
4127
4128   """
4129   HPATH = "cluster-modify"
4130   HTYPE = constants.HTYPE_CLUSTER
4131   REQ_BGL = False
4132
4133   def CheckArguments(self):
4134     """Check parameters
4135
4136     """
4137     if self.op.uid_pool:
4138       uidpool.CheckUidPool(self.op.uid_pool)
4139
4140     if self.op.add_uids:
4141       uidpool.CheckUidPool(self.op.add_uids)
4142
4143     if self.op.remove_uids:
4144       uidpool.CheckUidPool(self.op.remove_uids)
4145
4146     if self.op.master_netmask is not None:
4147       _ValidateNetmask(self.cfg, self.op.master_netmask)
4148
4149     if self.op.diskparams:
4150       for dt_params in self.op.diskparams.values():
4151         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4152       try:
4153         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4154       except errors.OpPrereqError, err:
4155         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4156                                    errors.ECODE_INVAL)
4157
4158   def ExpandNames(self):
4159     # FIXME: in the future maybe other cluster params won't require checking on
4160     # all nodes to be modified.
4161     # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4162     # resource locks the right thing, shouldn't it be the BGL instead?
4163     self.needed_locks = {
4164       locking.LEVEL_NODE: locking.ALL_SET,
4165       locking.LEVEL_INSTANCE: locking.ALL_SET,
4166       locking.LEVEL_NODEGROUP: locking.ALL_SET,
4167       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4168     }
4169     self.share_locks = _ShareAll()
4170
4171   def BuildHooksEnv(self):
4172     """Build hooks env.
4173
4174     """
4175     return {
4176       "OP_TARGET": self.cfg.GetClusterName(),
4177       "NEW_VG_NAME": self.op.vg_name,
4178       }
4179
4180   def BuildHooksNodes(self):
4181     """Build hooks nodes.
4182
4183     """
4184     mn = self.cfg.GetMasterNode()
4185     return ([mn], [mn])
4186
4187   def CheckPrereq(self):
4188     """Check prerequisites.
4189
4190     This checks whether the given params don't conflict and
4191     if the given volume group is valid.
4192
4193     """
4194     if self.op.vg_name is not None and not self.op.vg_name:
4195       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4196         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4197                                    " instances exist", errors.ECODE_INVAL)
4198
4199     if self.op.drbd_helper is not None and not self.op.drbd_helper:
4200       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4201         raise errors.OpPrereqError("Cannot disable drbd helper while"
4202                                    " drbd-based instances exist",
4203                                    errors.ECODE_INVAL)
4204
4205     node_list = self.owned_locks(locking.LEVEL_NODE)
4206
4207     # if vg_name not None, checks given volume group on all nodes
4208     if self.op.vg_name:
4209       vglist = self.rpc.call_vg_list(node_list)
4210       for node in node_list:
4211         msg = vglist[node].fail_msg
4212         if msg:
4213           # ignoring down node
4214           self.LogWarning("Error while gathering data on node %s"
4215                           " (ignoring node): %s", node, msg)
4216           continue
4217         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4218                                               self.op.vg_name,
4219                                               constants.MIN_VG_SIZE)
4220         if vgstatus:
4221           raise errors.OpPrereqError("Error on node '%s': %s" %
4222                                      (node, vgstatus), errors.ECODE_ENVIRON)
4223
4224     if self.op.drbd_helper:
4225       # checks given drbd helper on all nodes
4226       helpers = self.rpc.call_drbd_helper(node_list)
4227       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4228         if ninfo.offline:
4229           self.LogInfo("Not checking drbd helper on offline node %s", node)
4230           continue
4231         msg = helpers[node].fail_msg
4232         if msg:
4233           raise errors.OpPrereqError("Error checking drbd helper on node"
4234                                      " '%s': %s" % (node, msg),
4235                                      errors.ECODE_ENVIRON)
4236         node_helper = helpers[node].payload
4237         if node_helper != self.op.drbd_helper:
4238           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4239                                      (node, node_helper), errors.ECODE_ENVIRON)
4240
4241     self.cluster = cluster = self.cfg.GetClusterInfo()
4242     # validate params changes
4243     if self.op.beparams:
4244       objects.UpgradeBeParams(self.op.beparams)
4245       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4246       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4247
4248     if self.op.ndparams:
4249       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4250       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4251
4252       # TODO: we need a more general way to handle resetting
4253       # cluster-level parameters to default values
4254       if self.new_ndparams["oob_program"] == "":
4255         self.new_ndparams["oob_program"] = \
4256             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4257
4258     if self.op.hv_state:
4259       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4260                                             self.cluster.hv_state_static)
4261       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4262                                for hv, values in new_hv_state.items())
4263
4264     if self.op.disk_state:
4265       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4266                                                 self.cluster.disk_state_static)
4267       self.new_disk_state = \
4268         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4269                             for name, values in svalues.items()))
4270              for storage, svalues in new_disk_state.items())
4271
4272     if self.op.ipolicy:
4273       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4274                                             group_policy=False)
4275
4276       all_instances = self.cfg.GetAllInstancesInfo().values()
4277       violations = set()
4278       for group in self.cfg.GetAllNodeGroupsInfo().values():
4279         instances = frozenset([inst for inst in all_instances
4280                                if compat.any(node in group.members
4281                                              for node in inst.all_nodes)])
4282         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4283         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4284         new = _ComputeNewInstanceViolations(ipol,
4285                                             new_ipolicy, instances)
4286         if new:
4287           violations.update(new)
4288
4289       if violations:
4290         self.LogWarning("After the ipolicy change the following instances"
4291                         " violate them: %s",
4292                         utils.CommaJoin(utils.NiceSort(violations)))
4293
4294     if self.op.nicparams:
4295       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4296       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4297       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4298       nic_errors = []
4299
4300       # check all instances for consistency
4301       for instance in self.cfg.GetAllInstancesInfo().values():
4302         for nic_idx, nic in enumerate(instance.nics):
4303           params_copy = copy.deepcopy(nic.nicparams)
4304           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4305
4306           # check parameter syntax
4307           try:
4308             objects.NIC.CheckParameterSyntax(params_filled)
4309           except errors.ConfigurationError, err:
4310             nic_errors.append("Instance %s, nic/%d: %s" %
4311                               (instance.name, nic_idx, err))
4312
4313           # if we're moving instances to routed, check that they have an ip
4314           target_mode = params_filled[constants.NIC_MODE]
4315           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4316             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4317                               " address" % (instance.name, nic_idx))
4318       if nic_errors:
4319         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4320                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4321
4322     # hypervisor list/parameters
4323     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4324     if self.op.hvparams:
4325       for hv_name, hv_dict in self.op.hvparams.items():
4326         if hv_name not in self.new_hvparams:
4327           self.new_hvparams[hv_name] = hv_dict
4328         else:
4329           self.new_hvparams[hv_name].update(hv_dict)
4330
4331     # disk template parameters
4332     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4333     if self.op.diskparams:
4334       for dt_name, dt_params in self.op.diskparams.items():
4335         if dt_name not in self.op.diskparams:
4336           self.new_diskparams[dt_name] = dt_params
4337         else:
4338           self.new_diskparams[dt_name].update(dt_params)
4339
4340     # os hypervisor parameters
4341     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4342     if self.op.os_hvp:
4343       for os_name, hvs in self.op.os_hvp.items():
4344         if os_name not in self.new_os_hvp:
4345           self.new_os_hvp[os_name] = hvs
4346         else:
4347           for hv_name, hv_dict in hvs.items():
4348             if hv_dict is None:
4349               # Delete if it exists
4350               self.new_os_hvp[os_name].pop(hv_name, None)
4351             elif hv_name not in self.new_os_hvp[os_name]:
4352               self.new_os_hvp[os_name][hv_name] = hv_dict
4353             else:
4354               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4355
4356     # os parameters
4357     self.new_osp = objects.FillDict(cluster.osparams, {})
4358     if self.op.osparams:
4359       for os_name, osp in self.op.osparams.items():
4360         if os_name not in self.new_osp:
4361           self.new_osp[os_name] = {}
4362
4363         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4364                                                   use_none=True)
4365
4366         if not self.new_osp[os_name]:
4367           # we removed all parameters
4368           del self.new_osp[os_name]
4369         else:
4370           # check the parameter validity (remote check)
4371           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4372                          os_name, self.new_osp[os_name])
4373
4374     # changes to the hypervisor list
4375     if self.op.enabled_hypervisors is not None:
4376       self.hv_list = self.op.enabled_hypervisors
4377       for hv in self.hv_list:
4378         # if the hypervisor doesn't already exist in the cluster
4379         # hvparams, we initialize it to empty, and then (in both
4380         # cases) we make sure to fill the defaults, as we might not
4381         # have a complete defaults list if the hypervisor wasn't
4382         # enabled before
4383         if hv not in new_hvp:
4384           new_hvp[hv] = {}
4385         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4386         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4387     else:
4388       self.hv_list = cluster.enabled_hypervisors
4389
4390     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4391       # either the enabled list has changed, or the parameters have, validate
4392       for hv_name, hv_params in self.new_hvparams.items():
4393         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4394             (self.op.enabled_hypervisors and
4395              hv_name in self.op.enabled_hypervisors)):
4396           # either this is a new hypervisor, or its parameters have changed
4397           hv_class = hypervisor.GetHypervisorClass(hv_name)
4398           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4399           hv_class.CheckParameterSyntax(hv_params)
4400           _CheckHVParams(self, node_list, hv_name, hv_params)
4401
4402     if self.op.os_hvp:
4403       # no need to check any newly-enabled hypervisors, since the
4404       # defaults have already been checked in the above code-block
4405       for os_name, os_hvp in self.new_os_hvp.items():
4406         for hv_name, hv_params in os_hvp.items():
4407           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4408           # we need to fill in the new os_hvp on top of the actual hv_p
4409           cluster_defaults = self.new_hvparams.get(hv_name, {})
4410           new_osp = objects.FillDict(cluster_defaults, hv_params)
4411           hv_class = hypervisor.GetHypervisorClass(hv_name)
4412           hv_class.CheckParameterSyntax(new_osp)
4413           _CheckHVParams(self, node_list, hv_name, new_osp)
4414
4415     if self.op.default_iallocator:
4416       alloc_script = utils.FindFile(self.op.default_iallocator,
4417                                     constants.IALLOCATOR_SEARCH_PATH,
4418                                     os.path.isfile)
4419       if alloc_script is None:
4420         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4421                                    " specified" % self.op.default_iallocator,
4422                                    errors.ECODE_INVAL)
4423
4424   def Exec(self, feedback_fn):
4425     """Change the parameters of the cluster.
4426
4427     """
4428     if self.op.vg_name is not None:
4429       new_volume = self.op.vg_name
4430       if not new_volume:
4431         new_volume = None
4432       if new_volume != self.cfg.GetVGName():
4433         self.cfg.SetVGName(new_volume)
4434       else:
4435         feedback_fn("Cluster LVM configuration already in desired"
4436                     " state, not changing")
4437     if self.op.drbd_helper is not None:
4438       new_helper = self.op.drbd_helper
4439       if not new_helper:
4440         new_helper = None
4441       if new_helper != self.cfg.GetDRBDHelper():
4442         self.cfg.SetDRBDHelper(new_helper)
4443       else:
4444         feedback_fn("Cluster DRBD helper already in desired state,"
4445                     " not changing")
4446     if self.op.hvparams:
4447       self.cluster.hvparams = self.new_hvparams
4448     if self.op.os_hvp:
4449       self.cluster.os_hvp = self.new_os_hvp
4450     if self.op.enabled_hypervisors is not None:
4451       self.cluster.hvparams = self.new_hvparams
4452       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4453     if self.op.beparams:
4454       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4455     if self.op.nicparams:
4456       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4457     if self.op.ipolicy:
4458       self.cluster.ipolicy = self.new_ipolicy
4459     if self.op.osparams:
4460       self.cluster.osparams = self.new_osp
4461     if self.op.ndparams:
4462       self.cluster.ndparams = self.new_ndparams
4463     if self.op.diskparams:
4464       self.cluster.diskparams = self.new_diskparams
4465     if self.op.hv_state:
4466       self.cluster.hv_state_static = self.new_hv_state
4467     if self.op.disk_state:
4468       self.cluster.disk_state_static = self.new_disk_state
4469
4470     if self.op.candidate_pool_size is not None:
4471       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4472       # we need to update the pool size here, otherwise the save will fail
4473       _AdjustCandidatePool(self, [])
4474
4475     if self.op.maintain_node_health is not None:
4476       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4477         feedback_fn("Note: CONFD was disabled at build time, node health"
4478                     " maintenance is not useful (still enabling it)")
4479       self.cluster.maintain_node_health = self.op.maintain_node_health
4480
4481     if self.op.prealloc_wipe_disks is not None:
4482       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4483
4484     if self.op.add_uids is not None:
4485       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4486
4487     if self.op.remove_uids is not None:
4488       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4489
4490     if self.op.uid_pool is not None:
4491       self.cluster.uid_pool = self.op.uid_pool
4492
4493     if self.op.default_iallocator is not None:
4494       self.cluster.default_iallocator = self.op.default_iallocator
4495
4496     if self.op.reserved_lvs is not None:
4497       self.cluster.reserved_lvs = self.op.reserved_lvs
4498
4499     if self.op.use_external_mip_script is not None:
4500       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4501
4502     def helper_os(aname, mods, desc):
4503       desc += " OS list"
4504       lst = getattr(self.cluster, aname)
4505       for key, val in mods:
4506         if key == constants.DDM_ADD:
4507           if val in lst:
4508             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4509           else:
4510             lst.append(val)
4511         elif key == constants.DDM_REMOVE:
4512           if val in lst:
4513             lst.remove(val)
4514           else:
4515             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4516         else:
4517           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4518
4519     if self.op.hidden_os:
4520       helper_os("hidden_os", self.op.hidden_os, "hidden")
4521
4522     if self.op.blacklisted_os:
4523       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4524
4525     if self.op.master_netdev:
4526       master_params = self.cfg.GetMasterNetworkParameters()
4527       ems = self.cfg.GetUseExternalMipScript()
4528       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4529                   self.cluster.master_netdev)
4530       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4531                                                        master_params, ems)
4532       result.Raise("Could not disable the master ip")
4533       feedback_fn("Changing master_netdev from %s to %s" %
4534                   (master_params.netdev, self.op.master_netdev))
4535       self.cluster.master_netdev = self.op.master_netdev
4536
4537     if self.op.master_netmask:
4538       master_params = self.cfg.GetMasterNetworkParameters()
4539       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4540       result = self.rpc.call_node_change_master_netmask(master_params.name,
4541                                                         master_params.netmask,
4542                                                         self.op.master_netmask,
4543                                                         master_params.ip,
4544                                                         master_params.netdev)
4545       if result.fail_msg:
4546         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4547         feedback_fn(msg)
4548
4549       self.cluster.master_netmask = self.op.master_netmask
4550
4551     self.cfg.Update(self.cluster, feedback_fn)
4552
4553     if self.op.master_netdev:
4554       master_params = self.cfg.GetMasterNetworkParameters()
4555       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4556                   self.op.master_netdev)
4557       ems = self.cfg.GetUseExternalMipScript()
4558       result = self.rpc.call_node_activate_master_ip(master_params.name,
4559                                                      master_params, ems)
4560       if result.fail_msg:
4561         self.LogWarning("Could not re-enable the master ip on"
4562                         " the master, please restart manually: %s",
4563                         result.fail_msg)
4564
4565
4566 def _UploadHelper(lu, nodes, fname):
4567   """Helper for uploading a file and showing warnings.
4568
4569   """
4570   if os.path.exists(fname):
4571     result = lu.rpc.call_upload_file(nodes, fname)
4572     for to_node, to_result in result.items():
4573       msg = to_result.fail_msg
4574       if msg:
4575         msg = ("Copy of file %s to node %s failed: %s" %
4576                (fname, to_node, msg))
4577         lu.LogWarning(msg)
4578
4579
4580 def _ComputeAncillaryFiles(cluster, redist):
4581   """Compute files external to Ganeti which need to be consistent.
4582
4583   @type redist: boolean
4584   @param redist: Whether to include files which need to be redistributed
4585
4586   """
4587   # Compute files for all nodes
4588   files_all = set([
4589     pathutils.SSH_KNOWN_HOSTS_FILE,
4590     pathutils.CONFD_HMAC_KEY,
4591     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4592     pathutils.SPICE_CERT_FILE,
4593     pathutils.SPICE_CACERT_FILE,
4594     pathutils.RAPI_USERS_FILE,
4595     ])
4596
4597   if redist:
4598     # we need to ship at least the RAPI certificate
4599     files_all.add(pathutils.RAPI_CERT_FILE)
4600   else:
4601     files_all.update(pathutils.ALL_CERT_FILES)
4602     files_all.update(ssconf.SimpleStore().GetFileList())
4603
4604   if cluster.modify_etc_hosts:
4605     files_all.add(pathutils.ETC_HOSTS)
4606
4607   if cluster.use_external_mip_script:
4608     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4609
4610   # Files which are optional, these must:
4611   # - be present in one other category as well
4612   # - either exist or not exist on all nodes of that category (mc, vm all)
4613   files_opt = set([
4614     pathutils.RAPI_USERS_FILE,
4615     ])
4616
4617   # Files which should only be on master candidates
4618   files_mc = set()
4619
4620   if not redist:
4621     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4622
4623   # File storage
4624   if (not redist and
4625       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4626     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4627     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4628
4629   # Files which should only be on VM-capable nodes
4630   files_vm = set(
4631     filename
4632     for hv_name in cluster.enabled_hypervisors
4633     for filename in
4634       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4635
4636   files_opt |= set(
4637     filename
4638     for hv_name in cluster.enabled_hypervisors
4639     for filename in
4640       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4641
4642   # Filenames in each category must be unique
4643   all_files_set = files_all | files_mc | files_vm
4644   assert (len(all_files_set) ==
4645           sum(map(len, [files_all, files_mc, files_vm]))), \
4646          "Found file listed in more than one file list"
4647
4648   # Optional files must be present in one other category
4649   assert all_files_set.issuperset(files_opt), \
4650          "Optional file not in a different required list"
4651
4652   # This one file should never ever be re-distributed via RPC
4653   assert not (redist and
4654               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4655
4656   return (files_all, files_opt, files_mc, files_vm)
4657
4658
4659 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4660   """Distribute additional files which are part of the cluster configuration.
4661
4662   ConfigWriter takes care of distributing the config and ssconf files, but
4663   there are more files which should be distributed to all nodes. This function
4664   makes sure those are copied.
4665
4666   @param lu: calling logical unit
4667   @param additional_nodes: list of nodes not in the config to distribute to
4668   @type additional_vm: boolean
4669   @param additional_vm: whether the additional nodes are vm-capable or not
4670
4671   """
4672   # Gather target nodes
4673   cluster = lu.cfg.GetClusterInfo()
4674   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4675
4676   online_nodes = lu.cfg.GetOnlineNodeList()
4677   online_set = frozenset(online_nodes)
4678   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4679
4680   if additional_nodes is not None:
4681     online_nodes.extend(additional_nodes)
4682     if additional_vm:
4683       vm_nodes.extend(additional_nodes)
4684
4685   # Never distribute to master node
4686   for nodelist in [online_nodes, vm_nodes]:
4687     if master_info.name in nodelist:
4688       nodelist.remove(master_info.name)
4689
4690   # Gather file lists
4691   (files_all, _, files_mc, files_vm) = \
4692     _ComputeAncillaryFiles(cluster, True)
4693
4694   # Never re-distribute configuration file from here
4695   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4696               pathutils.CLUSTER_CONF_FILE in files_vm)
4697   assert not files_mc, "Master candidates not handled in this function"
4698
4699   filemap = [
4700     (online_nodes, files_all),
4701     (vm_nodes, files_vm),
4702     ]
4703
4704   # Upload the files
4705   for (node_list, files) in filemap:
4706     for fname in files:
4707       _UploadHelper(lu, node_list, fname)
4708
4709
4710 class LUClusterRedistConf(NoHooksLU):
4711   """Force the redistribution of cluster configuration.
4712
4713   This is a very simple LU.
4714
4715   """
4716   REQ_BGL = False
4717
4718   def ExpandNames(self):
4719     self.needed_locks = {
4720       locking.LEVEL_NODE: locking.ALL_SET,
4721       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4722     }
4723     self.share_locks = _ShareAll()
4724
4725   def Exec(self, feedback_fn):
4726     """Redistribute the configuration.
4727
4728     """
4729     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4730     _RedistributeAncillaryFiles(self)
4731
4732
4733 class LUClusterActivateMasterIp(NoHooksLU):
4734   """Activate the master IP on the master node.
4735
4736   """
4737   def Exec(self, feedback_fn):
4738     """Activate the master IP.
4739
4740     """
4741     master_params = self.cfg.GetMasterNetworkParameters()
4742     ems = self.cfg.GetUseExternalMipScript()
4743     result = self.rpc.call_node_activate_master_ip(master_params.name,
4744                                                    master_params, ems)
4745     result.Raise("Could not activate the master IP")
4746
4747
4748 class LUClusterDeactivateMasterIp(NoHooksLU):
4749   """Deactivate the master IP on the master node.
4750
4751   """
4752   def Exec(self, feedback_fn):
4753     """Deactivate the master IP.
4754
4755     """
4756     master_params = self.cfg.GetMasterNetworkParameters()
4757     ems = self.cfg.GetUseExternalMipScript()
4758     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4759                                                      master_params, ems)
4760     result.Raise("Could not deactivate the master IP")
4761
4762
4763 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4764   """Sleep and poll for an instance's disk to sync.
4765
4766   """
4767   if not instance.disks or disks is not None and not disks:
4768     return True
4769
4770   disks = _ExpandCheckDisks(instance, disks)
4771
4772   if not oneshot:
4773     lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4774
4775   node = instance.primary_node
4776
4777   for dev in disks:
4778     lu.cfg.SetDiskID(dev, node)
4779
4780   # TODO: Convert to utils.Retry
4781
4782   retries = 0
4783   degr_retries = 10 # in seconds, as we sleep 1 second each time
4784   while True:
4785     max_time = 0
4786     done = True
4787     cumul_degraded = False
4788     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4789     msg = rstats.fail_msg
4790     if msg:
4791       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4792       retries += 1
4793       if retries >= 10:
4794         raise errors.RemoteError("Can't contact node %s for mirror data,"
4795                                  " aborting." % node)
4796       time.sleep(6)
4797       continue
4798     rstats = rstats.payload
4799     retries = 0
4800     for i, mstat in enumerate(rstats):
4801       if mstat is None:
4802         lu.LogWarning("Can't compute data for node %s/%s",
4803                            node, disks[i].iv_name)
4804         continue
4805
4806       cumul_degraded = (cumul_degraded or
4807                         (mstat.is_degraded and mstat.sync_percent is None))
4808       if mstat.sync_percent is not None:
4809         done = False
4810         if mstat.estimated_time is not None:
4811           rem_time = ("%s remaining (estimated)" %
4812                       utils.FormatSeconds(mstat.estimated_time))
4813           max_time = mstat.estimated_time
4814         else:
4815           rem_time = "no time estimate"
4816         lu.LogInfo("- device %s: %5.2f%% done, %s",
4817                    disks[i].iv_name, mstat.sync_percent, rem_time)
4818
4819     # if we're done but degraded, let's do a few small retries, to
4820     # make sure we see a stable and not transient situation; therefore
4821     # we force restart of the loop
4822     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4823       logging.info("Degraded disks found, %d retries left", degr_retries)
4824       degr_retries -= 1
4825       time.sleep(1)
4826       continue
4827
4828     if done or oneshot:
4829       break
4830
4831     time.sleep(min(60, max_time))
4832
4833   if done:
4834     lu.LogInfo("Instance %s's disks are in sync", instance.name)
4835
4836   return not cumul_degraded
4837
4838
4839 def _BlockdevFind(lu, node, dev, instance):
4840   """Wrapper around call_blockdev_find to annotate diskparams.
4841
4842   @param lu: A reference to the lu object
4843   @param node: The node to call out
4844   @param dev: The device to find
4845   @param instance: The instance object the device belongs to
4846   @returns The result of the rpc call
4847
4848   """
4849   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4850   return lu.rpc.call_blockdev_find(node, disk)
4851
4852
4853 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4854   """Wrapper around L{_CheckDiskConsistencyInner}.
4855
4856   """
4857   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4858   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4859                                     ldisk=ldisk)
4860
4861
4862 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4863                                ldisk=False):
4864   """Check that mirrors are not degraded.
4865
4866   @attention: The device has to be annotated already.
4867
4868   The ldisk parameter, if True, will change the test from the
4869   is_degraded attribute (which represents overall non-ok status for
4870   the device(s)) to the ldisk (representing the local storage status).
4871
4872   """
4873   lu.cfg.SetDiskID(dev, node)
4874
4875   result = True
4876
4877   if on_primary or dev.AssembleOnSecondary():
4878     rstats = lu.rpc.call_blockdev_find(node, dev)
4879     msg = rstats.fail_msg
4880     if msg:
4881       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4882       result = False
4883     elif not rstats.payload:
4884       lu.LogWarning("Can't find disk on node %s", node)
4885       result = False
4886     else:
4887       if ldisk:
4888         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4889       else:
4890         result = result and not rstats.payload.is_degraded
4891
4892   if dev.children:
4893     for child in dev.children:
4894       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4895                                                      on_primary)
4896
4897   return result
4898
4899
4900 class LUOobCommand(NoHooksLU):
4901   """Logical unit for OOB handling.
4902
4903   """
4904   REQ_BGL = False
4905   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4906
4907   def ExpandNames(self):
4908     """Gather locks we need.
4909
4910     """
4911     if self.op.node_names:
4912       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4913       lock_names = self.op.node_names
4914     else:
4915       lock_names = locking.ALL_SET
4916
4917     self.needed_locks = {
4918       locking.LEVEL_NODE: lock_names,
4919       }
4920
4921     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4922
4923     if not self.op.node_names:
4924       # Acquire node allocation lock only if all nodes are affected
4925       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4926
4927   def CheckPrereq(self):
4928     """Check prerequisites.
4929
4930     This checks:
4931      - the node exists in the configuration
4932      - OOB is supported
4933
4934     Any errors are signaled by raising errors.OpPrereqError.
4935
4936     """
4937     self.nodes = []
4938     self.master_node = self.cfg.GetMasterNode()
4939
4940     assert self.op.power_delay >= 0.0
4941
4942     if self.op.node_names:
4943       if (self.op.command in self._SKIP_MASTER and
4944           self.master_node in self.op.node_names):
4945         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4946         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4947
4948         if master_oob_handler:
4949           additional_text = ("run '%s %s %s' if you want to operate on the"
4950                              " master regardless") % (master_oob_handler,
4951                                                       self.op.command,
4952                                                       self.master_node)
4953         else:
4954           additional_text = "it does not support out-of-band operations"
4955
4956         raise errors.OpPrereqError(("Operating on the master node %s is not"
4957                                     " allowed for %s; %s") %
4958                                    (self.master_node, self.op.command,
4959                                     additional_text), errors.ECODE_INVAL)
4960     else:
4961       self.op.node_names = self.cfg.GetNodeList()
4962       if self.op.command in self._SKIP_MASTER:
4963         self.op.node_names.remove(self.master_node)
4964
4965     if self.op.command in self._SKIP_MASTER:
4966       assert self.master_node not in self.op.node_names
4967
4968     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4969       if node is None:
4970         raise errors.OpPrereqError("Node %s not found" % node_name,
4971                                    errors.ECODE_NOENT)
4972       else:
4973         self.nodes.append(node)
4974
4975       if (not self.op.ignore_status and
4976           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4977         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4978                                     " not marked offline") % node_name,
4979                                    errors.ECODE_STATE)
4980
4981   def Exec(self, feedback_fn):
4982     """Execute OOB and return result if we expect any.
4983
4984     """
4985     master_node = self.master_node
4986     ret = []
4987
4988     for idx, node in enumerate(utils.NiceSort(self.nodes,
4989                                               key=lambda node: node.name)):
4990       node_entry = [(constants.RS_NORMAL, node.name)]
4991       ret.append(node_entry)
4992
4993       oob_program = _SupportsOob(self.cfg, node)
4994
4995       if not oob_program:
4996         node_entry.append((constants.RS_UNAVAIL, None))
4997         continue
4998
4999       logging.info("Executing out-of-band command '%s' using '%s' on %s",
5000                    self.op.command, oob_program, node.name)
5001       result = self.rpc.call_run_oob(master_node, oob_program,
5002                                      self.op.command, node.name,
5003                                      self.op.timeout)
5004
5005       if result.fail_msg:
5006         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
5007                         node.name, result.fail_msg)
5008         node_entry.append((constants.RS_NODATA, None))
5009       else:
5010         try:
5011           self._CheckPayload(result)
5012         except errors.OpExecError, err:
5013           self.LogWarning("Payload returned by node '%s' is not valid: %s",
5014                           node.name, err)
5015           node_entry.append((constants.RS_NODATA, None))
5016         else:
5017           if self.op.command == constants.OOB_HEALTH:
5018             # For health we should log important events
5019             for item, status in result.payload:
5020               if status in [constants.OOB_STATUS_WARNING,
5021                             constants.OOB_STATUS_CRITICAL]:
5022                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5023                                 item, node.name, status)
5024
5025           if self.op.command == constants.OOB_POWER_ON:
5026             node.powered = True
5027           elif self.op.command == constants.OOB_POWER_OFF:
5028             node.powered = False
5029           elif self.op.command == constants.OOB_POWER_STATUS:
5030             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5031             if powered != node.powered:
5032               logging.warning(("Recorded power state (%s) of node '%s' does not"
5033                                " match actual power state (%s)"), node.powered,
5034                               node.name, powered)
5035
5036           # For configuration changing commands we should update the node
5037           if self.op.command in (constants.OOB_POWER_ON,
5038                                  constants.OOB_POWER_OFF):
5039             self.cfg.Update(node, feedback_fn)
5040
5041           node_entry.append((constants.RS_NORMAL, result.payload))
5042
5043           if (self.op.command == constants.OOB_POWER_ON and
5044               idx < len(self.nodes) - 1):
5045             time.sleep(self.op.power_delay)
5046
5047     return ret
5048
5049   def _CheckPayload(self, result):
5050     """Checks if the payload is valid.
5051
5052     @param result: RPC result
5053     @raises errors.OpExecError: If payload is not valid
5054
5055     """
5056     errs = []
5057     if self.op.command == constants.OOB_HEALTH:
5058       if not isinstance(result.payload, list):
5059         errs.append("command 'health' is expected to return a list but got %s" %
5060                     type(result.payload))
5061       else:
5062         for item, status in result.payload:
5063           if status not in constants.OOB_STATUSES:
5064             errs.append("health item '%s' has invalid status '%s'" %
5065                         (item, status))
5066
5067     if self.op.command == constants.OOB_POWER_STATUS:
5068       if not isinstance(result.payload, dict):
5069         errs.append("power-status is expected to return a dict but got %s" %
5070                     type(result.payload))
5071
5072     if self.op.command in [
5073       constants.OOB_POWER_ON,
5074       constants.OOB_POWER_OFF,
5075       constants.OOB_POWER_CYCLE,
5076       ]:
5077       if result.payload is not None:
5078         errs.append("%s is expected to not return payload but got '%s'" %
5079                     (self.op.command, result.payload))
5080
5081     if errs:
5082       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5083                                utils.CommaJoin(errs))
5084
5085
5086 class _OsQuery(_QueryBase):
5087   FIELDS = query.OS_FIELDS
5088
5089   def ExpandNames(self, lu):
5090     # Lock all nodes in shared mode
5091     # Temporary removal of locks, should be reverted later
5092     # TODO: reintroduce locks when they are lighter-weight
5093     lu.needed_locks = {}
5094     #self.share_locks[locking.LEVEL_NODE] = 1
5095     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5096
5097     # The following variables interact with _QueryBase._GetNames
5098     if self.names:
5099       self.wanted = self.names
5100     else:
5101       self.wanted = locking.ALL_SET
5102
5103     self.do_locking = self.use_locking
5104
5105   def DeclareLocks(self, lu, level):
5106     pass
5107
5108   @staticmethod
5109   def _DiagnoseByOS(rlist):
5110     """Remaps a per-node return list into an a per-os per-node dictionary
5111
5112     @param rlist: a map with node names as keys and OS objects as values
5113
5114     @rtype: dict
5115     @return: a dictionary with osnames as keys and as value another
5116         map, with nodes as keys and tuples of (path, status, diagnose,
5117         variants, parameters, api_versions) as values, eg::
5118
5119           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5120                                      (/srv/..., False, "invalid api")],
5121                            "node2": [(/srv/..., True, "", [], [])]}
5122           }
5123
5124     """
5125     all_os = {}
5126     # we build here the list of nodes that didn't fail the RPC (at RPC
5127     # level), so that nodes with a non-responding node daemon don't
5128     # make all OSes invalid
5129     good_nodes = [node_name for node_name in rlist
5130                   if not rlist[node_name].fail_msg]
5131     for node_name, nr in rlist.items():
5132       if nr.fail_msg or not nr.payload:
5133         continue
5134       for (name, path, status, diagnose, variants,
5135            params, api_versions) in nr.payload:
5136         if name not in all_os:
5137           # build a list of nodes for this os containing empty lists
5138           # for each node in node_list
5139           all_os[name] = {}
5140           for nname in good_nodes:
5141             all_os[name][nname] = []
5142         # convert params from [name, help] to (name, help)
5143         params = [tuple(v) for v in params]
5144         all_os[name][node_name].append((path, status, diagnose,
5145                                         variants, params, api_versions))
5146     return all_os
5147
5148   def _GetQueryData(self, lu):
5149     """Computes the list of nodes and their attributes.
5150
5151     """
5152     # Locking is not used
5153     assert not (compat.any(lu.glm.is_owned(level)
5154                            for level in locking.LEVELS
5155                            if level != locking.LEVEL_CLUSTER) or
5156                 self.do_locking or self.use_locking)
5157
5158     valid_nodes = [node.name
5159                    for node in lu.cfg.GetAllNodesInfo().values()
5160                    if not node.offline and node.vm_capable]
5161     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5162     cluster = lu.cfg.GetClusterInfo()
5163
5164     data = {}
5165
5166     for (os_name, os_data) in pol.items():
5167       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5168                           hidden=(os_name in cluster.hidden_os),
5169                           blacklisted=(os_name in cluster.blacklisted_os))
5170
5171       variants = set()
5172       parameters = set()
5173       api_versions = set()
5174
5175       for idx, osl in enumerate(os_data.values()):
5176         info.valid = bool(info.valid and osl and osl[0][1])
5177         if not info.valid:
5178           break
5179
5180         (node_variants, node_params, node_api) = osl[0][3:6]
5181         if idx == 0:
5182           # First entry
5183           variants.update(node_variants)
5184           parameters.update(node_params)
5185           api_versions.update(node_api)
5186         else:
5187           # Filter out inconsistent values
5188           variants.intersection_update(node_variants)
5189           parameters.intersection_update(node_params)
5190           api_versions.intersection_update(node_api)
5191
5192       info.variants = list(variants)
5193       info.parameters = list(parameters)
5194       info.api_versions = list(api_versions)
5195
5196       data[os_name] = info
5197
5198     # Prepare data in requested order
5199     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5200             if name in data]
5201
5202
5203 class LUOsDiagnose(NoHooksLU):
5204   """Logical unit for OS diagnose/query.
5205
5206   """
5207   REQ_BGL = False
5208
5209   @staticmethod
5210   def _BuildFilter(fields, names):
5211     """Builds a filter for querying OSes.
5212
5213     """
5214     name_filter = qlang.MakeSimpleFilter("name", names)
5215
5216     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5217     # respective field is not requested
5218     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5219                      for fname in ["hidden", "blacklisted"]
5220                      if fname not in fields]
5221     if "valid" not in fields:
5222       status_filter.append([qlang.OP_TRUE, "valid"])
5223
5224     if status_filter:
5225       status_filter.insert(0, qlang.OP_AND)
5226     else:
5227       status_filter = None
5228
5229     if name_filter and status_filter:
5230       return [qlang.OP_AND, name_filter, status_filter]
5231     elif name_filter:
5232       return name_filter
5233     else:
5234       return status_filter
5235
5236   def CheckArguments(self):
5237     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5238                        self.op.output_fields, False)
5239
5240   def ExpandNames(self):
5241     self.oq.ExpandNames(self)
5242
5243   def Exec(self, feedback_fn):
5244     return self.oq.OldStyleQuery(self)
5245
5246
5247 class _ExtStorageQuery(_QueryBase):
5248   FIELDS = query.EXTSTORAGE_FIELDS
5249
5250   def ExpandNames(self, lu):
5251     # Lock all nodes in shared mode
5252     # Temporary removal of locks, should be reverted later
5253     # TODO: reintroduce locks when they are lighter-weight
5254     lu.needed_locks = {}
5255     #self.share_locks[locking.LEVEL_NODE] = 1
5256     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5257
5258     # The following variables interact with _QueryBase._GetNames
5259     if self.names:
5260       self.wanted = self.names
5261     else:
5262       self.wanted = locking.ALL_SET
5263
5264     self.do_locking = self.use_locking
5265
5266   def DeclareLocks(self, lu, level):
5267     pass
5268
5269   @staticmethod
5270   def _DiagnoseByProvider(rlist):
5271     """Remaps a per-node return list into an a per-provider per-node dictionary
5272
5273     @param rlist: a map with node names as keys and ExtStorage objects as values
5274
5275     @rtype: dict
5276     @return: a dictionary with extstorage providers as keys and as
5277         value another map, with nodes as keys and tuples of
5278         (path, status, diagnose, parameters) as values, eg::
5279
5280           {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5281                          "node2": [(/srv/..., False, "missing file")]
5282                          "node3": [(/srv/..., True, "", [])]
5283           }
5284
5285     """
5286     all_es = {}
5287     # we build here the list of nodes that didn't fail the RPC (at RPC
5288     # level), so that nodes with a non-responding node daemon don't
5289     # make all OSes invalid
5290     good_nodes = [node_name for node_name in rlist
5291                   if not rlist[node_name].fail_msg]
5292     for node_name, nr in rlist.items():
5293       if nr.fail_msg or not nr.payload:
5294         continue
5295       for (name, path, status, diagnose, params) in nr.payload:
5296         if name not in all_es:
5297           # build a list of nodes for this os containing empty lists
5298           # for each node in node_list
5299           all_es[name] = {}
5300           for nname in good_nodes:
5301             all_es[name][nname] = []
5302         # convert params from [name, help] to (name, help)
5303         params = [tuple(v) for v in params]
5304         all_es[name][node_name].append((path, status, diagnose, params))
5305     return all_es
5306
5307   def _GetQueryData(self, lu):
5308     """Computes the list of nodes and their attributes.
5309
5310     """
5311     # Locking is not used
5312     assert not (compat.any(lu.glm.is_owned(level)
5313                            for level in locking.LEVELS
5314                            if level != locking.LEVEL_CLUSTER) or
5315                 self.do_locking or self.use_locking)
5316
5317     valid_nodes = [node.name
5318                    for node in lu.cfg.GetAllNodesInfo().values()
5319                    if not node.offline and node.vm_capable]
5320     pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5321
5322     data = {}
5323
5324     nodegroup_list = lu.cfg.GetNodeGroupList()
5325
5326     for (es_name, es_data) in pol.items():
5327       # For every provider compute the nodegroup validity.
5328       # To do this we need to check the validity of each node in es_data
5329       # and then construct the corresponding nodegroup dict:
5330       #      { nodegroup1: status
5331       #        nodegroup2: status
5332       #      }
5333       ndgrp_data = {}
5334       for nodegroup in nodegroup_list:
5335         ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5336
5337         nodegroup_nodes = ndgrp.members
5338         nodegroup_name = ndgrp.name
5339         node_statuses = []
5340
5341         for node in nodegroup_nodes:
5342           if node in valid_nodes:
5343             if es_data[node] != []:
5344               node_status = es_data[node][0][1]
5345               node_statuses.append(node_status)
5346             else:
5347               node_statuses.append(False)
5348
5349         if False in node_statuses:
5350           ndgrp_data[nodegroup_name] = False
5351         else:
5352           ndgrp_data[nodegroup_name] = True
5353
5354       # Compute the provider's parameters
5355       parameters = set()
5356       for idx, esl in enumerate(es_data.values()):
5357         valid = bool(esl and esl[0][1])
5358         if not valid:
5359           break
5360
5361         node_params = esl[0][3]
5362         if idx == 0:
5363           # First entry
5364           parameters.update(node_params)
5365         else:
5366           # Filter out inconsistent values
5367           parameters.intersection_update(node_params)
5368
5369       params = list(parameters)
5370
5371       # Now fill all the info for this provider
5372       info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5373                                   nodegroup_status=ndgrp_data,
5374                                   parameters=params)
5375
5376       data[es_name] = info
5377
5378     # Prepare data in requested order
5379     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5380             if name in data]
5381
5382
5383 class LUExtStorageDiagnose(NoHooksLU):
5384   """Logical unit for ExtStorage diagnose/query.
5385
5386   """
5387   REQ_BGL = False
5388
5389   def CheckArguments(self):
5390     self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5391                                self.op.output_fields, False)
5392
5393   def ExpandNames(self):
5394     self.eq.ExpandNames(self)
5395
5396   def Exec(self, feedback_fn):
5397     return self.eq.OldStyleQuery(self)
5398
5399
5400 class LUNodeRemove(LogicalUnit):
5401   """Logical unit for removing a node.
5402
5403   """
5404   HPATH = "node-remove"
5405   HTYPE = constants.HTYPE_NODE
5406
5407   def BuildHooksEnv(self):
5408     """Build hooks env.
5409
5410     """
5411     return {
5412       "OP_TARGET": self.op.node_name,
5413       "NODE_NAME": self.op.node_name,
5414       }
5415
5416   def BuildHooksNodes(self):
5417     """Build hooks nodes.
5418
5419     This doesn't run on the target node in the pre phase as a failed
5420     node would then be impossible to remove.
5421
5422     """
5423     all_nodes = self.cfg.GetNodeList()
5424     try:
5425       all_nodes.remove(self.op.node_name)
5426     except ValueError:
5427       pass
5428     return (all_nodes, all_nodes)
5429
5430   def CheckPrereq(self):
5431     """Check prerequisites.
5432
5433     This checks:
5434      - the node exists in the configuration
5435      - it does not have primary or secondary instances
5436      - it's not the master
5437
5438     Any errors are signaled by raising errors.OpPrereqError.
5439
5440     """
5441     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5442     node = self.cfg.GetNodeInfo(self.op.node_name)
5443     assert node is not None
5444
5445     masternode = self.cfg.GetMasterNode()
5446     if node.name == masternode:
5447       raise errors.OpPrereqError("Node is the master node, failover to another"
5448                                  " node is required", errors.ECODE_INVAL)
5449
5450     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5451       if node.name in instance.all_nodes:
5452         raise errors.OpPrereqError("Instance %s is still running on the node,"
5453                                    " please remove first" % instance_name,
5454                                    errors.ECODE_INVAL)
5455     self.op.node_name = node.name
5456     self.node = node
5457
5458   def Exec(self, feedback_fn):
5459     """Removes the node from the cluster.
5460
5461     """
5462     node = self.node
5463     logging.info("Stopping the node daemon and removing configs from node %s",
5464                  node.name)
5465
5466     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5467
5468     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5469       "Not owning BGL"
5470
5471     # Promote nodes to master candidate as needed
5472     _AdjustCandidatePool(self, exceptions=[node.name])
5473     self.context.RemoveNode(node.name)
5474
5475     # Run post hooks on the node before it's removed
5476     _RunPostHook(self, node.name)
5477
5478     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5479     msg = result.fail_msg
5480     if msg:
5481       self.LogWarning("Errors encountered on the remote node while leaving"
5482                       " the cluster: %s", msg)
5483
5484     # Remove node from our /etc/hosts
5485     if self.cfg.GetClusterInfo().modify_etc_hosts:
5486       master_node = self.cfg.GetMasterNode()
5487       result = self.rpc.call_etc_hosts_modify(master_node,
5488                                               constants.ETC_HOSTS_REMOVE,
5489                                               node.name, None)
5490       result.Raise("Can't update hosts file with new host data")
5491       _RedistributeAncillaryFiles(self)
5492
5493
5494 class _NodeQuery(_QueryBase):
5495   FIELDS = query.NODE_FIELDS
5496
5497   def ExpandNames(self, lu):
5498     lu.needed_locks = {}
5499     lu.share_locks = _ShareAll()
5500
5501     if self.names:
5502       self.wanted = _GetWantedNodes(lu, self.names)
5503     else:
5504       self.wanted = locking.ALL_SET
5505
5506     self.do_locking = (self.use_locking and
5507                        query.NQ_LIVE in self.requested_data)
5508
5509     if self.do_locking:
5510       # If any non-static field is requested we need to lock the nodes
5511       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5512       lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5513
5514   def DeclareLocks(self, lu, level):
5515     pass
5516
5517   def _GetQueryData(self, lu):
5518     """Computes the list of nodes and their attributes.
5519
5520     """
5521     all_info = lu.cfg.GetAllNodesInfo()
5522
5523     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5524
5525     # Gather data as requested
5526     if query.NQ_LIVE in self.requested_data:
5527       # filter out non-vm_capable nodes
5528       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5529
5530       es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5531       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5532                                         [lu.cfg.GetHypervisorType()], es_flags)
5533       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5534                        for (name, nresult) in node_data.items()
5535                        if not nresult.fail_msg and nresult.payload)
5536     else:
5537       live_data = None
5538
5539     if query.NQ_INST in self.requested_data:
5540       node_to_primary = dict([(name, set()) for name in nodenames])
5541       node_to_secondary = dict([(name, set()) for name in nodenames])
5542
5543       inst_data = lu.cfg.GetAllInstancesInfo()
5544
5545       for inst in inst_data.values():
5546         if inst.primary_node in node_to_primary:
5547           node_to_primary[inst.primary_node].add(inst.name)
5548         for secnode in inst.secondary_nodes:
5549           if secnode in node_to_secondary:
5550             node_to_secondary[secnode].add(inst.name)
5551     else:
5552       node_to_primary = None
5553       node_to_secondary = None
5554
5555     if query.NQ_OOB in self.requested_data:
5556       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5557                          for name, node in all_info.iteritems())
5558     else:
5559       oob_support = None
5560
5561     if query.NQ_GROUP in self.requested_data:
5562       groups = lu.cfg.GetAllNodeGroupsInfo()
5563     else:
5564       groups = {}
5565
5566     return query.NodeQueryData([all_info[name] for name in nodenames],
5567                                live_data, lu.cfg.GetMasterNode(),
5568                                node_to_primary, node_to_secondary, groups,
5569                                oob_support, lu.cfg.GetClusterInfo())
5570
5571
5572 class LUNodeQuery(NoHooksLU):
5573   """Logical unit for querying nodes.
5574
5575   """
5576   # pylint: disable=W0142
5577   REQ_BGL = False
5578
5579   def CheckArguments(self):
5580     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5581                          self.op.output_fields, self.op.use_locking)
5582
5583   def ExpandNames(self):
5584     self.nq.ExpandNames(self)
5585
5586   def DeclareLocks(self, level):
5587     self.nq.DeclareLocks(self, level)
5588
5589   def Exec(self, feedback_fn):
5590     return self.nq.OldStyleQuery(self)
5591
5592
5593 class LUNodeQueryvols(NoHooksLU):
5594   """Logical unit for getting volumes on node(s).
5595
5596   """
5597   REQ_BGL = False
5598   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5599   _FIELDS_STATIC = utils.FieldSet("node")
5600
5601   def CheckArguments(self):
5602     _CheckOutputFields(static=self._FIELDS_STATIC,
5603                        dynamic=self._FIELDS_DYNAMIC,
5604                        selected=self.op.output_fields)
5605
5606   def ExpandNames(self):
5607     self.share_locks = _ShareAll()
5608
5609     if self.op.nodes:
5610       self.needed_locks = {
5611         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5612         }
5613     else:
5614       self.needed_locks = {
5615         locking.LEVEL_NODE: locking.ALL_SET,
5616         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5617         }
5618
5619   def Exec(self, feedback_fn):
5620     """Computes the list of nodes and their attributes.
5621
5622     """
5623     nodenames = self.owned_locks(locking.LEVEL_NODE)
5624     volumes = self.rpc.call_node_volumes(nodenames)
5625
5626     ilist = self.cfg.GetAllInstancesInfo()
5627     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5628
5629     output = []
5630     for node in nodenames:
5631       nresult = volumes[node]
5632       if nresult.offline:
5633         continue
5634       msg = nresult.fail_msg
5635       if msg:
5636         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5637         continue
5638
5639       node_vols = sorted(nresult.payload,
5640                          key=operator.itemgetter("dev"))
5641
5642       for vol in node_vols:
5643         node_output = []
5644         for field in self.op.output_fields:
5645           if field == "node":
5646             val = node
5647           elif field == "phys":
5648             val = vol["dev"]
5649           elif field == "vg":
5650             val = vol["vg"]
5651           elif field == "name":
5652             val = vol["name"]
5653           elif field == "size":
5654             val = int(float(vol["size"]))
5655           elif field == "instance":
5656             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5657           else:
5658             raise errors.ParameterError(field)
5659           node_output.append(str(val))
5660
5661         output.append(node_output)
5662
5663     return output
5664
5665
5666 class LUNodeQueryStorage(NoHooksLU):
5667   """Logical unit for getting information on storage units on node(s).
5668
5669   """
5670   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5671   REQ_BGL = False
5672
5673   def CheckArguments(self):
5674     _CheckOutputFields(static=self._FIELDS_STATIC,
5675                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5676                        selected=self.op.output_fields)
5677
5678   def ExpandNames(self):
5679     self.share_locks = _ShareAll()
5680
5681     if self.op.nodes:
5682       self.needed_locks = {
5683         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5684         }
5685     else:
5686       self.needed_locks = {
5687         locking.LEVEL_NODE: locking.ALL_SET,
5688         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5689         }
5690
5691   def Exec(self, feedback_fn):
5692     """Computes the list of nodes and their attributes.
5693
5694     """
5695     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5696
5697     # Always get name to sort by
5698     if constants.SF_NAME in self.op.output_fields:
5699       fields = self.op.output_fields[:]
5700     else:
5701       fields = [constants.SF_NAME] + self.op.output_fields
5702
5703     # Never ask for node or type as it's only known to the LU
5704     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5705       while extra in fields:
5706         fields.remove(extra)
5707
5708     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5709     name_idx = field_idx[constants.SF_NAME]
5710
5711     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5712     data = self.rpc.call_storage_list(self.nodes,
5713                                       self.op.storage_type, st_args,
5714                                       self.op.name, fields)
5715
5716     result = []
5717
5718     for node in utils.NiceSort(self.nodes):
5719       nresult = data[node]
5720       if nresult.offline:
5721         continue
5722
5723       msg = nresult.fail_msg
5724       if msg:
5725         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5726         continue
5727
5728       rows = dict([(row[name_idx], row) for row in nresult.payload])
5729
5730       for name in utils.NiceSort(rows.keys()):
5731         row = rows[name]
5732
5733         out = []
5734
5735         for field in self.op.output_fields:
5736           if field == constants.SF_NODE:
5737             val = node
5738           elif field == constants.SF_TYPE:
5739             val = self.op.storage_type
5740           elif field in field_idx:
5741             val = row[field_idx[field]]
5742           else:
5743             raise errors.ParameterError(field)
5744
5745           out.append(val)
5746
5747         result.append(out)
5748
5749     return result
5750
5751
5752 class _InstanceQuery(_QueryBase):
5753   FIELDS = query.INSTANCE_FIELDS
5754
5755   def ExpandNames(self, lu):
5756     lu.needed_locks = {}
5757     lu.share_locks = _ShareAll()
5758
5759     if self.names:
5760       self.wanted = _GetWantedInstances(lu, self.names)
5761     else:
5762       self.wanted = locking.ALL_SET
5763
5764     self.do_locking = (self.use_locking and
5765                        query.IQ_LIVE in self.requested_data)
5766     if self.do_locking:
5767       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5768       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5769       lu.needed_locks[locking.LEVEL_NODE] = []
5770       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5771
5772     self.do_grouplocks = (self.do_locking and
5773                           query.IQ_NODES in self.requested_data)
5774
5775   def DeclareLocks(self, lu, level):
5776     if self.do_locking:
5777       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5778         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5779
5780         # Lock all groups used by instances optimistically; this requires going
5781         # via the node before it's locked, requiring verification later on
5782         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5783           set(group_uuid
5784               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5785               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5786       elif level == locking.LEVEL_NODE:
5787         lu._LockInstancesNodes() # pylint: disable=W0212
5788
5789   @staticmethod
5790   def _CheckGroupLocks(lu):
5791     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5792     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5793
5794     # Check if node groups for locked instances are still correct
5795     for instance_name in owned_instances:
5796       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5797
5798   def _GetQueryData(self, lu):
5799     """Computes the list of instances and their attributes.
5800
5801     """
5802     if self.do_grouplocks:
5803       self._CheckGroupLocks(lu)
5804
5805     cluster = lu.cfg.GetClusterInfo()
5806     all_info = lu.cfg.GetAllInstancesInfo()
5807
5808     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5809
5810     instance_list = [all_info[name] for name in instance_names]
5811     nodes = frozenset(itertools.chain(*(inst.all_nodes
5812                                         for inst in instance_list)))
5813     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5814     bad_nodes = []
5815     offline_nodes = []
5816     wrongnode_inst = set()
5817
5818     # Gather data as requested
5819     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5820       live_data = {}
5821       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5822       for name in nodes:
5823         result = node_data[name]
5824         if result.offline:
5825           # offline nodes will be in both lists
5826           assert result.fail_msg
5827           offline_nodes.append(name)
5828         if result.fail_msg:
5829           bad_nodes.append(name)
5830         elif result.payload:
5831           for inst in result.payload:
5832             if inst in all_info:
5833               if all_info[inst].primary_node == name:
5834                 live_data.update(result.payload)
5835               else:
5836                 wrongnode_inst.add(inst)
5837             else:
5838               # orphan instance; we don't list it here as we don't
5839               # handle this case yet in the output of instance listing
5840               logging.warning("Orphan instance '%s' found on node %s",
5841                               inst, name)
5842         # else no instance is alive
5843     else:
5844       live_data = {}
5845
5846     if query.IQ_DISKUSAGE in self.requested_data:
5847       gmi = ganeti.masterd.instance
5848       disk_usage = dict((inst.name,
5849                          gmi.ComputeDiskSize(inst.disk_template,
5850                                              [{constants.IDISK_SIZE: disk.size}
5851                                               for disk in inst.disks]))
5852                         for inst in instance_list)
5853     else:
5854       disk_usage = None
5855
5856     if query.IQ_CONSOLE in self.requested_data:
5857       consinfo = {}
5858       for inst in instance_list:
5859         if inst.name in live_data:
5860           # Instance is running
5861           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5862         else:
5863           consinfo[inst.name] = None
5864       assert set(consinfo.keys()) == set(instance_names)
5865     else:
5866       consinfo = None
5867
5868     if query.IQ_NODES in self.requested_data:
5869       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5870                                             instance_list)))
5871       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5872       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5873                     for uuid in set(map(operator.attrgetter("group"),
5874                                         nodes.values())))
5875     else:
5876       nodes = None
5877       groups = None
5878
5879     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5880                                    disk_usage, offline_nodes, bad_nodes,
5881                                    live_data, wrongnode_inst, consinfo,
5882                                    nodes, groups)
5883
5884
5885 class LUQuery(NoHooksLU):
5886   """Query for resources/items of a certain kind.
5887
5888   """
5889   # pylint: disable=W0142
5890   REQ_BGL = False
5891
5892   def CheckArguments(self):
5893     qcls = _GetQueryImplementation(self.op.what)
5894
5895     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5896
5897   def ExpandNames(self):
5898     self.impl.ExpandNames(self)
5899
5900   def DeclareLocks(self, level):
5901     self.impl.DeclareLocks(self, level)
5902
5903   def Exec(self, feedback_fn):
5904     return self.impl.NewStyleQuery(self)
5905
5906
5907 class LUQueryFields(NoHooksLU):
5908   """Query for resources/items of a certain kind.
5909
5910   """
5911   # pylint: disable=W0142
5912   REQ_BGL = False
5913
5914   def CheckArguments(self):
5915     self.qcls = _GetQueryImplementation(self.op.what)
5916
5917   def ExpandNames(self):
5918     self.needed_locks = {}
5919
5920   def Exec(self, feedback_fn):
5921     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5922
5923
5924 class LUNodeModifyStorage(NoHooksLU):
5925   """Logical unit for modifying a storage volume on a node.
5926
5927   """
5928   REQ_BGL = False
5929
5930   def CheckArguments(self):
5931     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5932
5933     storage_type = self.op.storage_type
5934
5935     try:
5936       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5937     except KeyError:
5938       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5939                                  " modified" % storage_type,
5940                                  errors.ECODE_INVAL)
5941
5942     diff = set(self.op.changes.keys()) - modifiable
5943     if diff:
5944       raise errors.OpPrereqError("The following fields can not be modified for"
5945                                  " storage units of type '%s': %r" %
5946                                  (storage_type, list(diff)),
5947                                  errors.ECODE_INVAL)
5948
5949   def ExpandNames(self):
5950     self.needed_locks = {
5951       locking.LEVEL_NODE: self.op.node_name,
5952       }
5953
5954   def Exec(self, feedback_fn):
5955     """Computes the list of nodes and their attributes.
5956
5957     """
5958     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5959     result = self.rpc.call_storage_modify(self.op.node_name,
5960                                           self.op.storage_type, st_args,
5961                                           self.op.name, self.op.changes)
5962     result.Raise("Failed to modify storage unit '%s' on %s" %
5963                  (self.op.name, self.op.node_name))
5964
5965
5966 class LUNodeAdd(LogicalUnit):
5967   """Logical unit for adding node to the cluster.
5968
5969   """
5970   HPATH = "node-add"
5971   HTYPE = constants.HTYPE_NODE
5972   _NFLAGS = ["master_capable", "vm_capable"]
5973
5974   def CheckArguments(self):
5975     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5976     # validate/normalize the node name
5977     self.hostname = netutils.GetHostname(name=self.op.node_name,
5978                                          family=self.primary_ip_family)
5979     self.op.node_name = self.hostname.name
5980
5981     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5982       raise errors.OpPrereqError("Cannot readd the master node",
5983                                  errors.ECODE_STATE)
5984
5985     if self.op.readd and self.op.group:
5986       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5987                                  " being readded", errors.ECODE_INVAL)
5988
5989   def BuildHooksEnv(self):
5990     """Build hooks env.
5991
5992     This will run on all nodes before, and on all nodes + the new node after.
5993
5994     """
5995     return {
5996       "OP_TARGET": self.op.node_name,
5997       "NODE_NAME": self.op.node_name,
5998       "NODE_PIP": self.op.primary_ip,
5999       "NODE_SIP": self.op.secondary_ip,
6000       "MASTER_CAPABLE": str(self.op.master_capable),
6001       "VM_CAPABLE": str(self.op.vm_capable),
6002       }
6003
6004   def BuildHooksNodes(self):
6005     """Build hooks nodes.
6006
6007     """
6008     # Exclude added node
6009     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6010     post_nodes = pre_nodes + [self.op.node_name, ]
6011
6012     return (pre_nodes, post_nodes)
6013
6014   def CheckPrereq(self):
6015     """Check prerequisites.
6016
6017     This checks:
6018      - the new node is not already in the config
6019      - it is resolvable
6020      - its parameters (single/dual homed) matches the cluster
6021
6022     Any errors are signaled by raising errors.OpPrereqError.
6023
6024     """
6025     cfg = self.cfg
6026     hostname = self.hostname
6027     node = hostname.name
6028     primary_ip = self.op.primary_ip = hostname.ip
6029     if self.op.secondary_ip is None:
6030       if self.primary_ip_family == netutils.IP6Address.family:
6031         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6032                                    " IPv4 address must be given as secondary",
6033                                    errors.ECODE_INVAL)
6034       self.op.secondary_ip = primary_ip
6035
6036     secondary_ip = self.op.secondary_ip
6037     if not netutils.IP4Address.IsValid(secondary_ip):
6038       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6039                                  " address" % secondary_ip, errors.ECODE_INVAL)
6040
6041     node_list = cfg.GetNodeList()
6042     if not self.op.readd and node in node_list:
6043       raise errors.OpPrereqError("Node %s is already in the configuration" %
6044                                  node, errors.ECODE_EXISTS)
6045     elif self.op.readd and node not in node_list:
6046       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6047                                  errors.ECODE_NOENT)
6048
6049     self.changed_primary_ip = False
6050
6051     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6052       if self.op.readd and node == existing_node_name:
6053         if existing_node.secondary_ip != secondary_ip:
6054           raise errors.OpPrereqError("Readded node doesn't have the same IP"
6055                                      " address configuration as before",
6056                                      errors.ECODE_INVAL)
6057         if existing_node.primary_ip != primary_ip:
6058           self.changed_primary_ip = True
6059
6060         continue
6061
6062       if (existing_node.primary_ip == primary_ip or
6063           existing_node.secondary_ip == primary_ip or
6064           existing_node.primary_ip == secondary_ip or
6065           existing_node.secondary_ip == secondary_ip):
6066         raise errors.OpPrereqError("New node ip address(es) conflict with"
6067                                    " existing node %s" % existing_node.name,
6068                                    errors.ECODE_NOTUNIQUE)
6069
6070     # After this 'if' block, None is no longer a valid value for the
6071     # _capable op attributes
6072     if self.op.readd:
6073       old_node = self.cfg.GetNodeInfo(node)
6074       assert old_node is not None, "Can't retrieve locked node %s" % node
6075       for attr in self._NFLAGS:
6076         if getattr(self.op, attr) is None:
6077           setattr(self.op, attr, getattr(old_node, attr))
6078     else:
6079       for attr in self._NFLAGS:
6080         if getattr(self.op, attr) is None:
6081           setattr(self.op, attr, True)
6082
6083     if self.op.readd and not self.op.vm_capable:
6084       pri, sec = cfg.GetNodeInstances(node)
6085       if pri or sec:
6086         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6087                                    " flag set to false, but it already holds"
6088                                    " instances" % node,
6089                                    errors.ECODE_STATE)
6090
6091     # check that the type of the node (single versus dual homed) is the
6092     # same as for the master
6093     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6094     master_singlehomed = myself.secondary_ip == myself.primary_ip
6095     newbie_singlehomed = secondary_ip == primary_ip
6096     if master_singlehomed != newbie_singlehomed:
6097       if master_singlehomed:
6098         raise errors.OpPrereqError("The master has no secondary ip but the"
6099                                    " new node has one",
6100                                    errors.ECODE_INVAL)
6101       else:
6102         raise errors.OpPrereqError("The master has a secondary ip but the"
6103                                    " new node doesn't have one",
6104                                    errors.ECODE_INVAL)
6105
6106     # checks reachability
6107     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6108       raise errors.OpPrereqError("Node not reachable by ping",
6109                                  errors.ECODE_ENVIRON)
6110
6111     if not newbie_singlehomed:
6112       # check reachability from my secondary ip to newbie's secondary ip
6113       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6114                               source=myself.secondary_ip):
6115         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6116                                    " based ping to node daemon port",
6117                                    errors.ECODE_ENVIRON)
6118
6119     if self.op.readd:
6120       exceptions = [node]
6121     else:
6122       exceptions = []
6123
6124     if self.op.master_capable:
6125       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6126     else:
6127       self.master_candidate = False
6128
6129     if self.op.readd:
6130       self.new_node = old_node
6131     else:
6132       node_group = cfg.LookupNodeGroup(self.op.group)
6133       self.new_node = objects.Node(name=node,
6134                                    primary_ip=primary_ip,
6135                                    secondary_ip=secondary_ip,
6136                                    master_candidate=self.master_candidate,
6137                                    offline=False, drained=False,
6138                                    group=node_group, ndparams={})
6139
6140     if self.op.ndparams:
6141       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6142
6143     if self.op.hv_state:
6144       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6145
6146     if self.op.disk_state:
6147       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6148
6149     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6150     #       it a property on the base class.
6151     rpcrunner = rpc.DnsOnlyRunner()
6152     result = rpcrunner.call_version([node])[node]
6153     result.Raise("Can't get version information from node %s" % node)
6154     if constants.PROTOCOL_VERSION == result.payload:
6155       logging.info("Communication to node %s fine, sw version %s match",
6156                    node, result.payload)
6157     else:
6158       raise errors.OpPrereqError("Version mismatch master version %s,"
6159                                  " node version %s" %
6160                                  (constants.PROTOCOL_VERSION, result.payload),
6161                                  errors.ECODE_ENVIRON)
6162
6163     vg_name = cfg.GetVGName()
6164     if vg_name is not None:
6165       vparams = {constants.NV_PVLIST: [vg_name]}
6166       excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6167       if self.op.ndparams:
6168         excl_stor = self.op.ndparams.get(constants.ND_EXCLUSIVE_STORAGE,
6169                                          excl_stor)
6170       cname = self.cfg.GetClusterName()
6171       result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6172       (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6173       if errmsgs:
6174         raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6175                                    "; ".join(errmsgs), errors.ECODE_ENVIRON)
6176
6177   def Exec(self, feedback_fn):
6178     """Adds the new node to the cluster.
6179
6180     """
6181     new_node = self.new_node
6182     node = new_node.name
6183
6184     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6185       "Not owning BGL"
6186
6187     # We adding a new node so we assume it's powered
6188     new_node.powered = True
6189
6190     # for re-adds, reset the offline/drained/master-candidate flags;
6191     # we need to reset here, otherwise offline would prevent RPC calls
6192     # later in the procedure; this also means that if the re-add
6193     # fails, we are left with a non-offlined, broken node
6194     if self.op.readd:
6195       new_node.drained = new_node.offline = False # pylint: disable=W0201
6196       self.LogInfo("Readding a node, the offline/drained flags were reset")
6197       # if we demote the node, we do cleanup later in the procedure
6198       new_node.master_candidate = self.master_candidate
6199       if self.changed_primary_ip:
6200         new_node.primary_ip = self.op.primary_ip
6201
6202     # copy the master/vm_capable flags
6203     for attr in self._NFLAGS:
6204       setattr(new_node, attr, getattr(self.op, attr))
6205
6206     # notify the user about any possible mc promotion
6207     if new_node.master_candidate:
6208       self.LogInfo("Node will be a master candidate")
6209
6210     if self.op.ndparams:
6211       new_node.ndparams = self.op.ndparams
6212     else:
6213       new_node.ndparams = {}
6214
6215     if self.op.hv_state:
6216       new_node.hv_state_static = self.new_hv_state
6217
6218     if self.op.disk_state:
6219       new_node.disk_state_static = self.new_disk_state
6220
6221     # Add node to our /etc/hosts, and add key to known_hosts
6222     if self.cfg.GetClusterInfo().modify_etc_hosts:
6223       master_node = self.cfg.GetMasterNode()
6224       result = self.rpc.call_etc_hosts_modify(master_node,
6225                                               constants.ETC_HOSTS_ADD,
6226                                               self.hostname.name,
6227                                               self.hostname.ip)
6228       result.Raise("Can't update hosts file with new host data")
6229
6230     if new_node.secondary_ip != new_node.primary_ip:
6231       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6232                                False)
6233
6234     node_verify_list = [self.cfg.GetMasterNode()]
6235     node_verify_param = {
6236       constants.NV_NODELIST: ([node], {}),
6237       # TODO: do a node-net-test as well?
6238     }
6239
6240     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6241                                        self.cfg.GetClusterName())
6242     for verifier in node_verify_list:
6243       result[verifier].Raise("Cannot communicate with node %s" % verifier)
6244       nl_payload = result[verifier].payload[constants.NV_NODELIST]
6245       if nl_payload:
6246         for failed in nl_payload:
6247           feedback_fn("ssh/hostname verification failed"
6248                       " (checking from %s): %s" %
6249                       (verifier, nl_payload[failed]))
6250         raise errors.OpExecError("ssh/hostname verification failed")
6251
6252     if self.op.readd:
6253       _RedistributeAncillaryFiles(self)
6254       self.context.ReaddNode(new_node)
6255       # make sure we redistribute the config
6256       self.cfg.Update(new_node, feedback_fn)
6257       # and make sure the new node will not have old files around
6258       if not new_node.master_candidate:
6259         result = self.rpc.call_node_demote_from_mc(new_node.name)
6260         msg = result.fail_msg
6261         if msg:
6262           self.LogWarning("Node failed to demote itself from master"
6263                           " candidate status: %s" % msg)
6264     else:
6265       _RedistributeAncillaryFiles(self, additional_nodes=[node],
6266                                   additional_vm=self.op.vm_capable)
6267       self.context.AddNode(new_node, self.proc.GetECId())
6268
6269
6270 class LUNodeSetParams(LogicalUnit):
6271   """Modifies the parameters of a node.
6272
6273   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6274       to the node role (as _ROLE_*)
6275   @cvar _R2F: a dictionary from node role to tuples of flags
6276   @cvar _FLAGS: a list of attribute names corresponding to the flags
6277
6278   """
6279   HPATH = "node-modify"
6280   HTYPE = constants.HTYPE_NODE
6281   REQ_BGL = False
6282   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6283   _F2R = {
6284     (True, False, False): _ROLE_CANDIDATE,
6285     (False, True, False): _ROLE_DRAINED,
6286     (False, False, True): _ROLE_OFFLINE,
6287     (False, False, False): _ROLE_REGULAR,
6288     }
6289   _R2F = dict((v, k) for k, v in _F2R.items())
6290   _FLAGS = ["master_candidate", "drained", "offline"]
6291
6292   def CheckArguments(self):
6293     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6294     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6295                 self.op.master_capable, self.op.vm_capable,
6296                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6297                 self.op.disk_state]
6298     if all_mods.count(None) == len(all_mods):
6299       raise errors.OpPrereqError("Please pass at least one modification",
6300                                  errors.ECODE_INVAL)
6301     if all_mods.count(True) > 1:
6302       raise errors.OpPrereqError("Can't set the node into more than one"
6303                                  " state at the same time",
6304                                  errors.ECODE_INVAL)
6305
6306     # Boolean value that tells us whether we might be demoting from MC
6307     self.might_demote = (self.op.master_candidate is False or
6308                          self.op.offline is True or
6309                          self.op.drained is True or
6310                          self.op.master_capable is False)
6311
6312     if self.op.secondary_ip:
6313       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6314         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6315                                    " address" % self.op.secondary_ip,
6316                                    errors.ECODE_INVAL)
6317
6318     self.lock_all = self.op.auto_promote and self.might_demote
6319     self.lock_instances = self.op.secondary_ip is not None
6320
6321   def _InstanceFilter(self, instance):
6322     """Filter for getting affected instances.
6323
6324     """
6325     return (instance.disk_template in constants.DTS_INT_MIRROR and
6326             self.op.node_name in instance.all_nodes)
6327
6328   def ExpandNames(self):
6329     if self.lock_all:
6330       self.needed_locks = {
6331         locking.LEVEL_NODE: locking.ALL_SET,
6332
6333         # Block allocations when all nodes are locked
6334         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6335         }
6336     else:
6337       self.needed_locks = {
6338         locking.LEVEL_NODE: self.op.node_name,
6339         }
6340
6341     # Since modifying a node can have severe effects on currently running
6342     # operations the resource lock is at least acquired in shared mode
6343     self.needed_locks[locking.LEVEL_NODE_RES] = \
6344       self.needed_locks[locking.LEVEL_NODE]
6345
6346     # Get all locks except nodes in shared mode; they are not used for anything
6347     # but read-only access
6348     self.share_locks = _ShareAll()
6349     self.share_locks[locking.LEVEL_NODE] = 0
6350     self.share_locks[locking.LEVEL_NODE_RES] = 0
6351     self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6352
6353     if self.lock_instances:
6354       self.needed_locks[locking.LEVEL_INSTANCE] = \
6355         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6356
6357   def BuildHooksEnv(self):
6358     """Build hooks env.
6359
6360     This runs on the master node.
6361
6362     """
6363     return {
6364       "OP_TARGET": self.op.node_name,
6365       "MASTER_CANDIDATE": str(self.op.master_candidate),
6366       "OFFLINE": str(self.op.offline),
6367       "DRAINED": str(self.op.drained),
6368       "MASTER_CAPABLE": str(self.op.master_capable),
6369       "VM_CAPABLE": str(self.op.vm_capable),
6370       }
6371
6372   def BuildHooksNodes(self):
6373     """Build hooks nodes.
6374
6375     """
6376     nl = [self.cfg.GetMasterNode(), self.op.node_name]
6377     return (nl, nl)
6378
6379   def CheckPrereq(self):
6380     """Check prerequisites.
6381
6382     This only checks the instance list against the existing names.
6383
6384     """
6385     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6386
6387     if self.lock_instances:
6388       affected_instances = \
6389         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6390
6391       # Verify instance locks
6392       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6393       wanted_instances = frozenset(affected_instances.keys())
6394       if wanted_instances - owned_instances:
6395         raise errors.OpPrereqError("Instances affected by changing node %s's"
6396                                    " secondary IP address have changed since"
6397                                    " locks were acquired, wanted '%s', have"
6398                                    " '%s'; retry the operation" %
6399                                    (self.op.node_name,
6400                                     utils.CommaJoin(wanted_instances),
6401                                     utils.CommaJoin(owned_instances)),
6402                                    errors.ECODE_STATE)
6403     else:
6404       affected_instances = None
6405
6406     if (self.op.master_candidate is not None or
6407         self.op.drained is not None or
6408         self.op.offline is not None):
6409       # we can't change the master's node flags
6410       if self.op.node_name == self.cfg.GetMasterNode():
6411         raise errors.OpPrereqError("The master role can be changed"
6412                                    " only via master-failover",
6413                                    errors.ECODE_INVAL)
6414
6415     if self.op.master_candidate and not node.master_capable:
6416       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6417                                  " it a master candidate" % node.name,
6418                                  errors.ECODE_STATE)
6419
6420     if self.op.vm_capable is False:
6421       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6422       if ipri or isec:
6423         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6424                                    " the vm_capable flag" % node.name,
6425                                    errors.ECODE_STATE)
6426
6427     if node.master_candidate and self.might_demote and not self.lock_all:
6428       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6429       # check if after removing the current node, we're missing master
6430       # candidates
6431       (mc_remaining, mc_should, _) = \
6432           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6433       if mc_remaining < mc_should:
6434         raise errors.OpPrereqError("Not enough master candidates, please"
6435                                    " pass auto promote option to allow"
6436                                    " promotion (--auto-promote or RAPI"
6437                                    " auto_promote=True)", errors.ECODE_STATE)
6438
6439     self.old_flags = old_flags = (node.master_candidate,
6440                                   node.drained, node.offline)
6441     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6442     self.old_role = old_role = self._F2R[old_flags]
6443
6444     # Check for ineffective changes
6445     for attr in self._FLAGS:
6446       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6447         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6448         setattr(self.op, attr, None)
6449
6450     # Past this point, any flag change to False means a transition
6451     # away from the respective state, as only real changes are kept
6452
6453     # TODO: We might query the real power state if it supports OOB
6454     if _SupportsOob(self.cfg, node):
6455       if self.op.offline is False and not (node.powered or
6456                                            self.op.powered is True):
6457         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6458                                     " offline status can be reset") %
6459                                    self.op.node_name, errors.ECODE_STATE)
6460     elif self.op.powered is not None:
6461       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6462                                   " as it does not support out-of-band"
6463                                   " handling") % self.op.node_name,
6464                                  errors.ECODE_STATE)
6465
6466     # If we're being deofflined/drained, we'll MC ourself if needed
6467     if (self.op.drained is False or self.op.offline is False or
6468         (self.op.master_capable and not node.master_capable)):
6469       if _DecideSelfPromotion(self):
6470         self.op.master_candidate = True
6471         self.LogInfo("Auto-promoting node to master candidate")
6472
6473     # If we're no longer master capable, we'll demote ourselves from MC
6474     if self.op.master_capable is False and node.master_candidate:
6475       self.LogInfo("Demoting from master candidate")
6476       self.op.master_candidate = False
6477
6478     # Compute new role
6479     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6480     if self.op.master_candidate:
6481       new_role = self._ROLE_CANDIDATE
6482     elif self.op.drained:
6483       new_role = self._ROLE_DRAINED
6484     elif self.op.offline:
6485       new_role = self._ROLE_OFFLINE
6486     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6487       # False is still in new flags, which means we're un-setting (the
6488       # only) True flag
6489       new_role = self._ROLE_REGULAR
6490     else: # no new flags, nothing, keep old role
6491       new_role = old_role
6492
6493     self.new_role = new_role
6494
6495     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6496       # Trying to transition out of offline status
6497       result = self.rpc.call_version([node.name])[node.name]
6498       if result.fail_msg:
6499         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6500                                    " to report its version: %s" %
6501                                    (node.name, result.fail_msg),
6502                                    errors.ECODE_STATE)
6503       else:
6504         self.LogWarning("Transitioning node from offline to online state"
6505                         " without using re-add. Please make sure the node"
6506                         " is healthy!")
6507
6508     # When changing the secondary ip, verify if this is a single-homed to
6509     # multi-homed transition or vice versa, and apply the relevant
6510     # restrictions.
6511     if self.op.secondary_ip:
6512       # Ok even without locking, because this can't be changed by any LU
6513       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6514       master_singlehomed = master.secondary_ip == master.primary_ip
6515       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6516         if self.op.force and node.name == master.name:
6517           self.LogWarning("Transitioning from single-homed to multi-homed"
6518                           " cluster; all nodes will require a secondary IP"
6519                           " address")
6520         else:
6521           raise errors.OpPrereqError("Changing the secondary ip on a"
6522                                      " single-homed cluster requires the"
6523                                      " --force option to be passed, and the"
6524                                      " target node to be the master",
6525                                      errors.ECODE_INVAL)
6526       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6527         if self.op.force and node.name == master.name:
6528           self.LogWarning("Transitioning from multi-homed to single-homed"
6529                           " cluster; secondary IP addresses will have to be"
6530                           " removed")
6531         else:
6532           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6533                                      " same as the primary IP on a multi-homed"
6534                                      " cluster, unless the --force option is"
6535                                      " passed, and the target node is the"
6536                                      " master", errors.ECODE_INVAL)
6537
6538       assert not (frozenset(affected_instances) -
6539                   self.owned_locks(locking.LEVEL_INSTANCE))
6540
6541       if node.offline:
6542         if affected_instances:
6543           msg = ("Cannot change secondary IP address: offline node has"
6544                  " instances (%s) configured to use it" %
6545                  utils.CommaJoin(affected_instances.keys()))
6546           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6547       else:
6548         # On online nodes, check that no instances are running, and that
6549         # the node has the new ip and we can reach it.
6550         for instance in affected_instances.values():
6551           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6552                               msg="cannot change secondary ip")
6553
6554         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6555         if master.name != node.name:
6556           # check reachability from master secondary ip to new secondary ip
6557           if not netutils.TcpPing(self.op.secondary_ip,
6558                                   constants.DEFAULT_NODED_PORT,
6559                                   source=master.secondary_ip):
6560             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6561                                        " based ping to node daemon port",
6562                                        errors.ECODE_ENVIRON)
6563
6564     if self.op.ndparams:
6565       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6566       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6567       self.new_ndparams = new_ndparams
6568
6569     if self.op.hv_state:
6570       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6571                                                  self.node.hv_state_static)
6572
6573     if self.op.disk_state:
6574       self.new_disk_state = \
6575         _MergeAndVerifyDiskState(self.op.disk_state,
6576                                  self.node.disk_state_static)
6577
6578   def Exec(self, feedback_fn):
6579     """Modifies a node.
6580
6581     """
6582     node = self.node
6583     old_role = self.old_role
6584     new_role = self.new_role
6585
6586     result = []
6587
6588     if self.op.ndparams:
6589       node.ndparams = self.new_ndparams
6590
6591     if self.op.powered is not None:
6592       node.powered = self.op.powered
6593
6594     if self.op.hv_state:
6595       node.hv_state_static = self.new_hv_state
6596
6597     if self.op.disk_state:
6598       node.disk_state_static = self.new_disk_state
6599
6600     for attr in ["master_capable", "vm_capable"]:
6601       val = getattr(self.op, attr)
6602       if val is not None:
6603         setattr(node, attr, val)
6604         result.append((attr, str(val)))
6605
6606     if new_role != old_role:
6607       # Tell the node to demote itself, if no longer MC and not offline
6608       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6609         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6610         if msg:
6611           self.LogWarning("Node failed to demote itself: %s", msg)
6612
6613       new_flags = self._R2F[new_role]
6614       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6615         if of != nf:
6616           result.append((desc, str(nf)))
6617       (node.master_candidate, node.drained, node.offline) = new_flags
6618
6619       # we locked all nodes, we adjust the CP before updating this node
6620       if self.lock_all:
6621         _AdjustCandidatePool(self, [node.name])
6622
6623     if self.op.secondary_ip:
6624       node.secondary_ip = self.op.secondary_ip
6625       result.append(("secondary_ip", self.op.secondary_ip))
6626
6627     # this will trigger configuration file update, if needed
6628     self.cfg.Update(node, feedback_fn)
6629
6630     # this will trigger job queue propagation or cleanup if the mc
6631     # flag changed
6632     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6633       self.context.ReaddNode(node)
6634
6635     return result
6636
6637
6638 class LUNodePowercycle(NoHooksLU):
6639   """Powercycles a node.
6640
6641   """
6642   REQ_BGL = False
6643
6644   def CheckArguments(self):
6645     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6646     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6647       raise errors.OpPrereqError("The node is the master and the force"
6648                                  " parameter was not set",
6649                                  errors.ECODE_INVAL)
6650
6651   def ExpandNames(self):
6652     """Locking for PowercycleNode.
6653
6654     This is a last-resort option and shouldn't block on other
6655     jobs. Therefore, we grab no locks.
6656
6657     """
6658     self.needed_locks = {}
6659
6660   def Exec(self, feedback_fn):
6661     """Reboots a node.
6662
6663     """
6664     result = self.rpc.call_node_powercycle(self.op.node_name,
6665                                            self.cfg.GetHypervisorType())
6666     result.Raise("Failed to schedule the reboot")
6667     return result.payload
6668
6669
6670 class LUClusterQuery(NoHooksLU):
6671   """Query cluster configuration.
6672
6673   """
6674   REQ_BGL = False
6675
6676   def ExpandNames(self):
6677     self.needed_locks = {}
6678
6679   def Exec(self, feedback_fn):
6680     """Return cluster config.
6681
6682     """
6683     cluster = self.cfg.GetClusterInfo()
6684     os_hvp = {}
6685
6686     # Filter just for enabled hypervisors
6687     for os_name, hv_dict in cluster.os_hvp.items():
6688       os_hvp[os_name] = {}
6689       for hv_name, hv_params in hv_dict.items():
6690         if hv_name in cluster.enabled_hypervisors:
6691           os_hvp[os_name][hv_name] = hv_params
6692
6693     # Convert ip_family to ip_version
6694     primary_ip_version = constants.IP4_VERSION
6695     if cluster.primary_ip_family == netutils.IP6Address.family:
6696       primary_ip_version = constants.IP6_VERSION
6697
6698     result = {
6699       "software_version": constants.RELEASE_VERSION,
6700       "protocol_version": constants.PROTOCOL_VERSION,
6701       "config_version": constants.CONFIG_VERSION,
6702       "os_api_version": max(constants.OS_API_VERSIONS),
6703       "export_version": constants.EXPORT_VERSION,
6704       "architecture": runtime.GetArchInfo(),
6705       "name": cluster.cluster_name,
6706       "master": cluster.master_node,
6707       "default_hypervisor": cluster.primary_hypervisor,
6708       "enabled_hypervisors": cluster.enabled_hypervisors,
6709       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6710                         for hypervisor_name in cluster.enabled_hypervisors]),
6711       "os_hvp": os_hvp,
6712       "beparams": cluster.beparams,
6713       "osparams": cluster.osparams,
6714       "ipolicy": cluster.ipolicy,
6715       "nicparams": cluster.nicparams,
6716       "ndparams": cluster.ndparams,
6717       "diskparams": cluster.diskparams,
6718       "candidate_pool_size": cluster.candidate_pool_size,
6719       "master_netdev": cluster.master_netdev,
6720       "master_netmask": cluster.master_netmask,
6721       "use_external_mip_script": cluster.use_external_mip_script,
6722       "volume_group_name": cluster.volume_group_name,
6723       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6724       "file_storage_dir": cluster.file_storage_dir,
6725       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6726       "maintain_node_health": cluster.maintain_node_health,
6727       "ctime": cluster.ctime,
6728       "mtime": cluster.mtime,
6729       "uuid": cluster.uuid,
6730       "tags": list(cluster.GetTags()),
6731       "uid_pool": cluster.uid_pool,
6732       "default_iallocator": cluster.default_iallocator,
6733       "reserved_lvs": cluster.reserved_lvs,
6734       "primary_ip_version": primary_ip_version,
6735       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6736       "hidden_os": cluster.hidden_os,
6737       "blacklisted_os": cluster.blacklisted_os,
6738       }
6739
6740     return result
6741
6742
6743 class LUClusterConfigQuery(NoHooksLU):
6744   """Return configuration values.
6745
6746   """
6747   REQ_BGL = False
6748
6749   def CheckArguments(self):
6750     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6751
6752   def ExpandNames(self):
6753     self.cq.ExpandNames(self)
6754
6755   def DeclareLocks(self, level):
6756     self.cq.DeclareLocks(self, level)
6757
6758   def Exec(self, feedback_fn):
6759     result = self.cq.OldStyleQuery(self)
6760
6761     assert len(result) == 1
6762
6763     return result[0]
6764
6765
6766 class _ClusterQuery(_QueryBase):
6767   FIELDS = query.CLUSTER_FIELDS
6768
6769   #: Do not sort (there is only one item)
6770   SORT_FIELD = None
6771
6772   def ExpandNames(self, lu):
6773     lu.needed_locks = {}
6774
6775     # The following variables interact with _QueryBase._GetNames
6776     self.wanted = locking.ALL_SET
6777     self.do_locking = self.use_locking
6778
6779     if self.do_locking:
6780       raise errors.OpPrereqError("Can not use locking for cluster queries",
6781                                  errors.ECODE_INVAL)
6782
6783   def DeclareLocks(self, lu, level):
6784     pass
6785
6786   def _GetQueryData(self, lu):
6787     """Computes the list of nodes and their attributes.
6788
6789     """
6790     # Locking is not used
6791     assert not (compat.any(lu.glm.is_owned(level)
6792                            for level in locking.LEVELS
6793                            if level != locking.LEVEL_CLUSTER) or
6794                 self.do_locking or self.use_locking)
6795
6796     if query.CQ_CONFIG in self.requested_data:
6797       cluster = lu.cfg.GetClusterInfo()
6798     else:
6799       cluster = NotImplemented
6800
6801     if query.CQ_QUEUE_DRAINED in self.requested_data:
6802       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6803     else:
6804       drain_flag = NotImplemented
6805
6806     if query.CQ_WATCHER_PAUSE in self.requested_data:
6807       master_name = lu.cfg.GetMasterNode()
6808
6809       result = lu.rpc.call_get_watcher_pause(master_name)
6810       result.Raise("Can't retrieve watcher pause from master node '%s'" %
6811                    master_name)
6812
6813       watcher_pause = result.payload
6814     else:
6815       watcher_pause = NotImplemented
6816
6817     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6818
6819
6820 class LUInstanceActivateDisks(NoHooksLU):
6821   """Bring up an instance's disks.
6822
6823   """
6824   REQ_BGL = False
6825
6826   def ExpandNames(self):
6827     self._ExpandAndLockInstance()
6828     self.needed_locks[locking.LEVEL_NODE] = []
6829     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6830
6831   def DeclareLocks(self, level):
6832     if level == locking.LEVEL_NODE:
6833       self._LockInstancesNodes()
6834
6835   def CheckPrereq(self):
6836     """Check prerequisites.
6837
6838     This checks that the instance is in the cluster.
6839
6840     """
6841     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6842     assert self.instance is not None, \
6843       "Cannot retrieve locked instance %s" % self.op.instance_name
6844     _CheckNodeOnline(self, self.instance.primary_node)
6845
6846   def Exec(self, feedback_fn):
6847     """Activate the disks.
6848
6849     """
6850     disks_ok, disks_info = \
6851               _AssembleInstanceDisks(self, self.instance,
6852                                      ignore_size=self.op.ignore_size)
6853     if not disks_ok:
6854       raise errors.OpExecError("Cannot activate block devices")
6855
6856     if self.op.wait_for_sync:
6857       if not _WaitForSync(self, self.instance):
6858         raise errors.OpExecError("Some disks of the instance are degraded!")
6859
6860     return disks_info
6861
6862
6863 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6864                            ignore_size=False):
6865   """Prepare the block devices for an instance.
6866
6867   This sets up the block devices on all nodes.
6868
6869   @type lu: L{LogicalUnit}
6870   @param lu: the logical unit on whose behalf we execute
6871   @type instance: L{objects.Instance}
6872   @param instance: the instance for whose disks we assemble
6873   @type disks: list of L{objects.Disk} or None
6874   @param disks: which disks to assemble (or all, if None)
6875   @type ignore_secondaries: boolean
6876   @param ignore_secondaries: if true, errors on secondary nodes
6877       won't result in an error return from the function
6878   @type ignore_size: boolean
6879   @param ignore_size: if true, the current known size of the disk
6880       will not be used during the disk activation, useful for cases
6881       when the size is wrong
6882   @return: False if the operation failed, otherwise a list of
6883       (host, instance_visible_name, node_visible_name)
6884       with the mapping from node devices to instance devices
6885
6886   """
6887   device_info = []
6888   disks_ok = True
6889   iname = instance.name
6890   disks = _ExpandCheckDisks(instance, disks)
6891
6892   # With the two passes mechanism we try to reduce the window of
6893   # opportunity for the race condition of switching DRBD to primary
6894   # before handshaking occured, but we do not eliminate it
6895
6896   # The proper fix would be to wait (with some limits) until the
6897   # connection has been made and drbd transitions from WFConnection
6898   # into any other network-connected state (Connected, SyncTarget,
6899   # SyncSource, etc.)
6900
6901   # 1st pass, assemble on all nodes in secondary mode
6902   for idx, inst_disk in enumerate(disks):
6903     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6904       if ignore_size:
6905         node_disk = node_disk.Copy()
6906         node_disk.UnsetSize()
6907       lu.cfg.SetDiskID(node_disk, node)
6908       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6909                                              False, idx)
6910       msg = result.fail_msg
6911       if msg:
6912         is_offline_secondary = (node in instance.secondary_nodes and
6913                                 result.offline)
6914         lu.LogWarning("Could not prepare block device %s on node %s"
6915                       " (is_primary=False, pass=1): %s",
6916                       inst_disk.iv_name, node, msg)
6917         if not (ignore_secondaries or is_offline_secondary):
6918           disks_ok = False
6919
6920   # FIXME: race condition on drbd migration to primary
6921
6922   # 2nd pass, do only the primary node
6923   for idx, inst_disk in enumerate(disks):
6924     dev_path = None
6925
6926     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6927       if node != instance.primary_node:
6928         continue
6929       if ignore_size:
6930         node_disk = node_disk.Copy()
6931         node_disk.UnsetSize()
6932       lu.cfg.SetDiskID(node_disk, node)
6933       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6934                                              True, idx)
6935       msg = result.fail_msg
6936       if msg:
6937         lu.LogWarning("Could not prepare block device %s on node %s"
6938                       " (is_primary=True, pass=2): %s",
6939                       inst_disk.iv_name, node, msg)
6940         disks_ok = False
6941       else:
6942         dev_path = result.payload
6943
6944     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6945
6946   # leave the disks configured for the primary node
6947   # this is a workaround that would be fixed better by
6948   # improving the logical/physical id handling
6949   for disk in disks:
6950     lu.cfg.SetDiskID(disk, instance.primary_node)
6951
6952   return disks_ok, device_info
6953
6954
6955 def _StartInstanceDisks(lu, instance, force):
6956   """Start the disks of an instance.
6957
6958   """
6959   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6960                                            ignore_secondaries=force)
6961   if not disks_ok:
6962     _ShutdownInstanceDisks(lu, instance)
6963     if force is not None and not force:
6964       lu.LogWarning("",
6965                     hint=("If the message above refers to a secondary node,"
6966                           " you can retry the operation using '--force'"))
6967     raise errors.OpExecError("Disk consistency error")
6968
6969
6970 class LUInstanceDeactivateDisks(NoHooksLU):
6971   """Shutdown an instance's disks.
6972
6973   """
6974   REQ_BGL = False
6975
6976   def ExpandNames(self):
6977     self._ExpandAndLockInstance()
6978     self.needed_locks[locking.LEVEL_NODE] = []
6979     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6980
6981   def DeclareLocks(self, level):
6982     if level == locking.LEVEL_NODE:
6983       self._LockInstancesNodes()
6984
6985   def CheckPrereq(self):
6986     """Check prerequisites.
6987
6988     This checks that the instance is in the cluster.
6989
6990     """
6991     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6992     assert self.instance is not None, \
6993       "Cannot retrieve locked instance %s" % self.op.instance_name
6994
6995   def Exec(self, feedback_fn):
6996     """Deactivate the disks
6997
6998     """
6999     instance = self.instance
7000     if self.op.force:
7001       _ShutdownInstanceDisks(self, instance)
7002     else:
7003       _SafeShutdownInstanceDisks(self, instance)
7004
7005
7006 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
7007   """Shutdown block devices of an instance.
7008
7009   This function checks if an instance is running, before calling
7010   _ShutdownInstanceDisks.
7011
7012   """
7013   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7014   _ShutdownInstanceDisks(lu, instance, disks=disks)
7015
7016
7017 def _ExpandCheckDisks(instance, disks):
7018   """Return the instance disks selected by the disks list
7019
7020   @type disks: list of L{objects.Disk} or None
7021   @param disks: selected disks
7022   @rtype: list of L{objects.Disk}
7023   @return: selected instance disks to act on
7024
7025   """
7026   if disks is None:
7027     return instance.disks
7028   else:
7029     if not set(disks).issubset(instance.disks):
7030       raise errors.ProgrammerError("Can only act on disks belonging to the"
7031                                    " target instance")
7032     return disks
7033
7034
7035 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7036   """Shutdown block devices of an instance.
7037
7038   This does the shutdown on all nodes of the instance.
7039
7040   If the ignore_primary is false, errors on the primary node are
7041   ignored.
7042
7043   """
7044   all_result = True
7045   disks = _ExpandCheckDisks(instance, disks)
7046
7047   for disk in disks:
7048     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7049       lu.cfg.SetDiskID(top_disk, node)
7050       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7051       msg = result.fail_msg
7052       if msg:
7053         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7054                       disk.iv_name, node, msg)
7055         if ((node == instance.primary_node and not ignore_primary) or
7056             (node != instance.primary_node and not result.offline)):
7057           all_result = False
7058   return all_result
7059
7060
7061 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7062   """Checks if a node has enough free memory.
7063
7064   This function checks if a given node has the needed amount of free
7065   memory. In case the node has less memory or we cannot get the
7066   information from the node, this function raises an OpPrereqError
7067   exception.
7068
7069   @type lu: C{LogicalUnit}
7070   @param lu: a logical unit from which we get configuration data
7071   @type node: C{str}
7072   @param node: the node to check
7073   @type reason: C{str}
7074   @param reason: string to use in the error message
7075   @type requested: C{int}
7076   @param requested: the amount of memory in MiB to check for
7077   @type hypervisor_name: C{str}
7078   @param hypervisor_name: the hypervisor to ask for memory stats
7079   @rtype: integer
7080   @return: node current free memory
7081   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7082       we cannot check the node
7083
7084   """
7085   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7086   nodeinfo[node].Raise("Can't get data from node %s" % node,
7087                        prereq=True, ecode=errors.ECODE_ENVIRON)
7088   (_, _, (hv_info, )) = nodeinfo[node].payload
7089
7090   free_mem = hv_info.get("memory_free", None)
7091   if not isinstance(free_mem, int):
7092     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7093                                " was '%s'" % (node, free_mem),
7094                                errors.ECODE_ENVIRON)
7095   if requested > free_mem:
7096     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7097                                " needed %s MiB, available %s MiB" %
7098                                (node, reason, requested, free_mem),
7099                                errors.ECODE_NORES)
7100   return free_mem
7101
7102
7103 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7104   """Checks if nodes have enough free disk space in all the VGs.
7105
7106   This function checks if all given nodes have the needed amount of
7107   free disk. In case any node has less disk or we cannot get the
7108   information from the node, this function raises an OpPrereqError
7109   exception.
7110
7111   @type lu: C{LogicalUnit}
7112   @param lu: a logical unit from which we get configuration data
7113   @type nodenames: C{list}
7114   @param nodenames: the list of node names to check
7115   @type req_sizes: C{dict}
7116   @param req_sizes: the hash of vg and corresponding amount of disk in
7117       MiB to check for
7118   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7119       or we cannot check the node
7120
7121   """
7122   for vg, req_size in req_sizes.items():
7123     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7124
7125
7126 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7127   """Checks if nodes have enough free disk space in the specified VG.
7128
7129   This function checks if all given nodes have the needed amount of
7130   free disk. In case any node has less disk or we cannot get the
7131   information from the node, this function raises an OpPrereqError
7132   exception.
7133
7134   @type lu: C{LogicalUnit}
7135   @param lu: a logical unit from which we get configuration data
7136   @type nodenames: C{list}
7137   @param nodenames: the list of node names to check
7138   @type vg: C{str}
7139   @param vg: the volume group to check
7140   @type requested: C{int}
7141   @param requested: the amount of disk in MiB to check for
7142   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7143       or we cannot check the node
7144
7145   """
7146   es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7147   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7148   for node in nodenames:
7149     info = nodeinfo[node]
7150     info.Raise("Cannot get current information from node %s" % node,
7151                prereq=True, ecode=errors.ECODE_ENVIRON)
7152     (_, (vg_info, ), _) = info.payload
7153     vg_free = vg_info.get("vg_free", None)
7154     if not isinstance(vg_free, int):
7155       raise errors.OpPrereqError("Can't compute free disk space on node"
7156                                  " %s for vg %s, result was '%s'" %
7157                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
7158     if requested > vg_free:
7159       raise errors.OpPrereqError("Not enough disk space on target node %s"
7160                                  " vg %s: required %d MiB, available %d MiB" %
7161                                  (node, vg, requested, vg_free),
7162                                  errors.ECODE_NORES)
7163
7164
7165 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7166   """Checks if nodes have enough physical CPUs
7167
7168   This function checks if all given nodes have the needed number of
7169   physical CPUs. In case any node has less CPUs or we cannot get the
7170   information from the node, this function raises an OpPrereqError
7171   exception.
7172
7173   @type lu: C{LogicalUnit}
7174   @param lu: a logical unit from which we get configuration data
7175   @type nodenames: C{list}
7176   @param nodenames: the list of node names to check
7177   @type requested: C{int}
7178   @param requested: the minimum acceptable number of physical CPUs
7179   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7180       or we cannot check the node
7181
7182   """
7183   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7184   for node in nodenames:
7185     info = nodeinfo[node]
7186     info.Raise("Cannot get current information from node %s" % node,
7187                prereq=True, ecode=errors.ECODE_ENVIRON)
7188     (_, _, (hv_info, )) = info.payload
7189     num_cpus = hv_info.get("cpu_total", None)
7190     if not isinstance(num_cpus, int):
7191       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7192                                  " on node %s, result was '%s'" %
7193                                  (node, num_cpus), errors.ECODE_ENVIRON)
7194     if requested > num_cpus:
7195       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7196                                  "required" % (node, num_cpus, requested),
7197                                  errors.ECODE_NORES)
7198
7199
7200 class LUInstanceStartup(LogicalUnit):
7201   """Starts an instance.
7202
7203   """
7204   HPATH = "instance-start"
7205   HTYPE = constants.HTYPE_INSTANCE
7206   REQ_BGL = False
7207
7208   def CheckArguments(self):
7209     # extra beparams
7210     if self.op.beparams:
7211       # fill the beparams dict
7212       objects.UpgradeBeParams(self.op.beparams)
7213       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7214
7215   def ExpandNames(self):
7216     self._ExpandAndLockInstance()
7217     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7218
7219   def DeclareLocks(self, level):
7220     if level == locking.LEVEL_NODE_RES:
7221       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7222
7223   def BuildHooksEnv(self):
7224     """Build hooks env.
7225
7226     This runs on master, primary and secondary nodes of the instance.
7227
7228     """
7229     env = {
7230       "FORCE": self.op.force,
7231       }
7232
7233     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7234
7235     return env
7236
7237   def BuildHooksNodes(self):
7238     """Build hooks nodes.
7239
7240     """
7241     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7242     return (nl, nl)
7243
7244   def CheckPrereq(self):
7245     """Check prerequisites.
7246
7247     This checks that the instance is in the cluster.
7248
7249     """
7250     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7251     assert self.instance is not None, \
7252       "Cannot retrieve locked instance %s" % self.op.instance_name
7253
7254     # extra hvparams
7255     if self.op.hvparams:
7256       # check hypervisor parameter syntax (locally)
7257       cluster = self.cfg.GetClusterInfo()
7258       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7259       filled_hvp = cluster.FillHV(instance)
7260       filled_hvp.update(self.op.hvparams)
7261       hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7262       hv_type.CheckParameterSyntax(filled_hvp)
7263       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7264
7265     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7266
7267     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7268
7269     if self.primary_offline and self.op.ignore_offline_nodes:
7270       self.LogWarning("Ignoring offline primary node")
7271
7272       if self.op.hvparams or self.op.beparams:
7273         self.LogWarning("Overridden parameters are ignored")
7274     else:
7275       _CheckNodeOnline(self, instance.primary_node)
7276
7277       bep = self.cfg.GetClusterInfo().FillBE(instance)
7278       bep.update(self.op.beparams)
7279
7280       # check bridges existence
7281       _CheckInstanceBridgesExist(self, instance)
7282
7283       remote_info = self.rpc.call_instance_info(instance.primary_node,
7284                                                 instance.name,
7285                                                 instance.hypervisor)
7286       remote_info.Raise("Error checking node %s" % instance.primary_node,
7287                         prereq=True, ecode=errors.ECODE_ENVIRON)
7288       if not remote_info.payload: # not running already
7289         _CheckNodeFreeMemory(self, instance.primary_node,
7290                              "starting instance %s" % instance.name,
7291                              bep[constants.BE_MINMEM], instance.hypervisor)
7292
7293   def Exec(self, feedback_fn):
7294     """Start the instance.
7295
7296     """
7297     instance = self.instance
7298     force = self.op.force
7299
7300     if not self.op.no_remember:
7301       self.cfg.MarkInstanceUp(instance.name)
7302
7303     if self.primary_offline:
7304       assert self.op.ignore_offline_nodes
7305       self.LogInfo("Primary node offline, marked instance as started")
7306     else:
7307       node_current = instance.primary_node
7308
7309       _StartInstanceDisks(self, instance, force)
7310
7311       result = \
7312         self.rpc.call_instance_start(node_current,
7313                                      (instance, self.op.hvparams,
7314                                       self.op.beparams),
7315                                      self.op.startup_paused)
7316       msg = result.fail_msg
7317       if msg:
7318         _ShutdownInstanceDisks(self, instance)
7319         raise errors.OpExecError("Could not start instance: %s" % msg)
7320
7321
7322 class LUInstanceReboot(LogicalUnit):
7323   """Reboot an instance.
7324
7325   """
7326   HPATH = "instance-reboot"
7327   HTYPE = constants.HTYPE_INSTANCE
7328   REQ_BGL = False
7329
7330   def ExpandNames(self):
7331     self._ExpandAndLockInstance()
7332
7333   def BuildHooksEnv(self):
7334     """Build hooks env.
7335
7336     This runs on master, primary and secondary nodes of the instance.
7337
7338     """
7339     env = {
7340       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7341       "REBOOT_TYPE": self.op.reboot_type,
7342       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7343       }
7344
7345     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7346
7347     return env
7348
7349   def BuildHooksNodes(self):
7350     """Build hooks nodes.
7351
7352     """
7353     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7354     return (nl, nl)
7355
7356   def CheckPrereq(self):
7357     """Check prerequisites.
7358
7359     This checks that the instance is in the cluster.
7360
7361     """
7362     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7363     assert self.instance is not None, \
7364       "Cannot retrieve locked instance %s" % self.op.instance_name
7365     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7366     _CheckNodeOnline(self, instance.primary_node)
7367
7368     # check bridges existence
7369     _CheckInstanceBridgesExist(self, instance)
7370
7371   def Exec(self, feedback_fn):
7372     """Reboot the instance.
7373
7374     """
7375     instance = self.instance
7376     ignore_secondaries = self.op.ignore_secondaries
7377     reboot_type = self.op.reboot_type
7378
7379     remote_info = self.rpc.call_instance_info(instance.primary_node,
7380                                               instance.name,
7381                                               instance.hypervisor)
7382     remote_info.Raise("Error checking node %s" % instance.primary_node)
7383     instance_running = bool(remote_info.payload)
7384
7385     node_current = instance.primary_node
7386
7387     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7388                                             constants.INSTANCE_REBOOT_HARD]:
7389       for disk in instance.disks:
7390         self.cfg.SetDiskID(disk, node_current)
7391       result = self.rpc.call_instance_reboot(node_current, instance,
7392                                              reboot_type,
7393                                              self.op.shutdown_timeout)
7394       result.Raise("Could not reboot instance")
7395     else:
7396       if instance_running:
7397         result = self.rpc.call_instance_shutdown(node_current, instance,
7398                                                  self.op.shutdown_timeout)
7399         result.Raise("Could not shutdown instance for full reboot")
7400         _ShutdownInstanceDisks(self, instance)
7401       else:
7402         self.LogInfo("Instance %s was already stopped, starting now",
7403                      instance.name)
7404       _StartInstanceDisks(self, instance, ignore_secondaries)
7405       result = self.rpc.call_instance_start(node_current,
7406                                             (instance, None, None), False)
7407       msg = result.fail_msg
7408       if msg:
7409         _ShutdownInstanceDisks(self, instance)
7410         raise errors.OpExecError("Could not start instance for"
7411                                  " full reboot: %s" % msg)
7412
7413     self.cfg.MarkInstanceUp(instance.name)
7414
7415
7416 class LUInstanceShutdown(LogicalUnit):
7417   """Shutdown an instance.
7418
7419   """
7420   HPATH = "instance-stop"
7421   HTYPE = constants.HTYPE_INSTANCE
7422   REQ_BGL = False
7423
7424   def ExpandNames(self):
7425     self._ExpandAndLockInstance()
7426
7427   def BuildHooksEnv(self):
7428     """Build hooks env.
7429
7430     This runs on master, primary and secondary nodes of the instance.
7431
7432     """
7433     env = _BuildInstanceHookEnvByObject(self, self.instance)
7434     env["TIMEOUT"] = self.op.timeout
7435     return env
7436
7437   def BuildHooksNodes(self):
7438     """Build hooks nodes.
7439
7440     """
7441     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7442     return (nl, nl)
7443
7444   def CheckPrereq(self):
7445     """Check prerequisites.
7446
7447     This checks that the instance is in the cluster.
7448
7449     """
7450     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7451     assert self.instance is not None, \
7452       "Cannot retrieve locked instance %s" % self.op.instance_name
7453
7454     if not self.op.force:
7455       _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7456     else:
7457       self.LogWarning("Ignoring offline instance check")
7458
7459     self.primary_offline = \
7460       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7461
7462     if self.primary_offline and self.op.ignore_offline_nodes:
7463       self.LogWarning("Ignoring offline primary node")
7464     else:
7465       _CheckNodeOnline(self, self.instance.primary_node)
7466
7467   def Exec(self, feedback_fn):
7468     """Shutdown the instance.
7469
7470     """
7471     instance = self.instance
7472     node_current = instance.primary_node
7473     timeout = self.op.timeout
7474
7475     # If the instance is offline we shouldn't mark it as down, as that
7476     # resets the offline flag.
7477     if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7478       self.cfg.MarkInstanceDown(instance.name)
7479
7480     if self.primary_offline:
7481       assert self.op.ignore_offline_nodes
7482       self.LogInfo("Primary node offline, marked instance as stopped")
7483     else:
7484       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7485       msg = result.fail_msg
7486       if msg:
7487         self.LogWarning("Could not shutdown instance: %s", msg)
7488
7489       _ShutdownInstanceDisks(self, instance)
7490
7491
7492 class LUInstanceReinstall(LogicalUnit):
7493   """Reinstall an instance.
7494
7495   """
7496   HPATH = "instance-reinstall"
7497   HTYPE = constants.HTYPE_INSTANCE
7498   REQ_BGL = False
7499
7500   def ExpandNames(self):
7501     self._ExpandAndLockInstance()
7502
7503   def BuildHooksEnv(self):
7504     """Build hooks env.
7505
7506     This runs on master, primary and secondary nodes of the instance.
7507
7508     """
7509     return _BuildInstanceHookEnvByObject(self, self.instance)
7510
7511   def BuildHooksNodes(self):
7512     """Build hooks nodes.
7513
7514     """
7515     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7516     return (nl, nl)
7517
7518   def CheckPrereq(self):
7519     """Check prerequisites.
7520
7521     This checks that the instance is in the cluster and is not running.
7522
7523     """
7524     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7525     assert instance is not None, \
7526       "Cannot retrieve locked instance %s" % self.op.instance_name
7527     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7528                      " offline, cannot reinstall")
7529
7530     if instance.disk_template == constants.DT_DISKLESS:
7531       raise errors.OpPrereqError("Instance '%s' has no disks" %
7532                                  self.op.instance_name,
7533                                  errors.ECODE_INVAL)
7534     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7535
7536     if self.op.os_type is not None:
7537       # OS verification
7538       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7539       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7540       instance_os = self.op.os_type
7541     else:
7542       instance_os = instance.os
7543
7544     nodelist = list(instance.all_nodes)
7545
7546     if self.op.osparams:
7547       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7548       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7549       self.os_inst = i_osdict # the new dict (without defaults)
7550     else:
7551       self.os_inst = None
7552
7553     self.instance = instance
7554
7555   def Exec(self, feedback_fn):
7556     """Reinstall the instance.
7557
7558     """
7559     inst = self.instance
7560
7561     if self.op.os_type is not None:
7562       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7563       inst.os = self.op.os_type
7564       # Write to configuration
7565       self.cfg.Update(inst, feedback_fn)
7566
7567     _StartInstanceDisks(self, inst, None)
7568     try:
7569       feedback_fn("Running the instance OS create scripts...")
7570       # FIXME: pass debug option from opcode to backend
7571       result = self.rpc.call_instance_os_add(inst.primary_node,
7572                                              (inst, self.os_inst), True,
7573                                              self.op.debug_level)
7574       result.Raise("Could not install OS for instance %s on node %s" %
7575                    (inst.name, inst.primary_node))
7576     finally:
7577       _ShutdownInstanceDisks(self, inst)
7578
7579
7580 class LUInstanceRecreateDisks(LogicalUnit):
7581   """Recreate an instance's missing disks.
7582
7583   """
7584   HPATH = "instance-recreate-disks"
7585   HTYPE = constants.HTYPE_INSTANCE
7586   REQ_BGL = False
7587
7588   _MODIFYABLE = compat.UniqueFrozenset([
7589     constants.IDISK_SIZE,
7590     constants.IDISK_MODE,
7591     ])
7592
7593   # New or changed disk parameters may have different semantics
7594   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7595     constants.IDISK_ADOPT,
7596
7597     # TODO: Implement support changing VG while recreating
7598     constants.IDISK_VG,
7599     constants.IDISK_METAVG,
7600     constants.IDISK_PROVIDER,
7601     ]))
7602
7603   def _RunAllocator(self):
7604     """Run the allocator based on input opcode.
7605
7606     """
7607     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7608
7609     # FIXME
7610     # The allocator should actually run in "relocate" mode, but current
7611     # allocators don't support relocating all the nodes of an instance at
7612     # the same time. As a workaround we use "allocate" mode, but this is
7613     # suboptimal for two reasons:
7614     # - The instance name passed to the allocator is present in the list of
7615     #   existing instances, so there could be a conflict within the
7616     #   internal structures of the allocator. This doesn't happen with the
7617     #   current allocators, but it's a liability.
7618     # - The allocator counts the resources used by the instance twice: once
7619     #   because the instance exists already, and once because it tries to
7620     #   allocate a new instance.
7621     # The allocator could choose some of the nodes on which the instance is
7622     # running, but that's not a problem. If the instance nodes are broken,
7623     # they should be already be marked as drained or offline, and hence
7624     # skipped by the allocator. If instance disks have been lost for other
7625     # reasons, then recreating the disks on the same nodes should be fine.
7626     disk_template = self.instance.disk_template
7627     spindle_use = be_full[constants.BE_SPINDLE_USE]
7628     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7629                                         disk_template=disk_template,
7630                                         tags=list(self.instance.GetTags()),
7631                                         os=self.instance.os,
7632                                         nics=[{}],
7633                                         vcpus=be_full[constants.BE_VCPUS],
7634                                         memory=be_full[constants.BE_MAXMEM],
7635                                         spindle_use=spindle_use,
7636                                         disks=[{constants.IDISK_SIZE: d.size,
7637                                                 constants.IDISK_MODE: d.mode}
7638                                                 for d in self.instance.disks],
7639                                         hypervisor=self.instance.hypervisor,
7640                                         node_whitelist=None)
7641     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7642
7643     ial.Run(self.op.iallocator)
7644
7645     assert req.RequiredNodes() == len(self.instance.all_nodes)
7646
7647     if not ial.success:
7648       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7649                                  " %s" % (self.op.iallocator, ial.info),
7650                                  errors.ECODE_NORES)
7651
7652     self.op.nodes = ial.result
7653     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7654                  self.op.instance_name, self.op.iallocator,
7655                  utils.CommaJoin(ial.result))
7656
7657   def CheckArguments(self):
7658     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7659       # Normalize and convert deprecated list of disk indices
7660       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7661
7662     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7663     if duplicates:
7664       raise errors.OpPrereqError("Some disks have been specified more than"
7665                                  " once: %s" % utils.CommaJoin(duplicates),
7666                                  errors.ECODE_INVAL)
7667
7668     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7669     # when neither iallocator nor nodes are specified
7670     if self.op.iallocator or self.op.nodes:
7671       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7672
7673     for (idx, params) in self.op.disks:
7674       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7675       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7676       if unsupported:
7677         raise errors.OpPrereqError("Parameters for disk %s try to change"
7678                                    " unmodifyable parameter(s): %s" %
7679                                    (idx, utils.CommaJoin(unsupported)),
7680                                    errors.ECODE_INVAL)
7681
7682   def ExpandNames(self):
7683     self._ExpandAndLockInstance()
7684     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7685
7686     if self.op.nodes:
7687       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7688       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7689     else:
7690       self.needed_locks[locking.LEVEL_NODE] = []
7691       if self.op.iallocator:
7692         # iallocator will select a new node in the same group
7693         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7694         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7695
7696     self.needed_locks[locking.LEVEL_NODE_RES] = []
7697
7698   def DeclareLocks(self, level):
7699     if level == locking.LEVEL_NODEGROUP:
7700       assert self.op.iallocator is not None
7701       assert not self.op.nodes
7702       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7703       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7704       # Lock the primary group used by the instance optimistically; this
7705       # requires going via the node before it's locked, requiring
7706       # verification later on
7707       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7708         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7709
7710     elif level == locking.LEVEL_NODE:
7711       # If an allocator is used, then we lock all the nodes in the current
7712       # instance group, as we don't know yet which ones will be selected;
7713       # if we replace the nodes without using an allocator, locks are
7714       # already declared in ExpandNames; otherwise, we need to lock all the
7715       # instance nodes for disk re-creation
7716       if self.op.iallocator:
7717         assert not self.op.nodes
7718         assert not self.needed_locks[locking.LEVEL_NODE]
7719         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7720
7721         # Lock member nodes of the group of the primary node
7722         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7723           self.needed_locks[locking.LEVEL_NODE].extend(
7724             self.cfg.GetNodeGroup(group_uuid).members)
7725
7726         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7727       elif not self.op.nodes:
7728         self._LockInstancesNodes(primary_only=False)
7729     elif level == locking.LEVEL_NODE_RES:
7730       # Copy node locks
7731       self.needed_locks[locking.LEVEL_NODE_RES] = \
7732         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7733
7734   def BuildHooksEnv(self):
7735     """Build hooks env.
7736
7737     This runs on master, primary and secondary nodes of the instance.
7738
7739     """
7740     return _BuildInstanceHookEnvByObject(self, self.instance)
7741
7742   def BuildHooksNodes(self):
7743     """Build hooks nodes.
7744
7745     """
7746     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7747     return (nl, nl)
7748
7749   def CheckPrereq(self):
7750     """Check prerequisites.
7751
7752     This checks that the instance is in the cluster and is not running.
7753
7754     """
7755     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7756     assert instance is not None, \
7757       "Cannot retrieve locked instance %s" % self.op.instance_name
7758     if self.op.nodes:
7759       if len(self.op.nodes) != len(instance.all_nodes):
7760         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7761                                    " %d replacement nodes were specified" %
7762                                    (instance.name, len(instance.all_nodes),
7763                                     len(self.op.nodes)),
7764                                    errors.ECODE_INVAL)
7765       assert instance.disk_template != constants.DT_DRBD8 or \
7766           len(self.op.nodes) == 2
7767       assert instance.disk_template != constants.DT_PLAIN or \
7768           len(self.op.nodes) == 1
7769       primary_node = self.op.nodes[0]
7770     else:
7771       primary_node = instance.primary_node
7772     if not self.op.iallocator:
7773       _CheckNodeOnline(self, primary_node)
7774
7775     if instance.disk_template == constants.DT_DISKLESS:
7776       raise errors.OpPrereqError("Instance '%s' has no disks" %
7777                                  self.op.instance_name, errors.ECODE_INVAL)
7778
7779     # Verify if node group locks are still correct
7780     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7781     if owned_groups:
7782       # Node group locks are acquired only for the primary node (and only
7783       # when the allocator is used)
7784       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7785                                primary_only=True)
7786
7787     # if we replace nodes *and* the old primary is offline, we don't
7788     # check the instance state
7789     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7790     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7791       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7792                           msg="cannot recreate disks")
7793
7794     if self.op.disks:
7795       self.disks = dict(self.op.disks)
7796     else:
7797       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7798
7799     maxidx = max(self.disks.keys())
7800     if maxidx >= len(instance.disks):
7801       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7802                                  errors.ECODE_INVAL)
7803
7804     if ((self.op.nodes or self.op.iallocator) and
7805         sorted(self.disks.keys()) != range(len(instance.disks))):
7806       raise errors.OpPrereqError("Can't recreate disks partially and"
7807                                  " change the nodes at the same time",
7808                                  errors.ECODE_INVAL)
7809
7810     self.instance = instance
7811
7812     if self.op.iallocator:
7813       self._RunAllocator()
7814       # Release unneeded node and node resource locks
7815       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7816       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7817       _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7818
7819     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7820
7821   def Exec(self, feedback_fn):
7822     """Recreate the disks.
7823
7824     """
7825     instance = self.instance
7826
7827     assert (self.owned_locks(locking.LEVEL_NODE) ==
7828             self.owned_locks(locking.LEVEL_NODE_RES))
7829
7830     to_skip = []
7831     mods = [] # keeps track of needed changes
7832
7833     for idx, disk in enumerate(instance.disks):
7834       try:
7835         changes = self.disks[idx]
7836       except KeyError:
7837         # Disk should not be recreated
7838         to_skip.append(idx)
7839         continue
7840
7841       # update secondaries for disks, if needed
7842       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7843         # need to update the nodes and minors
7844         assert len(self.op.nodes) == 2
7845         assert len(disk.logical_id) == 6 # otherwise disk internals
7846                                          # have changed
7847         (_, _, old_port, _, _, old_secret) = disk.logical_id
7848         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7849         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7850                   new_minors[0], new_minors[1], old_secret)
7851         assert len(disk.logical_id) == len(new_id)
7852       else:
7853         new_id = None
7854
7855       mods.append((idx, new_id, changes))
7856
7857     # now that we have passed all asserts above, we can apply the mods
7858     # in a single run (to avoid partial changes)
7859     for idx, new_id, changes in mods:
7860       disk = instance.disks[idx]
7861       if new_id is not None:
7862         assert disk.dev_type == constants.LD_DRBD8
7863         disk.logical_id = new_id
7864       if changes:
7865         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7866                     mode=changes.get(constants.IDISK_MODE, None))
7867
7868     # change primary node, if needed
7869     if self.op.nodes:
7870       instance.primary_node = self.op.nodes[0]
7871       self.LogWarning("Changing the instance's nodes, you will have to"
7872                       " remove any disks left on the older nodes manually")
7873
7874     if self.op.nodes:
7875       self.cfg.Update(instance, feedback_fn)
7876
7877     # All touched nodes must be locked
7878     mylocks = self.owned_locks(locking.LEVEL_NODE)
7879     assert mylocks.issuperset(frozenset(instance.all_nodes))
7880     _CreateDisks(self, instance, to_skip=to_skip)
7881
7882
7883 class LUInstanceRename(LogicalUnit):
7884   """Rename an instance.
7885
7886   """
7887   HPATH = "instance-rename"
7888   HTYPE = constants.HTYPE_INSTANCE
7889
7890   def CheckArguments(self):
7891     """Check arguments.
7892
7893     """
7894     if self.op.ip_check and not self.op.name_check:
7895       # TODO: make the ip check more flexible and not depend on the name check
7896       raise errors.OpPrereqError("IP address check requires a name check",
7897                                  errors.ECODE_INVAL)
7898
7899   def BuildHooksEnv(self):
7900     """Build hooks env.
7901
7902     This runs on master, primary and secondary nodes of the instance.
7903
7904     """
7905     env = _BuildInstanceHookEnvByObject(self, self.instance)
7906     env["INSTANCE_NEW_NAME"] = self.op.new_name
7907     return env
7908
7909   def BuildHooksNodes(self):
7910     """Build hooks nodes.
7911
7912     """
7913     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7914     return (nl, nl)
7915
7916   def CheckPrereq(self):
7917     """Check prerequisites.
7918
7919     This checks that the instance is in the cluster and is not running.
7920
7921     """
7922     self.op.instance_name = _ExpandInstanceName(self.cfg,
7923                                                 self.op.instance_name)
7924     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7925     assert instance is not None
7926     _CheckNodeOnline(self, instance.primary_node)
7927     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7928                         msg="cannot rename")
7929     self.instance = instance
7930
7931     new_name = self.op.new_name
7932     if self.op.name_check:
7933       hostname = _CheckHostnameSane(self, new_name)
7934       new_name = self.op.new_name = hostname.name
7935       if (self.op.ip_check and
7936           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7937         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7938                                    (hostname.ip, new_name),
7939                                    errors.ECODE_NOTUNIQUE)
7940
7941     instance_list = self.cfg.GetInstanceList()
7942     if new_name in instance_list and new_name != instance.name:
7943       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7944                                  new_name, errors.ECODE_EXISTS)
7945
7946   def Exec(self, feedback_fn):
7947     """Rename the instance.
7948
7949     """
7950     inst = self.instance
7951     old_name = inst.name
7952
7953     rename_file_storage = False
7954     if (inst.disk_template in constants.DTS_FILEBASED and
7955         self.op.new_name != inst.name):
7956       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7957       rename_file_storage = True
7958
7959     self.cfg.RenameInstance(inst.name, self.op.new_name)
7960     # Change the instance lock. This is definitely safe while we hold the BGL.
7961     # Otherwise the new lock would have to be added in acquired mode.
7962     assert self.REQ_BGL
7963     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7964     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7965     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7966
7967     # re-read the instance from the configuration after rename
7968     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7969
7970     if rename_file_storage:
7971       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7972       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7973                                                      old_file_storage_dir,
7974                                                      new_file_storage_dir)
7975       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7976                    " (but the instance has been renamed in Ganeti)" %
7977                    (inst.primary_node, old_file_storage_dir,
7978                     new_file_storage_dir))
7979
7980     _StartInstanceDisks(self, inst, None)
7981     # update info on disks
7982     info = _GetInstanceInfoText(inst)
7983     for (idx, disk) in enumerate(inst.disks):
7984       for node in inst.all_nodes:
7985         self.cfg.SetDiskID(disk, node)
7986         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7987         if result.fail_msg:
7988           self.LogWarning("Error setting info on node %s for disk %s: %s",
7989                           node, idx, result.fail_msg)
7990     try:
7991       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7992                                                  old_name, self.op.debug_level)
7993       msg = result.fail_msg
7994       if msg:
7995         msg = ("Could not run OS rename script for instance %s on node %s"
7996                " (but the instance has been renamed in Ganeti): %s" %
7997                (inst.name, inst.primary_node, msg))
7998         self.LogWarning(msg)
7999     finally:
8000       _ShutdownInstanceDisks(self, inst)
8001
8002     return inst.name
8003
8004
8005 class LUInstanceRemove(LogicalUnit):
8006   """Remove an instance.
8007
8008   """
8009   HPATH = "instance-remove"
8010   HTYPE = constants.HTYPE_INSTANCE
8011   REQ_BGL = False
8012
8013   def ExpandNames(self):
8014     self._ExpandAndLockInstance()
8015     self.needed_locks[locking.LEVEL_NODE] = []
8016     self.needed_locks[locking.LEVEL_NODE_RES] = []
8017     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8018
8019   def DeclareLocks(self, level):
8020     if level == locking.LEVEL_NODE:
8021       self._LockInstancesNodes()
8022     elif level == locking.LEVEL_NODE_RES:
8023       # Copy node locks
8024       self.needed_locks[locking.LEVEL_NODE_RES] = \
8025         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8026
8027   def BuildHooksEnv(self):
8028     """Build hooks env.
8029
8030     This runs on master, primary and secondary nodes of the instance.
8031
8032     """
8033     env = _BuildInstanceHookEnvByObject(self, self.instance)
8034     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8035     return env
8036
8037   def BuildHooksNodes(self):
8038     """Build hooks nodes.
8039
8040     """
8041     nl = [self.cfg.GetMasterNode()]
8042     nl_post = list(self.instance.all_nodes) + nl
8043     return (nl, nl_post)
8044
8045   def CheckPrereq(self):
8046     """Check prerequisites.
8047
8048     This checks that the instance is in the cluster.
8049
8050     """
8051     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8052     assert self.instance is not None, \
8053       "Cannot retrieve locked instance %s" % self.op.instance_name
8054
8055   def Exec(self, feedback_fn):
8056     """Remove the instance.
8057
8058     """
8059     instance = self.instance
8060     logging.info("Shutting down instance %s on node %s",
8061                  instance.name, instance.primary_node)
8062
8063     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8064                                              self.op.shutdown_timeout)
8065     msg = result.fail_msg
8066     if msg:
8067       if self.op.ignore_failures:
8068         feedback_fn("Warning: can't shutdown instance: %s" % msg)
8069       else:
8070         raise errors.OpExecError("Could not shutdown instance %s on"
8071                                  " node %s: %s" %
8072                                  (instance.name, instance.primary_node, msg))
8073
8074     assert (self.owned_locks(locking.LEVEL_NODE) ==
8075             self.owned_locks(locking.LEVEL_NODE_RES))
8076     assert not (set(instance.all_nodes) -
8077                 self.owned_locks(locking.LEVEL_NODE)), \
8078       "Not owning correct locks"
8079
8080     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8081
8082
8083 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8084   """Utility function to remove an instance.
8085
8086   """
8087   logging.info("Removing block devices for instance %s", instance.name)
8088
8089   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8090     if not ignore_failures:
8091       raise errors.OpExecError("Can't remove instance's disks")
8092     feedback_fn("Warning: can't remove instance's disks")
8093
8094   logging.info("Removing instance %s out of cluster config", instance.name)
8095
8096   lu.cfg.RemoveInstance(instance.name)
8097
8098   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8099     "Instance lock removal conflict"
8100
8101   # Remove lock for the instance
8102   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8103
8104
8105 class LUInstanceQuery(NoHooksLU):
8106   """Logical unit for querying instances.
8107
8108   """
8109   # pylint: disable=W0142
8110   REQ_BGL = False
8111
8112   def CheckArguments(self):
8113     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8114                              self.op.output_fields, self.op.use_locking)
8115
8116   def ExpandNames(self):
8117     self.iq.ExpandNames(self)
8118
8119   def DeclareLocks(self, level):
8120     self.iq.DeclareLocks(self, level)
8121
8122   def Exec(self, feedback_fn):
8123     return self.iq.OldStyleQuery(self)
8124
8125
8126 def _ExpandNamesForMigration(lu):
8127   """Expands names for use with L{TLMigrateInstance}.
8128
8129   @type lu: L{LogicalUnit}
8130
8131   """
8132   if lu.op.target_node is not None:
8133     lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8134
8135   lu.needed_locks[locking.LEVEL_NODE] = []
8136   lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8137
8138   lu.needed_locks[locking.LEVEL_NODE_RES] = []
8139   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8140
8141   # The node allocation lock is actually only needed for replicated instances
8142   # (e.g. DRBD8) and if an iallocator is used.
8143   lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8144
8145
8146 def _DeclareLocksForMigration(lu, level):
8147   """Declares locks for L{TLMigrateInstance}.
8148
8149   @type lu: L{LogicalUnit}
8150   @param level: Lock level
8151
8152   """
8153   if level == locking.LEVEL_NODE_ALLOC:
8154     assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8155
8156     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8157
8158     # Node locks are already declared here rather than at LEVEL_NODE as we need
8159     # the instance object anyway to declare the node allocation lock.
8160     if instance.disk_template in constants.DTS_EXT_MIRROR:
8161       if lu.op.target_node is None:
8162         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8163         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8164       else:
8165         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8166                                                lu.op.target_node]
8167       del lu.recalculate_locks[locking.LEVEL_NODE]
8168     else:
8169       lu._LockInstancesNodes() # pylint: disable=W0212
8170
8171   elif level == locking.LEVEL_NODE:
8172     # Node locks are declared together with the node allocation lock
8173     assert (lu.needed_locks[locking.LEVEL_NODE] or
8174             lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8175
8176   elif level == locking.LEVEL_NODE_RES:
8177     # Copy node locks
8178     lu.needed_locks[locking.LEVEL_NODE_RES] = \
8179       _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8180
8181
8182 class LUInstanceFailover(LogicalUnit):
8183   """Failover an instance.
8184
8185   """
8186   HPATH = "instance-failover"
8187   HTYPE = constants.HTYPE_INSTANCE
8188   REQ_BGL = False
8189
8190   def CheckArguments(self):
8191     """Check the arguments.
8192
8193     """
8194     self.iallocator = getattr(self.op, "iallocator", None)
8195     self.target_node = getattr(self.op, "target_node", None)
8196
8197   def ExpandNames(self):
8198     self._ExpandAndLockInstance()
8199     _ExpandNamesForMigration(self)
8200
8201     self._migrater = \
8202       TLMigrateInstance(self, self.op.instance_name, False, True, False,
8203                         self.op.ignore_consistency, True,
8204                         self.op.shutdown_timeout, self.op.ignore_ipolicy)
8205
8206     self.tasklets = [self._migrater]
8207
8208   def DeclareLocks(self, level):
8209     _DeclareLocksForMigration(self, level)
8210
8211   def BuildHooksEnv(self):
8212     """Build hooks env.
8213
8214     This runs on master, primary and secondary nodes of the instance.
8215
8216     """
8217     instance = self._migrater.instance
8218     source_node = instance.primary_node
8219     target_node = self.op.target_node
8220     env = {
8221       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8222       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8223       "OLD_PRIMARY": source_node,
8224       "NEW_PRIMARY": target_node,
8225       }
8226
8227     if instance.disk_template in constants.DTS_INT_MIRROR:
8228       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8229       env["NEW_SECONDARY"] = source_node
8230     else:
8231       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8232
8233     env.update(_BuildInstanceHookEnvByObject(self, instance))
8234
8235     return env
8236
8237   def BuildHooksNodes(self):
8238     """Build hooks nodes.
8239
8240     """
8241     instance = self._migrater.instance
8242     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8243     return (nl, nl + [instance.primary_node])
8244
8245
8246 class LUInstanceMigrate(LogicalUnit):
8247   """Migrate an instance.
8248
8249   This is migration without shutting down, compared to the failover,
8250   which is done with shutdown.
8251
8252   """
8253   HPATH = "instance-migrate"
8254   HTYPE = constants.HTYPE_INSTANCE
8255   REQ_BGL = False
8256
8257   def ExpandNames(self):
8258     self._ExpandAndLockInstance()
8259     _ExpandNamesForMigration(self)
8260
8261     self._migrater = \
8262       TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8263                         False, self.op.allow_failover, False,
8264                         self.op.allow_runtime_changes,
8265                         constants.DEFAULT_SHUTDOWN_TIMEOUT,
8266                         self.op.ignore_ipolicy)
8267
8268     self.tasklets = [self._migrater]
8269
8270   def DeclareLocks(self, level):
8271     _DeclareLocksForMigration(self, level)
8272
8273   def BuildHooksEnv(self):
8274     """Build hooks env.
8275
8276     This runs on master, primary and secondary nodes of the instance.
8277
8278     """
8279     instance = self._migrater.instance
8280     source_node = instance.primary_node
8281     target_node = self.op.target_node
8282     env = _BuildInstanceHookEnvByObject(self, instance)
8283     env.update({
8284       "MIGRATE_LIVE": self._migrater.live,
8285       "MIGRATE_CLEANUP": self.op.cleanup,
8286       "OLD_PRIMARY": source_node,
8287       "NEW_PRIMARY": target_node,
8288       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8289       })
8290
8291     if instance.disk_template in constants.DTS_INT_MIRROR:
8292       env["OLD_SECONDARY"] = target_node
8293       env["NEW_SECONDARY"] = source_node
8294     else:
8295       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8296
8297     return env
8298
8299   def BuildHooksNodes(self):
8300     """Build hooks nodes.
8301
8302     """
8303     instance = self._migrater.instance
8304     snodes = list(instance.secondary_nodes)
8305     nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8306     return (nl, nl)
8307
8308
8309 class LUInstanceMove(LogicalUnit):
8310   """Move an instance by data-copying.
8311
8312   """
8313   HPATH = "instance-move"
8314   HTYPE = constants.HTYPE_INSTANCE
8315   REQ_BGL = False
8316
8317   def ExpandNames(self):
8318     self._ExpandAndLockInstance()
8319     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8320     self.op.target_node = target_node
8321     self.needed_locks[locking.LEVEL_NODE] = [target_node]
8322     self.needed_locks[locking.LEVEL_NODE_RES] = []
8323     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8324
8325   def DeclareLocks(self, level):
8326     if level == locking.LEVEL_NODE:
8327       self._LockInstancesNodes(primary_only=True)
8328     elif level == locking.LEVEL_NODE_RES:
8329       # Copy node locks
8330       self.needed_locks[locking.LEVEL_NODE_RES] = \
8331         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8332
8333   def BuildHooksEnv(self):
8334     """Build hooks env.
8335
8336     This runs on master, primary and secondary nodes of the instance.
8337
8338     """
8339     env = {
8340       "TARGET_NODE": self.op.target_node,
8341       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8342       }
8343     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8344     return env
8345
8346   def BuildHooksNodes(self):
8347     """Build hooks nodes.
8348
8349     """
8350     nl = [
8351       self.cfg.GetMasterNode(),
8352       self.instance.primary_node,
8353       self.op.target_node,
8354       ]
8355     return (nl, nl)
8356
8357   def CheckPrereq(self):
8358     """Check prerequisites.
8359
8360     This checks that the instance is in the cluster.
8361
8362     """
8363     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8364     assert self.instance is not None, \
8365       "Cannot retrieve locked instance %s" % self.op.instance_name
8366
8367     node = self.cfg.GetNodeInfo(self.op.target_node)
8368     assert node is not None, \
8369       "Cannot retrieve locked node %s" % self.op.target_node
8370
8371     self.target_node = target_node = node.name
8372
8373     if target_node == instance.primary_node:
8374       raise errors.OpPrereqError("Instance %s is already on the node %s" %
8375                                  (instance.name, target_node),
8376                                  errors.ECODE_STATE)
8377
8378     bep = self.cfg.GetClusterInfo().FillBE(instance)
8379
8380     for idx, dsk in enumerate(instance.disks):
8381       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8382         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8383                                    " cannot copy" % idx, errors.ECODE_STATE)
8384
8385     _CheckNodeOnline(self, target_node)
8386     _CheckNodeNotDrained(self, target_node)
8387     _CheckNodeVmCapable(self, target_node)
8388     cluster = self.cfg.GetClusterInfo()
8389     group_info = self.cfg.GetNodeGroup(node.group)
8390     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8391     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8392                             ignore=self.op.ignore_ipolicy)
8393
8394     if instance.admin_state == constants.ADMINST_UP:
8395       # check memory requirements on the secondary node
8396       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8397                            instance.name, bep[constants.BE_MAXMEM],
8398                            instance.hypervisor)
8399     else:
8400       self.LogInfo("Not checking memory on the secondary node as"
8401                    " instance will not be started")
8402
8403     # check bridge existance
8404     _CheckInstanceBridgesExist(self, instance, node=target_node)
8405
8406   def Exec(self, feedback_fn):
8407     """Move an instance.
8408
8409     The move is done by shutting it down on its present node, copying
8410     the data over (slow) and starting it on the new node.
8411
8412     """
8413     instance = self.instance
8414
8415     source_node = instance.primary_node
8416     target_node = self.target_node
8417
8418     self.LogInfo("Shutting down instance %s on source node %s",
8419                  instance.name, source_node)
8420
8421     assert (self.owned_locks(locking.LEVEL_NODE) ==
8422             self.owned_locks(locking.LEVEL_NODE_RES))
8423
8424     result = self.rpc.call_instance_shutdown(source_node, instance,
8425                                              self.op.shutdown_timeout)
8426     msg = result.fail_msg
8427     if msg:
8428       if self.op.ignore_consistency:
8429         self.LogWarning("Could not shutdown instance %s on node %s."
8430                         " Proceeding anyway. Please make sure node"
8431                         " %s is down. Error details: %s",
8432                         instance.name, source_node, source_node, msg)
8433       else:
8434         raise errors.OpExecError("Could not shutdown instance %s on"
8435                                  " node %s: %s" %
8436                                  (instance.name, source_node, msg))
8437
8438     # create the target disks
8439     try:
8440       _CreateDisks(self, instance, target_node=target_node)
8441     except errors.OpExecError:
8442       self.LogWarning("Device creation failed, reverting...")
8443       try:
8444         _RemoveDisks(self, instance, target_node=target_node)
8445       finally:
8446         self.cfg.ReleaseDRBDMinors(instance.name)
8447         raise
8448
8449     cluster_name = self.cfg.GetClusterInfo().cluster_name
8450
8451     errs = []
8452     # activate, get path, copy the data over
8453     for idx, disk in enumerate(instance.disks):
8454       self.LogInfo("Copying data for disk %d", idx)
8455       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8456                                                instance.name, True, idx)
8457       if result.fail_msg:
8458         self.LogWarning("Can't assemble newly created disk %d: %s",
8459                         idx, result.fail_msg)
8460         errs.append(result.fail_msg)
8461         break
8462       dev_path = result.payload
8463       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8464                                              target_node, dev_path,
8465                                              cluster_name)
8466       if result.fail_msg:
8467         self.LogWarning("Can't copy data over for disk %d: %s",
8468                         idx, result.fail_msg)
8469         errs.append(result.fail_msg)
8470         break
8471
8472     if errs:
8473       self.LogWarning("Some disks failed to copy, aborting")
8474       try:
8475         _RemoveDisks(self, instance, target_node=target_node)
8476       finally:
8477         self.cfg.ReleaseDRBDMinors(instance.name)
8478         raise errors.OpExecError("Errors during disk copy: %s" %
8479                                  (",".join(errs),))
8480
8481     instance.primary_node = target_node
8482     self.cfg.Update(instance, feedback_fn)
8483
8484     self.LogInfo("Removing the disks on the original node")
8485     _RemoveDisks(self, instance, target_node=source_node)
8486
8487     # Only start the instance if it's marked as up
8488     if instance.admin_state == constants.ADMINST_UP:
8489       self.LogInfo("Starting instance %s on node %s",
8490                    instance.name, target_node)
8491
8492       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8493                                            ignore_secondaries=True)
8494       if not disks_ok:
8495         _ShutdownInstanceDisks(self, instance)
8496         raise errors.OpExecError("Can't activate the instance's disks")
8497
8498       result = self.rpc.call_instance_start(target_node,
8499                                             (instance, None, None), False)
8500       msg = result.fail_msg
8501       if msg:
8502         _ShutdownInstanceDisks(self, instance)
8503         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8504                                  (instance.name, target_node, msg))
8505
8506
8507 class LUNodeMigrate(LogicalUnit):
8508   """Migrate all instances from a node.
8509
8510   """
8511   HPATH = "node-migrate"
8512   HTYPE = constants.HTYPE_NODE
8513   REQ_BGL = False
8514
8515   def CheckArguments(self):
8516     pass
8517
8518   def ExpandNames(self):
8519     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8520
8521     self.share_locks = _ShareAll()
8522     self.needed_locks = {
8523       locking.LEVEL_NODE: [self.op.node_name],
8524       }
8525
8526   def BuildHooksEnv(self):
8527     """Build hooks env.
8528
8529     This runs on the master, the primary and all the secondaries.
8530
8531     """
8532     return {
8533       "NODE_NAME": self.op.node_name,
8534       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8535       }
8536
8537   def BuildHooksNodes(self):
8538     """Build hooks nodes.
8539
8540     """
8541     nl = [self.cfg.GetMasterNode()]
8542     return (nl, nl)
8543
8544   def CheckPrereq(self):
8545     pass
8546
8547   def Exec(self, feedback_fn):
8548     # Prepare jobs for migration instances
8549     allow_runtime_changes = self.op.allow_runtime_changes
8550     jobs = [
8551       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8552                                  mode=self.op.mode,
8553                                  live=self.op.live,
8554                                  iallocator=self.op.iallocator,
8555                                  target_node=self.op.target_node,
8556                                  allow_runtime_changes=allow_runtime_changes,
8557                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8558       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8559
8560     # TODO: Run iallocator in this opcode and pass correct placement options to
8561     # OpInstanceMigrate. Since other jobs can modify the cluster between
8562     # running the iallocator and the actual migration, a good consistency model
8563     # will have to be found.
8564
8565     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8566             frozenset([self.op.node_name]))
8567
8568     return ResultWithJobs(jobs)
8569
8570
8571 class TLMigrateInstance(Tasklet):
8572   """Tasklet class for instance migration.
8573
8574   @type live: boolean
8575   @ivar live: whether the migration will be done live or non-live;
8576       this variable is initalized only after CheckPrereq has run
8577   @type cleanup: boolean
8578   @ivar cleanup: Wheater we cleanup from a failed migration
8579   @type iallocator: string
8580   @ivar iallocator: The iallocator used to determine target_node
8581   @type target_node: string
8582   @ivar target_node: If given, the target_node to reallocate the instance to
8583   @type failover: boolean
8584   @ivar failover: Whether operation results in failover or migration
8585   @type fallback: boolean
8586   @ivar fallback: Whether fallback to failover is allowed if migration not
8587                   possible
8588   @type ignore_consistency: boolean
8589   @ivar ignore_consistency: Wheter we should ignore consistency between source
8590                             and target node
8591   @type shutdown_timeout: int
8592   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8593   @type ignore_ipolicy: bool
8594   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8595
8596   """
8597
8598   # Constants
8599   _MIGRATION_POLL_INTERVAL = 1      # seconds
8600   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8601
8602   def __init__(self, lu, instance_name, cleanup, failover, fallback,
8603                ignore_consistency, allow_runtime_changes, shutdown_timeout,
8604                ignore_ipolicy):
8605     """Initializes this class.
8606
8607     """
8608     Tasklet.__init__(self, lu)
8609
8610     # Parameters
8611     self.instance_name = instance_name
8612     self.cleanup = cleanup
8613     self.live = False # will be overridden later
8614     self.failover = failover
8615     self.fallback = fallback
8616     self.ignore_consistency = ignore_consistency
8617     self.shutdown_timeout = shutdown_timeout
8618     self.ignore_ipolicy = ignore_ipolicy
8619     self.allow_runtime_changes = allow_runtime_changes
8620
8621   def CheckPrereq(self):
8622     """Check prerequisites.
8623
8624     This checks that the instance is in the cluster.
8625
8626     """
8627     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8628     instance = self.cfg.GetInstanceInfo(instance_name)
8629     assert instance is not None
8630     self.instance = instance
8631     cluster = self.cfg.GetClusterInfo()
8632
8633     if (not self.cleanup and
8634         not instance.admin_state == constants.ADMINST_UP and
8635         not self.failover and self.fallback):
8636       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8637                       " switching to failover")
8638       self.failover = True
8639
8640     if instance.disk_template not in constants.DTS_MIRRORED:
8641       if self.failover:
8642         text = "failovers"
8643       else:
8644         text = "migrations"
8645       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8646                                  " %s" % (instance.disk_template, text),
8647                                  errors.ECODE_STATE)
8648
8649     if instance.disk_template in constants.DTS_EXT_MIRROR:
8650       assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8651
8652       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8653
8654       if self.lu.op.iallocator:
8655         self._RunAllocator()
8656       else:
8657         # We set set self.target_node as it is required by
8658         # BuildHooksEnv
8659         self.target_node = self.lu.op.target_node
8660
8661       # Check that the target node is correct in terms of instance policy
8662       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8663       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8664       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8665                                                               group_info)
8666       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8667                               ignore=self.ignore_ipolicy)
8668
8669       # self.target_node is already populated, either directly or by the
8670       # iallocator run
8671       target_node = self.target_node
8672       if self.target_node == instance.primary_node:
8673         raise errors.OpPrereqError("Cannot migrate instance %s"
8674                                    " to its primary (%s)" %
8675                                    (instance.name, instance.primary_node),
8676                                    errors.ECODE_STATE)
8677
8678       if len(self.lu.tasklets) == 1:
8679         # It is safe to release locks only when we're the only tasklet
8680         # in the LU
8681         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8682                       keep=[instance.primary_node, self.target_node])
8683         _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8684
8685     else:
8686       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8687
8688       secondary_nodes = instance.secondary_nodes
8689       if not secondary_nodes:
8690         raise errors.ConfigurationError("No secondary node but using"
8691                                         " %s disk template" %
8692                                         instance.disk_template)
8693       target_node = secondary_nodes[0]
8694       if self.lu.op.iallocator or (self.lu.op.target_node and
8695                                    self.lu.op.target_node != target_node):
8696         if self.failover:
8697           text = "failed over"
8698         else:
8699           text = "migrated"
8700         raise errors.OpPrereqError("Instances with disk template %s cannot"
8701                                    " be %s to arbitrary nodes"
8702                                    " (neither an iallocator nor a target"
8703                                    " node can be passed)" %
8704                                    (instance.disk_template, text),
8705                                    errors.ECODE_INVAL)
8706       nodeinfo = self.cfg.GetNodeInfo(target_node)
8707       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8708       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8709                                                               group_info)
8710       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8711                               ignore=self.ignore_ipolicy)
8712
8713     i_be = cluster.FillBE(instance)
8714
8715     # check memory requirements on the secondary node
8716     if (not self.cleanup and
8717          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8718       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8719                                                "migrating instance %s" %
8720                                                instance.name,
8721                                                i_be[constants.BE_MINMEM],
8722                                                instance.hypervisor)
8723     else:
8724       self.lu.LogInfo("Not checking memory on the secondary node as"
8725                       " instance will not be started")
8726
8727     # check if failover must be forced instead of migration
8728     if (not self.cleanup and not self.failover and
8729         i_be[constants.BE_ALWAYS_FAILOVER]):
8730       self.lu.LogInfo("Instance configured to always failover; fallback"
8731                       " to failover")
8732       self.failover = True
8733
8734     # check bridge existance
8735     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8736
8737     if not self.cleanup:
8738       _CheckNodeNotDrained(self.lu, target_node)
8739       if not self.failover:
8740         result = self.rpc.call_instance_migratable(instance.primary_node,
8741                                                    instance)
8742         if result.fail_msg and self.fallback:
8743           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8744                           " failover")
8745           self.failover = True
8746         else:
8747           result.Raise("Can't migrate, please use failover",
8748                        prereq=True, ecode=errors.ECODE_STATE)
8749
8750     assert not (self.failover and self.cleanup)
8751
8752     if not self.failover:
8753       if self.lu.op.live is not None and self.lu.op.mode is not None:
8754         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8755                                    " parameters are accepted",
8756                                    errors.ECODE_INVAL)
8757       if self.lu.op.live is not None:
8758         if self.lu.op.live:
8759           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8760         else:
8761           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8762         # reset the 'live' parameter to None so that repeated
8763         # invocations of CheckPrereq do not raise an exception
8764         self.lu.op.live = None
8765       elif self.lu.op.mode is None:
8766         # read the default value from the hypervisor
8767         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8768         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8769
8770       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8771     else:
8772       # Failover is never live
8773       self.live = False
8774
8775     if not (self.failover or self.cleanup):
8776       remote_info = self.rpc.call_instance_info(instance.primary_node,
8777                                                 instance.name,
8778                                                 instance.hypervisor)
8779       remote_info.Raise("Error checking instance on node %s" %
8780                         instance.primary_node)
8781       instance_running = bool(remote_info.payload)
8782       if instance_running:
8783         self.current_mem = int(remote_info.payload["memory"])
8784
8785   def _RunAllocator(self):
8786     """Run the allocator based on input opcode.
8787
8788     """
8789     assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8790
8791     # FIXME: add a self.ignore_ipolicy option
8792     req = iallocator.IAReqRelocate(name=self.instance_name,
8793                                    relocate_from=[self.instance.primary_node])
8794     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8795
8796     ial.Run(self.lu.op.iallocator)
8797
8798     if not ial.success:
8799       raise errors.OpPrereqError("Can't compute nodes using"
8800                                  " iallocator '%s': %s" %
8801                                  (self.lu.op.iallocator, ial.info),
8802                                  errors.ECODE_NORES)
8803     self.target_node = ial.result[0]
8804     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8805                     self.instance_name, self.lu.op.iallocator,
8806                     utils.CommaJoin(ial.result))
8807
8808   def _WaitUntilSync(self):
8809     """Poll with custom rpc for disk sync.
8810
8811     This uses our own step-based rpc call.
8812
8813     """
8814     self.feedback_fn("* wait until resync is done")
8815     all_done = False
8816     while not all_done:
8817       all_done = True
8818       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8819                                             self.nodes_ip,
8820                                             (self.instance.disks,
8821                                              self.instance))
8822       min_percent = 100
8823       for node, nres in result.items():
8824         nres.Raise("Cannot resync disks on node %s" % node)
8825         node_done, node_percent = nres.payload
8826         all_done = all_done and node_done
8827         if node_percent is not None:
8828           min_percent = min(min_percent, node_percent)
8829       if not all_done:
8830         if min_percent < 100:
8831           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8832         time.sleep(2)
8833
8834   def _EnsureSecondary(self, node):
8835     """Demote a node to secondary.
8836
8837     """
8838     self.feedback_fn("* switching node %s to secondary mode" % node)
8839
8840     for dev in self.instance.disks:
8841       self.cfg.SetDiskID(dev, node)
8842
8843     result = self.rpc.call_blockdev_close(node, self.instance.name,
8844                                           self.instance.disks)
8845     result.Raise("Cannot change disk to secondary on node %s" % node)
8846
8847   def _GoStandalone(self):
8848     """Disconnect from the network.
8849
8850     """
8851     self.feedback_fn("* changing into standalone mode")
8852     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8853                                                self.instance.disks)
8854     for node, nres in result.items():
8855       nres.Raise("Cannot disconnect disks node %s" % node)
8856
8857   def _GoReconnect(self, multimaster):
8858     """Reconnect to the network.
8859
8860     """
8861     if multimaster:
8862       msg = "dual-master"
8863     else:
8864       msg = "single-master"
8865     self.feedback_fn("* changing disks into %s mode" % msg)
8866     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8867                                            (self.instance.disks, self.instance),
8868                                            self.instance.name, multimaster)
8869     for node, nres in result.items():
8870       nres.Raise("Cannot change disks config on node %s" % node)
8871
8872   def _ExecCleanup(self):
8873     """Try to cleanup after a failed migration.
8874
8875     The cleanup is done by:
8876       - check that the instance is running only on one node
8877         (and update the config if needed)
8878       - change disks on its secondary node to secondary
8879       - wait until disks are fully synchronized
8880       - disconnect from the network
8881       - change disks into single-master mode
8882       - wait again until disks are fully synchronized
8883
8884     """
8885     instance = self.instance
8886     target_node = self.target_node
8887     source_node = self.source_node
8888
8889     # check running on only one node
8890     self.feedback_fn("* checking where the instance actually runs"
8891                      " (if this hangs, the hypervisor might be in"
8892                      " a bad state)")
8893     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8894     for node, result in ins_l.items():
8895       result.Raise("Can't contact node %s" % node)
8896
8897     runningon_source = instance.name in ins_l[source_node].payload
8898     runningon_target = instance.name in ins_l[target_node].payload
8899
8900     if runningon_source and runningon_target:
8901       raise errors.OpExecError("Instance seems to be running on two nodes,"
8902                                " or the hypervisor is confused; you will have"
8903                                " to ensure manually that it runs only on one"
8904                                " and restart this operation")
8905
8906     if not (runningon_source or runningon_target):
8907       raise errors.OpExecError("Instance does not seem to be running at all;"
8908                                " in this case it's safer to repair by"
8909                                " running 'gnt-instance stop' to ensure disk"
8910                                " shutdown, and then restarting it")
8911
8912     if runningon_target:
8913       # the migration has actually succeeded, we need to update the config
8914       self.feedback_fn("* instance running on secondary node (%s),"
8915                        " updating config" % target_node)
8916       instance.primary_node = target_node
8917       self.cfg.Update(instance, self.feedback_fn)
8918       demoted_node = source_node
8919     else:
8920       self.feedback_fn("* instance confirmed to be running on its"
8921                        " primary node (%s)" % source_node)
8922       demoted_node = target_node
8923
8924     if instance.disk_template in constants.DTS_INT_MIRROR:
8925       self._EnsureSecondary(demoted_node)
8926       try:
8927         self._WaitUntilSync()
8928       except errors.OpExecError:
8929         # we ignore here errors, since if the device is standalone, it
8930         # won't be able to sync
8931         pass
8932       self._GoStandalone()
8933       self._GoReconnect(False)
8934       self._WaitUntilSync()
8935
8936     self.feedback_fn("* done")
8937
8938   def _RevertDiskStatus(self):
8939     """Try to revert the disk status after a failed migration.
8940
8941     """
8942     target_node = self.target_node
8943     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8944       return
8945
8946     try:
8947       self._EnsureSecondary(target_node)
8948       self._GoStandalone()
8949       self._GoReconnect(False)
8950       self._WaitUntilSync()
8951     except errors.OpExecError, err:
8952       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8953                          " please try to recover the instance manually;"
8954                          " error '%s'" % str(err))
8955
8956   def _AbortMigration(self):
8957     """Call the hypervisor code to abort a started migration.
8958
8959     """
8960     instance = self.instance
8961     target_node = self.target_node
8962     source_node = self.source_node
8963     migration_info = self.migration_info
8964
8965     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8966                                                                  instance,
8967                                                                  migration_info,
8968                                                                  False)
8969     abort_msg = abort_result.fail_msg
8970     if abort_msg:
8971       logging.error("Aborting migration failed on target node %s: %s",
8972                     target_node, abort_msg)
8973       # Don't raise an exception here, as we stil have to try to revert the
8974       # disk status, even if this step failed.
8975
8976     abort_result = self.rpc.call_instance_finalize_migration_src(
8977       source_node, instance, False, self.live)
8978     abort_msg = abort_result.fail_msg
8979     if abort_msg:
8980       logging.error("Aborting migration failed on source node %s: %s",
8981                     source_node, abort_msg)
8982
8983   def _ExecMigration(self):
8984     """Migrate an instance.
8985
8986     The migrate is done by:
8987       - change the disks into dual-master mode
8988       - wait until disks are fully synchronized again
8989       - migrate the instance
8990       - change disks on the new secondary node (the old primary) to secondary
8991       - wait until disks are fully synchronized
8992       - change disks into single-master mode
8993
8994     """
8995     instance = self.instance
8996     target_node = self.target_node
8997     source_node = self.source_node
8998
8999     # Check for hypervisor version mismatch and warn the user.
9000     nodeinfo = self.rpc.call_node_info([source_node, target_node],
9001                                        None, [self.instance.hypervisor], False)
9002     for ninfo in nodeinfo.values():
9003       ninfo.Raise("Unable to retrieve node information from node '%s'" %
9004                   ninfo.node)
9005     (_, _, (src_info, )) = nodeinfo[source_node].payload
9006     (_, _, (dst_info, )) = nodeinfo[target_node].payload
9007
9008     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9009         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9010       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9011       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9012       if src_version != dst_version:
9013         self.feedback_fn("* warning: hypervisor version mismatch between"
9014                          " source (%s) and target (%s) node" %
9015                          (src_version, dst_version))
9016
9017     self.feedback_fn("* checking disk consistency between source and target")
9018     for (idx, dev) in enumerate(instance.disks):
9019       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9020         raise errors.OpExecError("Disk %s is degraded or not fully"
9021                                  " synchronized on target node,"
9022                                  " aborting migration" % idx)
9023
9024     if self.current_mem > self.tgt_free_mem:
9025       if not self.allow_runtime_changes:
9026         raise errors.OpExecError("Memory ballooning not allowed and not enough"
9027                                  " free memory to fit instance %s on target"
9028                                  " node %s (have %dMB, need %dMB)" %
9029                                  (instance.name, target_node,
9030                                   self.tgt_free_mem, self.current_mem))
9031       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9032       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9033                                                      instance,
9034                                                      self.tgt_free_mem)
9035       rpcres.Raise("Cannot modify instance runtime memory")
9036
9037     # First get the migration information from the remote node
9038     result = self.rpc.call_migration_info(source_node, instance)
9039     msg = result.fail_msg
9040     if msg:
9041       log_err = ("Failed fetching source migration information from %s: %s" %
9042                  (source_node, msg))
9043       logging.error(log_err)
9044       raise errors.OpExecError(log_err)
9045
9046     self.migration_info = migration_info = result.payload
9047
9048     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9049       # Then switch the disks to master/master mode
9050       self._EnsureSecondary(target_node)
9051       self._GoStandalone()
9052       self._GoReconnect(True)
9053       self._WaitUntilSync()
9054
9055     self.feedback_fn("* preparing %s to accept the instance" % target_node)
9056     result = self.rpc.call_accept_instance(target_node,
9057                                            instance,
9058                                            migration_info,
9059                                            self.nodes_ip[target_node])
9060
9061     msg = result.fail_msg
9062     if msg:
9063       logging.error("Instance pre-migration failed, trying to revert"
9064                     " disk status: %s", msg)
9065       self.feedback_fn("Pre-migration failed, aborting")
9066       self._AbortMigration()
9067       self._RevertDiskStatus()
9068       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9069                                (instance.name, msg))
9070
9071     self.feedback_fn("* migrating instance to %s" % target_node)
9072     result = self.rpc.call_instance_migrate(source_node, instance,
9073                                             self.nodes_ip[target_node],
9074                                             self.live)
9075     msg = result.fail_msg
9076     if msg:
9077       logging.error("Instance migration failed, trying to revert"
9078                     " disk status: %s", msg)
9079       self.feedback_fn("Migration failed, aborting")
9080       self._AbortMigration()
9081       self._RevertDiskStatus()
9082       raise errors.OpExecError("Could not migrate instance %s: %s" %
9083                                (instance.name, msg))
9084
9085     self.feedback_fn("* starting memory transfer")
9086     last_feedback = time.time()
9087     while True:
9088       result = self.rpc.call_instance_get_migration_status(source_node,
9089                                                            instance)
9090       msg = result.fail_msg
9091       ms = result.payload   # MigrationStatus instance
9092       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9093         logging.error("Instance migration failed, trying to revert"
9094                       " disk status: %s", msg)
9095         self.feedback_fn("Migration failed, aborting")
9096         self._AbortMigration()
9097         self._RevertDiskStatus()
9098         if not msg:
9099           msg = "hypervisor returned failure"
9100         raise errors.OpExecError("Could not migrate instance %s: %s" %
9101                                  (instance.name, msg))
9102
9103       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9104         self.feedback_fn("* memory transfer complete")
9105         break
9106
9107       if (utils.TimeoutExpired(last_feedback,
9108                                self._MIGRATION_FEEDBACK_INTERVAL) and
9109           ms.transferred_ram is not None):
9110         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9111         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9112         last_feedback = time.time()
9113
9114       time.sleep(self._MIGRATION_POLL_INTERVAL)
9115
9116     result = self.rpc.call_instance_finalize_migration_src(source_node,
9117                                                            instance,
9118                                                            True,
9119                                                            self.live)
9120     msg = result.fail_msg
9121     if msg:
9122       logging.error("Instance migration succeeded, but finalization failed"
9123                     " on the source node: %s", msg)
9124       raise errors.OpExecError("Could not finalize instance migration: %s" %
9125                                msg)
9126
9127     instance.primary_node = target_node
9128
9129     # distribute new instance config to the other nodes
9130     self.cfg.Update(instance, self.feedback_fn)
9131
9132     result = self.rpc.call_instance_finalize_migration_dst(target_node,
9133                                                            instance,
9134                                                            migration_info,
9135                                                            True)
9136     msg = result.fail_msg
9137     if msg:
9138       logging.error("Instance migration succeeded, but finalization failed"
9139                     " on the target node: %s", msg)
9140       raise errors.OpExecError("Could not finalize instance migration: %s" %
9141                                msg)
9142
9143     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9144       self._EnsureSecondary(source_node)
9145       self._WaitUntilSync()
9146       self._GoStandalone()
9147       self._GoReconnect(False)
9148       self._WaitUntilSync()
9149
9150     # If the instance's disk template is `rbd' or `ext' and there was a
9151     # successful migration, unmap the device from the source node.
9152     if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9153       disks = _ExpandCheckDisks(instance, instance.disks)
9154       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9155       for disk in disks:
9156         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9157         msg = result.fail_msg
9158         if msg:
9159           logging.error("Migration was successful, but couldn't unmap the"
9160                         " block device %s on source node %s: %s",
9161                         disk.iv_name, source_node, msg)
9162           logging.error("You need to unmap the device %s manually on %s",
9163                         disk.iv_name, source_node)
9164
9165     self.feedback_fn("* done")
9166
9167   def _ExecFailover(self):
9168     """Failover an instance.
9169
9170     The failover is done by shutting it down on its present node and
9171     starting it on the secondary.
9172
9173     """
9174     instance = self.instance
9175     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9176
9177     source_node = instance.primary_node
9178     target_node = self.target_node
9179
9180     if instance.admin_state == constants.ADMINST_UP:
9181       self.feedback_fn("* checking disk consistency between source and target")
9182       for (idx, dev) in enumerate(instance.disks):
9183         # for drbd, these are drbd over lvm
9184         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9185                                      False):
9186           if primary_node.offline:
9187             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9188                              " target node %s" %
9189                              (primary_node.name, idx, target_node))
9190           elif not self.ignore_consistency:
9191             raise errors.OpExecError("Disk %s is degraded on target node,"
9192                                      " aborting failover" % idx)
9193     else:
9194       self.feedback_fn("* not checking disk consistency as instance is not"
9195                        " running")
9196
9197     self.feedback_fn("* shutting down instance on source node")
9198     logging.info("Shutting down instance %s on node %s",
9199                  instance.name, source_node)
9200
9201     result = self.rpc.call_instance_shutdown(source_node, instance,
9202                                              self.shutdown_timeout)
9203     msg = result.fail_msg
9204     if msg:
9205       if self.ignore_consistency or primary_node.offline:
9206         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9207                            " proceeding anyway; please make sure node"
9208                            " %s is down; error details: %s",
9209                            instance.name, source_node, source_node, msg)
9210       else:
9211         raise errors.OpExecError("Could not shutdown instance %s on"
9212                                  " node %s: %s" %
9213                                  (instance.name, source_node, msg))
9214
9215     self.feedback_fn("* deactivating the instance's disks on source node")
9216     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9217       raise errors.OpExecError("Can't shut down the instance's disks")
9218
9219     instance.primary_node = target_node
9220     # distribute new instance config to the other nodes
9221     self.cfg.Update(instance, self.feedback_fn)
9222
9223     # Only start the instance if it's marked as up
9224     if instance.admin_state == constants.ADMINST_UP:
9225       self.feedback_fn("* activating the instance's disks on target node %s" %
9226                        target_node)
9227       logging.info("Starting instance %s on node %s",
9228                    instance.name, target_node)
9229
9230       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9231                                            ignore_secondaries=True)
9232       if not disks_ok:
9233         _ShutdownInstanceDisks(self.lu, instance)
9234         raise errors.OpExecError("Can't activate the instance's disks")
9235
9236       self.feedback_fn("* starting the instance on the target node %s" %
9237                        target_node)
9238       result = self.rpc.call_instance_start(target_node, (instance, None, None),
9239                                             False)
9240       msg = result.fail_msg
9241       if msg:
9242         _ShutdownInstanceDisks(self.lu, instance)
9243         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9244                                  (instance.name, target_node, msg))
9245
9246   def Exec(self, feedback_fn):
9247     """Perform the migration.
9248
9249     """
9250     self.feedback_fn = feedback_fn
9251     self.source_node = self.instance.primary_node
9252
9253     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9254     if self.instance.disk_template in constants.DTS_INT_MIRROR:
9255       self.target_node = self.instance.secondary_nodes[0]
9256       # Otherwise self.target_node has been populated either
9257       # directly, or through an iallocator.
9258
9259     self.all_nodes = [self.source_node, self.target_node]
9260     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9261                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
9262
9263     if self.failover:
9264       feedback_fn("Failover instance %s" % self.instance.name)
9265       self._ExecFailover()
9266     else:
9267       feedback_fn("Migrating instance %s" % self.instance.name)
9268
9269       if self.cleanup:
9270         return self._ExecCleanup()
9271       else:
9272         return self._ExecMigration()
9273
9274
9275 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9276                     force_open):
9277   """Wrapper around L{_CreateBlockDevInner}.
9278
9279   This method annotates the root device first.
9280
9281   """
9282   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9283   excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9284   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9285                               force_open, excl_stor)
9286
9287
9288 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9289                          info, force_open, excl_stor):
9290   """Create a tree of block devices on a given node.
9291
9292   If this device type has to be created on secondaries, create it and
9293   all its children.
9294
9295   If not, just recurse to children keeping the same 'force' value.
9296
9297   @attention: The device has to be annotated already.
9298
9299   @param lu: the lu on whose behalf we execute
9300   @param node: the node on which to create the device
9301   @type instance: L{objects.Instance}
9302   @param instance: the instance which owns the device
9303   @type device: L{objects.Disk}
9304   @param device: the device to create
9305   @type force_create: boolean
9306   @param force_create: whether to force creation of this device; this
9307       will be change to True whenever we find a device which has
9308       CreateOnSecondary() attribute
9309   @param info: the extra 'metadata' we should attach to the device
9310       (this will be represented as a LVM tag)
9311   @type force_open: boolean
9312   @param force_open: this parameter will be passes to the
9313       L{backend.BlockdevCreate} function where it specifies
9314       whether we run on primary or not, and it affects both
9315       the child assembly and the device own Open() execution
9316   @type excl_stor: boolean
9317   @param excl_stor: Whether exclusive_storage is active for the node
9318
9319   """
9320   if device.CreateOnSecondary():
9321     force_create = True
9322
9323   if device.children:
9324     for child in device.children:
9325       _CreateBlockDevInner(lu, node, instance, child, force_create,
9326                            info, force_open, excl_stor)
9327
9328   if not force_create:
9329     return
9330
9331   _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9332                         excl_stor)
9333
9334
9335 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9336                           excl_stor):
9337   """Create a single block device on a given node.
9338
9339   This will not recurse over children of the device, so they must be
9340   created in advance.
9341
9342   @param lu: the lu on whose behalf we execute
9343   @param node: the node on which to create the device
9344   @type instance: L{objects.Instance}
9345   @param instance: the instance which owns the device
9346   @type device: L{objects.Disk}
9347   @param device: the device to create
9348   @param info: the extra 'metadata' we should attach to the device
9349       (this will be represented as a LVM tag)
9350   @type force_open: boolean
9351   @param force_open: this parameter will be passes to the
9352       L{backend.BlockdevCreate} function where it specifies
9353       whether we run on primary or not, and it affects both
9354       the child assembly and the device own Open() execution
9355   @type excl_stor: boolean
9356   @param excl_stor: Whether exclusive_storage is active for the node
9357
9358   """
9359   lu.cfg.SetDiskID(device, node)
9360   result = lu.rpc.call_blockdev_create(node, device, device.size,
9361                                        instance.name, force_open, info,
9362                                        excl_stor)
9363   result.Raise("Can't create block device %s on"
9364                " node %s for instance %s" % (device, node, instance.name))
9365   if device.physical_id is None:
9366     device.physical_id = result.payload
9367
9368
9369 def _GenerateUniqueNames(lu, exts):
9370   """Generate a suitable LV name.
9371
9372   This will generate a logical volume name for the given instance.
9373
9374   """
9375   results = []
9376   for val in exts:
9377     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9378     results.append("%s%s" % (new_id, val))
9379   return results
9380
9381
9382 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9383                          iv_name, p_minor, s_minor):
9384   """Generate a drbd8 device complete with its children.
9385
9386   """
9387   assert len(vgnames) == len(names) == 2
9388   port = lu.cfg.AllocatePort()
9389   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9390
9391   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9392                           logical_id=(vgnames[0], names[0]),
9393                           params={})
9394   dev_meta = objects.Disk(dev_type=constants.LD_LV,
9395                           size=constants.DRBD_META_SIZE,
9396                           logical_id=(vgnames[1], names[1]),
9397                           params={})
9398   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9399                           logical_id=(primary, secondary, port,
9400                                       p_minor, s_minor,
9401                                       shared_secret),
9402                           children=[dev_data, dev_meta],
9403                           iv_name=iv_name, params={})
9404   return drbd_dev
9405
9406
9407 _DISK_TEMPLATE_NAME_PREFIX = {
9408   constants.DT_PLAIN: "",
9409   constants.DT_RBD: ".rbd",
9410   constants.DT_EXT: ".ext",
9411   }
9412
9413
9414 _DISK_TEMPLATE_DEVICE_TYPE = {
9415   constants.DT_PLAIN: constants.LD_LV,
9416   constants.DT_FILE: constants.LD_FILE,
9417   constants.DT_SHARED_FILE: constants.LD_FILE,
9418   constants.DT_BLOCK: constants.LD_BLOCKDEV,
9419   constants.DT_RBD: constants.LD_RBD,
9420   constants.DT_EXT: constants.LD_EXT,
9421   }
9422
9423
9424 def _GenerateDiskTemplate(
9425   lu, template_name, instance_name, primary_node, secondary_nodes,
9426   disk_info, file_storage_dir, file_driver, base_index,
9427   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9428   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9429   """Generate the entire disk layout for a given template type.
9430
9431   """
9432   vgname = lu.cfg.GetVGName()
9433   disk_count = len(disk_info)
9434   disks = []
9435
9436   if template_name == constants.DT_DISKLESS:
9437     pass
9438   elif template_name == constants.DT_DRBD8:
9439     if len(secondary_nodes) != 1:
9440       raise errors.ProgrammerError("Wrong template configuration")
9441     remote_node = secondary_nodes[0]
9442     minors = lu.cfg.AllocateDRBDMinor(
9443       [primary_node, remote_node] * len(disk_info), instance_name)
9444
9445     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9446                                                        full_disk_params)
9447     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9448
9449     names = []
9450     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9451                                                for i in range(disk_count)]):
9452       names.append(lv_prefix + "_data")
9453       names.append(lv_prefix + "_meta")
9454     for idx, disk in enumerate(disk_info):
9455       disk_index = idx + base_index
9456       data_vg = disk.get(constants.IDISK_VG, vgname)
9457       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9458       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9459                                       disk[constants.IDISK_SIZE],
9460                                       [data_vg, meta_vg],
9461                                       names[idx * 2:idx * 2 + 2],
9462                                       "disk/%d" % disk_index,
9463                                       minors[idx * 2], minors[idx * 2 + 1])
9464       disk_dev.mode = disk[constants.IDISK_MODE]
9465       disks.append(disk_dev)
9466   else:
9467     if secondary_nodes:
9468       raise errors.ProgrammerError("Wrong template configuration")
9469
9470     if template_name == constants.DT_FILE:
9471       _req_file_storage()
9472     elif template_name == constants.DT_SHARED_FILE:
9473       _req_shr_file_storage()
9474
9475     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9476     if name_prefix is None:
9477       names = None
9478     else:
9479       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9480                                         (name_prefix, base_index + i)
9481                                         for i in range(disk_count)])
9482
9483     if template_name == constants.DT_PLAIN:
9484
9485       def logical_id_fn(idx, _, disk):
9486         vg = disk.get(constants.IDISK_VG, vgname)
9487         return (vg, names[idx])
9488
9489     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9490       logical_id_fn = \
9491         lambda _, disk_index, disk: (file_driver,
9492                                      "%s/disk%d" % (file_storage_dir,
9493                                                     disk_index))
9494     elif template_name == constants.DT_BLOCK:
9495       logical_id_fn = \
9496         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9497                                        disk[constants.IDISK_ADOPT])
9498     elif template_name == constants.DT_RBD:
9499       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9500     elif template_name == constants.DT_EXT:
9501       def logical_id_fn(idx, _, disk):
9502         provider = disk.get(constants.IDISK_PROVIDER, None)
9503         if provider is None:
9504           raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9505                                        " not found", constants.DT_EXT,
9506                                        constants.IDISK_PROVIDER)
9507         return (provider, names[idx])
9508     else:
9509       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9510
9511     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9512
9513     for idx, disk in enumerate(disk_info):
9514       params = {}
9515       # Only for the Ext template add disk_info to params
9516       if template_name == constants.DT_EXT:
9517         params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9518         for key in disk:
9519           if key not in constants.IDISK_PARAMS:
9520             params[key] = disk[key]
9521       disk_index = idx + base_index
9522       size = disk[constants.IDISK_SIZE]
9523       feedback_fn("* disk %s, size %s" %
9524                   (disk_index, utils.FormatUnit(size, "h")))
9525       disks.append(objects.Disk(dev_type=dev_type, size=size,
9526                                 logical_id=logical_id_fn(idx, disk_index, disk),
9527                                 iv_name="disk/%d" % disk_index,
9528                                 mode=disk[constants.IDISK_MODE],
9529                                 params=params))
9530
9531   return disks
9532
9533
9534 def _GetInstanceInfoText(instance):
9535   """Compute that text that should be added to the disk's metadata.
9536
9537   """
9538   return "originstname+%s" % instance.name
9539
9540
9541 def _CalcEta(time_taken, written, total_size):
9542   """Calculates the ETA based on size written and total size.
9543
9544   @param time_taken: The time taken so far
9545   @param written: amount written so far
9546   @param total_size: The total size of data to be written
9547   @return: The remaining time in seconds
9548
9549   """
9550   avg_time = time_taken / float(written)
9551   return (total_size - written) * avg_time
9552
9553
9554 def _WipeDisks(lu, instance, disks=None):
9555   """Wipes instance disks.
9556
9557   @type lu: L{LogicalUnit}
9558   @param lu: the logical unit on whose behalf we execute
9559   @type instance: L{objects.Instance}
9560   @param instance: the instance whose disks we should create
9561   @return: the success of the wipe
9562
9563   """
9564   node = instance.primary_node
9565
9566   if disks is None:
9567     disks = [(idx, disk, 0)
9568              for (idx, disk) in enumerate(instance.disks)]
9569
9570   for (_, device, _) in disks:
9571     lu.cfg.SetDiskID(device, node)
9572
9573   logging.info("Pausing synchronization of disks of instance '%s'",
9574                instance.name)
9575   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9576                                                   (map(compat.snd, disks),
9577                                                    instance),
9578                                                   True)
9579   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9580
9581   for idx, success in enumerate(result.payload):
9582     if not success:
9583       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9584                    " failed", idx, instance.name)
9585
9586   try:
9587     for (idx, device, offset) in disks:
9588       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9589       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9590       wipe_chunk_size = \
9591         int(min(constants.MAX_WIPE_CHUNK,
9592                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9593
9594       size = device.size
9595       last_output = 0
9596       start_time = time.time()
9597
9598       if offset == 0:
9599         info_text = ""
9600       else:
9601         info_text = (" (from %s to %s)" %
9602                      (utils.FormatUnit(offset, "h"),
9603                       utils.FormatUnit(size, "h")))
9604
9605       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9606
9607       logging.info("Wiping disk %d for instance %s on node %s using"
9608                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9609
9610       while offset < size:
9611         wipe_size = min(wipe_chunk_size, size - offset)
9612
9613         logging.debug("Wiping disk %d, offset %s, chunk %s",
9614                       idx, offset, wipe_size)
9615
9616         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9617                                            wipe_size)
9618         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9619                      (idx, offset, wipe_size))
9620
9621         now = time.time()
9622         offset += wipe_size
9623         if now - last_output >= 60:
9624           eta = _CalcEta(now - start_time, offset, size)
9625           lu.LogInfo(" - done: %.1f%% ETA: %s",
9626                      offset / float(size) * 100, utils.FormatSeconds(eta))
9627           last_output = now
9628   finally:
9629     logging.info("Resuming synchronization of disks for instance '%s'",
9630                  instance.name)
9631
9632     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9633                                                     (map(compat.snd, disks),
9634                                                      instance),
9635                                                     False)
9636
9637     if result.fail_msg:
9638       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9639                     node, result.fail_msg)
9640     else:
9641       for idx, success in enumerate(result.payload):
9642         if not success:
9643           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9644                         " failed", idx, instance.name)
9645
9646
9647 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9648   """Create all disks for an instance.
9649
9650   This abstracts away some work from AddInstance.
9651
9652   @type lu: L{LogicalUnit}
9653   @param lu: the logical unit on whose behalf we execute
9654   @type instance: L{objects.Instance}
9655   @param instance: the instance whose disks we should create
9656   @type to_skip: list
9657   @param to_skip: list of indices to skip
9658   @type target_node: string
9659   @param target_node: if passed, overrides the target node for creation
9660   @rtype: boolean
9661   @return: the success of the creation
9662
9663   """
9664   info = _GetInstanceInfoText(instance)
9665   if target_node is None:
9666     pnode = instance.primary_node
9667     all_nodes = instance.all_nodes
9668   else:
9669     pnode = target_node
9670     all_nodes = [pnode]
9671
9672   if instance.disk_template in constants.DTS_FILEBASED:
9673     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9674     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9675
9676     result.Raise("Failed to create directory '%s' on"
9677                  " node %s" % (file_storage_dir, pnode))
9678
9679   # Note: this needs to be kept in sync with adding of disks in
9680   # LUInstanceSetParams
9681   for idx, device in enumerate(instance.disks):
9682     if to_skip and idx in to_skip:
9683       continue
9684     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9685     #HARDCODE
9686     for node in all_nodes:
9687       f_create = node == pnode
9688       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9689
9690
9691 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9692   """Remove all disks for an instance.
9693
9694   This abstracts away some work from `AddInstance()` and
9695   `RemoveInstance()`. Note that in case some of the devices couldn't
9696   be removed, the removal will continue with the other ones (compare
9697   with `_CreateDisks()`).
9698
9699   @type lu: L{LogicalUnit}
9700   @param lu: the logical unit on whose behalf we execute
9701   @type instance: L{objects.Instance}
9702   @param instance: the instance whose disks we should remove
9703   @type target_node: string
9704   @param target_node: used to override the node on which to remove the disks
9705   @rtype: boolean
9706   @return: the success of the removal
9707
9708   """
9709   logging.info("Removing block devices for instance %s", instance.name)
9710
9711   all_result = True
9712   ports_to_release = set()
9713   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9714   for (idx, device) in enumerate(anno_disks):
9715     if target_node:
9716       edata = [(target_node, device)]
9717     else:
9718       edata = device.ComputeNodeTree(instance.primary_node)
9719     for node, disk in edata:
9720       lu.cfg.SetDiskID(disk, node)
9721       result = lu.rpc.call_blockdev_remove(node, disk)
9722       if result.fail_msg:
9723         lu.LogWarning("Could not remove disk %s on node %s,"
9724                       " continuing anyway: %s", idx, node, result.fail_msg)
9725         if not (result.offline and node != instance.primary_node):
9726           all_result = False
9727
9728     # if this is a DRBD disk, return its port to the pool
9729     if device.dev_type in constants.LDS_DRBD:
9730       ports_to_release.add(device.logical_id[2])
9731
9732   if all_result or ignore_failures:
9733     for port in ports_to_release:
9734       lu.cfg.AddTcpUdpPort(port)
9735
9736   if instance.disk_template in constants.DTS_FILEBASED:
9737     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9738     if target_node:
9739       tgt = target_node
9740     else:
9741       tgt = instance.primary_node
9742     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9743     if result.fail_msg:
9744       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9745                     file_storage_dir, instance.primary_node, result.fail_msg)
9746       all_result = False
9747
9748   return all_result
9749
9750
9751 def _ComputeDiskSizePerVG(disk_template, disks):
9752   """Compute disk size requirements in the volume group
9753
9754   """
9755   def _compute(disks, payload):
9756     """Universal algorithm.
9757
9758     """
9759     vgs = {}
9760     for disk in disks:
9761       vgs[disk[constants.IDISK_VG]] = \
9762         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9763
9764     return vgs
9765
9766   # Required free disk space as a function of disk and swap space
9767   req_size_dict = {
9768     constants.DT_DISKLESS: {},
9769     constants.DT_PLAIN: _compute(disks, 0),
9770     # 128 MB are added for drbd metadata for each disk
9771     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9772     constants.DT_FILE: {},
9773     constants.DT_SHARED_FILE: {},
9774   }
9775
9776   if disk_template not in req_size_dict:
9777     raise errors.ProgrammerError("Disk template '%s' size requirement"
9778                                  " is unknown" % disk_template)
9779
9780   return req_size_dict[disk_template]
9781
9782
9783 def _FilterVmNodes(lu, nodenames):
9784   """Filters out non-vm_capable nodes from a list.
9785
9786   @type lu: L{LogicalUnit}
9787   @param lu: the logical unit for which we check
9788   @type nodenames: list
9789   @param nodenames: the list of nodes on which we should check
9790   @rtype: list
9791   @return: the list of vm-capable nodes
9792
9793   """
9794   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9795   return [name for name in nodenames if name not in vm_nodes]
9796
9797
9798 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9799   """Hypervisor parameter validation.
9800
9801   This function abstract the hypervisor parameter validation to be
9802   used in both instance create and instance modify.
9803
9804   @type lu: L{LogicalUnit}
9805   @param lu: the logical unit for which we check
9806   @type nodenames: list
9807   @param nodenames: the list of nodes on which we should check
9808   @type hvname: string
9809   @param hvname: the name of the hypervisor we should use
9810   @type hvparams: dict
9811   @param hvparams: the parameters which we need to check
9812   @raise errors.OpPrereqError: if the parameters are not valid
9813
9814   """
9815   nodenames = _FilterVmNodes(lu, nodenames)
9816
9817   cluster = lu.cfg.GetClusterInfo()
9818   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9819
9820   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9821   for node in nodenames:
9822     info = hvinfo[node]
9823     if info.offline:
9824       continue
9825     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9826
9827
9828 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9829   """OS parameters validation.
9830
9831   @type lu: L{LogicalUnit}
9832   @param lu: the logical unit for which we check
9833   @type required: boolean
9834   @param required: whether the validation should fail if the OS is not
9835       found
9836   @type nodenames: list
9837   @param nodenames: the list of nodes on which we should check
9838   @type osname: string
9839   @param osname: the name of the hypervisor we should use
9840   @type osparams: dict
9841   @param osparams: the parameters which we need to check
9842   @raise errors.OpPrereqError: if the parameters are not valid
9843
9844   """
9845   nodenames = _FilterVmNodes(lu, nodenames)
9846   result = lu.rpc.call_os_validate(nodenames, required, osname,
9847                                    [constants.OS_VALIDATE_PARAMETERS],
9848                                    osparams)
9849   for node, nres in result.items():
9850     # we don't check for offline cases since this should be run only
9851     # against the master node and/or an instance's nodes
9852     nres.Raise("OS Parameters validation failed on node %s" % node)
9853     if not nres.payload:
9854       lu.LogInfo("OS %s not found on node %s, validation skipped",
9855                  osname, node)
9856
9857
9858 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9859   """Wrapper around IAReqInstanceAlloc.
9860
9861   @param op: The instance opcode
9862   @param disks: The computed disks
9863   @param nics: The computed nics
9864   @param beparams: The full filled beparams
9865   @param node_whitelist: List of nodes which should appear as online to the
9866     allocator (unless the node is already marked offline)
9867
9868   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9869
9870   """
9871   spindle_use = beparams[constants.BE_SPINDLE_USE]
9872   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9873                                        disk_template=op.disk_template,
9874                                        tags=op.tags,
9875                                        os=op.os_type,
9876                                        vcpus=beparams[constants.BE_VCPUS],
9877                                        memory=beparams[constants.BE_MAXMEM],
9878                                        spindle_use=spindle_use,
9879                                        disks=disks,
9880                                        nics=[n.ToDict() for n in nics],
9881                                        hypervisor=op.hypervisor,
9882                                        node_whitelist=node_whitelist)
9883
9884
9885 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9886   """Computes the nics.
9887
9888   @param op: The instance opcode
9889   @param cluster: Cluster configuration object
9890   @param default_ip: The default ip to assign
9891   @param cfg: An instance of the configuration object
9892   @param ec_id: Execution context ID
9893
9894   @returns: The build up nics
9895
9896   """
9897   nics = []
9898   for nic in op.nics:
9899     nic_mode_req = nic.get(constants.INIC_MODE, None)
9900     nic_mode = nic_mode_req
9901     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9902       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9903
9904     net = nic.get(constants.INIC_NETWORK, None)
9905     link = nic.get(constants.NIC_LINK, None)
9906     ip = nic.get(constants.INIC_IP, None)
9907
9908     if net is None or net.lower() == constants.VALUE_NONE:
9909       net = None
9910     else:
9911       if nic_mode_req is not None or link is not None:
9912         raise errors.OpPrereqError("If network is given, no mode or link"
9913                                    " is allowed to be passed",
9914                                    errors.ECODE_INVAL)
9915
9916     # ip validity checks
9917     if ip is None or ip.lower() == constants.VALUE_NONE:
9918       nic_ip = None
9919     elif ip.lower() == constants.VALUE_AUTO:
9920       if not op.name_check:
9921         raise errors.OpPrereqError("IP address set to auto but name checks"
9922                                    " have been skipped",
9923                                    errors.ECODE_INVAL)
9924       nic_ip = default_ip
9925     else:
9926       # We defer pool operations until later, so that the iallocator has
9927       # filled in the instance's node(s) dimara
9928       if ip.lower() == constants.NIC_IP_POOL:
9929         if net is None:
9930           raise errors.OpPrereqError("if ip=pool, parameter network"
9931                                      " must be passed too",
9932                                      errors.ECODE_INVAL)
9933
9934       elif not netutils.IPAddress.IsValid(ip):
9935         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9936                                    errors.ECODE_INVAL)
9937
9938       nic_ip = ip
9939
9940     # TODO: check the ip address for uniqueness
9941     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9942       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9943                                  errors.ECODE_INVAL)
9944
9945     # MAC address verification
9946     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9947     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9948       mac = utils.NormalizeAndValidateMac(mac)
9949
9950       try:
9951         # TODO: We need to factor this out
9952         cfg.ReserveMAC(mac, ec_id)
9953       except errors.ReservationError:
9954         raise errors.OpPrereqError("MAC address %s already in use"
9955                                    " in cluster" % mac,
9956                                    errors.ECODE_NOTUNIQUE)
9957
9958     #  Build nic parameters
9959     nicparams = {}
9960     if nic_mode_req:
9961       nicparams[constants.NIC_MODE] = nic_mode
9962     if link:
9963       nicparams[constants.NIC_LINK] = link
9964
9965     check_params = cluster.SimpleFillNIC(nicparams)
9966     objects.NIC.CheckParameterSyntax(check_params)
9967     nics.append(objects.NIC(mac=mac, ip=nic_ip,
9968                             network=net, nicparams=nicparams))
9969
9970   return nics
9971
9972
9973 def _ComputeDisks(op, default_vg):
9974   """Computes the instance disks.
9975
9976   @param op: The instance opcode
9977   @param default_vg: The default_vg to assume
9978
9979   @return: The computed disks
9980
9981   """
9982   disks = []
9983   for disk in op.disks:
9984     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9985     if mode not in constants.DISK_ACCESS_SET:
9986       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9987                                  mode, errors.ECODE_INVAL)
9988     size = disk.get(constants.IDISK_SIZE, None)
9989     if size is None:
9990       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9991     try:
9992       size = int(size)
9993     except (TypeError, ValueError):
9994       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9995                                  errors.ECODE_INVAL)
9996
9997     ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9998     if ext_provider and op.disk_template != constants.DT_EXT:
9999       raise errors.OpPrereqError("The '%s' option is only valid for the %s"
10000                                  " disk template, not %s" %
10001                                  (constants.IDISK_PROVIDER, constants.DT_EXT,
10002                                  op.disk_template), errors.ECODE_INVAL)
10003
10004     data_vg = disk.get(constants.IDISK_VG, default_vg)
10005     new_disk = {
10006       constants.IDISK_SIZE: size,
10007       constants.IDISK_MODE: mode,
10008       constants.IDISK_VG: data_vg,
10009       }
10010
10011     if constants.IDISK_METAVG in disk:
10012       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10013     if constants.IDISK_ADOPT in disk:
10014       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10015
10016     # For extstorage, demand the `provider' option and add any
10017     # additional parameters (ext-params) to the dict
10018     if op.disk_template == constants.DT_EXT:
10019       if ext_provider:
10020         new_disk[constants.IDISK_PROVIDER] = ext_provider
10021         for key in disk:
10022           if key not in constants.IDISK_PARAMS:
10023             new_disk[key] = disk[key]
10024       else:
10025         raise errors.OpPrereqError("Missing provider for template '%s'" %
10026                                    constants.DT_EXT, errors.ECODE_INVAL)
10027
10028     disks.append(new_disk)
10029
10030   return disks
10031
10032
10033 def _ComputeFullBeParams(op, cluster):
10034   """Computes the full beparams.
10035
10036   @param op: The instance opcode
10037   @param cluster: The cluster config object
10038
10039   @return: The fully filled beparams
10040
10041   """
10042   default_beparams = cluster.beparams[constants.PP_DEFAULT]
10043   for param, value in op.beparams.iteritems():
10044     if value == constants.VALUE_AUTO:
10045       op.beparams[param] = default_beparams[param]
10046   objects.UpgradeBeParams(op.beparams)
10047   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10048   return cluster.SimpleFillBE(op.beparams)
10049
10050
10051 def _CheckOpportunisticLocking(op):
10052   """Generate error if opportunistic locking is not possible.
10053
10054   """
10055   if op.opportunistic_locking and not op.iallocator:
10056     raise errors.OpPrereqError("Opportunistic locking is only available in"
10057                                " combination with an instance allocator",
10058                                errors.ECODE_INVAL)
10059
10060
10061 class LUInstanceCreate(LogicalUnit):
10062   """Create an instance.
10063
10064   """
10065   HPATH = "instance-add"
10066   HTYPE = constants.HTYPE_INSTANCE
10067   REQ_BGL = False
10068
10069   def CheckArguments(self):
10070     """Check arguments.
10071
10072     """
10073     # do not require name_check to ease forward/backward compatibility
10074     # for tools
10075     if self.op.no_install and self.op.start:
10076       self.LogInfo("No-installation mode selected, disabling startup")
10077       self.op.start = False
10078     # validate/normalize the instance name
10079     self.op.instance_name = \
10080       netutils.Hostname.GetNormalizedName(self.op.instance_name)
10081
10082     if self.op.ip_check and not self.op.name_check:
10083       # TODO: make the ip check more flexible and not depend on the name check
10084       raise errors.OpPrereqError("Cannot do IP address check without a name"
10085                                  " check", errors.ECODE_INVAL)
10086
10087     # check nics' parameter names
10088     for nic in self.op.nics:
10089       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10090
10091     # check disks. parameter names and consistent adopt/no-adopt strategy
10092     has_adopt = has_no_adopt = False
10093     for disk in self.op.disks:
10094       if self.op.disk_template != constants.DT_EXT:
10095         utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10096       if constants.IDISK_ADOPT in disk:
10097         has_adopt = True
10098       else:
10099         has_no_adopt = True
10100     if has_adopt and has_no_adopt:
10101       raise errors.OpPrereqError("Either all disks are adopted or none is",
10102                                  errors.ECODE_INVAL)
10103     if has_adopt:
10104       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10105         raise errors.OpPrereqError("Disk adoption is not supported for the"
10106                                    " '%s' disk template" %
10107                                    self.op.disk_template,
10108                                    errors.ECODE_INVAL)
10109       if self.op.iallocator is not None:
10110         raise errors.OpPrereqError("Disk adoption not allowed with an"
10111                                    " iallocator script", errors.ECODE_INVAL)
10112       if self.op.mode == constants.INSTANCE_IMPORT:
10113         raise errors.OpPrereqError("Disk adoption not allowed for"
10114                                    " instance import", errors.ECODE_INVAL)
10115     else:
10116       if self.op.disk_template in constants.DTS_MUST_ADOPT:
10117         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10118                                    " but no 'adopt' parameter given" %
10119                                    self.op.disk_template,
10120                                    errors.ECODE_INVAL)
10121
10122     self.adopt_disks = has_adopt
10123
10124     # instance name verification
10125     if self.op.name_check:
10126       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10127       self.op.instance_name = self.hostname1.name
10128       # used in CheckPrereq for ip ping check
10129       self.check_ip = self.hostname1.ip
10130     else:
10131       self.check_ip = None
10132
10133     # file storage checks
10134     if (self.op.file_driver and
10135         not self.op.file_driver in constants.FILE_DRIVER):
10136       raise errors.OpPrereqError("Invalid file driver name '%s'" %
10137                                  self.op.file_driver, errors.ECODE_INVAL)
10138
10139     if self.op.disk_template == constants.DT_FILE:
10140       opcodes.RequireFileStorage()
10141     elif self.op.disk_template == constants.DT_SHARED_FILE:
10142       opcodes.RequireSharedFileStorage()
10143
10144     ### Node/iallocator related checks
10145     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10146
10147     if self.op.pnode is not None:
10148       if self.op.disk_template in constants.DTS_INT_MIRROR:
10149         if self.op.snode is None:
10150           raise errors.OpPrereqError("The networked disk templates need"
10151                                      " a mirror node", errors.ECODE_INVAL)
10152       elif self.op.snode:
10153         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10154                         " template")
10155         self.op.snode = None
10156
10157     _CheckOpportunisticLocking(self.op)
10158
10159     self._cds = _GetClusterDomainSecret()
10160
10161     if self.op.mode == constants.INSTANCE_IMPORT:
10162       # On import force_variant must be True, because if we forced it at
10163       # initial install, our only chance when importing it back is that it
10164       # works again!
10165       self.op.force_variant = True
10166
10167       if self.op.no_install:
10168         self.LogInfo("No-installation mode has no effect during import")
10169
10170     elif self.op.mode == constants.INSTANCE_CREATE:
10171       if self.op.os_type is None:
10172         raise errors.OpPrereqError("No guest OS specified",
10173                                    errors.ECODE_INVAL)
10174       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10175         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10176                                    " installation" % self.op.os_type,
10177                                    errors.ECODE_STATE)
10178       if self.op.disk_template is None:
10179         raise errors.OpPrereqError("No disk template specified",
10180                                    errors.ECODE_INVAL)
10181
10182     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10183       # Check handshake to ensure both clusters have the same domain secret
10184       src_handshake = self.op.source_handshake
10185       if not src_handshake:
10186         raise errors.OpPrereqError("Missing source handshake",
10187                                    errors.ECODE_INVAL)
10188
10189       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10190                                                            src_handshake)
10191       if errmsg:
10192         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10193                                    errors.ECODE_INVAL)
10194
10195       # Load and check source CA
10196       self.source_x509_ca_pem = self.op.source_x509_ca
10197       if not self.source_x509_ca_pem:
10198         raise errors.OpPrereqError("Missing source X509 CA",
10199                                    errors.ECODE_INVAL)
10200
10201       try:
10202         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10203                                                     self._cds)
10204       except OpenSSL.crypto.Error, err:
10205         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10206                                    (err, ), errors.ECODE_INVAL)
10207
10208       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10209       if errcode is not None:
10210         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10211                                    errors.ECODE_INVAL)
10212
10213       self.source_x509_ca = cert
10214
10215       src_instance_name = self.op.source_instance_name
10216       if not src_instance_name:
10217         raise errors.OpPrereqError("Missing source instance name",
10218                                    errors.ECODE_INVAL)
10219
10220       self.source_instance_name = \
10221           netutils.GetHostname(name=src_instance_name).name
10222
10223     else:
10224       raise errors.OpPrereqError("Invalid instance creation mode %r" %
10225                                  self.op.mode, errors.ECODE_INVAL)
10226
10227   def ExpandNames(self):
10228     """ExpandNames for CreateInstance.
10229
10230     Figure out the right locks for instance creation.
10231
10232     """
10233     self.needed_locks = {}
10234
10235     instance_name = self.op.instance_name
10236     # this is just a preventive check, but someone might still add this
10237     # instance in the meantime, and creation will fail at lock-add time
10238     if instance_name in self.cfg.GetInstanceList():
10239       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10240                                  instance_name, errors.ECODE_EXISTS)
10241
10242     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10243
10244     if self.op.iallocator:
10245       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10246       # specifying a group on instance creation and then selecting nodes from
10247       # that group
10248       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10249       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10250
10251       if self.op.opportunistic_locking:
10252         self.opportunistic_locks[locking.LEVEL_NODE] = True
10253         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10254     else:
10255       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10256       nodelist = [self.op.pnode]
10257       if self.op.snode is not None:
10258         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10259         nodelist.append(self.op.snode)
10260       self.needed_locks[locking.LEVEL_NODE] = nodelist
10261
10262     # in case of import lock the source node too
10263     if self.op.mode == constants.INSTANCE_IMPORT:
10264       src_node = self.op.src_node
10265       src_path = self.op.src_path
10266
10267       if src_path is None:
10268         self.op.src_path = src_path = self.op.instance_name
10269
10270       if src_node is None:
10271         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10272         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10273         self.op.src_node = None
10274         if os.path.isabs(src_path):
10275           raise errors.OpPrereqError("Importing an instance from a path"
10276                                      " requires a source node option",
10277                                      errors.ECODE_INVAL)
10278       else:
10279         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10280         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10281           self.needed_locks[locking.LEVEL_NODE].append(src_node)
10282         if not os.path.isabs(src_path):
10283           self.op.src_path = src_path = \
10284             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10285
10286     self.needed_locks[locking.LEVEL_NODE_RES] = \
10287       _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10288
10289   def _RunAllocator(self):
10290     """Run the allocator based on input opcode.
10291
10292     """
10293     if self.op.opportunistic_locking:
10294       # Only consider nodes for which a lock is held
10295       node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10296     else:
10297       node_whitelist = None
10298
10299     #TODO Export network to iallocator so that it chooses a pnode
10300     #     in a nodegroup that has the desired network connected to
10301     req = _CreateInstanceAllocRequest(self.op, self.disks,
10302                                       self.nics, self.be_full,
10303                                       node_whitelist)
10304     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10305
10306     ial.Run(self.op.iallocator)
10307
10308     if not ial.success:
10309       # When opportunistic locks are used only a temporary failure is generated
10310       if self.op.opportunistic_locking:
10311         ecode = errors.ECODE_TEMP_NORES
10312       else:
10313         ecode = errors.ECODE_NORES
10314
10315       raise errors.OpPrereqError("Can't compute nodes using"
10316                                  " iallocator '%s': %s" %
10317                                  (self.op.iallocator, ial.info),
10318                                  ecode)
10319
10320     self.op.pnode = ial.result[0]
10321     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10322                  self.op.instance_name, self.op.iallocator,
10323                  utils.CommaJoin(ial.result))
10324
10325     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10326
10327     if req.RequiredNodes() == 2:
10328       self.op.snode = ial.result[1]
10329
10330   def BuildHooksEnv(self):
10331     """Build hooks env.
10332
10333     This runs on master, primary and secondary nodes of the instance.
10334
10335     """
10336     env = {
10337       "ADD_MODE": self.op.mode,
10338       }
10339     if self.op.mode == constants.INSTANCE_IMPORT:
10340       env["SRC_NODE"] = self.op.src_node
10341       env["SRC_PATH"] = self.op.src_path
10342       env["SRC_IMAGES"] = self.src_images
10343
10344     env.update(_BuildInstanceHookEnv(
10345       name=self.op.instance_name,
10346       primary_node=self.op.pnode,
10347       secondary_nodes=self.secondaries,
10348       status=self.op.start,
10349       os_type=self.op.os_type,
10350       minmem=self.be_full[constants.BE_MINMEM],
10351       maxmem=self.be_full[constants.BE_MAXMEM],
10352       vcpus=self.be_full[constants.BE_VCPUS],
10353       nics=_NICListToTuple(self, self.nics),
10354       disk_template=self.op.disk_template,
10355       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10356              for d in self.disks],
10357       bep=self.be_full,
10358       hvp=self.hv_full,
10359       hypervisor_name=self.op.hypervisor,
10360       tags=self.op.tags,
10361     ))
10362
10363     return env
10364
10365   def BuildHooksNodes(self):
10366     """Build hooks nodes.
10367
10368     """
10369     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10370     return nl, nl
10371
10372   def _ReadExportInfo(self):
10373     """Reads the export information from disk.
10374
10375     It will override the opcode source node and path with the actual
10376     information, if these two were not specified before.
10377
10378     @return: the export information
10379
10380     """
10381     assert self.op.mode == constants.INSTANCE_IMPORT
10382
10383     src_node = self.op.src_node
10384     src_path = self.op.src_path
10385
10386     if src_node is None:
10387       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10388       exp_list = self.rpc.call_export_list(locked_nodes)
10389       found = False
10390       for node in exp_list:
10391         if exp_list[node].fail_msg:
10392           continue
10393         if src_path in exp_list[node].payload:
10394           found = True
10395           self.op.src_node = src_node = node
10396           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10397                                                        src_path)
10398           break
10399       if not found:
10400         raise errors.OpPrereqError("No export found for relative path %s" %
10401                                     src_path, errors.ECODE_INVAL)
10402
10403     _CheckNodeOnline(self, src_node)
10404     result = self.rpc.call_export_info(src_node, src_path)
10405     result.Raise("No export or invalid export found in dir %s" % src_path)
10406
10407     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10408     if not export_info.has_section(constants.INISECT_EXP):
10409       raise errors.ProgrammerError("Corrupted export config",
10410                                    errors.ECODE_ENVIRON)
10411
10412     ei_version = export_info.get(constants.INISECT_EXP, "version")
10413     if (int(ei_version) != constants.EXPORT_VERSION):
10414       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10415                                  (ei_version, constants.EXPORT_VERSION),
10416                                  errors.ECODE_ENVIRON)
10417     return export_info
10418
10419   def _ReadExportParams(self, einfo):
10420     """Use export parameters as defaults.
10421
10422     In case the opcode doesn't specify (as in override) some instance
10423     parameters, then try to use them from the export information, if
10424     that declares them.
10425
10426     """
10427     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10428
10429     if self.op.disk_template is None:
10430       if einfo.has_option(constants.INISECT_INS, "disk_template"):
10431         self.op.disk_template = einfo.get(constants.INISECT_INS,
10432                                           "disk_template")
10433         if self.op.disk_template not in constants.DISK_TEMPLATES:
10434           raise errors.OpPrereqError("Disk template specified in configuration"
10435                                      " file is not one of the allowed values:"
10436                                      " %s" %
10437                                      " ".join(constants.DISK_TEMPLATES),
10438                                      errors.ECODE_INVAL)
10439       else:
10440         raise errors.OpPrereqError("No disk template specified and the export"
10441                                    " is missing the disk_template information",
10442                                    errors.ECODE_INVAL)
10443
10444     if not self.op.disks:
10445       disks = []
10446       # TODO: import the disk iv_name too
10447       for idx in range(constants.MAX_DISKS):
10448         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10449           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10450           disks.append({constants.IDISK_SIZE: disk_sz})
10451       self.op.disks = disks
10452       if not disks and self.op.disk_template != constants.DT_DISKLESS:
10453         raise errors.OpPrereqError("No disk info specified and the export"
10454                                    " is missing the disk information",
10455                                    errors.ECODE_INVAL)
10456
10457     if not self.op.nics:
10458       nics = []
10459       for idx in range(constants.MAX_NICS):
10460         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10461           ndict = {}
10462           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10463             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10464             ndict[name] = v
10465           nics.append(ndict)
10466         else:
10467           break
10468       self.op.nics = nics
10469
10470     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10471       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10472
10473     if (self.op.hypervisor is None and
10474         einfo.has_option(constants.INISECT_INS, "hypervisor")):
10475       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10476
10477     if einfo.has_section(constants.INISECT_HYP):
10478       # use the export parameters but do not override the ones
10479       # specified by the user
10480       for name, value in einfo.items(constants.INISECT_HYP):
10481         if name not in self.op.hvparams:
10482           self.op.hvparams[name] = value
10483
10484     if einfo.has_section(constants.INISECT_BEP):
10485       # use the parameters, without overriding
10486       for name, value in einfo.items(constants.INISECT_BEP):
10487         if name not in self.op.beparams:
10488           self.op.beparams[name] = value
10489         # Compatibility for the old "memory" be param
10490         if name == constants.BE_MEMORY:
10491           if constants.BE_MAXMEM not in self.op.beparams:
10492             self.op.beparams[constants.BE_MAXMEM] = value
10493           if constants.BE_MINMEM not in self.op.beparams:
10494             self.op.beparams[constants.BE_MINMEM] = value
10495     else:
10496       # try to read the parameters old style, from the main section
10497       for name in constants.BES_PARAMETERS:
10498         if (name not in self.op.beparams and
10499             einfo.has_option(constants.INISECT_INS, name)):
10500           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10501
10502     if einfo.has_section(constants.INISECT_OSP):
10503       # use the parameters, without overriding
10504       for name, value in einfo.items(constants.INISECT_OSP):
10505         if name not in self.op.osparams:
10506           self.op.osparams[name] = value
10507
10508   def _RevertToDefaults(self, cluster):
10509     """Revert the instance parameters to the default values.
10510
10511     """
10512     # hvparams
10513     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10514     for name in self.op.hvparams.keys():
10515       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10516         del self.op.hvparams[name]
10517     # beparams
10518     be_defs = cluster.SimpleFillBE({})
10519     for name in self.op.beparams.keys():
10520       if name in be_defs and be_defs[name] == self.op.beparams[name]:
10521         del self.op.beparams[name]
10522     # nic params
10523     nic_defs = cluster.SimpleFillNIC({})
10524     for nic in self.op.nics:
10525       for name in constants.NICS_PARAMETERS:
10526         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10527           del nic[name]
10528     # osparams
10529     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10530     for name in self.op.osparams.keys():
10531       if name in os_defs and os_defs[name] == self.op.osparams[name]:
10532         del self.op.osparams[name]
10533
10534   def _CalculateFileStorageDir(self):
10535     """Calculate final instance file storage dir.
10536
10537     """
10538     # file storage dir calculation/check
10539     self.instance_file_storage_dir = None
10540     if self.op.disk_template in constants.DTS_FILEBASED:
10541       # build the full file storage dir path
10542       joinargs = []
10543
10544       if self.op.disk_template == constants.DT_SHARED_FILE:
10545         get_fsd_fn = self.cfg.GetSharedFileStorageDir
10546       else:
10547         get_fsd_fn = self.cfg.GetFileStorageDir
10548
10549       cfg_storagedir = get_fsd_fn()
10550       if not cfg_storagedir:
10551         raise errors.OpPrereqError("Cluster file storage dir not defined",
10552                                    errors.ECODE_STATE)
10553       joinargs.append(cfg_storagedir)
10554
10555       if self.op.file_storage_dir is not None:
10556         joinargs.append(self.op.file_storage_dir)
10557
10558       joinargs.append(self.op.instance_name)
10559
10560       # pylint: disable=W0142
10561       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10562
10563   def CheckPrereq(self): # pylint: disable=R0914
10564     """Check prerequisites.
10565
10566     """
10567     self._CalculateFileStorageDir()
10568
10569     if self.op.mode == constants.INSTANCE_IMPORT:
10570       export_info = self._ReadExportInfo()
10571       self._ReadExportParams(export_info)
10572       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10573     else:
10574       self._old_instance_name = None
10575
10576     if (not self.cfg.GetVGName() and
10577         self.op.disk_template not in constants.DTS_NOT_LVM):
10578       raise errors.OpPrereqError("Cluster does not support lvm-based"
10579                                  " instances", errors.ECODE_STATE)
10580
10581     if (self.op.hypervisor is None or
10582         self.op.hypervisor == constants.VALUE_AUTO):
10583       self.op.hypervisor = self.cfg.GetHypervisorType()
10584
10585     cluster = self.cfg.GetClusterInfo()
10586     enabled_hvs = cluster.enabled_hypervisors
10587     if self.op.hypervisor not in enabled_hvs:
10588       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10589                                  " cluster (%s)" %
10590                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10591                                  errors.ECODE_STATE)
10592
10593     # Check tag validity
10594     for tag in self.op.tags:
10595       objects.TaggableObject.ValidateTag(tag)
10596
10597     # check hypervisor parameter syntax (locally)
10598     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10599     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10600                                       self.op.hvparams)
10601     hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10602     hv_type.CheckParameterSyntax(filled_hvp)
10603     self.hv_full = filled_hvp
10604     # check that we don't specify global parameters on an instance
10605     _CheckGlobalHvParams(self.op.hvparams)
10606
10607     # fill and remember the beparams dict
10608     self.be_full = _ComputeFullBeParams(self.op, cluster)
10609
10610     # build os parameters
10611     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10612
10613     # now that hvp/bep are in final format, let's reset to defaults,
10614     # if told to do so
10615     if self.op.identify_defaults:
10616       self._RevertToDefaults(cluster)
10617
10618     # NIC buildup
10619     self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10620                              self.proc.GetECId())
10621
10622     # disk checks/pre-build
10623     default_vg = self.cfg.GetVGName()
10624     self.disks = _ComputeDisks(self.op, default_vg)
10625
10626     if self.op.mode == constants.INSTANCE_IMPORT:
10627       disk_images = []
10628       for idx in range(len(self.disks)):
10629         option = "disk%d_dump" % idx
10630         if export_info.has_option(constants.INISECT_INS, option):
10631           # FIXME: are the old os-es, disk sizes, etc. useful?
10632           export_name = export_info.get(constants.INISECT_INS, option)
10633           image = utils.PathJoin(self.op.src_path, export_name)
10634           disk_images.append(image)
10635         else:
10636           disk_images.append(False)
10637
10638       self.src_images = disk_images
10639
10640       if self.op.instance_name == self._old_instance_name:
10641         for idx, nic in enumerate(self.nics):
10642           if nic.mac == constants.VALUE_AUTO:
10643             nic_mac_ini = "nic%d_mac" % idx
10644             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10645
10646     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10647
10648     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10649     if self.op.ip_check:
10650       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10651         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10652                                    (self.check_ip, self.op.instance_name),
10653                                    errors.ECODE_NOTUNIQUE)
10654
10655     #### mac address generation
10656     # By generating here the mac address both the allocator and the hooks get
10657     # the real final mac address rather than the 'auto' or 'generate' value.
10658     # There is a race condition between the generation and the instance object
10659     # creation, which means that we know the mac is valid now, but we're not
10660     # sure it will be when we actually add the instance. If things go bad
10661     # adding the instance will abort because of a duplicate mac, and the
10662     # creation job will fail.
10663     for nic in self.nics:
10664       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10665         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10666
10667     #### allocator run
10668
10669     if self.op.iallocator is not None:
10670       self._RunAllocator()
10671
10672     # Release all unneeded node locks
10673     keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10674     _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10675     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10676     _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10677
10678     assert (self.owned_locks(locking.LEVEL_NODE) ==
10679             self.owned_locks(locking.LEVEL_NODE_RES)), \
10680       "Node locks differ from node resource locks"
10681
10682     #### node related checks
10683
10684     # check primary node
10685     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10686     assert self.pnode is not None, \
10687       "Cannot retrieve locked node %s" % self.op.pnode
10688     if pnode.offline:
10689       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10690                                  pnode.name, errors.ECODE_STATE)
10691     if pnode.drained:
10692       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10693                                  pnode.name, errors.ECODE_STATE)
10694     if not pnode.vm_capable:
10695       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10696                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10697
10698     self.secondaries = []
10699
10700     # Fill in any IPs from IP pools. This must happen here, because we need to
10701     # know the nic's primary node, as specified by the iallocator
10702     for idx, nic in enumerate(self.nics):
10703       net = nic.network
10704       if net is not None:
10705         netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10706         if netparams is None:
10707           raise errors.OpPrereqError("No netparams found for network"
10708                                      " %s. Propably not connected to"
10709                                      " node's %s nodegroup" %
10710                                      (net, self.pnode.name),
10711                                      errors.ECODE_INVAL)
10712         self.LogInfo("NIC/%d inherits netparams %s" %
10713                      (idx, netparams.values()))
10714         nic.nicparams = dict(netparams)
10715         if nic.ip is not None:
10716           if nic.ip.lower() == constants.NIC_IP_POOL:
10717             try:
10718               nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10719             except errors.ReservationError:
10720               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10721                                          " from the address pool" % idx,
10722                                          errors.ECODE_STATE)
10723             self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10724           else:
10725             try:
10726               self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10727             except errors.ReservationError:
10728               raise errors.OpPrereqError("IP address %s already in use"
10729                                          " or does not belong to network %s" %
10730                                          (nic.ip, net),
10731                                          errors.ECODE_NOTUNIQUE)
10732
10733       # net is None, ip None or given
10734       elif self.op.conflicts_check:
10735         _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10736
10737     # mirror node verification
10738     if self.op.disk_template in constants.DTS_INT_MIRROR:
10739       if self.op.snode == pnode.name:
10740         raise errors.OpPrereqError("The secondary node cannot be the"
10741                                    " primary node", errors.ECODE_INVAL)
10742       _CheckNodeOnline(self, self.op.snode)
10743       _CheckNodeNotDrained(self, self.op.snode)
10744       _CheckNodeVmCapable(self, self.op.snode)
10745       self.secondaries.append(self.op.snode)
10746
10747       snode = self.cfg.GetNodeInfo(self.op.snode)
10748       if pnode.group != snode.group:
10749         self.LogWarning("The primary and secondary nodes are in two"
10750                         " different node groups; the disk parameters"
10751                         " from the first disk's node group will be"
10752                         " used")
10753
10754     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10755       nodes = [pnode]
10756       if self.op.disk_template in constants.DTS_INT_MIRROR:
10757         nodes.append(snode)
10758       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10759       if compat.any(map(has_es, nodes)):
10760         raise errors.OpPrereqError("Disk template %s not supported with"
10761                                    " exclusive storage" % self.op.disk_template,
10762                                    errors.ECODE_STATE)
10763
10764     nodenames = [pnode.name] + self.secondaries
10765
10766     # Verify instance specs
10767     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10768     ispec = {
10769       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10770       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10771       constants.ISPEC_DISK_COUNT: len(self.disks),
10772       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10773       constants.ISPEC_NIC_COUNT: len(self.nics),
10774       constants.ISPEC_SPINDLE_USE: spindle_use,
10775       }
10776
10777     group_info = self.cfg.GetNodeGroup(pnode.group)
10778     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10779     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10780     if not self.op.ignore_ipolicy and res:
10781       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10782              (pnode.group, group_info.name, utils.CommaJoin(res)))
10783       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10784
10785     if not self.adopt_disks:
10786       if self.op.disk_template == constants.DT_RBD:
10787         # _CheckRADOSFreeSpace() is just a placeholder.
10788         # Any function that checks prerequisites can be placed here.
10789         # Check if there is enough space on the RADOS cluster.
10790         _CheckRADOSFreeSpace()
10791       elif self.op.disk_template == constants.DT_EXT:
10792         # FIXME: Function that checks prereqs if needed
10793         pass
10794       else:
10795         # Check lv size requirements, if not adopting
10796         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10797         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10798
10799     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10800       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10801                                 disk[constants.IDISK_ADOPT])
10802                      for disk in self.disks])
10803       if len(all_lvs) != len(self.disks):
10804         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10805                                    errors.ECODE_INVAL)
10806       for lv_name in all_lvs:
10807         try:
10808           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10809           # to ReserveLV uses the same syntax
10810           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10811         except errors.ReservationError:
10812           raise errors.OpPrereqError("LV named %s used by another instance" %
10813                                      lv_name, errors.ECODE_NOTUNIQUE)
10814
10815       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10816       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10817
10818       node_lvs = self.rpc.call_lv_list([pnode.name],
10819                                        vg_names.payload.keys())[pnode.name]
10820       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10821       node_lvs = node_lvs.payload
10822
10823       delta = all_lvs.difference(node_lvs.keys())
10824       if delta:
10825         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10826                                    utils.CommaJoin(delta),
10827                                    errors.ECODE_INVAL)
10828       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10829       if online_lvs:
10830         raise errors.OpPrereqError("Online logical volumes found, cannot"
10831                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10832                                    errors.ECODE_STATE)
10833       # update the size of disk based on what is found
10834       for dsk in self.disks:
10835         dsk[constants.IDISK_SIZE] = \
10836           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10837                                         dsk[constants.IDISK_ADOPT])][0]))
10838
10839     elif self.op.disk_template == constants.DT_BLOCK:
10840       # Normalize and de-duplicate device paths
10841       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10842                        for disk in self.disks])
10843       if len(all_disks) != len(self.disks):
10844         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10845                                    errors.ECODE_INVAL)
10846       baddisks = [d for d in all_disks
10847                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10848       if baddisks:
10849         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10850                                    " cannot be adopted" %
10851                                    (utils.CommaJoin(baddisks),
10852                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10853                                    errors.ECODE_INVAL)
10854
10855       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10856                                             list(all_disks))[pnode.name]
10857       node_disks.Raise("Cannot get block device information from node %s" %
10858                        pnode.name)
10859       node_disks = node_disks.payload
10860       delta = all_disks.difference(node_disks.keys())
10861       if delta:
10862         raise errors.OpPrereqError("Missing block device(s): %s" %
10863                                    utils.CommaJoin(delta),
10864                                    errors.ECODE_INVAL)
10865       for dsk in self.disks:
10866         dsk[constants.IDISK_SIZE] = \
10867           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10868
10869     # Verify instance specs
10870     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10871     ispec = {
10872       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10873       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10874       constants.ISPEC_DISK_COUNT: len(self.disks),
10875       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10876                                   for disk in self.disks],
10877       constants.ISPEC_NIC_COUNT: len(self.nics),
10878       constants.ISPEC_SPINDLE_USE: spindle_use,
10879       }
10880
10881     group_info = self.cfg.GetNodeGroup(pnode.group)
10882     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10883     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10884     if not self.op.ignore_ipolicy and res:
10885       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10886                                   " policy: %s") % (pnode.group,
10887                                                     utils.CommaJoin(res)),
10888                                   errors.ECODE_INVAL)
10889
10890     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10891
10892     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10893     # check OS parameters (remotely)
10894     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10895
10896     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10897
10898     #TODO: _CheckExtParams (remotely)
10899     # Check parameters for extstorage
10900
10901     # memory check on primary node
10902     #TODO(dynmem): use MINMEM for checking
10903     if self.op.start:
10904       _CheckNodeFreeMemory(self, self.pnode.name,
10905                            "creating instance %s" % self.op.instance_name,
10906                            self.be_full[constants.BE_MAXMEM],
10907                            self.op.hypervisor)
10908
10909     self.dry_run_result = list(nodenames)
10910
10911   def Exec(self, feedback_fn):
10912     """Create and add the instance to the cluster.
10913
10914     """
10915     instance = self.op.instance_name
10916     pnode_name = self.pnode.name
10917
10918     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10919                 self.owned_locks(locking.LEVEL_NODE)), \
10920       "Node locks differ from node resource locks"
10921     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10922
10923     ht_kind = self.op.hypervisor
10924     if ht_kind in constants.HTS_REQ_PORT:
10925       network_port = self.cfg.AllocatePort()
10926     else:
10927       network_port = None
10928
10929     # This is ugly but we got a chicken-egg problem here
10930     # We can only take the group disk parameters, as the instance
10931     # has no disks yet (we are generating them right here).
10932     node = self.cfg.GetNodeInfo(pnode_name)
10933     nodegroup = self.cfg.GetNodeGroup(node.group)
10934     disks = _GenerateDiskTemplate(self,
10935                                   self.op.disk_template,
10936                                   instance, pnode_name,
10937                                   self.secondaries,
10938                                   self.disks,
10939                                   self.instance_file_storage_dir,
10940                                   self.op.file_driver,
10941                                   0,
10942                                   feedback_fn,
10943                                   self.cfg.GetGroupDiskParams(nodegroup))
10944
10945     iobj = objects.Instance(name=instance, os=self.op.os_type,
10946                             primary_node=pnode_name,
10947                             nics=self.nics, disks=disks,
10948                             disk_template=self.op.disk_template,
10949                             admin_state=constants.ADMINST_DOWN,
10950                             network_port=network_port,
10951                             beparams=self.op.beparams,
10952                             hvparams=self.op.hvparams,
10953                             hypervisor=self.op.hypervisor,
10954                             osparams=self.op.osparams,
10955                             )
10956
10957     if self.op.tags:
10958       for tag in self.op.tags:
10959         iobj.AddTag(tag)
10960
10961     if self.adopt_disks:
10962       if self.op.disk_template == constants.DT_PLAIN:
10963         # rename LVs to the newly-generated names; we need to construct
10964         # 'fake' LV disks with the old data, plus the new unique_id
10965         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10966         rename_to = []
10967         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10968           rename_to.append(t_dsk.logical_id)
10969           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10970           self.cfg.SetDiskID(t_dsk, pnode_name)
10971         result = self.rpc.call_blockdev_rename(pnode_name,
10972                                                zip(tmp_disks, rename_to))
10973         result.Raise("Failed to rename adoped LVs")
10974     else:
10975       feedback_fn("* creating instance disks...")
10976       try:
10977         _CreateDisks(self, iobj)
10978       except errors.OpExecError:
10979         self.LogWarning("Device creation failed, reverting...")
10980         try:
10981           _RemoveDisks(self, iobj)
10982         finally:
10983           self.cfg.ReleaseDRBDMinors(instance)
10984           raise
10985
10986     feedback_fn("adding instance %s to cluster config" % instance)
10987
10988     self.cfg.AddInstance(iobj, self.proc.GetECId())
10989
10990     # Declare that we don't want to remove the instance lock anymore, as we've
10991     # added the instance to the config
10992     del self.remove_locks[locking.LEVEL_INSTANCE]
10993
10994     if self.op.mode == constants.INSTANCE_IMPORT:
10995       # Release unused nodes
10996       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10997     else:
10998       # Release all nodes
10999       _ReleaseLocks(self, locking.LEVEL_NODE)
11000
11001     disk_abort = False
11002     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
11003       feedback_fn("* wiping instance disks...")
11004       try:
11005         _WipeDisks(self, iobj)
11006       except errors.OpExecError, err:
11007         logging.exception("Wiping disks failed")
11008         self.LogWarning("Wiping instance disks failed (%s)", err)
11009         disk_abort = True
11010
11011     if disk_abort:
11012       # Something is already wrong with the disks, don't do anything else
11013       pass
11014     elif self.op.wait_for_sync:
11015       disk_abort = not _WaitForSync(self, iobj)
11016     elif iobj.disk_template in constants.DTS_INT_MIRROR:
11017       # make sure the disks are not degraded (still sync-ing is ok)
11018       feedback_fn("* checking mirrors status")
11019       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11020     else:
11021       disk_abort = False
11022
11023     if disk_abort:
11024       _RemoveDisks(self, iobj)
11025       self.cfg.RemoveInstance(iobj.name)
11026       # Make sure the instance lock gets removed
11027       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11028       raise errors.OpExecError("There are some degraded disks for"
11029                                " this instance")
11030
11031     # Release all node resource locks
11032     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11033
11034     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11035       # we need to set the disks ID to the primary node, since the
11036       # preceding code might or might have not done it, depending on
11037       # disk template and other options
11038       for disk in iobj.disks:
11039         self.cfg.SetDiskID(disk, pnode_name)
11040       if self.op.mode == constants.INSTANCE_CREATE:
11041         if not self.op.no_install:
11042           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11043                         not self.op.wait_for_sync)
11044           if pause_sync:
11045             feedback_fn("* pausing disk sync to install instance OS")
11046             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11047                                                               (iobj.disks,
11048                                                                iobj), True)
11049             for idx, success in enumerate(result.payload):
11050               if not success:
11051                 logging.warn("pause-sync of instance %s for disk %d failed",
11052                              instance, idx)
11053
11054           feedback_fn("* running the instance OS create scripts...")
11055           # FIXME: pass debug option from opcode to backend
11056           os_add_result = \
11057             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11058                                           self.op.debug_level)
11059           if pause_sync:
11060             feedback_fn("* resuming disk sync")
11061             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11062                                                               (iobj.disks,
11063                                                                iobj), False)
11064             for idx, success in enumerate(result.payload):
11065               if not success:
11066                 logging.warn("resume-sync of instance %s for disk %d failed",
11067                              instance, idx)
11068
11069           os_add_result.Raise("Could not add os for instance %s"
11070                               " on node %s" % (instance, pnode_name))
11071
11072       else:
11073         if self.op.mode == constants.INSTANCE_IMPORT:
11074           feedback_fn("* running the instance OS import scripts...")
11075
11076           transfers = []
11077
11078           for idx, image in enumerate(self.src_images):
11079             if not image:
11080               continue
11081
11082             # FIXME: pass debug option from opcode to backend
11083             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11084                                                constants.IEIO_FILE, (image, ),
11085                                                constants.IEIO_SCRIPT,
11086                                                (iobj.disks[idx], idx),
11087                                                None)
11088             transfers.append(dt)
11089
11090           import_result = \
11091             masterd.instance.TransferInstanceData(self, feedback_fn,
11092                                                   self.op.src_node, pnode_name,
11093                                                   self.pnode.secondary_ip,
11094                                                   iobj, transfers)
11095           if not compat.all(import_result):
11096             self.LogWarning("Some disks for instance %s on node %s were not"
11097                             " imported successfully" % (instance, pnode_name))
11098
11099           rename_from = self._old_instance_name
11100
11101         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11102           feedback_fn("* preparing remote import...")
11103           # The source cluster will stop the instance before attempting to make
11104           # a connection. In some cases stopping an instance can take a long
11105           # time, hence the shutdown timeout is added to the connection
11106           # timeout.
11107           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11108                              self.op.source_shutdown_timeout)
11109           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11110
11111           assert iobj.primary_node == self.pnode.name
11112           disk_results = \
11113             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11114                                           self.source_x509_ca,
11115                                           self._cds, timeouts)
11116           if not compat.all(disk_results):
11117             # TODO: Should the instance still be started, even if some disks
11118             # failed to import (valid for local imports, too)?
11119             self.LogWarning("Some disks for instance %s on node %s were not"
11120                             " imported successfully" % (instance, pnode_name))
11121
11122           rename_from = self.source_instance_name
11123
11124         else:
11125           # also checked in the prereq part
11126           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11127                                        % self.op.mode)
11128
11129         # Run rename script on newly imported instance
11130         assert iobj.name == instance
11131         feedback_fn("Running rename script for %s" % instance)
11132         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11133                                                    rename_from,
11134                                                    self.op.debug_level)
11135         if result.fail_msg:
11136           self.LogWarning("Failed to run rename script for %s on node"
11137                           " %s: %s" % (instance, pnode_name, result.fail_msg))
11138
11139     assert not self.owned_locks(locking.LEVEL_NODE_RES)
11140
11141     if self.op.start:
11142       iobj.admin_state = constants.ADMINST_UP
11143       self.cfg.Update(iobj, feedback_fn)
11144       logging.info("Starting instance %s on node %s", instance, pnode_name)
11145       feedback_fn("* starting instance...")
11146       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11147                                             False)
11148       result.Raise("Could not start instance")
11149
11150     return list(iobj.all_nodes)
11151
11152
11153 class LUInstanceMultiAlloc(NoHooksLU):
11154   """Allocates multiple instances at the same time.
11155
11156   """
11157   REQ_BGL = False
11158
11159   def CheckArguments(self):
11160     """Check arguments.
11161
11162     """
11163     nodes = []
11164     for inst in self.op.instances:
11165       if inst.iallocator is not None:
11166         raise errors.OpPrereqError("iallocator are not allowed to be set on"
11167                                    " instance objects", errors.ECODE_INVAL)
11168       nodes.append(bool(inst.pnode))
11169       if inst.disk_template in constants.DTS_INT_MIRROR:
11170         nodes.append(bool(inst.snode))
11171
11172     has_nodes = compat.any(nodes)
11173     if compat.all(nodes) ^ has_nodes:
11174       raise errors.OpPrereqError("There are instance objects providing"
11175                                  " pnode/snode while others do not",
11176                                  errors.ECODE_INVAL)
11177
11178     if self.op.iallocator is None:
11179       default_iallocator = self.cfg.GetDefaultIAllocator()
11180       if default_iallocator and has_nodes:
11181         self.op.iallocator = default_iallocator
11182       else:
11183         raise errors.OpPrereqError("No iallocator or nodes on the instances"
11184                                    " given and no cluster-wide default"
11185                                    " iallocator found; please specify either"
11186                                    " an iallocator or nodes on the instances"
11187                                    " or set a cluster-wide default iallocator",
11188                                    errors.ECODE_INVAL)
11189
11190     _CheckOpportunisticLocking(self.op)
11191
11192     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11193     if dups:
11194       raise errors.OpPrereqError("There are duplicate instance names: %s" %
11195                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
11196
11197   def ExpandNames(self):
11198     """Calculate the locks.
11199
11200     """
11201     self.share_locks = _ShareAll()
11202     self.needed_locks = {
11203       # iallocator will select nodes and even if no iallocator is used,
11204       # collisions with LUInstanceCreate should be avoided
11205       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11206       }
11207
11208     if self.op.iallocator:
11209       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11210       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11211
11212       if self.op.opportunistic_locking:
11213         self.opportunistic_locks[locking.LEVEL_NODE] = True
11214         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11215     else:
11216       nodeslist = []
11217       for inst in self.op.instances:
11218         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11219         nodeslist.append(inst.pnode)
11220         if inst.snode is not None:
11221           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11222           nodeslist.append(inst.snode)
11223
11224       self.needed_locks[locking.LEVEL_NODE] = nodeslist
11225       # Lock resources of instance's primary and secondary nodes (copy to
11226       # prevent accidential modification)
11227       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11228
11229   def CheckPrereq(self):
11230     """Check prerequisite.
11231
11232     """
11233     cluster = self.cfg.GetClusterInfo()
11234     default_vg = self.cfg.GetVGName()
11235     ec_id = self.proc.GetECId()
11236
11237     if self.op.opportunistic_locking:
11238       # Only consider nodes for which a lock is held
11239       node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11240     else:
11241       node_whitelist = None
11242
11243     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11244                                          _ComputeNics(op, cluster, None,
11245                                                       self.cfg, ec_id),
11246                                          _ComputeFullBeParams(op, cluster),
11247                                          node_whitelist)
11248              for op in self.op.instances]
11249
11250     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11251     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11252
11253     ial.Run(self.op.iallocator)
11254
11255     if not ial.success:
11256       raise errors.OpPrereqError("Can't compute nodes using"
11257                                  " iallocator '%s': %s" %
11258                                  (self.op.iallocator, ial.info),
11259                                  errors.ECODE_NORES)
11260
11261     self.ia_result = ial.result
11262
11263     if self.op.dry_run:
11264       self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11265         constants.JOB_IDS_KEY: [],
11266         })
11267
11268   def _ConstructPartialResult(self):
11269     """Contructs the partial result.
11270
11271     """
11272     (allocatable, failed) = self.ia_result
11273     return {
11274       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11275         map(compat.fst, allocatable),
11276       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11277       }
11278
11279   def Exec(self, feedback_fn):
11280     """Executes the opcode.
11281
11282     """
11283     op2inst = dict((op.instance_name, op) for op in self.op.instances)
11284     (allocatable, failed) = self.ia_result
11285
11286     jobs = []
11287     for (name, nodes) in allocatable:
11288       op = op2inst.pop(name)
11289
11290       if len(nodes) > 1:
11291         (op.pnode, op.snode) = nodes
11292       else:
11293         (op.pnode,) = nodes
11294
11295       jobs.append([op])
11296
11297     missing = set(op2inst.keys()) - set(failed)
11298     assert not missing, \
11299       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11300
11301     return ResultWithJobs(jobs, **self._ConstructPartialResult())
11302
11303
11304 def _CheckRADOSFreeSpace():
11305   """Compute disk size requirements inside the RADOS cluster.
11306
11307   """
11308   # For the RADOS cluster we assume there is always enough space.
11309   pass
11310
11311
11312 class LUInstanceConsole(NoHooksLU):
11313   """Connect to an instance's console.
11314
11315   This is somewhat special in that it returns the command line that
11316   you need to run on the master node in order to connect to the
11317   console.
11318
11319   """
11320   REQ_BGL = False
11321
11322   def ExpandNames(self):
11323     self.share_locks = _ShareAll()
11324     self._ExpandAndLockInstance()
11325
11326   def CheckPrereq(self):
11327     """Check prerequisites.
11328
11329     This checks that the instance is in the cluster.
11330
11331     """
11332     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11333     assert self.instance is not None, \
11334       "Cannot retrieve locked instance %s" % self.op.instance_name
11335     _CheckNodeOnline(self, self.instance.primary_node)
11336
11337   def Exec(self, feedback_fn):
11338     """Connect to the console of an instance
11339
11340     """
11341     instance = self.instance
11342     node = instance.primary_node
11343
11344     node_insts = self.rpc.call_instance_list([node],
11345                                              [instance.hypervisor])[node]
11346     node_insts.Raise("Can't get node information from %s" % node)
11347
11348     if instance.name not in node_insts.payload:
11349       if instance.admin_state == constants.ADMINST_UP:
11350         state = constants.INSTST_ERRORDOWN
11351       elif instance.admin_state == constants.ADMINST_DOWN:
11352         state = constants.INSTST_ADMINDOWN
11353       else:
11354         state = constants.INSTST_ADMINOFFLINE
11355       raise errors.OpExecError("Instance %s is not running (state %s)" %
11356                                (instance.name, state))
11357
11358     logging.debug("Connecting to console of %s on %s", instance.name, node)
11359
11360     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11361
11362
11363 def _GetInstanceConsole(cluster, instance):
11364   """Returns console information for an instance.
11365
11366   @type cluster: L{objects.Cluster}
11367   @type instance: L{objects.Instance}
11368   @rtype: dict
11369
11370   """
11371   hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11372   # beparams and hvparams are passed separately, to avoid editing the
11373   # instance and then saving the defaults in the instance itself.
11374   hvparams = cluster.FillHV(instance)
11375   beparams = cluster.FillBE(instance)
11376   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11377
11378   assert console.instance == instance.name
11379   assert console.Validate()
11380
11381   return console.ToDict()
11382
11383
11384 class LUInstanceReplaceDisks(LogicalUnit):
11385   """Replace the disks of an instance.
11386
11387   """
11388   HPATH = "mirrors-replace"
11389   HTYPE = constants.HTYPE_INSTANCE
11390   REQ_BGL = False
11391
11392   def CheckArguments(self):
11393     """Check arguments.
11394
11395     """
11396     remote_node = self.op.remote_node
11397     ialloc = self.op.iallocator
11398     if self.op.mode == constants.REPLACE_DISK_CHG:
11399       if remote_node is None and ialloc is None:
11400         raise errors.OpPrereqError("When changing the secondary either an"
11401                                    " iallocator script must be used or the"
11402                                    " new node given", errors.ECODE_INVAL)
11403       else:
11404         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11405
11406     elif remote_node is not None or ialloc is not None:
11407       # Not replacing the secondary
11408       raise errors.OpPrereqError("The iallocator and new node options can"
11409                                  " only be used when changing the"
11410                                  " secondary node", errors.ECODE_INVAL)
11411
11412   def ExpandNames(self):
11413     self._ExpandAndLockInstance()
11414
11415     assert locking.LEVEL_NODE not in self.needed_locks
11416     assert locking.LEVEL_NODE_RES not in self.needed_locks
11417     assert locking.LEVEL_NODEGROUP not in self.needed_locks
11418
11419     assert self.op.iallocator is None or self.op.remote_node is None, \
11420       "Conflicting options"
11421
11422     if self.op.remote_node is not None:
11423       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11424
11425       # Warning: do not remove the locking of the new secondary here
11426       # unless DRBD8.AddChildren is changed to work in parallel;
11427       # currently it doesn't since parallel invocations of
11428       # FindUnusedMinor will conflict
11429       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11430       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11431     else:
11432       self.needed_locks[locking.LEVEL_NODE] = []
11433       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11434
11435       if self.op.iallocator is not None:
11436         # iallocator will select a new node in the same group
11437         self.needed_locks[locking.LEVEL_NODEGROUP] = []
11438         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11439
11440     self.needed_locks[locking.LEVEL_NODE_RES] = []
11441
11442     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11443                                    self.op.iallocator, self.op.remote_node,
11444                                    self.op.disks, self.op.early_release,
11445                                    self.op.ignore_ipolicy)
11446
11447     self.tasklets = [self.replacer]
11448
11449   def DeclareLocks(self, level):
11450     if level == locking.LEVEL_NODEGROUP:
11451       assert self.op.remote_node is None
11452       assert self.op.iallocator is not None
11453       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11454
11455       self.share_locks[locking.LEVEL_NODEGROUP] = 1
11456       # Lock all groups used by instance optimistically; this requires going
11457       # via the node before it's locked, requiring verification later on
11458       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11459         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11460
11461     elif level == locking.LEVEL_NODE:
11462       if self.op.iallocator is not None:
11463         assert self.op.remote_node is None
11464         assert not self.needed_locks[locking.LEVEL_NODE]
11465         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11466
11467         # Lock member nodes of all locked groups
11468         self.needed_locks[locking.LEVEL_NODE] = \
11469             [node_name
11470              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11471              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11472       else:
11473         assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11474
11475         self._LockInstancesNodes()
11476
11477     elif level == locking.LEVEL_NODE_RES:
11478       # Reuse node locks
11479       self.needed_locks[locking.LEVEL_NODE_RES] = \
11480         self.needed_locks[locking.LEVEL_NODE]
11481
11482   def BuildHooksEnv(self):
11483     """Build hooks env.
11484
11485     This runs on the master, the primary and all the secondaries.
11486
11487     """
11488     instance = self.replacer.instance
11489     env = {
11490       "MODE": self.op.mode,
11491       "NEW_SECONDARY": self.op.remote_node,
11492       "OLD_SECONDARY": instance.secondary_nodes[0],
11493       }
11494     env.update(_BuildInstanceHookEnvByObject(self, instance))
11495     return env
11496
11497   def BuildHooksNodes(self):
11498     """Build hooks nodes.
11499
11500     """
11501     instance = self.replacer.instance
11502     nl = [
11503       self.cfg.GetMasterNode(),
11504       instance.primary_node,
11505       ]
11506     if self.op.remote_node is not None:
11507       nl.append(self.op.remote_node)
11508     return nl, nl
11509
11510   def CheckPrereq(self):
11511     """Check prerequisites.
11512
11513     """
11514     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11515             self.op.iallocator is None)
11516
11517     # Verify if node group locks are still correct
11518     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11519     if owned_groups:
11520       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11521
11522     return LogicalUnit.CheckPrereq(self)
11523
11524
11525 class TLReplaceDisks(Tasklet):
11526   """Replaces disks for an instance.
11527
11528   Note: Locking is not within the scope of this class.
11529
11530   """
11531   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11532                disks, early_release, ignore_ipolicy):
11533     """Initializes this class.
11534
11535     """
11536     Tasklet.__init__(self, lu)
11537
11538     # Parameters
11539     self.instance_name = instance_name
11540     self.mode = mode
11541     self.iallocator_name = iallocator_name
11542     self.remote_node = remote_node
11543     self.disks = disks
11544     self.early_release = early_release
11545     self.ignore_ipolicy = ignore_ipolicy
11546
11547     # Runtime data
11548     self.instance = None
11549     self.new_node = None
11550     self.target_node = None
11551     self.other_node = None
11552     self.remote_node_info = None
11553     self.node_secondary_ip = None
11554
11555   @staticmethod
11556   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11557     """Compute a new secondary node using an IAllocator.
11558
11559     """
11560     req = iallocator.IAReqRelocate(name=instance_name,
11561                                    relocate_from=list(relocate_from))
11562     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11563
11564     ial.Run(iallocator_name)
11565
11566     if not ial.success:
11567       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11568                                  " %s" % (iallocator_name, ial.info),
11569                                  errors.ECODE_NORES)
11570
11571     remote_node_name = ial.result[0]
11572
11573     lu.LogInfo("Selected new secondary for instance '%s': %s",
11574                instance_name, remote_node_name)
11575
11576     return remote_node_name
11577
11578   def _FindFaultyDisks(self, node_name):
11579     """Wrapper for L{_FindFaultyInstanceDisks}.
11580
11581     """
11582     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11583                                     node_name, True)
11584
11585   def _CheckDisksActivated(self, instance):
11586     """Checks if the instance disks are activated.
11587
11588     @param instance: The instance to check disks
11589     @return: True if they are activated, False otherwise
11590
11591     """
11592     nodes = instance.all_nodes
11593
11594     for idx, dev in enumerate(instance.disks):
11595       for node in nodes:
11596         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11597         self.cfg.SetDiskID(dev, node)
11598
11599         result = _BlockdevFind(self, node, dev, instance)
11600
11601         if result.offline:
11602           continue
11603         elif result.fail_msg or not result.payload:
11604           return False
11605
11606     return True
11607
11608   def CheckPrereq(self):
11609     """Check prerequisites.
11610
11611     This checks that the instance is in the cluster.
11612
11613     """
11614     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11615     assert instance is not None, \
11616       "Cannot retrieve locked instance %s" % self.instance_name
11617
11618     if instance.disk_template != constants.DT_DRBD8:
11619       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11620                                  " instances", errors.ECODE_INVAL)
11621
11622     if len(instance.secondary_nodes) != 1:
11623       raise errors.OpPrereqError("The instance has a strange layout,"
11624                                  " expected one secondary but found %d" %
11625                                  len(instance.secondary_nodes),
11626                                  errors.ECODE_FAULT)
11627
11628     instance = self.instance
11629     secondary_node = instance.secondary_nodes[0]
11630
11631     if self.iallocator_name is None:
11632       remote_node = self.remote_node
11633     else:
11634       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11635                                        instance.name, instance.secondary_nodes)
11636
11637     if remote_node is None:
11638       self.remote_node_info = None
11639     else:
11640       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11641              "Remote node '%s' is not locked" % remote_node
11642
11643       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11644       assert self.remote_node_info is not None, \
11645         "Cannot retrieve locked node %s" % remote_node
11646
11647     if remote_node == self.instance.primary_node:
11648       raise errors.OpPrereqError("The specified node is the primary node of"
11649                                  " the instance", errors.ECODE_INVAL)
11650
11651     if remote_node == secondary_node:
11652       raise errors.OpPrereqError("The specified node is already the"
11653                                  " secondary node of the instance",
11654                                  errors.ECODE_INVAL)
11655
11656     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11657                                     constants.REPLACE_DISK_CHG):
11658       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11659                                  errors.ECODE_INVAL)
11660
11661     if self.mode == constants.REPLACE_DISK_AUTO:
11662       if not self._CheckDisksActivated(instance):
11663         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11664                                    " first" % self.instance_name,
11665                                    errors.ECODE_STATE)
11666       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11667       faulty_secondary = self._FindFaultyDisks(secondary_node)
11668
11669       if faulty_primary and faulty_secondary:
11670         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11671                                    " one node and can not be repaired"
11672                                    " automatically" % self.instance_name,
11673                                    errors.ECODE_STATE)
11674
11675       if faulty_primary:
11676         self.disks = faulty_primary
11677         self.target_node = instance.primary_node
11678         self.other_node = secondary_node
11679         check_nodes = [self.target_node, self.other_node]
11680       elif faulty_secondary:
11681         self.disks = faulty_secondary
11682         self.target_node = secondary_node
11683         self.other_node = instance.primary_node
11684         check_nodes = [self.target_node, self.other_node]
11685       else:
11686         self.disks = []
11687         check_nodes = []
11688
11689     else:
11690       # Non-automatic modes
11691       if self.mode == constants.REPLACE_DISK_PRI:
11692         self.target_node = instance.primary_node
11693         self.other_node = secondary_node
11694         check_nodes = [self.target_node, self.other_node]
11695
11696       elif self.mode == constants.REPLACE_DISK_SEC:
11697         self.target_node = secondary_node
11698         self.other_node = instance.primary_node
11699         check_nodes = [self.target_node, self.other_node]
11700
11701       elif self.mode == constants.REPLACE_DISK_CHG:
11702         self.new_node = remote_node
11703         self.other_node = instance.primary_node
11704         self.target_node = secondary_node
11705         check_nodes = [self.new_node, self.other_node]
11706
11707         _CheckNodeNotDrained(self.lu, remote_node)
11708         _CheckNodeVmCapable(self.lu, remote_node)
11709
11710         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11711         assert old_node_info is not None
11712         if old_node_info.offline and not self.early_release:
11713           # doesn't make sense to delay the release
11714           self.early_release = True
11715           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11716                           " early-release mode", secondary_node)
11717
11718       else:
11719         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11720                                      self.mode)
11721
11722       # If not specified all disks should be replaced
11723       if not self.disks:
11724         self.disks = range(len(self.instance.disks))
11725
11726     # TODO: This is ugly, but right now we can't distinguish between internal
11727     # submitted opcode and external one. We should fix that.
11728     if self.remote_node_info:
11729       # We change the node, lets verify it still meets instance policy
11730       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11731       cluster = self.cfg.GetClusterInfo()
11732       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11733                                                               new_group_info)
11734       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11735                               ignore=self.ignore_ipolicy)
11736
11737     for node in check_nodes:
11738       _CheckNodeOnline(self.lu, node)
11739
11740     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11741                                                           self.other_node,
11742                                                           self.target_node]
11743                               if node_name is not None)
11744
11745     # Release unneeded node and node resource locks
11746     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11747     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11748     _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11749
11750     # Release any owned node group
11751     _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11752
11753     # Check whether disks are valid
11754     for disk_idx in self.disks:
11755       instance.FindDisk(disk_idx)
11756
11757     # Get secondary node IP addresses
11758     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11759                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11760
11761   def Exec(self, feedback_fn):
11762     """Execute disk replacement.
11763
11764     This dispatches the disk replacement to the appropriate handler.
11765
11766     """
11767     if __debug__:
11768       # Verify owned locks before starting operation
11769       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11770       assert set(owned_nodes) == set(self.node_secondary_ip), \
11771           ("Incorrect node locks, owning %s, expected %s" %
11772            (owned_nodes, self.node_secondary_ip.keys()))
11773       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11774               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11775       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11776
11777       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11778       assert list(owned_instances) == [self.instance_name], \
11779           "Instance '%s' not locked" % self.instance_name
11780
11781       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11782           "Should not own any node group lock at this point"
11783
11784     if not self.disks:
11785       feedback_fn("No disks need replacement for instance '%s'" %
11786                   self.instance.name)
11787       return
11788
11789     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11790                 (utils.CommaJoin(self.disks), self.instance.name))
11791     feedback_fn("Current primary node: %s" % self.instance.primary_node)
11792     feedback_fn("Current seconary node: %s" %
11793                 utils.CommaJoin(self.instance.secondary_nodes))
11794
11795     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11796
11797     # Activate the instance disks if we're replacing them on a down instance
11798     if activate_disks:
11799       _StartInstanceDisks(self.lu, self.instance, True)
11800
11801     try:
11802       # Should we replace the secondary node?
11803       if self.new_node is not None:
11804         fn = self._ExecDrbd8Secondary
11805       else:
11806         fn = self._ExecDrbd8DiskOnly
11807
11808       result = fn(feedback_fn)
11809     finally:
11810       # Deactivate the instance disks if we're replacing them on a
11811       # down instance
11812       if activate_disks:
11813         _SafeShutdownInstanceDisks(self.lu, self.instance)
11814
11815     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11816
11817     if __debug__:
11818       # Verify owned locks
11819       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11820       nodes = frozenset(self.node_secondary_ip)
11821       assert ((self.early_release and not owned_nodes) or
11822               (not self.early_release and not (set(owned_nodes) - nodes))), \
11823         ("Not owning the correct locks, early_release=%s, owned=%r,"
11824          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11825
11826     return result
11827
11828   def _CheckVolumeGroup(self, nodes):
11829     self.lu.LogInfo("Checking volume groups")
11830
11831     vgname = self.cfg.GetVGName()
11832
11833     # Make sure volume group exists on all involved nodes
11834     results = self.rpc.call_vg_list(nodes)
11835     if not results:
11836       raise errors.OpExecError("Can't list volume groups on the nodes")
11837
11838     for node in nodes:
11839       res = results[node]
11840       res.Raise("Error checking node %s" % node)
11841       if vgname not in res.payload:
11842         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11843                                  (vgname, node))
11844
11845   def _CheckDisksExistence(self, nodes):
11846     # Check disk existence
11847     for idx, dev in enumerate(self.instance.disks):
11848       if idx not in self.disks:
11849         continue
11850
11851       for node in nodes:
11852         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11853         self.cfg.SetDiskID(dev, node)
11854
11855         result = _BlockdevFind(self, node, dev, self.instance)
11856
11857         msg = result.fail_msg
11858         if msg or not result.payload:
11859           if not msg:
11860             msg = "disk not found"
11861           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11862                                    (idx, node, msg))
11863
11864   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11865     for idx, dev in enumerate(self.instance.disks):
11866       if idx not in self.disks:
11867         continue
11868
11869       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11870                       (idx, node_name))
11871
11872       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11873                                    on_primary, ldisk=ldisk):
11874         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11875                                  " replace disks for instance %s" %
11876                                  (node_name, self.instance.name))
11877
11878   def _CreateNewStorage(self, node_name):
11879     """Create new storage on the primary or secondary node.
11880
11881     This is only used for same-node replaces, not for changing the
11882     secondary node, hence we don't want to modify the existing disk.
11883
11884     """
11885     iv_names = {}
11886
11887     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11888     for idx, dev in enumerate(disks):
11889       if idx not in self.disks:
11890         continue
11891
11892       self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11893
11894       self.cfg.SetDiskID(dev, node_name)
11895
11896       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11897       names = _GenerateUniqueNames(self.lu, lv_names)
11898
11899       (data_disk, meta_disk) = dev.children
11900       vg_data = data_disk.logical_id[0]
11901       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11902                              logical_id=(vg_data, names[0]),
11903                              params=data_disk.params)
11904       vg_meta = meta_disk.logical_id[0]
11905       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11906                              size=constants.DRBD_META_SIZE,
11907                              logical_id=(vg_meta, names[1]),
11908                              params=meta_disk.params)
11909
11910       new_lvs = [lv_data, lv_meta]
11911       old_lvs = [child.Copy() for child in dev.children]
11912       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11913       excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11914
11915       # we pass force_create=True to force the LVM creation
11916       for new_lv in new_lvs:
11917         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11918                              _GetInstanceInfoText(self.instance), False,
11919                              excl_stor)
11920
11921     return iv_names
11922
11923   def _CheckDevices(self, node_name, iv_names):
11924     for name, (dev, _, _) in iv_names.iteritems():
11925       self.cfg.SetDiskID(dev, node_name)
11926
11927       result = _BlockdevFind(self, node_name, dev, self.instance)
11928
11929       msg = result.fail_msg
11930       if msg or not result.payload:
11931         if not msg:
11932           msg = "disk not found"
11933         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11934                                  (name, msg))
11935
11936       if result.payload.is_degraded:
11937         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11938
11939   def _RemoveOldStorage(self, node_name, iv_names):
11940     for name, (_, old_lvs, _) in iv_names.iteritems():
11941       self.lu.LogInfo("Remove logical volumes for %s", name)
11942
11943       for lv in old_lvs:
11944         self.cfg.SetDiskID(lv, node_name)
11945
11946         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11947         if msg:
11948           self.lu.LogWarning("Can't remove old LV: %s", msg,
11949                              hint="remove unused LVs manually")
11950
11951   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11952     """Replace a disk on the primary or secondary for DRBD 8.
11953
11954     The algorithm for replace is quite complicated:
11955
11956       1. for each disk to be replaced:
11957
11958         1. create new LVs on the target node with unique names
11959         1. detach old LVs from the drbd device
11960         1. rename old LVs to name_replaced.<time_t>
11961         1. rename new LVs to old LVs
11962         1. attach the new LVs (with the old names now) to the drbd device
11963
11964       1. wait for sync across all devices
11965
11966       1. for each modified disk:
11967
11968         1. remove old LVs (which have the name name_replaces.<time_t>)
11969
11970     Failures are not very well handled.
11971
11972     """
11973     steps_total = 6
11974
11975     # Step: check device activation
11976     self.lu.LogStep(1, steps_total, "Check device existence")
11977     self._CheckDisksExistence([self.other_node, self.target_node])
11978     self._CheckVolumeGroup([self.target_node, self.other_node])
11979
11980     # Step: check other node consistency
11981     self.lu.LogStep(2, steps_total, "Check peer consistency")
11982     self._CheckDisksConsistency(self.other_node,
11983                                 self.other_node == self.instance.primary_node,
11984                                 False)
11985
11986     # Step: create new storage
11987     self.lu.LogStep(3, steps_total, "Allocate new storage")
11988     iv_names = self._CreateNewStorage(self.target_node)
11989
11990     # Step: for each lv, detach+rename*2+attach
11991     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11992     for dev, old_lvs, new_lvs in iv_names.itervalues():
11993       self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11994
11995       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11996                                                      old_lvs)
11997       result.Raise("Can't detach drbd from local storage on node"
11998                    " %s for device %s" % (self.target_node, dev.iv_name))
11999       #dev.children = []
12000       #cfg.Update(instance)
12001
12002       # ok, we created the new LVs, so now we know we have the needed
12003       # storage; as such, we proceed on the target node to rename
12004       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
12005       # using the assumption that logical_id == physical_id (which in
12006       # turn is the unique_id on that node)
12007
12008       # FIXME(iustin): use a better name for the replaced LVs
12009       temp_suffix = int(time.time())
12010       ren_fn = lambda d, suff: (d.physical_id[0],
12011                                 d.physical_id[1] + "_replaced-%s" % suff)
12012
12013       # Build the rename list based on what LVs exist on the node
12014       rename_old_to_new = []
12015       for to_ren in old_lvs:
12016         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12017         if not result.fail_msg and result.payload:
12018           # device exists
12019           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12020
12021       self.lu.LogInfo("Renaming the old LVs on the target node")
12022       result = self.rpc.call_blockdev_rename(self.target_node,
12023                                              rename_old_to_new)
12024       result.Raise("Can't rename old LVs on node %s" % self.target_node)
12025
12026       # Now we rename the new LVs to the old LVs
12027       self.lu.LogInfo("Renaming the new LVs on the target node")
12028       rename_new_to_old = [(new, old.physical_id)
12029                            for old, new in zip(old_lvs, new_lvs)]
12030       result = self.rpc.call_blockdev_rename(self.target_node,
12031                                              rename_new_to_old)
12032       result.Raise("Can't rename new LVs on node %s" % self.target_node)
12033
12034       # Intermediate steps of in memory modifications
12035       for old, new in zip(old_lvs, new_lvs):
12036         new.logical_id = old.logical_id
12037         self.cfg.SetDiskID(new, self.target_node)
12038
12039       # We need to modify old_lvs so that removal later removes the
12040       # right LVs, not the newly added ones; note that old_lvs is a
12041       # copy here
12042       for disk in old_lvs:
12043         disk.logical_id = ren_fn(disk, temp_suffix)
12044         self.cfg.SetDiskID(disk, self.target_node)
12045
12046       # Now that the new lvs have the old name, we can add them to the device
12047       self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12048       result = self.rpc.call_blockdev_addchildren(self.target_node,
12049                                                   (dev, self.instance), new_lvs)
12050       msg = result.fail_msg
12051       if msg:
12052         for new_lv in new_lvs:
12053           msg2 = self.rpc.call_blockdev_remove(self.target_node,
12054                                                new_lv).fail_msg
12055           if msg2:
12056             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12057                                hint=("cleanup manually the unused logical"
12058                                      "volumes"))
12059         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12060
12061     cstep = itertools.count(5)
12062
12063     if self.early_release:
12064       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12065       self._RemoveOldStorage(self.target_node, iv_names)
12066       # TODO: Check if releasing locks early still makes sense
12067       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12068     else:
12069       # Release all resource locks except those used by the instance
12070       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12071                     keep=self.node_secondary_ip.keys())
12072
12073     # Release all node locks while waiting for sync
12074     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12075
12076     # TODO: Can the instance lock be downgraded here? Take the optional disk
12077     # shutdown in the caller into consideration.
12078
12079     # Wait for sync
12080     # This can fail as the old devices are degraded and _WaitForSync
12081     # does a combined result over all disks, so we don't check its return value
12082     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12083     _WaitForSync(self.lu, self.instance)
12084
12085     # Check all devices manually
12086     self._CheckDevices(self.instance.primary_node, iv_names)
12087
12088     # Step: remove old storage
12089     if not self.early_release:
12090       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12091       self._RemoveOldStorage(self.target_node, iv_names)
12092
12093   def _ExecDrbd8Secondary(self, feedback_fn):
12094     """Replace the secondary node for DRBD 8.
12095
12096     The algorithm for replace is quite complicated:
12097       - for all disks of the instance:
12098         - create new LVs on the new node with same names
12099         - shutdown the drbd device on the old secondary
12100         - disconnect the drbd network on the primary
12101         - create the drbd device on the new secondary
12102         - network attach the drbd on the primary, using an artifice:
12103           the drbd code for Attach() will connect to the network if it
12104           finds a device which is connected to the good local disks but
12105           not network enabled
12106       - wait for sync across all devices
12107       - remove all disks from the old secondary
12108
12109     Failures are not very well handled.
12110
12111     """
12112     steps_total = 6
12113
12114     pnode = self.instance.primary_node
12115
12116     # Step: check device activation
12117     self.lu.LogStep(1, steps_total, "Check device existence")
12118     self._CheckDisksExistence([self.instance.primary_node])
12119     self._CheckVolumeGroup([self.instance.primary_node])
12120
12121     # Step: check other node consistency
12122     self.lu.LogStep(2, steps_total, "Check peer consistency")
12123     self._CheckDisksConsistency(self.instance.primary_node, True, True)
12124
12125     # Step: create new storage
12126     self.lu.LogStep(3, steps_total, "Allocate new storage")
12127     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12128     excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12129     for idx, dev in enumerate(disks):
12130       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12131                       (self.new_node, idx))
12132       # we pass force_create=True to force LVM creation
12133       for new_lv in dev.children:
12134         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12135                              True, _GetInstanceInfoText(self.instance), False,
12136                              excl_stor)
12137
12138     # Step 4: dbrd minors and drbd setups changes
12139     # after this, we must manually remove the drbd minors on both the
12140     # error and the success paths
12141     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12142     minors = self.cfg.AllocateDRBDMinor([self.new_node
12143                                          for dev in self.instance.disks],
12144                                         self.instance.name)
12145     logging.debug("Allocated minors %r", minors)
12146
12147     iv_names = {}
12148     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12149       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12150                       (self.new_node, idx))
12151       # create new devices on new_node; note that we create two IDs:
12152       # one without port, so the drbd will be activated without
12153       # networking information on the new node at this stage, and one
12154       # with network, for the latter activation in step 4
12155       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12156       if self.instance.primary_node == o_node1:
12157         p_minor = o_minor1
12158       else:
12159         assert self.instance.primary_node == o_node2, "Three-node instance?"
12160         p_minor = o_minor2
12161
12162       new_alone_id = (self.instance.primary_node, self.new_node, None,
12163                       p_minor, new_minor, o_secret)
12164       new_net_id = (self.instance.primary_node, self.new_node, o_port,
12165                     p_minor, new_minor, o_secret)
12166
12167       iv_names[idx] = (dev, dev.children, new_net_id)
12168       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12169                     new_net_id)
12170       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12171                               logical_id=new_alone_id,
12172                               children=dev.children,
12173                               size=dev.size,
12174                               params={})
12175       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12176                                              self.cfg)
12177       try:
12178         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12179                               anno_new_drbd,
12180                               _GetInstanceInfoText(self.instance), False,
12181                               excl_stor)
12182       except errors.GenericError:
12183         self.cfg.ReleaseDRBDMinors(self.instance.name)
12184         raise
12185
12186     # We have new devices, shutdown the drbd on the old secondary
12187     for idx, dev in enumerate(self.instance.disks):
12188       self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12189       self.cfg.SetDiskID(dev, self.target_node)
12190       msg = self.rpc.call_blockdev_shutdown(self.target_node,
12191                                             (dev, self.instance)).fail_msg
12192       if msg:
12193         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12194                            "node: %s" % (idx, msg),
12195                            hint=("Please cleanup this device manually as"
12196                                  " soon as possible"))
12197
12198     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12199     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12200                                                self.instance.disks)[pnode]
12201
12202     msg = result.fail_msg
12203     if msg:
12204       # detaches didn't succeed (unlikely)
12205       self.cfg.ReleaseDRBDMinors(self.instance.name)
12206       raise errors.OpExecError("Can't detach the disks from the network on"
12207                                " old node: %s" % (msg,))
12208
12209     # if we managed to detach at least one, we update all the disks of
12210     # the instance to point to the new secondary
12211     self.lu.LogInfo("Updating instance configuration")
12212     for dev, _, new_logical_id in iv_names.itervalues():
12213       dev.logical_id = new_logical_id
12214       self.cfg.SetDiskID(dev, self.instance.primary_node)
12215
12216     self.cfg.Update(self.instance, feedback_fn)
12217
12218     # Release all node locks (the configuration has been updated)
12219     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12220
12221     # and now perform the drbd attach
12222     self.lu.LogInfo("Attaching primary drbds to new secondary"
12223                     " (standalone => connected)")
12224     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12225                                             self.new_node],
12226                                            self.node_secondary_ip,
12227                                            (self.instance.disks, self.instance),
12228                                            self.instance.name,
12229                                            False)
12230     for to_node, to_result in result.items():
12231       msg = to_result.fail_msg
12232       if msg:
12233         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12234                            to_node, msg,
12235                            hint=("please do a gnt-instance info to see the"
12236                                  " status of disks"))
12237
12238     cstep = itertools.count(5)
12239
12240     if self.early_release:
12241       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12242       self._RemoveOldStorage(self.target_node, iv_names)
12243       # TODO: Check if releasing locks early still makes sense
12244       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12245     else:
12246       # Release all resource locks except those used by the instance
12247       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12248                     keep=self.node_secondary_ip.keys())
12249
12250     # TODO: Can the instance lock be downgraded here? Take the optional disk
12251     # shutdown in the caller into consideration.
12252
12253     # Wait for sync
12254     # This can fail as the old devices are degraded and _WaitForSync
12255     # does a combined result over all disks, so we don't check its return value
12256     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12257     _WaitForSync(self.lu, self.instance)
12258
12259     # Check all devices manually
12260     self._CheckDevices(self.instance.primary_node, iv_names)
12261
12262     # Step: remove old storage
12263     if not self.early_release:
12264       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12265       self._RemoveOldStorage(self.target_node, iv_names)
12266
12267
12268 class LURepairNodeStorage(NoHooksLU):
12269   """Repairs the volume group on a node.
12270
12271   """
12272   REQ_BGL = False
12273
12274   def CheckArguments(self):
12275     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12276
12277     storage_type = self.op.storage_type
12278
12279     if (constants.SO_FIX_CONSISTENCY not in
12280         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12281       raise errors.OpPrereqError("Storage units of type '%s' can not be"
12282                                  " repaired" % storage_type,
12283                                  errors.ECODE_INVAL)
12284
12285   def ExpandNames(self):
12286     self.needed_locks = {
12287       locking.LEVEL_NODE: [self.op.node_name],
12288       }
12289
12290   def _CheckFaultyDisks(self, instance, node_name):
12291     """Ensure faulty disks abort the opcode or at least warn."""
12292     try:
12293       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12294                                   node_name, True):
12295         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12296                                    " node '%s'" % (instance.name, node_name),
12297                                    errors.ECODE_STATE)
12298     except errors.OpPrereqError, err:
12299       if self.op.ignore_consistency:
12300         self.LogWarning(str(err.args[0]))
12301       else:
12302         raise
12303
12304   def CheckPrereq(self):
12305     """Check prerequisites.
12306
12307     """
12308     # Check whether any instance on this node has faulty disks
12309     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12310       if inst.admin_state != constants.ADMINST_UP:
12311         continue
12312       check_nodes = set(inst.all_nodes)
12313       check_nodes.discard(self.op.node_name)
12314       for inst_node_name in check_nodes:
12315         self._CheckFaultyDisks(inst, inst_node_name)
12316
12317   def Exec(self, feedback_fn):
12318     feedback_fn("Repairing storage unit '%s' on %s ..." %
12319                 (self.op.name, self.op.node_name))
12320
12321     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12322     result = self.rpc.call_storage_execute(self.op.node_name,
12323                                            self.op.storage_type, st_args,
12324                                            self.op.name,
12325                                            constants.SO_FIX_CONSISTENCY)
12326     result.Raise("Failed to repair storage unit '%s' on %s" %
12327                  (self.op.name, self.op.node_name))
12328
12329
12330 class LUNodeEvacuate(NoHooksLU):
12331   """Evacuates instances off a list of nodes.
12332
12333   """
12334   REQ_BGL = False
12335
12336   _MODE2IALLOCATOR = {
12337     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12338     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12339     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12340     }
12341   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12342   assert (frozenset(_MODE2IALLOCATOR.values()) ==
12343           constants.IALLOCATOR_NEVAC_MODES)
12344
12345   def CheckArguments(self):
12346     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12347
12348   def ExpandNames(self):
12349     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12350
12351     if self.op.remote_node is not None:
12352       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12353       assert self.op.remote_node
12354
12355       if self.op.remote_node == self.op.node_name:
12356         raise errors.OpPrereqError("Can not use evacuated node as a new"
12357                                    " secondary node", errors.ECODE_INVAL)
12358
12359       if self.op.mode != constants.NODE_EVAC_SEC:
12360         raise errors.OpPrereqError("Without the use of an iallocator only"
12361                                    " secondary instances can be evacuated",
12362                                    errors.ECODE_INVAL)
12363
12364     # Declare locks
12365     self.share_locks = _ShareAll()
12366     self.needed_locks = {
12367       locking.LEVEL_INSTANCE: [],
12368       locking.LEVEL_NODEGROUP: [],
12369       locking.LEVEL_NODE: [],
12370       }
12371
12372     # Determine nodes (via group) optimistically, needs verification once locks
12373     # have been acquired
12374     self.lock_nodes = self._DetermineNodes()
12375
12376   def _DetermineNodes(self):
12377     """Gets the list of nodes to operate on.
12378
12379     """
12380     if self.op.remote_node is None:
12381       # Iallocator will choose any node(s) in the same group
12382       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12383     else:
12384       group_nodes = frozenset([self.op.remote_node])
12385
12386     # Determine nodes to be locked
12387     return set([self.op.node_name]) | group_nodes
12388
12389   def _DetermineInstances(self):
12390     """Builds list of instances to operate on.
12391
12392     """
12393     assert self.op.mode in constants.NODE_EVAC_MODES
12394
12395     if self.op.mode == constants.NODE_EVAC_PRI:
12396       # Primary instances only
12397       inst_fn = _GetNodePrimaryInstances
12398       assert self.op.remote_node is None, \
12399         "Evacuating primary instances requires iallocator"
12400     elif self.op.mode == constants.NODE_EVAC_SEC:
12401       # Secondary instances only
12402       inst_fn = _GetNodeSecondaryInstances
12403     else:
12404       # All instances
12405       assert self.op.mode == constants.NODE_EVAC_ALL
12406       inst_fn = _GetNodeInstances
12407       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12408       # per instance
12409       raise errors.OpPrereqError("Due to an issue with the iallocator"
12410                                  " interface it is not possible to evacuate"
12411                                  " all instances at once; specify explicitly"
12412                                  " whether to evacuate primary or secondary"
12413                                  " instances",
12414                                  errors.ECODE_INVAL)
12415
12416     return inst_fn(self.cfg, self.op.node_name)
12417
12418   def DeclareLocks(self, level):
12419     if level == locking.LEVEL_INSTANCE:
12420       # Lock instances optimistically, needs verification once node and group
12421       # locks have been acquired
12422       self.needed_locks[locking.LEVEL_INSTANCE] = \
12423         set(i.name for i in self._DetermineInstances())
12424
12425     elif level == locking.LEVEL_NODEGROUP:
12426       # Lock node groups for all potential target nodes optimistically, needs
12427       # verification once nodes have been acquired
12428       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12429         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12430
12431     elif level == locking.LEVEL_NODE:
12432       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12433
12434   def CheckPrereq(self):
12435     # Verify locks
12436     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12437     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12438     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12439
12440     need_nodes = self._DetermineNodes()
12441
12442     if not owned_nodes.issuperset(need_nodes):
12443       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12444                                  " locks were acquired, current nodes are"
12445                                  " are '%s', used to be '%s'; retry the"
12446                                  " operation" %
12447                                  (self.op.node_name,
12448                                   utils.CommaJoin(need_nodes),
12449                                   utils.CommaJoin(owned_nodes)),
12450                                  errors.ECODE_STATE)
12451
12452     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12453     if owned_groups != wanted_groups:
12454       raise errors.OpExecError("Node groups changed since locks were acquired,"
12455                                " current groups are '%s', used to be '%s';"
12456                                " retry the operation" %
12457                                (utils.CommaJoin(wanted_groups),
12458                                 utils.CommaJoin(owned_groups)))
12459
12460     # Determine affected instances
12461     self.instances = self._DetermineInstances()
12462     self.instance_names = [i.name for i in self.instances]
12463
12464     if set(self.instance_names) != owned_instances:
12465       raise errors.OpExecError("Instances on node '%s' changed since locks"
12466                                " were acquired, current instances are '%s',"
12467                                " used to be '%s'; retry the operation" %
12468                                (self.op.node_name,
12469                                 utils.CommaJoin(self.instance_names),
12470                                 utils.CommaJoin(owned_instances)))
12471
12472     if self.instance_names:
12473       self.LogInfo("Evacuating instances from node '%s': %s",
12474                    self.op.node_name,
12475                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
12476     else:
12477       self.LogInfo("No instances to evacuate from node '%s'",
12478                    self.op.node_name)
12479
12480     if self.op.remote_node is not None:
12481       for i in self.instances:
12482         if i.primary_node == self.op.remote_node:
12483           raise errors.OpPrereqError("Node %s is the primary node of"
12484                                      " instance %s, cannot use it as"
12485                                      " secondary" %
12486                                      (self.op.remote_node, i.name),
12487                                      errors.ECODE_INVAL)
12488
12489   def Exec(self, feedback_fn):
12490     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12491
12492     if not self.instance_names:
12493       # No instances to evacuate
12494       jobs = []
12495
12496     elif self.op.iallocator is not None:
12497       # TODO: Implement relocation to other group
12498       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12499       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12500                                      instances=list(self.instance_names))
12501       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12502
12503       ial.Run(self.op.iallocator)
12504
12505       if not ial.success:
12506         raise errors.OpPrereqError("Can't compute node evacuation using"
12507                                    " iallocator '%s': %s" %
12508                                    (self.op.iallocator, ial.info),
12509                                    errors.ECODE_NORES)
12510
12511       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12512
12513     elif self.op.remote_node is not None:
12514       assert self.op.mode == constants.NODE_EVAC_SEC
12515       jobs = [
12516         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12517                                         remote_node=self.op.remote_node,
12518                                         disks=[],
12519                                         mode=constants.REPLACE_DISK_CHG,
12520                                         early_release=self.op.early_release)]
12521         for instance_name in self.instance_names]
12522
12523     else:
12524       raise errors.ProgrammerError("No iallocator or remote node")
12525
12526     return ResultWithJobs(jobs)
12527
12528
12529 def _SetOpEarlyRelease(early_release, op):
12530   """Sets C{early_release} flag on opcodes if available.
12531
12532   """
12533   try:
12534     op.early_release = early_release
12535   except AttributeError:
12536     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12537
12538   return op
12539
12540
12541 def _NodeEvacDest(use_nodes, group, nodes):
12542   """Returns group or nodes depending on caller's choice.
12543
12544   """
12545   if use_nodes:
12546     return utils.CommaJoin(nodes)
12547   else:
12548     return group
12549
12550
12551 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12552   """Unpacks the result of change-group and node-evacuate iallocator requests.
12553
12554   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12555   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12556
12557   @type lu: L{LogicalUnit}
12558   @param lu: Logical unit instance
12559   @type alloc_result: tuple/list
12560   @param alloc_result: Result from iallocator
12561   @type early_release: bool
12562   @param early_release: Whether to release locks early if possible
12563   @type use_nodes: bool
12564   @param use_nodes: Whether to display node names instead of groups
12565
12566   """
12567   (moved, failed, jobs) = alloc_result
12568
12569   if failed:
12570     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12571                                  for (name, reason) in failed)
12572     lu.LogWarning("Unable to evacuate instances %s", failreason)
12573     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12574
12575   if moved:
12576     lu.LogInfo("Instances to be moved: %s",
12577                utils.CommaJoin("%s (to %s)" %
12578                                (name, _NodeEvacDest(use_nodes, group, nodes))
12579                                for (name, group, nodes) in moved))
12580
12581   return [map(compat.partial(_SetOpEarlyRelease, early_release),
12582               map(opcodes.OpCode.LoadOpCode, ops))
12583           for ops in jobs]
12584
12585
12586 def _DiskSizeInBytesToMebibytes(lu, size):
12587   """Converts a disk size in bytes to mebibytes.
12588
12589   Warns and rounds up if the size isn't an even multiple of 1 MiB.
12590
12591   """
12592   (mib, remainder) = divmod(size, 1024 * 1024)
12593
12594   if remainder != 0:
12595     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12596                   " to not overwrite existing data (%s bytes will not be"
12597                   " wiped)", (1024 * 1024) - remainder)
12598     mib += 1
12599
12600   return mib
12601
12602
12603 class LUInstanceGrowDisk(LogicalUnit):
12604   """Grow a disk of an instance.
12605
12606   """
12607   HPATH = "disk-grow"
12608   HTYPE = constants.HTYPE_INSTANCE
12609   REQ_BGL = False
12610
12611   def ExpandNames(self):
12612     self._ExpandAndLockInstance()
12613     self.needed_locks[locking.LEVEL_NODE] = []
12614     self.needed_locks[locking.LEVEL_NODE_RES] = []
12615     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12616     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12617
12618   def DeclareLocks(self, level):
12619     if level == locking.LEVEL_NODE:
12620       self._LockInstancesNodes()
12621     elif level == locking.LEVEL_NODE_RES:
12622       # Copy node locks
12623       self.needed_locks[locking.LEVEL_NODE_RES] = \
12624         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12625
12626   def BuildHooksEnv(self):
12627     """Build hooks env.
12628
12629     This runs on the master, the primary and all the secondaries.
12630
12631     """
12632     env = {
12633       "DISK": self.op.disk,
12634       "AMOUNT": self.op.amount,
12635       "ABSOLUTE": self.op.absolute,
12636       }
12637     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12638     return env
12639
12640   def BuildHooksNodes(self):
12641     """Build hooks nodes.
12642
12643     """
12644     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12645     return (nl, nl)
12646
12647   def CheckPrereq(self):
12648     """Check prerequisites.
12649
12650     This checks that the instance is in the cluster.
12651
12652     """
12653     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12654     assert instance is not None, \
12655       "Cannot retrieve locked instance %s" % self.op.instance_name
12656     nodenames = list(instance.all_nodes)
12657     for node in nodenames:
12658       _CheckNodeOnline(self, node)
12659
12660     self.instance = instance
12661
12662     if instance.disk_template not in constants.DTS_GROWABLE:
12663       raise errors.OpPrereqError("Instance's disk layout does not support"
12664                                  " growing", errors.ECODE_INVAL)
12665
12666     self.disk = instance.FindDisk(self.op.disk)
12667
12668     if self.op.absolute:
12669       self.target = self.op.amount
12670       self.delta = self.target - self.disk.size
12671       if self.delta < 0:
12672         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12673                                    "current disk size (%s)" %
12674                                    (utils.FormatUnit(self.target, "h"),
12675                                     utils.FormatUnit(self.disk.size, "h")),
12676                                    errors.ECODE_STATE)
12677     else:
12678       self.delta = self.op.amount
12679       self.target = self.disk.size + self.delta
12680       if self.delta < 0:
12681         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12682                                    utils.FormatUnit(self.delta, "h"),
12683                                    errors.ECODE_INVAL)
12684
12685     self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12686
12687   def _CheckDiskSpace(self, nodenames, req_vgspace):
12688     template = self.instance.disk_template
12689     if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12690       # TODO: check the free disk space for file, when that feature will be
12691       # supported
12692       nodes = map(self.cfg.GetNodeInfo, nodenames)
12693       es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12694                         nodes)
12695       if es_nodes:
12696         # With exclusive storage we need to something smarter than just looking
12697         # at free space; for now, let's simply abort the operation.
12698         raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12699                                    " is enabled", errors.ECODE_STATE)
12700       _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12701
12702   def Exec(self, feedback_fn):
12703     """Execute disk grow.
12704
12705     """
12706     instance = self.instance
12707     disk = self.disk
12708
12709     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12710     assert (self.owned_locks(locking.LEVEL_NODE) ==
12711             self.owned_locks(locking.LEVEL_NODE_RES))
12712
12713     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12714
12715     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12716     if not disks_ok:
12717       raise errors.OpExecError("Cannot activate block device to grow")
12718
12719     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12720                 (self.op.disk, instance.name,
12721                  utils.FormatUnit(self.delta, "h"),
12722                  utils.FormatUnit(self.target, "h")))
12723
12724     # First run all grow ops in dry-run mode
12725     for node in instance.all_nodes:
12726       self.cfg.SetDiskID(disk, node)
12727       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12728                                            True, True)
12729       result.Raise("Dry-run grow request failed to node %s" % node)
12730
12731     if wipe_disks:
12732       # Get disk size from primary node for wiping
12733       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12734       result.Raise("Failed to retrieve disk size from node '%s'" %
12735                    instance.primary_node)
12736
12737       (disk_size_in_bytes, ) = result.payload
12738
12739       if disk_size_in_bytes is None:
12740         raise errors.OpExecError("Failed to retrieve disk size from primary"
12741                                  " node '%s'" % instance.primary_node)
12742
12743       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12744
12745       assert old_disk_size >= disk.size, \
12746         ("Retrieved disk size too small (got %s, should be at least %s)" %
12747          (old_disk_size, disk.size))
12748     else:
12749       old_disk_size = None
12750
12751     # We know that (as far as we can test) operations across different
12752     # nodes will succeed, time to run it for real on the backing storage
12753     for node in instance.all_nodes:
12754       self.cfg.SetDiskID(disk, node)
12755       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12756                                            False, True)
12757       result.Raise("Grow request failed to node %s" % node)
12758
12759     # And now execute it for logical storage, on the primary node
12760     node = instance.primary_node
12761     self.cfg.SetDiskID(disk, node)
12762     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12763                                          False, False)
12764     result.Raise("Grow request failed to node %s" % node)
12765
12766     disk.RecordGrow(self.delta)
12767     self.cfg.Update(instance, feedback_fn)
12768
12769     # Changes have been recorded, release node lock
12770     _ReleaseLocks(self, locking.LEVEL_NODE)
12771
12772     # Downgrade lock while waiting for sync
12773     self.glm.downgrade(locking.LEVEL_INSTANCE)
12774
12775     assert wipe_disks ^ (old_disk_size is None)
12776
12777     if wipe_disks:
12778       assert instance.disks[self.op.disk] == disk
12779
12780       # Wipe newly added disk space
12781       _WipeDisks(self, instance,
12782                  disks=[(self.op.disk, disk, old_disk_size)])
12783
12784     if self.op.wait_for_sync:
12785       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12786       if disk_abort:
12787         self.LogWarning("Disk syncing has not returned a good status; check"
12788                         " the instance")
12789       if instance.admin_state != constants.ADMINST_UP:
12790         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12791     elif instance.admin_state != constants.ADMINST_UP:
12792       self.LogWarning("Not shutting down the disk even if the instance is"
12793                       " not supposed to be running because no wait for"
12794                       " sync mode was requested")
12795
12796     assert self.owned_locks(locking.LEVEL_NODE_RES)
12797     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12798
12799
12800 class LUInstanceQueryData(NoHooksLU):
12801   """Query runtime instance data.
12802
12803   """
12804   REQ_BGL = False
12805
12806   def ExpandNames(self):
12807     self.needed_locks = {}
12808
12809     # Use locking if requested or when non-static information is wanted
12810     if not (self.op.static or self.op.use_locking):
12811       self.LogWarning("Non-static data requested, locks need to be acquired")
12812       self.op.use_locking = True
12813
12814     if self.op.instances or not self.op.use_locking:
12815       # Expand instance names right here
12816       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12817     else:
12818       # Will use acquired locks
12819       self.wanted_names = None
12820
12821     if self.op.use_locking:
12822       self.share_locks = _ShareAll()
12823
12824       if self.wanted_names is None:
12825         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12826       else:
12827         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12828
12829       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12830       self.needed_locks[locking.LEVEL_NODE] = []
12831       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12832
12833   def DeclareLocks(self, level):
12834     if self.op.use_locking:
12835       if level == locking.LEVEL_NODEGROUP:
12836         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12837
12838         # Lock all groups used by instances optimistically; this requires going
12839         # via the node before it's locked, requiring verification later on
12840         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12841           frozenset(group_uuid
12842                     for instance_name in owned_instances
12843                     for group_uuid in
12844                       self.cfg.GetInstanceNodeGroups(instance_name))
12845
12846       elif level == locking.LEVEL_NODE:
12847         self._LockInstancesNodes()
12848
12849   def CheckPrereq(self):
12850     """Check prerequisites.
12851
12852     This only checks the optional instance list against the existing names.
12853
12854     """
12855     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12856     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12857     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12858
12859     if self.wanted_names is None:
12860       assert self.op.use_locking, "Locking was not used"
12861       self.wanted_names = owned_instances
12862
12863     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12864
12865     if self.op.use_locking:
12866       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12867                                 None)
12868     else:
12869       assert not (owned_instances or owned_groups or owned_nodes)
12870
12871     self.wanted_instances = instances.values()
12872
12873   def _ComputeBlockdevStatus(self, node, instance, dev):
12874     """Returns the status of a block device
12875
12876     """
12877     if self.op.static or not node:
12878       return None
12879
12880     self.cfg.SetDiskID(dev, node)
12881
12882     result = self.rpc.call_blockdev_find(node, dev)
12883     if result.offline:
12884       return None
12885
12886     result.Raise("Can't compute disk status for %s" % instance.name)
12887
12888     status = result.payload
12889     if status is None:
12890       return None
12891
12892     return (status.dev_path, status.major, status.minor,
12893             status.sync_percent, status.estimated_time,
12894             status.is_degraded, status.ldisk_status)
12895
12896   def _ComputeDiskStatus(self, instance, snode, dev):
12897     """Compute block device status.
12898
12899     """
12900     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12901
12902     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12903
12904   def _ComputeDiskStatusInner(self, instance, snode, dev):
12905     """Compute block device status.
12906
12907     @attention: The device has to be annotated already.
12908
12909     """
12910     if dev.dev_type in constants.LDS_DRBD:
12911       # we change the snode then (otherwise we use the one passed in)
12912       if dev.logical_id[0] == instance.primary_node:
12913         snode = dev.logical_id[1]
12914       else:
12915         snode = dev.logical_id[0]
12916
12917     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12918                                               instance, dev)
12919     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12920
12921     if dev.children:
12922       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12923                                         instance, snode),
12924                          dev.children)
12925     else:
12926       dev_children = []
12927
12928     return {
12929       "iv_name": dev.iv_name,
12930       "dev_type": dev.dev_type,
12931       "logical_id": dev.logical_id,
12932       "physical_id": dev.physical_id,
12933       "pstatus": dev_pstatus,
12934       "sstatus": dev_sstatus,
12935       "children": dev_children,
12936       "mode": dev.mode,
12937       "size": dev.size,
12938       }
12939
12940   def Exec(self, feedback_fn):
12941     """Gather and return data"""
12942     result = {}
12943
12944     cluster = self.cfg.GetClusterInfo()
12945
12946     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12947     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12948
12949     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12950                                                  for node in nodes.values()))
12951
12952     group2name_fn = lambda uuid: groups[uuid].name
12953
12954     for instance in self.wanted_instances:
12955       pnode = nodes[instance.primary_node]
12956
12957       if self.op.static or pnode.offline:
12958         remote_state = None
12959         if pnode.offline:
12960           self.LogWarning("Primary node %s is marked offline, returning static"
12961                           " information only for instance %s" %
12962                           (pnode.name, instance.name))
12963       else:
12964         remote_info = self.rpc.call_instance_info(instance.primary_node,
12965                                                   instance.name,
12966                                                   instance.hypervisor)
12967         remote_info.Raise("Error checking node %s" % instance.primary_node)
12968         remote_info = remote_info.payload
12969         if remote_info and "state" in remote_info:
12970           remote_state = "up"
12971         else:
12972           if instance.admin_state == constants.ADMINST_UP:
12973             remote_state = "down"
12974           else:
12975             remote_state = instance.admin_state
12976
12977       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12978                   instance.disks)
12979
12980       snodes_group_uuids = [nodes[snode_name].group
12981                             for snode_name in instance.secondary_nodes]
12982
12983       result[instance.name] = {
12984         "name": instance.name,
12985         "config_state": instance.admin_state,
12986         "run_state": remote_state,
12987         "pnode": instance.primary_node,
12988         "pnode_group_uuid": pnode.group,
12989         "pnode_group_name": group2name_fn(pnode.group),
12990         "snodes": instance.secondary_nodes,
12991         "snodes_group_uuids": snodes_group_uuids,
12992         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12993         "os": instance.os,
12994         # this happens to be the same format used for hooks
12995         "nics": _NICListToTuple(self, instance.nics),
12996         "disk_template": instance.disk_template,
12997         "disks": disks,
12998         "hypervisor": instance.hypervisor,
12999         "network_port": instance.network_port,
13000         "hv_instance": instance.hvparams,
13001         "hv_actual": cluster.FillHV(instance, skip_globals=True),
13002         "be_instance": instance.beparams,
13003         "be_actual": cluster.FillBE(instance),
13004         "os_instance": instance.osparams,
13005         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
13006         "serial_no": instance.serial_no,
13007         "mtime": instance.mtime,
13008         "ctime": instance.ctime,
13009         "uuid": instance.uuid,
13010         }
13011
13012     return result
13013
13014
13015 def PrepareContainerMods(mods, private_fn):
13016   """Prepares a list of container modifications by adding a private data field.
13017
13018   @type mods: list of tuples; (operation, index, parameters)
13019   @param mods: List of modifications
13020   @type private_fn: callable or None
13021   @param private_fn: Callable for constructing a private data field for a
13022     modification
13023   @rtype: list
13024
13025   """
13026   if private_fn is None:
13027     fn = lambda: None
13028   else:
13029     fn = private_fn
13030
13031   return [(op, idx, params, fn()) for (op, idx, params) in mods]
13032
13033
13034 #: Type description for changes as returned by L{ApplyContainerMods}'s
13035 #: callbacks
13036 _TApplyContModsCbChanges = \
13037   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13038     ht.TNonEmptyString,
13039     ht.TAny,
13040     ])))
13041
13042
13043 def ApplyContainerMods(kind, container, chgdesc, mods,
13044                        create_fn, modify_fn, remove_fn):
13045   """Applies descriptions in C{mods} to C{container}.
13046
13047   @type kind: string
13048   @param kind: One-word item description
13049   @type container: list
13050   @param container: Container to modify
13051   @type chgdesc: None or list
13052   @param chgdesc: List of applied changes
13053   @type mods: list
13054   @param mods: Modifications as returned by L{PrepareContainerMods}
13055   @type create_fn: callable
13056   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13057     receives absolute item index, parameters and private data object as added
13058     by L{PrepareContainerMods}, returns tuple containing new item and changes
13059     as list
13060   @type modify_fn: callable
13061   @param modify_fn: Callback for modifying an existing item
13062     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13063     and private data object as added by L{PrepareContainerMods}, returns
13064     changes as list
13065   @type remove_fn: callable
13066   @param remove_fn: Callback on removing item; receives absolute item index,
13067     item and private data object as added by L{PrepareContainerMods}
13068
13069   """
13070   for (op, idx, params, private) in mods:
13071     if idx == -1:
13072       # Append
13073       absidx = len(container) - 1
13074     elif idx < 0:
13075       raise IndexError("Not accepting negative indices other than -1")
13076     elif idx > len(container):
13077       raise IndexError("Got %s index %s, but there are only %s" %
13078                        (kind, idx, len(container)))
13079     else:
13080       absidx = idx
13081
13082     changes = None
13083
13084     if op == constants.DDM_ADD:
13085       # Calculate where item will be added
13086       if idx == -1:
13087         addidx = len(container)
13088       else:
13089         addidx = idx
13090
13091       if create_fn is None:
13092         item = params
13093       else:
13094         (item, changes) = create_fn(addidx, params, private)
13095
13096       if idx == -1:
13097         container.append(item)
13098       else:
13099         assert idx >= 0
13100         assert idx <= len(container)
13101         # list.insert does so before the specified index
13102         container.insert(idx, item)
13103     else:
13104       # Retrieve existing item
13105       try:
13106         item = container[absidx]
13107       except IndexError:
13108         raise IndexError("Invalid %s index %s" % (kind, idx))
13109
13110       if op == constants.DDM_REMOVE:
13111         assert not params
13112
13113         if remove_fn is not None:
13114           remove_fn(absidx, item, private)
13115
13116         changes = [("%s/%s" % (kind, absidx), "remove")]
13117
13118         assert container[absidx] == item
13119         del container[absidx]
13120       elif op == constants.DDM_MODIFY:
13121         if modify_fn is not None:
13122           changes = modify_fn(absidx, item, params, private)
13123       else:
13124         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13125
13126     assert _TApplyContModsCbChanges(changes)
13127
13128     if not (chgdesc is None or changes is None):
13129       chgdesc.extend(changes)
13130
13131
13132 def _UpdateIvNames(base_index, disks):
13133   """Updates the C{iv_name} attribute of disks.
13134
13135   @type disks: list of L{objects.Disk}
13136
13137   """
13138   for (idx, disk) in enumerate(disks):
13139     disk.iv_name = "disk/%s" % (base_index + idx, )
13140
13141
13142 class _InstNicModPrivate:
13143   """Data structure for network interface modifications.
13144
13145   Used by L{LUInstanceSetParams}.
13146
13147   """
13148   def __init__(self):
13149     self.params = None
13150     self.filled = None
13151
13152
13153 class LUInstanceSetParams(LogicalUnit):
13154   """Modifies an instances's parameters.
13155
13156   """
13157   HPATH = "instance-modify"
13158   HTYPE = constants.HTYPE_INSTANCE
13159   REQ_BGL = False
13160
13161   @staticmethod
13162   def _UpgradeDiskNicMods(kind, mods, verify_fn):
13163     assert ht.TList(mods)
13164     assert not mods or len(mods[0]) in (2, 3)
13165
13166     if mods and len(mods[0]) == 2:
13167       result = []
13168
13169       addremove = 0
13170       for op, params in mods:
13171         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13172           result.append((op, -1, params))
13173           addremove += 1
13174
13175           if addremove > 1:
13176             raise errors.OpPrereqError("Only one %s add or remove operation is"
13177                                        " supported at a time" % kind,
13178                                        errors.ECODE_INVAL)
13179         else:
13180           result.append((constants.DDM_MODIFY, op, params))
13181
13182       assert verify_fn(result)
13183     else:
13184       result = mods
13185
13186     return result
13187
13188   @staticmethod
13189   def _CheckMods(kind, mods, key_types, item_fn):
13190     """Ensures requested disk/NIC modifications are valid.
13191
13192     """
13193     for (op, _, params) in mods:
13194       assert ht.TDict(params)
13195
13196       # If 'key_types' is an empty dict, we assume we have an
13197       # 'ext' template and thus do not ForceDictType
13198       if key_types:
13199         utils.ForceDictType(params, key_types)
13200
13201       if op == constants.DDM_REMOVE:
13202         if params:
13203           raise errors.OpPrereqError("No settings should be passed when"
13204                                      " removing a %s" % kind,
13205                                      errors.ECODE_INVAL)
13206       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13207         item_fn(op, params)
13208       else:
13209         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13210
13211   @staticmethod
13212   def _VerifyDiskModification(op, params):
13213     """Verifies a disk modification.
13214
13215     """
13216     if op == constants.DDM_ADD:
13217       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13218       if mode not in constants.DISK_ACCESS_SET:
13219         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13220                                    errors.ECODE_INVAL)
13221
13222       size = params.get(constants.IDISK_SIZE, None)
13223       if size is None:
13224         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13225                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
13226
13227       try:
13228         size = int(size)
13229       except (TypeError, ValueError), err:
13230         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13231                                    errors.ECODE_INVAL)
13232
13233       params[constants.IDISK_SIZE] = size
13234
13235     elif op == constants.DDM_MODIFY:
13236       if constants.IDISK_SIZE in params:
13237         raise errors.OpPrereqError("Disk size change not possible, use"
13238                                    " grow-disk", errors.ECODE_INVAL)
13239       if constants.IDISK_MODE not in params:
13240         raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13241                                    " modification supported, but missing",
13242                                    errors.ECODE_NOENT)
13243       if len(params) > 1:
13244         raise errors.OpPrereqError("Disk modification doesn't support"
13245                                    " additional arbitrary parameters",
13246                                    errors.ECODE_INVAL)
13247
13248   @staticmethod
13249   def _VerifyNicModification(op, params):
13250     """Verifies a network interface modification.
13251
13252     """
13253     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13254       ip = params.get(constants.INIC_IP, None)
13255       req_net = params.get(constants.INIC_NETWORK, None)
13256       link = params.get(constants.NIC_LINK, None)
13257       mode = params.get(constants.NIC_MODE, None)
13258       if req_net is not None:
13259         if req_net.lower() == constants.VALUE_NONE:
13260           params[constants.INIC_NETWORK] = None
13261           req_net = None
13262         elif link is not None or mode is not None:
13263           raise errors.OpPrereqError("If network is given"
13264                                      " mode or link should not",
13265                                      errors.ECODE_INVAL)
13266
13267       if op == constants.DDM_ADD:
13268         macaddr = params.get(constants.INIC_MAC, None)
13269         if macaddr is None:
13270           params[constants.INIC_MAC] = constants.VALUE_AUTO
13271
13272       if ip is not None:
13273         if ip.lower() == constants.VALUE_NONE:
13274           params[constants.INIC_IP] = None
13275         else:
13276           if ip.lower() == constants.NIC_IP_POOL:
13277             if op == constants.DDM_ADD and req_net is None:
13278               raise errors.OpPrereqError("If ip=pool, parameter network"
13279                                          " cannot be none",
13280                                          errors.ECODE_INVAL)
13281           else:
13282             if not netutils.IPAddress.IsValid(ip):
13283               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13284                                          errors.ECODE_INVAL)
13285
13286       if constants.INIC_MAC in params:
13287         macaddr = params[constants.INIC_MAC]
13288         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13289           macaddr = utils.NormalizeAndValidateMac(macaddr)
13290
13291         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13292           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13293                                      " modifying an existing NIC",
13294                                      errors.ECODE_INVAL)
13295
13296   def CheckArguments(self):
13297     if not (self.op.nics or self.op.disks or self.op.disk_template or
13298             self.op.hvparams or self.op.beparams or self.op.os_name or
13299             self.op.offline is not None or self.op.runtime_mem):
13300       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13301
13302     if self.op.hvparams:
13303       _CheckGlobalHvParams(self.op.hvparams)
13304
13305     self.op.disks = self._UpgradeDiskNicMods(
13306       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13307     self.op.nics = self._UpgradeDiskNicMods(
13308       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13309
13310     if self.op.disks and self.op.disk_template is not None:
13311       raise errors.OpPrereqError("Disk template conversion and other disk"
13312                                  " changes not supported at the same time",
13313                                  errors.ECODE_INVAL)
13314
13315     if (self.op.disk_template and
13316         self.op.disk_template in constants.DTS_INT_MIRROR and
13317         self.op.remote_node is None):
13318       raise errors.OpPrereqError("Changing the disk template to a mirrored"
13319                                  " one requires specifying a secondary node",
13320                                  errors.ECODE_INVAL)
13321
13322     # Check NIC modifications
13323     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13324                     self._VerifyNicModification)
13325
13326   def ExpandNames(self):
13327     self._ExpandAndLockInstance()
13328     self.needed_locks[locking.LEVEL_NODEGROUP] = []
13329     # Can't even acquire node locks in shared mode as upcoming changes in
13330     # Ganeti 2.6 will start to modify the node object on disk conversion
13331     self.needed_locks[locking.LEVEL_NODE] = []
13332     self.needed_locks[locking.LEVEL_NODE_RES] = []
13333     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13334     # Look node group to look up the ipolicy
13335     self.share_locks[locking.LEVEL_NODEGROUP] = 1
13336
13337   def DeclareLocks(self, level):
13338     if level == locking.LEVEL_NODEGROUP:
13339       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13340       # Acquire locks for the instance's nodegroups optimistically. Needs
13341       # to be verified in CheckPrereq
13342       self.needed_locks[locking.LEVEL_NODEGROUP] = \
13343         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13344     elif level == locking.LEVEL_NODE:
13345       self._LockInstancesNodes()
13346       if self.op.disk_template and self.op.remote_node:
13347         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13348         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13349     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13350       # Copy node locks
13351       self.needed_locks[locking.LEVEL_NODE_RES] = \
13352         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13353
13354   def BuildHooksEnv(self):
13355     """Build hooks env.
13356
13357     This runs on the master, primary and secondaries.
13358
13359     """
13360     args = {}
13361     if constants.BE_MINMEM in self.be_new:
13362       args["minmem"] = self.be_new[constants.BE_MINMEM]
13363     if constants.BE_MAXMEM in self.be_new:
13364       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13365     if constants.BE_VCPUS in self.be_new:
13366       args["vcpus"] = self.be_new[constants.BE_VCPUS]
13367     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13368     # information at all.
13369
13370     if self._new_nics is not None:
13371       nics = []
13372
13373       for nic in self._new_nics:
13374         n = copy.deepcopy(nic)
13375         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13376         n.nicparams = nicparams
13377         nics.append(_NICToTuple(self, n))
13378
13379       args["nics"] = nics
13380
13381     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13382     if self.op.disk_template:
13383       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13384     if self.op.runtime_mem:
13385       env["RUNTIME_MEMORY"] = self.op.runtime_mem
13386
13387     return env
13388
13389   def BuildHooksNodes(self):
13390     """Build hooks nodes.
13391
13392     """
13393     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13394     return (nl, nl)
13395
13396   def _PrepareNicModification(self, params, private, old_ip, old_net,
13397                               old_params, cluster, pnode):
13398
13399     update_params_dict = dict([(key, params[key])
13400                                for key in constants.NICS_PARAMETERS
13401                                if key in params])
13402
13403     req_link = update_params_dict.get(constants.NIC_LINK, None)
13404     req_mode = update_params_dict.get(constants.NIC_MODE, None)
13405
13406     new_net = params.get(constants.INIC_NETWORK, old_net)
13407     if new_net is not None:
13408       netparams = self.cfg.GetGroupNetParams(new_net, pnode)
13409       if netparams is None:
13410         raise errors.OpPrereqError("No netparams found for the network"
13411                                    " %s, probably not connected" % new_net,
13412                                    errors.ECODE_INVAL)
13413       new_params = dict(netparams)
13414     else:
13415       new_params = _GetUpdatedParams(old_params, update_params_dict)
13416
13417     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13418
13419     new_filled_params = cluster.SimpleFillNIC(new_params)
13420     objects.NIC.CheckParameterSyntax(new_filled_params)
13421
13422     new_mode = new_filled_params[constants.NIC_MODE]
13423     if new_mode == constants.NIC_MODE_BRIDGED:
13424       bridge = new_filled_params[constants.NIC_LINK]
13425       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13426       if msg:
13427         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13428         if self.op.force:
13429           self.warn.append(msg)
13430         else:
13431           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13432
13433     elif new_mode == constants.NIC_MODE_ROUTED:
13434       ip = params.get(constants.INIC_IP, old_ip)
13435       if ip is None:
13436         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13437                                    " on a routed NIC", errors.ECODE_INVAL)
13438
13439     elif new_mode == constants.NIC_MODE_OVS:
13440       # TODO: check OVS link
13441       self.LogInfo("OVS links are currently not checked for correctness")
13442
13443     if constants.INIC_MAC in params:
13444       mac = params[constants.INIC_MAC]
13445       if mac is None:
13446         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13447                                    errors.ECODE_INVAL)
13448       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13449         # otherwise generate the MAC address
13450         params[constants.INIC_MAC] = \
13451           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13452       else:
13453         # or validate/reserve the current one
13454         try:
13455           self.cfg.ReserveMAC(mac, self.proc.GetECId())
13456         except errors.ReservationError:
13457           raise errors.OpPrereqError("MAC address '%s' already in use"
13458                                      " in cluster" % mac,
13459                                      errors.ECODE_NOTUNIQUE)
13460     elif new_net != old_net:
13461
13462       def get_net_prefix(net):
13463         if net:
13464           uuid = self.cfg.LookupNetwork(net)
13465           if uuid:
13466             nobj = self.cfg.GetNetwork(uuid)
13467             return nobj.mac_prefix
13468         return None
13469
13470       new_prefix = get_net_prefix(new_net)
13471       old_prefix = get_net_prefix(old_net)
13472       if old_prefix != new_prefix:
13473         params[constants.INIC_MAC] = \
13474           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13475
13476     #if there is a change in nic-network configuration
13477     new_ip = params.get(constants.INIC_IP, old_ip)
13478     if (new_ip, new_net) != (old_ip, old_net):
13479       if new_ip:
13480         if new_net:
13481           if new_ip.lower() == constants.NIC_IP_POOL:
13482             try:
13483               new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13484             except errors.ReservationError:
13485               raise errors.OpPrereqError("Unable to get a free IP"
13486                                          " from the address pool",
13487                                          errors.ECODE_STATE)
13488             self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13489             params[constants.INIC_IP] = new_ip
13490           elif new_ip != old_ip or new_net != old_net:
13491             try:
13492               self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13493               self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13494             except errors.ReservationError:
13495               raise errors.OpPrereqError("IP %s not available in network %s" %
13496                                          (new_ip, new_net),
13497                                          errors.ECODE_NOTUNIQUE)
13498         elif new_ip.lower() == constants.NIC_IP_POOL:
13499           raise errors.OpPrereqError("ip=pool, but no network found",
13500                                      errors.ECODE_INVAL)
13501
13502         # new net is None
13503         elif self.op.conflicts_check:
13504           _CheckForConflictingIp(self, new_ip, pnode)
13505
13506       if old_ip:
13507         if old_net:
13508           try:
13509             self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13510           except errors.AddressPoolError:
13511             logging.warning("Release IP %s not contained in network %s",
13512                             old_ip, old_net)
13513
13514     # there are no changes in (net, ip) tuple
13515     elif (old_net is not None and
13516           (req_link is not None or req_mode is not None)):
13517       raise errors.OpPrereqError("Not allowed to change link or mode of"
13518                                  " a NIC that is connected to a network",
13519                                  errors.ECODE_INVAL)
13520
13521     private.params = new_params
13522     private.filled = new_filled_params
13523
13524   def _PreCheckDiskTemplate(self, pnode_info):
13525     """CheckPrereq checks related to a new disk template."""
13526     # Arguments are passed to avoid configuration lookups
13527     instance = self.instance
13528     pnode = instance.primary_node
13529     cluster = self.cluster
13530     if instance.disk_template == self.op.disk_template:
13531       raise errors.OpPrereqError("Instance already has disk template %s" %
13532                                  instance.disk_template, errors.ECODE_INVAL)
13533
13534     if (instance.disk_template,
13535         self.op.disk_template) not in self._DISK_CONVERSIONS:
13536       raise errors.OpPrereqError("Unsupported disk template conversion from"
13537                                  " %s to %s" % (instance.disk_template,
13538                                                 self.op.disk_template),
13539                                  errors.ECODE_INVAL)
13540     _CheckInstanceState(self, instance, INSTANCE_DOWN,
13541                         msg="cannot change disk template")
13542     if self.op.disk_template in constants.DTS_INT_MIRROR:
13543       if self.op.remote_node == pnode:
13544         raise errors.OpPrereqError("Given new secondary node %s is the same"
13545                                    " as the primary node of the instance" %
13546                                    self.op.remote_node, errors.ECODE_STATE)
13547       _CheckNodeOnline(self, self.op.remote_node)
13548       _CheckNodeNotDrained(self, self.op.remote_node)
13549       # FIXME: here we assume that the old instance type is DT_PLAIN
13550       assert instance.disk_template == constants.DT_PLAIN
13551       disks = [{constants.IDISK_SIZE: d.size,
13552                 constants.IDISK_VG: d.logical_id[0]}
13553                for d in instance.disks]
13554       required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13555       _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13556
13557       snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13558       snode_group = self.cfg.GetNodeGroup(snode_info.group)
13559       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13560                                                               snode_group)
13561       _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13562                               ignore=self.op.ignore_ipolicy)
13563       if pnode_info.group != snode_info.group:
13564         self.LogWarning("The primary and secondary nodes are in two"
13565                         " different node groups; the disk parameters"
13566                         " from the first disk's node group will be"
13567                         " used")
13568
13569     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13570       # Make sure none of the nodes require exclusive storage
13571       nodes = [pnode_info]
13572       if self.op.disk_template in constants.DTS_INT_MIRROR:
13573         assert snode_info
13574         nodes.append(snode_info)
13575       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13576       if compat.any(map(has_es, nodes)):
13577         errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13578                   " storage is enabled" % (instance.disk_template,
13579                                            self.op.disk_template))
13580         raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13581
13582   def CheckPrereq(self):
13583     """Check prerequisites.
13584
13585     This only checks the instance list against the existing names.
13586
13587     """
13588     assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13589     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13590
13591     cluster = self.cluster = self.cfg.GetClusterInfo()
13592     assert self.instance is not None, \
13593       "Cannot retrieve locked instance %s" % self.op.instance_name
13594
13595     pnode = instance.primary_node
13596     assert pnode in self.owned_locks(locking.LEVEL_NODE)
13597     nodelist = list(instance.all_nodes)
13598     pnode_info = self.cfg.GetNodeInfo(pnode)
13599     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13600
13601     #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13602     assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13603     group_info = self.cfg.GetNodeGroup(pnode_info.group)
13604
13605     # dictionary with instance information after the modification
13606     ispec = {}
13607
13608     # Check disk modifications. This is done here and not in CheckArguments
13609     # (as with NICs), because we need to know the instance's disk template
13610     if instance.disk_template == constants.DT_EXT:
13611       self._CheckMods("disk", self.op.disks, {},
13612                       self._VerifyDiskModification)
13613     else:
13614       self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13615                       self._VerifyDiskModification)
13616
13617     # Prepare disk/NIC modifications
13618     self.diskmod = PrepareContainerMods(self.op.disks, None)
13619     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13620
13621     # Check the validity of the `provider' parameter
13622     if instance.disk_template in constants.DT_EXT:
13623       for mod in self.diskmod:
13624         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13625         if mod[0] == constants.DDM_ADD:
13626           if ext_provider is None:
13627             raise errors.OpPrereqError("Instance template is '%s' and parameter"
13628                                        " '%s' missing, during disk add" %
13629                                        (constants.DT_EXT,
13630                                         constants.IDISK_PROVIDER),
13631                                        errors.ECODE_NOENT)
13632         elif mod[0] == constants.DDM_MODIFY:
13633           if ext_provider:
13634             raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13635                                        " modification" %
13636                                        constants.IDISK_PROVIDER,
13637                                        errors.ECODE_INVAL)
13638     else:
13639       for mod in self.diskmod:
13640         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13641         if ext_provider is not None:
13642           raise errors.OpPrereqError("Parameter '%s' is only valid for"
13643                                      " instances of type '%s'" %
13644                                      (constants.IDISK_PROVIDER,
13645                                       constants.DT_EXT),
13646                                      errors.ECODE_INVAL)
13647
13648     # OS change
13649     if self.op.os_name and not self.op.force:
13650       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13651                       self.op.force_variant)
13652       instance_os = self.op.os_name
13653     else:
13654       instance_os = instance.os
13655
13656     assert not (self.op.disk_template and self.op.disks), \
13657       "Can't modify disk template and apply disk changes at the same time"
13658
13659     if self.op.disk_template:
13660       self._PreCheckDiskTemplate(pnode_info)
13661
13662     # hvparams processing
13663     if self.op.hvparams:
13664       hv_type = instance.hypervisor
13665       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13666       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13667       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13668
13669       # local check
13670       hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13671       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13672       self.hv_proposed = self.hv_new = hv_new # the new actual values
13673       self.hv_inst = i_hvdict # the new dict (without defaults)
13674     else:
13675       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13676                                               instance.hvparams)
13677       self.hv_new = self.hv_inst = {}
13678
13679     # beparams processing
13680     if self.op.beparams:
13681       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13682                                    use_none=True)
13683       objects.UpgradeBeParams(i_bedict)
13684       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13685       be_new = cluster.SimpleFillBE(i_bedict)
13686       self.be_proposed = self.be_new = be_new # the new actual values
13687       self.be_inst = i_bedict # the new dict (without defaults)
13688     else:
13689       self.be_new = self.be_inst = {}
13690       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13691     be_old = cluster.FillBE(instance)
13692
13693     # CPU param validation -- checking every time a parameter is
13694     # changed to cover all cases where either CPU mask or vcpus have
13695     # changed
13696     if (constants.BE_VCPUS in self.be_proposed and
13697         constants.HV_CPU_MASK in self.hv_proposed):
13698       cpu_list = \
13699         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13700       # Verify mask is consistent with number of vCPUs. Can skip this
13701       # test if only 1 entry in the CPU mask, which means same mask
13702       # is applied to all vCPUs.
13703       if (len(cpu_list) > 1 and
13704           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13705         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13706                                    " CPU mask [%s]" %
13707                                    (self.be_proposed[constants.BE_VCPUS],
13708                                     self.hv_proposed[constants.HV_CPU_MASK]),
13709                                    errors.ECODE_INVAL)
13710
13711       # Only perform this test if a new CPU mask is given
13712       if constants.HV_CPU_MASK in self.hv_new:
13713         # Calculate the largest CPU number requested
13714         max_requested_cpu = max(map(max, cpu_list))
13715         # Check that all of the instance's nodes have enough physical CPUs to
13716         # satisfy the requested CPU mask
13717         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13718                                 max_requested_cpu + 1, instance.hypervisor)
13719
13720     # osparams processing
13721     if self.op.osparams:
13722       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13723       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13724       self.os_inst = i_osdict # the new dict (without defaults)
13725     else:
13726       self.os_inst = {}
13727
13728     self.warn = []
13729
13730     #TODO(dynmem): do the appropriate check involving MINMEM
13731     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13732         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13733       mem_check_list = [pnode]
13734       if be_new[constants.BE_AUTO_BALANCE]:
13735         # either we changed auto_balance to yes or it was from before
13736         mem_check_list.extend(instance.secondary_nodes)
13737       instance_info = self.rpc.call_instance_info(pnode, instance.name,
13738                                                   instance.hypervisor)
13739       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13740                                          [instance.hypervisor], False)
13741       pninfo = nodeinfo[pnode]
13742       msg = pninfo.fail_msg
13743       if msg:
13744         # Assume the primary node is unreachable and go ahead
13745         self.warn.append("Can't get info from primary node %s: %s" %
13746                          (pnode, msg))
13747       else:
13748         (_, _, (pnhvinfo, )) = pninfo.payload
13749         if not isinstance(pnhvinfo.get("memory_free", None), int):
13750           self.warn.append("Node data from primary node %s doesn't contain"
13751                            " free memory information" % pnode)
13752         elif instance_info.fail_msg:
13753           self.warn.append("Can't get instance runtime information: %s" %
13754                            instance_info.fail_msg)
13755         else:
13756           if instance_info.payload:
13757             current_mem = int(instance_info.payload["memory"])
13758           else:
13759             # Assume instance not running
13760             # (there is a slight race condition here, but it's not very
13761             # probable, and we have no other way to check)
13762             # TODO: Describe race condition
13763             current_mem = 0
13764           #TODO(dynmem): do the appropriate check involving MINMEM
13765           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13766                       pnhvinfo["memory_free"])
13767           if miss_mem > 0:
13768             raise errors.OpPrereqError("This change will prevent the instance"
13769                                        " from starting, due to %d MB of memory"
13770                                        " missing on its primary node" %
13771                                        miss_mem, errors.ECODE_NORES)
13772
13773       if be_new[constants.BE_AUTO_BALANCE]:
13774         for node, nres in nodeinfo.items():
13775           if node not in instance.secondary_nodes:
13776             continue
13777           nres.Raise("Can't get info from secondary node %s" % node,
13778                      prereq=True, ecode=errors.ECODE_STATE)
13779           (_, _, (nhvinfo, )) = nres.payload
13780           if not isinstance(nhvinfo.get("memory_free", None), int):
13781             raise errors.OpPrereqError("Secondary node %s didn't return free"
13782                                        " memory information" % node,
13783                                        errors.ECODE_STATE)
13784           #TODO(dynmem): do the appropriate check involving MINMEM
13785           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13786             raise errors.OpPrereqError("This change will prevent the instance"
13787                                        " from failover to its secondary node"
13788                                        " %s, due to not enough memory" % node,
13789                                        errors.ECODE_STATE)
13790
13791     if self.op.runtime_mem:
13792       remote_info = self.rpc.call_instance_info(instance.primary_node,
13793                                                 instance.name,
13794                                                 instance.hypervisor)
13795       remote_info.Raise("Error checking node %s" % instance.primary_node)
13796       if not remote_info.payload: # not running already
13797         raise errors.OpPrereqError("Instance %s is not running" %
13798                                    instance.name, errors.ECODE_STATE)
13799
13800       current_memory = remote_info.payload["memory"]
13801       if (not self.op.force and
13802            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13803             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13804         raise errors.OpPrereqError("Instance %s must have memory between %d"
13805                                    " and %d MB of memory unless --force is"
13806                                    " given" %
13807                                    (instance.name,
13808                                     self.be_proposed[constants.BE_MINMEM],
13809                                     self.be_proposed[constants.BE_MAXMEM]),
13810                                    errors.ECODE_INVAL)
13811
13812       delta = self.op.runtime_mem - current_memory
13813       if delta > 0:
13814         _CheckNodeFreeMemory(self, instance.primary_node,
13815                              "ballooning memory for instance %s" %
13816                              instance.name, delta, instance.hypervisor)
13817
13818     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13819       raise errors.OpPrereqError("Disk operations not supported for"
13820                                  " diskless instances", errors.ECODE_INVAL)
13821
13822     def _PrepareNicCreate(_, params, private):
13823       self._PrepareNicModification(params, private, None, None,
13824                                    {}, cluster, pnode)
13825       return (None, None)
13826
13827     def _PrepareNicMod(_, nic, params, private):
13828       self._PrepareNicModification(params, private, nic.ip, nic.network,
13829                                    nic.nicparams, cluster, pnode)
13830       return None
13831
13832     def _PrepareNicRemove(_, params, __):
13833       ip = params.ip
13834       net = params.network
13835       if net is not None and ip is not None:
13836         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13837
13838     # Verify NIC changes (operating on copy)
13839     nics = instance.nics[:]
13840     ApplyContainerMods("NIC", nics, None, self.nicmod,
13841                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13842     if len(nics) > constants.MAX_NICS:
13843       raise errors.OpPrereqError("Instance has too many network interfaces"
13844                                  " (%d), cannot add more" % constants.MAX_NICS,
13845                                  errors.ECODE_STATE)
13846
13847     # Verify disk changes (operating on a copy)
13848     disks = instance.disks[:]
13849     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13850     if len(disks) > constants.MAX_DISKS:
13851       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13852                                  " more" % constants.MAX_DISKS,
13853                                  errors.ECODE_STATE)
13854     disk_sizes = [disk.size for disk in instance.disks]
13855     disk_sizes.extend(params["size"] for (op, idx, params, private) in
13856                       self.diskmod if op == constants.DDM_ADD)
13857     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13858     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13859
13860     if self.op.offline is not None and self.op.offline:
13861       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13862                           msg="can't change to offline")
13863
13864     # Pre-compute NIC changes (necessary to use result in hooks)
13865     self._nic_chgdesc = []
13866     if self.nicmod:
13867       # Operate on copies as this is still in prereq
13868       nics = [nic.Copy() for nic in instance.nics]
13869       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13870                          self._CreateNewNic, self._ApplyNicMods, None)
13871       self._new_nics = nics
13872       ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13873     else:
13874       self._new_nics = None
13875       ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13876
13877     if not self.op.ignore_ipolicy:
13878       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13879                                                               group_info)
13880
13881       # Fill ispec with backend parameters
13882       ispec[constants.ISPEC_SPINDLE_USE] = \
13883         self.be_new.get(constants.BE_SPINDLE_USE, None)
13884       ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13885                                                          None)
13886
13887       # Copy ispec to verify parameters with min/max values separately
13888       ispec_max = ispec.copy()
13889       ispec_max[constants.ISPEC_MEM_SIZE] = \
13890         self.be_new.get(constants.BE_MAXMEM, None)
13891       res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13892       ispec_min = ispec.copy()
13893       ispec_min[constants.ISPEC_MEM_SIZE] = \
13894         self.be_new.get(constants.BE_MINMEM, None)
13895       res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13896
13897       if (res_max or res_min):
13898         # FIXME: Improve error message by including information about whether
13899         # the upper or lower limit of the parameter fails the ipolicy.
13900         msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13901                (group_info, group_info.name,
13902                 utils.CommaJoin(set(res_max + res_min))))
13903         raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13904
13905   def _ConvertPlainToDrbd(self, feedback_fn):
13906     """Converts an instance from plain to drbd.
13907
13908     """
13909     feedback_fn("Converting template to drbd")
13910     instance = self.instance
13911     pnode = instance.primary_node
13912     snode = self.op.remote_node
13913
13914     assert instance.disk_template == constants.DT_PLAIN
13915
13916     # create a fake disk info for _GenerateDiskTemplate
13917     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13918                   constants.IDISK_VG: d.logical_id[0]}
13919                  for d in instance.disks]
13920     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13921                                       instance.name, pnode, [snode],
13922                                       disk_info, None, None, 0, feedback_fn,
13923                                       self.diskparams)
13924     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13925                                         self.diskparams)
13926     p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13927     s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13928     info = _GetInstanceInfoText(instance)
13929     feedback_fn("Creating additional volumes...")
13930     # first, create the missing data and meta devices
13931     for disk in anno_disks:
13932       # unfortunately this is... not too nice
13933       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13934                             info, True, p_excl_stor)
13935       for child in disk.children:
13936         _CreateSingleBlockDev(self, snode, instance, child, info, True,
13937                               s_excl_stor)
13938     # at this stage, all new LVs have been created, we can rename the
13939     # old ones
13940     feedback_fn("Renaming original volumes...")
13941     rename_list = [(o, n.children[0].logical_id)
13942                    for (o, n) in zip(instance.disks, new_disks)]
13943     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13944     result.Raise("Failed to rename original LVs")
13945
13946     feedback_fn("Initializing DRBD devices...")
13947     # all child devices are in place, we can now create the DRBD devices
13948     for disk in anno_disks:
13949       for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13950         f_create = node == pnode
13951         _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13952                               excl_stor)
13953
13954     # at this point, the instance has been modified
13955     instance.disk_template = constants.DT_DRBD8
13956     instance.disks = new_disks
13957     self.cfg.Update(instance, feedback_fn)
13958
13959     # Release node locks while waiting for sync
13960     _ReleaseLocks(self, locking.LEVEL_NODE)
13961
13962     # disks are created, waiting for sync
13963     disk_abort = not _WaitForSync(self, instance,
13964                                   oneshot=not self.op.wait_for_sync)
13965     if disk_abort:
13966       raise errors.OpExecError("There are some degraded disks for"
13967                                " this instance, please cleanup manually")
13968
13969     # Node resource locks will be released by caller
13970
13971   def _ConvertDrbdToPlain(self, feedback_fn):
13972     """Converts an instance from drbd to plain.
13973
13974     """
13975     instance = self.instance
13976
13977     assert len(instance.secondary_nodes) == 1
13978     assert instance.disk_template == constants.DT_DRBD8
13979
13980     pnode = instance.primary_node
13981     snode = instance.secondary_nodes[0]
13982     feedback_fn("Converting template to plain")
13983
13984     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13985     new_disks = [d.children[0] for d in instance.disks]
13986
13987     # copy over size and mode
13988     for parent, child in zip(old_disks, new_disks):
13989       child.size = parent.size
13990       child.mode = parent.mode
13991
13992     # this is a DRBD disk, return its port to the pool
13993     # NOTE: this must be done right before the call to cfg.Update!
13994     for disk in old_disks:
13995       tcp_port = disk.logical_id[2]
13996       self.cfg.AddTcpUdpPort(tcp_port)
13997
13998     # update instance structure
13999     instance.disks = new_disks
14000     instance.disk_template = constants.DT_PLAIN
14001     self.cfg.Update(instance, feedback_fn)
14002
14003     # Release locks in case removing disks takes a while
14004     _ReleaseLocks(self, locking.LEVEL_NODE)
14005
14006     feedback_fn("Removing volumes on the secondary node...")
14007     for disk in old_disks:
14008       self.cfg.SetDiskID(disk, snode)
14009       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14010       if msg:
14011         self.LogWarning("Could not remove block device %s on node %s,"
14012                         " continuing anyway: %s", disk.iv_name, snode, msg)
14013
14014     feedback_fn("Removing unneeded volumes on the primary node...")
14015     for idx, disk in enumerate(old_disks):
14016       meta = disk.children[1]
14017       self.cfg.SetDiskID(meta, pnode)
14018       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14019       if msg:
14020         self.LogWarning("Could not remove metadata for disk %d on node %s,"
14021                         " continuing anyway: %s", idx, pnode, msg)
14022
14023   def _CreateNewDisk(self, idx, params, _):
14024     """Creates a new disk.
14025
14026     """
14027     instance = self.instance
14028
14029     # add a new disk
14030     if instance.disk_template in constants.DTS_FILEBASED:
14031       (file_driver, file_path) = instance.disks[0].logical_id
14032       file_path = os.path.dirname(file_path)
14033     else:
14034       file_driver = file_path = None
14035
14036     disk = \
14037       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14038                             instance.primary_node, instance.secondary_nodes,
14039                             [params], file_path, file_driver, idx,
14040                             self.Log, self.diskparams)[0]
14041
14042     info = _GetInstanceInfoText(instance)
14043
14044     logging.info("Creating volume %s for instance %s",
14045                  disk.iv_name, instance.name)
14046     # Note: this needs to be kept in sync with _CreateDisks
14047     #HARDCODE
14048     for node in instance.all_nodes:
14049       f_create = (node == instance.primary_node)
14050       try:
14051         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14052       except errors.OpExecError, err:
14053         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14054                         disk.iv_name, disk, node, err)
14055
14056     return (disk, [
14057       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14058       ])
14059
14060   @staticmethod
14061   def _ModifyDisk(idx, disk, params, _):
14062     """Modifies a disk.
14063
14064     """
14065     disk.mode = params[constants.IDISK_MODE]
14066
14067     return [
14068       ("disk.mode/%d" % idx, disk.mode),
14069       ]
14070
14071   def _RemoveDisk(self, idx, root, _):
14072     """Removes a disk.
14073
14074     """
14075     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14076     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14077       self.cfg.SetDiskID(disk, node)
14078       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14079       if msg:
14080         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14081                         " continuing anyway", idx, node, msg)
14082
14083     # if this is a DRBD disk, return its port to the pool
14084     if root.dev_type in constants.LDS_DRBD:
14085       self.cfg.AddTcpUdpPort(root.logical_id[2])
14086
14087   @staticmethod
14088   def _CreateNewNic(idx, params, private):
14089     """Creates data structure for a new network interface.
14090
14091     """
14092     mac = params[constants.INIC_MAC]
14093     ip = params.get(constants.INIC_IP, None)
14094     net = params.get(constants.INIC_NETWORK, None)
14095     #TODO: not private.filled?? can a nic have no nicparams??
14096     nicparams = private.filled
14097
14098     return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
14099       ("nic.%d" % idx,
14100        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14101        (mac, ip, private.filled[constants.NIC_MODE],
14102        private.filled[constants.NIC_LINK],
14103        net)),
14104       ])
14105
14106   @staticmethod
14107   def _ApplyNicMods(idx, nic, params, private):
14108     """Modifies a network interface.
14109
14110     """
14111     changes = []
14112
14113     for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
14114       if key in params:
14115         changes.append(("nic.%s/%d" % (key, idx), params[key]))
14116         setattr(nic, key, params[key])
14117
14118     if private.filled:
14119       nic.nicparams = private.filled
14120
14121       for (key, val) in nic.nicparams.items():
14122         changes.append(("nic.%s/%d" % (key, idx), val))
14123
14124     return changes
14125
14126   def Exec(self, feedback_fn):
14127     """Modifies an instance.
14128
14129     All parameters take effect only at the next restart of the instance.
14130
14131     """
14132     # Process here the warnings from CheckPrereq, as we don't have a
14133     # feedback_fn there.
14134     # TODO: Replace with self.LogWarning
14135     for warn in self.warn:
14136       feedback_fn("WARNING: %s" % warn)
14137
14138     assert ((self.op.disk_template is None) ^
14139             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14140       "Not owning any node resource locks"
14141
14142     result = []
14143     instance = self.instance
14144
14145     # runtime memory
14146     if self.op.runtime_mem:
14147       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14148                                                      instance,
14149                                                      self.op.runtime_mem)
14150       rpcres.Raise("Cannot modify instance runtime memory")
14151       result.append(("runtime_memory", self.op.runtime_mem))
14152
14153     # Apply disk changes
14154     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14155                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14156     _UpdateIvNames(0, instance.disks)
14157
14158     if self.op.disk_template:
14159       if __debug__:
14160         check_nodes = set(instance.all_nodes)
14161         if self.op.remote_node:
14162           check_nodes.add(self.op.remote_node)
14163         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14164           owned = self.owned_locks(level)
14165           assert not (check_nodes - owned), \
14166             ("Not owning the correct locks, owning %r, expected at least %r" %
14167              (owned, check_nodes))
14168
14169       r_shut = _ShutdownInstanceDisks(self, instance)
14170       if not r_shut:
14171         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14172                                  " proceed with disk template conversion")
14173       mode = (instance.disk_template, self.op.disk_template)
14174       try:
14175         self._DISK_CONVERSIONS[mode](self, feedback_fn)
14176       except:
14177         self.cfg.ReleaseDRBDMinors(instance.name)
14178         raise
14179       result.append(("disk_template", self.op.disk_template))
14180
14181       assert instance.disk_template == self.op.disk_template, \
14182         ("Expected disk template '%s', found '%s'" %
14183          (self.op.disk_template, instance.disk_template))
14184
14185     # Release node and resource locks if there are any (they might already have
14186     # been released during disk conversion)
14187     _ReleaseLocks(self, locking.LEVEL_NODE)
14188     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14189
14190     # Apply NIC changes
14191     if self._new_nics is not None:
14192       instance.nics = self._new_nics
14193       result.extend(self._nic_chgdesc)
14194
14195     # hvparams changes
14196     if self.op.hvparams:
14197       instance.hvparams = self.hv_inst
14198       for key, val in self.op.hvparams.iteritems():
14199         result.append(("hv/%s" % key, val))
14200
14201     # beparams changes
14202     if self.op.beparams:
14203       instance.beparams = self.be_inst
14204       for key, val in self.op.beparams.iteritems():
14205         result.append(("be/%s" % key, val))
14206
14207     # OS change
14208     if self.op.os_name:
14209       instance.os = self.op.os_name
14210
14211     # osparams changes
14212     if self.op.osparams:
14213       instance.osparams = self.os_inst
14214       for key, val in self.op.osparams.iteritems():
14215         result.append(("os/%s" % key, val))
14216
14217     if self.op.offline is None:
14218       # Ignore
14219       pass
14220     elif self.op.offline:
14221       # Mark instance as offline
14222       self.cfg.MarkInstanceOffline(instance.name)
14223       result.append(("admin_state", constants.ADMINST_OFFLINE))
14224     else:
14225       # Mark instance as online, but stopped
14226       self.cfg.MarkInstanceDown(instance.name)
14227       result.append(("admin_state", constants.ADMINST_DOWN))
14228
14229     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14230
14231     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14232                 self.owned_locks(locking.LEVEL_NODE)), \
14233       "All node locks should have been released by now"
14234
14235     return result
14236
14237   _DISK_CONVERSIONS = {
14238     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14239     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14240     }
14241
14242
14243 class LUInstanceChangeGroup(LogicalUnit):
14244   HPATH = "instance-change-group"
14245   HTYPE = constants.HTYPE_INSTANCE
14246   REQ_BGL = False
14247
14248   def ExpandNames(self):
14249     self.share_locks = _ShareAll()
14250
14251     self.needed_locks = {
14252       locking.LEVEL_NODEGROUP: [],
14253       locking.LEVEL_NODE: [],
14254       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14255       }
14256
14257     self._ExpandAndLockInstance()
14258
14259     if self.op.target_groups:
14260       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14261                                   self.op.target_groups)
14262     else:
14263       self.req_target_uuids = None
14264
14265     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14266
14267   def DeclareLocks(self, level):
14268     if level == locking.LEVEL_NODEGROUP:
14269       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14270
14271       if self.req_target_uuids:
14272         lock_groups = set(self.req_target_uuids)
14273
14274         # Lock all groups used by instance optimistically; this requires going
14275         # via the node before it's locked, requiring verification later on
14276         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14277         lock_groups.update(instance_groups)
14278       else:
14279         # No target groups, need to lock all of them
14280         lock_groups = locking.ALL_SET
14281
14282       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14283
14284     elif level == locking.LEVEL_NODE:
14285       if self.req_target_uuids:
14286         # Lock all nodes used by instances
14287         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14288         self._LockInstancesNodes()
14289
14290         # Lock all nodes in all potential target groups
14291         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14292                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14293         member_nodes = [node_name
14294                         for group in lock_groups
14295                         for node_name in self.cfg.GetNodeGroup(group).members]
14296         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14297       else:
14298         # Lock all nodes as all groups are potential targets
14299         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14300
14301   def CheckPrereq(self):
14302     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14303     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14304     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14305
14306     assert (self.req_target_uuids is None or
14307             owned_groups.issuperset(self.req_target_uuids))
14308     assert owned_instances == set([self.op.instance_name])
14309
14310     # Get instance information
14311     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14312
14313     # Check if node groups for locked instance are still correct
14314     assert owned_nodes.issuperset(self.instance.all_nodes), \
14315       ("Instance %s's nodes changed while we kept the lock" %
14316        self.op.instance_name)
14317
14318     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14319                                            owned_groups)
14320
14321     if self.req_target_uuids:
14322       # User requested specific target groups
14323       self.target_uuids = frozenset(self.req_target_uuids)
14324     else:
14325       # All groups except those used by the instance are potential targets
14326       self.target_uuids = owned_groups - inst_groups
14327
14328     conflicting_groups = self.target_uuids & inst_groups
14329     if conflicting_groups:
14330       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14331                                  " used by the instance '%s'" %
14332                                  (utils.CommaJoin(conflicting_groups),
14333                                   self.op.instance_name),
14334                                  errors.ECODE_INVAL)
14335
14336     if not self.target_uuids:
14337       raise errors.OpPrereqError("There are no possible target groups",
14338                                  errors.ECODE_INVAL)
14339
14340   def BuildHooksEnv(self):
14341     """Build hooks env.
14342
14343     """
14344     assert self.target_uuids
14345
14346     env = {
14347       "TARGET_GROUPS": " ".join(self.target_uuids),
14348       }
14349
14350     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14351
14352     return env
14353
14354   def BuildHooksNodes(self):
14355     """Build hooks nodes.
14356
14357     """
14358     mn = self.cfg.GetMasterNode()
14359     return ([mn], [mn])
14360
14361   def Exec(self, feedback_fn):
14362     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14363
14364     assert instances == [self.op.instance_name], "Instance not locked"
14365
14366     req = iallocator.IAReqGroupChange(instances=instances,
14367                                       target_groups=list(self.target_uuids))
14368     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14369
14370     ial.Run(self.op.iallocator)
14371
14372     if not ial.success:
14373       raise errors.OpPrereqError("Can't compute solution for changing group of"
14374                                  " instance '%s' using iallocator '%s': %s" %
14375                                  (self.op.instance_name, self.op.iallocator,
14376                                   ial.info), errors.ECODE_NORES)
14377
14378     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14379
14380     self.LogInfo("Iallocator returned %s job(s) for changing group of"
14381                  " instance '%s'", len(jobs), self.op.instance_name)
14382
14383     return ResultWithJobs(jobs)
14384
14385
14386 class LUBackupQuery(NoHooksLU):
14387   """Query the exports list
14388
14389   """
14390   REQ_BGL = False
14391
14392   def CheckArguments(self):
14393     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14394                              ["node", "export"], self.op.use_locking)
14395
14396   def ExpandNames(self):
14397     self.expq.ExpandNames(self)
14398
14399   def DeclareLocks(self, level):
14400     self.expq.DeclareLocks(self, level)
14401
14402   def Exec(self, feedback_fn):
14403     result = {}
14404
14405     for (node, expname) in self.expq.OldStyleQuery(self):
14406       if expname is None:
14407         result[node] = False
14408       else:
14409         result.setdefault(node, []).append(expname)
14410
14411     return result
14412
14413
14414 class _ExportQuery(_QueryBase):
14415   FIELDS = query.EXPORT_FIELDS
14416
14417   #: The node name is not a unique key for this query
14418   SORT_FIELD = "node"
14419
14420   def ExpandNames(self, lu):
14421     lu.needed_locks = {}
14422
14423     # The following variables interact with _QueryBase._GetNames
14424     if self.names:
14425       self.wanted = _GetWantedNodes(lu, self.names)
14426     else:
14427       self.wanted = locking.ALL_SET
14428
14429     self.do_locking = self.use_locking
14430
14431     if self.do_locking:
14432       lu.share_locks = _ShareAll()
14433       lu.needed_locks = {
14434         locking.LEVEL_NODE: self.wanted,
14435         }
14436
14437       if not self.names:
14438         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14439
14440   def DeclareLocks(self, lu, level):
14441     pass
14442
14443   def _GetQueryData(self, lu):
14444     """Computes the list of nodes and their attributes.
14445
14446     """
14447     # Locking is not used
14448     # TODO
14449     assert not (compat.any(lu.glm.is_owned(level)
14450                            for level in locking.LEVELS
14451                            if level != locking.LEVEL_CLUSTER) or
14452                 self.do_locking or self.use_locking)
14453
14454     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14455
14456     result = []
14457
14458     for (node, nres) in lu.rpc.call_export_list(nodes).items():
14459       if nres.fail_msg:
14460         result.append((node, None))
14461       else:
14462         result.extend((node, expname) for expname in nres.payload)
14463
14464     return result
14465
14466
14467 class LUBackupPrepare(NoHooksLU):
14468   """Prepares an instance for an export and returns useful information.
14469
14470   """
14471   REQ_BGL = False
14472
14473   def ExpandNames(self):
14474     self._ExpandAndLockInstance()
14475
14476   def CheckPrereq(self):
14477     """Check prerequisites.
14478
14479     """
14480     instance_name = self.op.instance_name
14481
14482     self.instance = self.cfg.GetInstanceInfo(instance_name)
14483     assert self.instance is not None, \
14484           "Cannot retrieve locked instance %s" % self.op.instance_name
14485     _CheckNodeOnline(self, self.instance.primary_node)
14486
14487     self._cds = _GetClusterDomainSecret()
14488
14489   def Exec(self, feedback_fn):
14490     """Prepares an instance for an export.
14491
14492     """
14493     instance = self.instance
14494
14495     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14496       salt = utils.GenerateSecret(8)
14497
14498       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14499       result = self.rpc.call_x509_cert_create(instance.primary_node,
14500                                               constants.RIE_CERT_VALIDITY)
14501       result.Raise("Can't create X509 key and certificate on %s" % result.node)
14502
14503       (name, cert_pem) = result.payload
14504
14505       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14506                                              cert_pem)
14507
14508       return {
14509         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14510         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14511                           salt),
14512         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14513         }
14514
14515     return None
14516
14517
14518 class LUBackupExport(LogicalUnit):
14519   """Export an instance to an image in the cluster.
14520
14521   """
14522   HPATH = "instance-export"
14523   HTYPE = constants.HTYPE_INSTANCE
14524   REQ_BGL = False
14525
14526   def CheckArguments(self):
14527     """Check the arguments.
14528
14529     """
14530     self.x509_key_name = self.op.x509_key_name
14531     self.dest_x509_ca_pem = self.op.destination_x509_ca
14532
14533     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14534       if not self.x509_key_name:
14535         raise errors.OpPrereqError("Missing X509 key name for encryption",
14536                                    errors.ECODE_INVAL)
14537
14538       if not self.dest_x509_ca_pem:
14539         raise errors.OpPrereqError("Missing destination X509 CA",
14540                                    errors.ECODE_INVAL)
14541
14542   def ExpandNames(self):
14543     self._ExpandAndLockInstance()
14544
14545     # Lock all nodes for local exports
14546     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14547       # FIXME: lock only instance primary and destination node
14548       #
14549       # Sad but true, for now we have do lock all nodes, as we don't know where
14550       # the previous export might be, and in this LU we search for it and
14551       # remove it from its current node. In the future we could fix this by:
14552       #  - making a tasklet to search (share-lock all), then create the
14553       #    new one, then one to remove, after
14554       #  - removing the removal operation altogether
14555       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14556
14557       # Allocations should be stopped while this LU runs with node locks, but
14558       # it doesn't have to be exclusive
14559       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14560       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14561
14562   def DeclareLocks(self, level):
14563     """Last minute lock declaration."""
14564     # All nodes are locked anyway, so nothing to do here.
14565
14566   def BuildHooksEnv(self):
14567     """Build hooks env.
14568
14569     This will run on the master, primary node and target node.
14570
14571     """
14572     env = {
14573       "EXPORT_MODE": self.op.mode,
14574       "EXPORT_NODE": self.op.target_node,
14575       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14576       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14577       # TODO: Generic function for boolean env variables
14578       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14579       }
14580
14581     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14582
14583     return env
14584
14585   def BuildHooksNodes(self):
14586     """Build hooks nodes.
14587
14588     """
14589     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14590
14591     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14592       nl.append(self.op.target_node)
14593
14594     return (nl, nl)
14595
14596   def CheckPrereq(self):
14597     """Check prerequisites.
14598
14599     This checks that the instance and node names are valid.
14600
14601     """
14602     instance_name = self.op.instance_name
14603
14604     self.instance = self.cfg.GetInstanceInfo(instance_name)
14605     assert self.instance is not None, \
14606           "Cannot retrieve locked instance %s" % self.op.instance_name
14607     _CheckNodeOnline(self, self.instance.primary_node)
14608
14609     if (self.op.remove_instance and
14610         self.instance.admin_state == constants.ADMINST_UP and
14611         not self.op.shutdown):
14612       raise errors.OpPrereqError("Can not remove instance without shutting it"
14613                                  " down before", errors.ECODE_STATE)
14614
14615     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14616       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14617       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14618       assert self.dst_node is not None
14619
14620       _CheckNodeOnline(self, self.dst_node.name)
14621       _CheckNodeNotDrained(self, self.dst_node.name)
14622
14623       self._cds = None
14624       self.dest_disk_info = None
14625       self.dest_x509_ca = None
14626
14627     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14628       self.dst_node = None
14629
14630       if len(self.op.target_node) != len(self.instance.disks):
14631         raise errors.OpPrereqError(("Received destination information for %s"
14632                                     " disks, but instance %s has %s disks") %
14633                                    (len(self.op.target_node), instance_name,
14634                                     len(self.instance.disks)),
14635                                    errors.ECODE_INVAL)
14636
14637       cds = _GetClusterDomainSecret()
14638
14639       # Check X509 key name
14640       try:
14641         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14642       except (TypeError, ValueError), err:
14643         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14644                                    errors.ECODE_INVAL)
14645
14646       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14647         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14648                                    errors.ECODE_INVAL)
14649
14650       # Load and verify CA
14651       try:
14652         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14653       except OpenSSL.crypto.Error, err:
14654         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14655                                    (err, ), errors.ECODE_INVAL)
14656
14657       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14658       if errcode is not None:
14659         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14660                                    (msg, ), errors.ECODE_INVAL)
14661
14662       self.dest_x509_ca = cert
14663
14664       # Verify target information
14665       disk_info = []
14666       for idx, disk_data in enumerate(self.op.target_node):
14667         try:
14668           (host, port, magic) = \
14669             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14670         except errors.GenericError, err:
14671           raise errors.OpPrereqError("Target info for disk %s: %s" %
14672                                      (idx, err), errors.ECODE_INVAL)
14673
14674         disk_info.append((host, port, magic))
14675
14676       assert len(disk_info) == len(self.op.target_node)
14677       self.dest_disk_info = disk_info
14678
14679     else:
14680       raise errors.ProgrammerError("Unhandled export mode %r" %
14681                                    self.op.mode)
14682
14683     # instance disk type verification
14684     # TODO: Implement export support for file-based disks
14685     for disk in self.instance.disks:
14686       if disk.dev_type == constants.LD_FILE:
14687         raise errors.OpPrereqError("Export not supported for instances with"
14688                                    " file-based disks", errors.ECODE_INVAL)
14689
14690   def _CleanupExports(self, feedback_fn):
14691     """Removes exports of current instance from all other nodes.
14692
14693     If an instance in a cluster with nodes A..D was exported to node C, its
14694     exports will be removed from the nodes A, B and D.
14695
14696     """
14697     assert self.op.mode != constants.EXPORT_MODE_REMOTE
14698
14699     nodelist = self.cfg.GetNodeList()
14700     nodelist.remove(self.dst_node.name)
14701
14702     # on one-node clusters nodelist will be empty after the removal
14703     # if we proceed the backup would be removed because OpBackupQuery
14704     # substitutes an empty list with the full cluster node list.
14705     iname = self.instance.name
14706     if nodelist:
14707       feedback_fn("Removing old exports for instance %s" % iname)
14708       exportlist = self.rpc.call_export_list(nodelist)
14709       for node in exportlist:
14710         if exportlist[node].fail_msg:
14711           continue
14712         if iname in exportlist[node].payload:
14713           msg = self.rpc.call_export_remove(node, iname).fail_msg
14714           if msg:
14715             self.LogWarning("Could not remove older export for instance %s"
14716                             " on node %s: %s", iname, node, msg)
14717
14718   def Exec(self, feedback_fn):
14719     """Export an instance to an image in the cluster.
14720
14721     """
14722     assert self.op.mode in constants.EXPORT_MODES
14723
14724     instance = self.instance
14725     src_node = instance.primary_node
14726
14727     if self.op.shutdown:
14728       # shutdown the instance, but not the disks
14729       feedback_fn("Shutting down instance %s" % instance.name)
14730       result = self.rpc.call_instance_shutdown(src_node, instance,
14731                                                self.op.shutdown_timeout)
14732       # TODO: Maybe ignore failures if ignore_remove_failures is set
14733       result.Raise("Could not shutdown instance %s on"
14734                    " node %s" % (instance.name, src_node))
14735
14736     # set the disks ID correctly since call_instance_start needs the
14737     # correct drbd minor to create the symlinks
14738     for disk in instance.disks:
14739       self.cfg.SetDiskID(disk, src_node)
14740
14741     activate_disks = (instance.admin_state != constants.ADMINST_UP)
14742
14743     if activate_disks:
14744       # Activate the instance disks if we'exporting a stopped instance
14745       feedback_fn("Activating disks for %s" % instance.name)
14746       _StartInstanceDisks(self, instance, None)
14747
14748     try:
14749       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14750                                                      instance)
14751
14752       helper.CreateSnapshots()
14753       try:
14754         if (self.op.shutdown and
14755             instance.admin_state == constants.ADMINST_UP and
14756             not self.op.remove_instance):
14757           assert not activate_disks
14758           feedback_fn("Starting instance %s" % instance.name)
14759           result = self.rpc.call_instance_start(src_node,
14760                                                 (instance, None, None), False)
14761           msg = result.fail_msg
14762           if msg:
14763             feedback_fn("Failed to start instance: %s" % msg)
14764             _ShutdownInstanceDisks(self, instance)
14765             raise errors.OpExecError("Could not start instance: %s" % msg)
14766
14767         if self.op.mode == constants.EXPORT_MODE_LOCAL:
14768           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14769         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14770           connect_timeout = constants.RIE_CONNECT_TIMEOUT
14771           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14772
14773           (key_name, _, _) = self.x509_key_name
14774
14775           dest_ca_pem = \
14776             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14777                                             self.dest_x509_ca)
14778
14779           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14780                                                      key_name, dest_ca_pem,
14781                                                      timeouts)
14782       finally:
14783         helper.Cleanup()
14784
14785       # Check for backwards compatibility
14786       assert len(dresults) == len(instance.disks)
14787       assert compat.all(isinstance(i, bool) for i in dresults), \
14788              "Not all results are boolean: %r" % dresults
14789
14790     finally:
14791       if activate_disks:
14792         feedback_fn("Deactivating disks for %s" % instance.name)
14793         _ShutdownInstanceDisks(self, instance)
14794
14795     if not (compat.all(dresults) and fin_resu):
14796       failures = []
14797       if not fin_resu:
14798         failures.append("export finalization")
14799       if not compat.all(dresults):
14800         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14801                                if not dsk)
14802         failures.append("disk export: disk(s) %s" % fdsk)
14803
14804       raise errors.OpExecError("Export failed, errors in %s" %
14805                                utils.CommaJoin(failures))
14806
14807     # At this point, the export was successful, we can cleanup/finish
14808
14809     # Remove instance if requested
14810     if self.op.remove_instance:
14811       feedback_fn("Removing instance %s" % instance.name)
14812       _RemoveInstance(self, feedback_fn, instance,
14813                       self.op.ignore_remove_failures)
14814
14815     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14816       self._CleanupExports(feedback_fn)
14817
14818     return fin_resu, dresults
14819
14820
14821 class LUBackupRemove(NoHooksLU):
14822   """Remove exports related to the named instance.
14823
14824   """
14825   REQ_BGL = False
14826
14827   def ExpandNames(self):
14828     self.needed_locks = {
14829       # We need all nodes to be locked in order for RemoveExport to work, but
14830       # we don't need to lock the instance itself, as nothing will happen to it
14831       # (and we can remove exports also for a removed instance)
14832       locking.LEVEL_NODE: locking.ALL_SET,
14833
14834       # Removing backups is quick, so blocking allocations is justified
14835       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14836       }
14837
14838     # Allocations should be stopped while this LU runs with node locks, but it
14839     # doesn't have to be exclusive
14840     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14841
14842   def Exec(self, feedback_fn):
14843     """Remove any export.
14844
14845     """
14846     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14847     # If the instance was not found we'll try with the name that was passed in.
14848     # This will only work if it was an FQDN, though.
14849     fqdn_warn = False
14850     if not instance_name:
14851       fqdn_warn = True
14852       instance_name = self.op.instance_name
14853
14854     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14855     exportlist = self.rpc.call_export_list(locked_nodes)
14856     found = False
14857     for node in exportlist:
14858       msg = exportlist[node].fail_msg
14859       if msg:
14860         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14861         continue
14862       if instance_name in exportlist[node].payload:
14863         found = True
14864         result = self.rpc.call_export_remove(node, instance_name)
14865         msg = result.fail_msg
14866         if msg:
14867           logging.error("Could not remove export for instance %s"
14868                         " on node %s: %s", instance_name, node, msg)
14869
14870     if fqdn_warn and not found:
14871       feedback_fn("Export not found. If trying to remove an export belonging"
14872                   " to a deleted instance please use its Fully Qualified"
14873                   " Domain Name.")
14874
14875
14876 class LUGroupAdd(LogicalUnit):
14877   """Logical unit for creating node groups.
14878
14879   """
14880   HPATH = "group-add"
14881   HTYPE = constants.HTYPE_GROUP
14882   REQ_BGL = False
14883
14884   def ExpandNames(self):
14885     # We need the new group's UUID here so that we can create and acquire the
14886     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14887     # that it should not check whether the UUID exists in the configuration.
14888     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14889     self.needed_locks = {}
14890     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14891
14892   def CheckPrereq(self):
14893     """Check prerequisites.
14894
14895     This checks that the given group name is not an existing node group
14896     already.
14897
14898     """
14899     try:
14900       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14901     except errors.OpPrereqError:
14902       pass
14903     else:
14904       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14905                                  " node group (UUID: %s)" %
14906                                  (self.op.group_name, existing_uuid),
14907                                  errors.ECODE_EXISTS)
14908
14909     if self.op.ndparams:
14910       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14911
14912     if self.op.hv_state:
14913       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14914     else:
14915       self.new_hv_state = None
14916
14917     if self.op.disk_state:
14918       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14919     else:
14920       self.new_disk_state = None
14921
14922     if self.op.diskparams:
14923       for templ in constants.DISK_TEMPLATES:
14924         if templ in self.op.diskparams:
14925           utils.ForceDictType(self.op.diskparams[templ],
14926                               constants.DISK_DT_TYPES)
14927       self.new_diskparams = self.op.diskparams
14928       try:
14929         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14930       except errors.OpPrereqError, err:
14931         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14932                                    errors.ECODE_INVAL)
14933     else:
14934       self.new_diskparams = {}
14935
14936     if self.op.ipolicy:
14937       cluster = self.cfg.GetClusterInfo()
14938       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14939       try:
14940         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14941       except errors.ConfigurationError, err:
14942         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14943                                    errors.ECODE_INVAL)
14944
14945   def BuildHooksEnv(self):
14946     """Build hooks env.
14947
14948     """
14949     return {
14950       "GROUP_NAME": self.op.group_name,
14951       }
14952
14953   def BuildHooksNodes(self):
14954     """Build hooks nodes.
14955
14956     """
14957     mn = self.cfg.GetMasterNode()
14958     return ([mn], [mn])
14959
14960   def Exec(self, feedback_fn):
14961     """Add the node group to the cluster.
14962
14963     """
14964     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14965                                   uuid=self.group_uuid,
14966                                   alloc_policy=self.op.alloc_policy,
14967                                   ndparams=self.op.ndparams,
14968                                   diskparams=self.new_diskparams,
14969                                   ipolicy=self.op.ipolicy,
14970                                   hv_state_static=self.new_hv_state,
14971                                   disk_state_static=self.new_disk_state)
14972
14973     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14974     del self.remove_locks[locking.LEVEL_NODEGROUP]
14975
14976
14977 class LUGroupAssignNodes(NoHooksLU):
14978   """Logical unit for assigning nodes to groups.
14979
14980   """
14981   REQ_BGL = False
14982
14983   def ExpandNames(self):
14984     # These raise errors.OpPrereqError on their own:
14985     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14986     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14987
14988     # We want to lock all the affected nodes and groups. We have readily
14989     # available the list of nodes, and the *destination* group. To gather the
14990     # list of "source" groups, we need to fetch node information later on.
14991     self.needed_locks = {
14992       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14993       locking.LEVEL_NODE: self.op.nodes,
14994       }
14995
14996   def DeclareLocks(self, level):
14997     if level == locking.LEVEL_NODEGROUP:
14998       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14999
15000       # Try to get all affected nodes' groups without having the group or node
15001       # lock yet. Needs verification later in the code flow.
15002       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
15003
15004       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15005
15006   def CheckPrereq(self):
15007     """Check prerequisites.
15008
15009     """
15010     assert self.needed_locks[locking.LEVEL_NODEGROUP]
15011     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15012             frozenset(self.op.nodes))
15013
15014     expected_locks = (set([self.group_uuid]) |
15015                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15016     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15017     if actual_locks != expected_locks:
15018       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15019                                " current groups are '%s', used to be '%s'" %
15020                                (utils.CommaJoin(expected_locks),
15021                                 utils.CommaJoin(actual_locks)))
15022
15023     self.node_data = self.cfg.GetAllNodesInfo()
15024     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15025     instance_data = self.cfg.GetAllInstancesInfo()
15026
15027     if self.group is None:
15028       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15029                                (self.op.group_name, self.group_uuid))
15030
15031     (new_splits, previous_splits) = \
15032       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15033                                              for node in self.op.nodes],
15034                                             self.node_data, instance_data)
15035
15036     if new_splits:
15037       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15038
15039       if not self.op.force:
15040         raise errors.OpExecError("The following instances get split by this"
15041                                  " change and --force was not given: %s" %
15042                                  fmt_new_splits)
15043       else:
15044         self.LogWarning("This operation will split the following instances: %s",
15045                         fmt_new_splits)
15046
15047         if previous_splits:
15048           self.LogWarning("In addition, these already-split instances continue"
15049                           " to be split across groups: %s",
15050                           utils.CommaJoin(utils.NiceSort(previous_splits)))
15051
15052   def Exec(self, feedback_fn):
15053     """Assign nodes to a new group.
15054
15055     """
15056     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15057
15058     self.cfg.AssignGroupNodes(mods)
15059
15060   @staticmethod
15061   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15062     """Check for split instances after a node assignment.
15063
15064     This method considers a series of node assignments as an atomic operation,
15065     and returns information about split instances after applying the set of
15066     changes.
15067
15068     In particular, it returns information about newly split instances, and
15069     instances that were already split, and remain so after the change.
15070
15071     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15072     considered.
15073
15074     @type changes: list of (node_name, new_group_uuid) pairs.
15075     @param changes: list of node assignments to consider.
15076     @param node_data: a dict with data for all nodes
15077     @param instance_data: a dict with all instances to consider
15078     @rtype: a two-tuple
15079     @return: a list of instances that were previously okay and result split as a
15080       consequence of this change, and a list of instances that were previously
15081       split and this change does not fix.
15082
15083     """
15084     changed_nodes = dict((node, group) for node, group in changes
15085                          if node_data[node].group != group)
15086
15087     all_split_instances = set()
15088     previously_split_instances = set()
15089
15090     def InstanceNodes(instance):
15091       return [instance.primary_node] + list(instance.secondary_nodes)
15092
15093     for inst in instance_data.values():
15094       if inst.disk_template not in constants.DTS_INT_MIRROR:
15095         continue
15096
15097       instance_nodes = InstanceNodes(inst)
15098
15099       if len(set(node_data[node].group for node in instance_nodes)) > 1:
15100         previously_split_instances.add(inst.name)
15101
15102       if len(set(changed_nodes.get(node, node_data[node].group)
15103                  for node in instance_nodes)) > 1:
15104         all_split_instances.add(inst.name)
15105
15106     return (list(all_split_instances - previously_split_instances),
15107             list(previously_split_instances & all_split_instances))
15108
15109
15110 class _GroupQuery(_QueryBase):
15111   FIELDS = query.GROUP_FIELDS
15112
15113   def ExpandNames(self, lu):
15114     lu.needed_locks = {}
15115
15116     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15117     self._cluster = lu.cfg.GetClusterInfo()
15118     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15119
15120     if not self.names:
15121       self.wanted = [name_to_uuid[name]
15122                      for name in utils.NiceSort(name_to_uuid.keys())]
15123     else:
15124       # Accept names to be either names or UUIDs.
15125       missing = []
15126       self.wanted = []
15127       all_uuid = frozenset(self._all_groups.keys())
15128
15129       for name in self.names:
15130         if name in all_uuid:
15131           self.wanted.append(name)
15132         elif name in name_to_uuid:
15133           self.wanted.append(name_to_uuid[name])
15134         else:
15135           missing.append(name)
15136
15137       if missing:
15138         raise errors.OpPrereqError("Some groups do not exist: %s" %
15139                                    utils.CommaJoin(missing),
15140                                    errors.ECODE_NOENT)
15141
15142   def DeclareLocks(self, lu, level):
15143     pass
15144
15145   def _GetQueryData(self, lu):
15146     """Computes the list of node groups and their attributes.
15147
15148     """
15149     do_nodes = query.GQ_NODE in self.requested_data
15150     do_instances = query.GQ_INST in self.requested_data
15151
15152     group_to_nodes = None
15153     group_to_instances = None
15154
15155     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15156     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15157     # latter GetAllInstancesInfo() is not enough, for we have to go through
15158     # instance->node. Hence, we will need to process nodes even if we only need
15159     # instance information.
15160     if do_nodes or do_instances:
15161       all_nodes = lu.cfg.GetAllNodesInfo()
15162       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15163       node_to_group = {}
15164
15165       for node in all_nodes.values():
15166         if node.group in group_to_nodes:
15167           group_to_nodes[node.group].append(node.name)
15168           node_to_group[node.name] = node.group
15169
15170       if do_instances:
15171         all_instances = lu.cfg.GetAllInstancesInfo()
15172         group_to_instances = dict((uuid, []) for uuid in self.wanted)
15173
15174         for instance in all_instances.values():
15175           node = instance.primary_node
15176           if node in node_to_group:
15177             group_to_instances[node_to_group[node]].append(instance.name)
15178
15179         if not do_nodes:
15180           # Do not pass on node information if it was not requested.
15181           group_to_nodes = None
15182
15183     return query.GroupQueryData(self._cluster,
15184                                 [self._all_groups[uuid]
15185                                  for uuid in self.wanted],
15186                                 group_to_nodes, group_to_instances,
15187                                 query.GQ_DISKPARAMS in self.requested_data)
15188
15189
15190 class LUGroupQuery(NoHooksLU):
15191   """Logical unit for querying node groups.
15192
15193   """
15194   REQ_BGL = False
15195
15196   def CheckArguments(self):
15197     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15198                           self.op.output_fields, False)
15199
15200   def ExpandNames(self):
15201     self.gq.ExpandNames(self)
15202
15203   def DeclareLocks(self, level):
15204     self.gq.DeclareLocks(self, level)
15205
15206   def Exec(self, feedback_fn):
15207     return self.gq.OldStyleQuery(self)
15208
15209
15210 class LUGroupSetParams(LogicalUnit):
15211   """Modifies the parameters of a node group.
15212
15213   """
15214   HPATH = "group-modify"
15215   HTYPE = constants.HTYPE_GROUP
15216   REQ_BGL = False
15217
15218   def CheckArguments(self):
15219     all_changes = [
15220       self.op.ndparams,
15221       self.op.diskparams,
15222       self.op.alloc_policy,
15223       self.op.hv_state,
15224       self.op.disk_state,
15225       self.op.ipolicy,
15226       ]
15227
15228     if all_changes.count(None) == len(all_changes):
15229       raise errors.OpPrereqError("Please pass at least one modification",
15230                                  errors.ECODE_INVAL)
15231
15232   def ExpandNames(self):
15233     # This raises errors.OpPrereqError on its own:
15234     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15235
15236     self.needed_locks = {
15237       locking.LEVEL_INSTANCE: [],
15238       locking.LEVEL_NODEGROUP: [self.group_uuid],
15239       }
15240
15241     self.share_locks[locking.LEVEL_INSTANCE] = 1
15242
15243   def DeclareLocks(self, level):
15244     if level == locking.LEVEL_INSTANCE:
15245       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15246
15247       # Lock instances optimistically, needs verification once group lock has
15248       # been acquired
15249       self.needed_locks[locking.LEVEL_INSTANCE] = \
15250           self.cfg.GetNodeGroupInstances(self.group_uuid)
15251
15252   @staticmethod
15253   def _UpdateAndVerifyDiskParams(old, new):
15254     """Updates and verifies disk parameters.
15255
15256     """
15257     new_params = _GetUpdatedParams(old, new)
15258     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15259     return new_params
15260
15261   def CheckPrereq(self):
15262     """Check prerequisites.
15263
15264     """
15265     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15266
15267     # Check if locked instances are still correct
15268     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15269
15270     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15271     cluster = self.cfg.GetClusterInfo()
15272
15273     if self.group is None:
15274       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15275                                (self.op.group_name, self.group_uuid))
15276
15277     if self.op.ndparams:
15278       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15279       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15280       self.new_ndparams = new_ndparams
15281
15282     if self.op.diskparams:
15283       diskparams = self.group.diskparams
15284       uavdp = self._UpdateAndVerifyDiskParams
15285       # For each disktemplate subdict update and verify the values
15286       new_diskparams = dict((dt,
15287                              uavdp(diskparams.get(dt, {}),
15288                                    self.op.diskparams[dt]))
15289                             for dt in constants.DISK_TEMPLATES
15290                             if dt in self.op.diskparams)
15291       # As we've all subdicts of diskparams ready, lets merge the actual
15292       # dict with all updated subdicts
15293       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15294       try:
15295         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15296       except errors.OpPrereqError, err:
15297         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15298                                    errors.ECODE_INVAL)
15299
15300     if self.op.hv_state:
15301       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15302                                                  self.group.hv_state_static)
15303
15304     if self.op.disk_state:
15305       self.new_disk_state = \
15306         _MergeAndVerifyDiskState(self.op.disk_state,
15307                                  self.group.disk_state_static)
15308
15309     if self.op.ipolicy:
15310       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15311                                             self.op.ipolicy,
15312                                             group_policy=True)
15313
15314       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15315       inst_filter = lambda inst: inst.name in owned_instances
15316       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15317       gmi = ganeti.masterd.instance
15318       violations = \
15319           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15320                                                                   self.group),
15321                                         new_ipolicy, instances)
15322
15323       if violations:
15324         self.LogWarning("After the ipolicy change the following instances"
15325                         " violate them: %s",
15326                         utils.CommaJoin(violations))
15327
15328   def BuildHooksEnv(self):
15329     """Build hooks env.
15330
15331     """
15332     return {
15333       "GROUP_NAME": self.op.group_name,
15334       "NEW_ALLOC_POLICY": self.op.alloc_policy,
15335       }
15336
15337   def BuildHooksNodes(self):
15338     """Build hooks nodes.
15339
15340     """
15341     mn = self.cfg.GetMasterNode()
15342     return ([mn], [mn])
15343
15344   def Exec(self, feedback_fn):
15345     """Modifies the node group.
15346
15347     """
15348     result = []
15349
15350     if self.op.ndparams:
15351       self.group.ndparams = self.new_ndparams
15352       result.append(("ndparams", str(self.group.ndparams)))
15353
15354     if self.op.diskparams:
15355       self.group.diskparams = self.new_diskparams
15356       result.append(("diskparams", str(self.group.diskparams)))
15357
15358     if self.op.alloc_policy:
15359       self.group.alloc_policy = self.op.alloc_policy
15360
15361     if self.op.hv_state:
15362       self.group.hv_state_static = self.new_hv_state
15363
15364     if self.op.disk_state:
15365       self.group.disk_state_static = self.new_disk_state
15366
15367     if self.op.ipolicy:
15368       self.group.ipolicy = self.new_ipolicy
15369
15370     self.cfg.Update(self.group, feedback_fn)
15371     return result
15372
15373
15374 class LUGroupRemove(LogicalUnit):
15375   HPATH = "group-remove"
15376   HTYPE = constants.HTYPE_GROUP
15377   REQ_BGL = False
15378
15379   def ExpandNames(self):
15380     # This will raises errors.OpPrereqError on its own:
15381     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15382     self.needed_locks = {
15383       locking.LEVEL_NODEGROUP: [self.group_uuid],
15384       }
15385
15386   def CheckPrereq(self):
15387     """Check prerequisites.
15388
15389     This checks that the given group name exists as a node group, that is
15390     empty (i.e., contains no nodes), and that is not the last group of the
15391     cluster.
15392
15393     """
15394     # Verify that the group is empty.
15395     group_nodes = [node.name
15396                    for node in self.cfg.GetAllNodesInfo().values()
15397                    if node.group == self.group_uuid]
15398
15399     if group_nodes:
15400       raise errors.OpPrereqError("Group '%s' not empty, has the following"
15401                                  " nodes: %s" %
15402                                  (self.op.group_name,
15403                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
15404                                  errors.ECODE_STATE)
15405
15406     # Verify the cluster would not be left group-less.
15407     if len(self.cfg.GetNodeGroupList()) == 1:
15408       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15409                                  " removed" % self.op.group_name,
15410                                  errors.ECODE_STATE)
15411
15412   def BuildHooksEnv(self):
15413     """Build hooks env.
15414
15415     """
15416     return {
15417       "GROUP_NAME": self.op.group_name,
15418       }
15419
15420   def BuildHooksNodes(self):
15421     """Build hooks nodes.
15422
15423     """
15424     mn = self.cfg.GetMasterNode()
15425     return ([mn], [mn])
15426
15427   def Exec(self, feedback_fn):
15428     """Remove the node group.
15429
15430     """
15431     try:
15432       self.cfg.RemoveNodeGroup(self.group_uuid)
15433     except errors.ConfigurationError:
15434       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15435                                (self.op.group_name, self.group_uuid))
15436
15437     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15438
15439
15440 class LUGroupRename(LogicalUnit):
15441   HPATH = "group-rename"
15442   HTYPE = constants.HTYPE_GROUP
15443   REQ_BGL = False
15444
15445   def ExpandNames(self):
15446     # This raises errors.OpPrereqError on its own:
15447     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15448
15449     self.needed_locks = {
15450       locking.LEVEL_NODEGROUP: [self.group_uuid],
15451       }
15452
15453   def CheckPrereq(self):
15454     """Check prerequisites.
15455
15456     Ensures requested new name is not yet used.
15457
15458     """
15459     try:
15460       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15461     except errors.OpPrereqError:
15462       pass
15463     else:
15464       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15465                                  " node group (UUID: %s)" %
15466                                  (self.op.new_name, new_name_uuid),
15467                                  errors.ECODE_EXISTS)
15468
15469   def BuildHooksEnv(self):
15470     """Build hooks env.
15471
15472     """
15473     return {
15474       "OLD_NAME": self.op.group_name,
15475       "NEW_NAME": self.op.new_name,
15476       }
15477
15478   def BuildHooksNodes(self):
15479     """Build hooks nodes.
15480
15481     """
15482     mn = self.cfg.GetMasterNode()
15483
15484     all_nodes = self.cfg.GetAllNodesInfo()
15485     all_nodes.pop(mn, None)
15486
15487     run_nodes = [mn]
15488     run_nodes.extend(node.name for node in all_nodes.values()
15489                      if node.group == self.group_uuid)
15490
15491     return (run_nodes, run_nodes)
15492
15493   def Exec(self, feedback_fn):
15494     """Rename the node group.
15495
15496     """
15497     group = self.cfg.GetNodeGroup(self.group_uuid)
15498
15499     if group is None:
15500       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15501                                (self.op.group_name, self.group_uuid))
15502
15503     group.name = self.op.new_name
15504     self.cfg.Update(group, feedback_fn)
15505
15506     return self.op.new_name
15507
15508
15509 class LUGroupEvacuate(LogicalUnit):
15510   HPATH = "group-evacuate"
15511   HTYPE = constants.HTYPE_GROUP
15512   REQ_BGL = False
15513
15514   def ExpandNames(self):
15515     # This raises errors.OpPrereqError on its own:
15516     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15517
15518     if self.op.target_groups:
15519       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15520                                   self.op.target_groups)
15521     else:
15522       self.req_target_uuids = []
15523
15524     if self.group_uuid in self.req_target_uuids:
15525       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15526                                  " as a target group (targets are %s)" %
15527                                  (self.group_uuid,
15528                                   utils.CommaJoin(self.req_target_uuids)),
15529                                  errors.ECODE_INVAL)
15530
15531     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15532
15533     self.share_locks = _ShareAll()
15534     self.needed_locks = {
15535       locking.LEVEL_INSTANCE: [],
15536       locking.LEVEL_NODEGROUP: [],
15537       locking.LEVEL_NODE: [],
15538       }
15539
15540   def DeclareLocks(self, level):
15541     if level == locking.LEVEL_INSTANCE:
15542       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15543
15544       # Lock instances optimistically, needs verification once node and group
15545       # locks have been acquired
15546       self.needed_locks[locking.LEVEL_INSTANCE] = \
15547         self.cfg.GetNodeGroupInstances(self.group_uuid)
15548
15549     elif level == locking.LEVEL_NODEGROUP:
15550       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15551
15552       if self.req_target_uuids:
15553         lock_groups = set([self.group_uuid] + self.req_target_uuids)
15554
15555         # Lock all groups used by instances optimistically; this requires going
15556         # via the node before it's locked, requiring verification later on
15557         lock_groups.update(group_uuid
15558                            for instance_name in
15559                              self.owned_locks(locking.LEVEL_INSTANCE)
15560                            for group_uuid in
15561                              self.cfg.GetInstanceNodeGroups(instance_name))
15562       else:
15563         # No target groups, need to lock all of them
15564         lock_groups = locking.ALL_SET
15565
15566       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15567
15568     elif level == locking.LEVEL_NODE:
15569       # This will only lock the nodes in the group to be evacuated which
15570       # contain actual instances
15571       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15572       self._LockInstancesNodes()
15573
15574       # Lock all nodes in group to be evacuated and target groups
15575       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15576       assert self.group_uuid in owned_groups
15577       member_nodes = [node_name
15578                       for group in owned_groups
15579                       for node_name in self.cfg.GetNodeGroup(group).members]
15580       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15581
15582   def CheckPrereq(self):
15583     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15584     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15585     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15586
15587     assert owned_groups.issuperset(self.req_target_uuids)
15588     assert self.group_uuid in owned_groups
15589
15590     # Check if locked instances are still correct
15591     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15592
15593     # Get instance information
15594     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15595
15596     # Check if node groups for locked instances are still correct
15597     _CheckInstancesNodeGroups(self.cfg, self.instances,
15598                               owned_groups, owned_nodes, self.group_uuid)
15599
15600     if self.req_target_uuids:
15601       # User requested specific target groups
15602       self.target_uuids = self.req_target_uuids
15603     else:
15604       # All groups except the one to be evacuated are potential targets
15605       self.target_uuids = [group_uuid for group_uuid in owned_groups
15606                            if group_uuid != self.group_uuid]
15607
15608       if not self.target_uuids:
15609         raise errors.OpPrereqError("There are no possible target groups",
15610                                    errors.ECODE_INVAL)
15611
15612   def BuildHooksEnv(self):
15613     """Build hooks env.
15614
15615     """
15616     return {
15617       "GROUP_NAME": self.op.group_name,
15618       "TARGET_GROUPS": " ".join(self.target_uuids),
15619       }
15620
15621   def BuildHooksNodes(self):
15622     """Build hooks nodes.
15623
15624     """
15625     mn = self.cfg.GetMasterNode()
15626
15627     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15628
15629     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15630
15631     return (run_nodes, run_nodes)
15632
15633   def Exec(self, feedback_fn):
15634     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15635
15636     assert self.group_uuid not in self.target_uuids
15637
15638     req = iallocator.IAReqGroupChange(instances=instances,
15639                                       target_groups=self.target_uuids)
15640     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15641
15642     ial.Run(self.op.iallocator)
15643
15644     if not ial.success:
15645       raise errors.OpPrereqError("Can't compute group evacuation using"
15646                                  " iallocator '%s': %s" %
15647                                  (self.op.iallocator, ial.info),
15648                                  errors.ECODE_NORES)
15649
15650     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15651
15652     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15653                  len(jobs), self.op.group_name)
15654
15655     return ResultWithJobs(jobs)
15656
15657
15658 class TagsLU(NoHooksLU): # pylint: disable=W0223
15659   """Generic tags LU.
15660
15661   This is an abstract class which is the parent of all the other tags LUs.
15662
15663   """
15664   def ExpandNames(self):
15665     self.group_uuid = None
15666     self.needed_locks = {}
15667
15668     if self.op.kind == constants.TAG_NODE:
15669       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15670       lock_level = locking.LEVEL_NODE
15671       lock_name = self.op.name
15672     elif self.op.kind == constants.TAG_INSTANCE:
15673       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15674       lock_level = locking.LEVEL_INSTANCE
15675       lock_name = self.op.name
15676     elif self.op.kind == constants.TAG_NODEGROUP:
15677       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15678       lock_level = locking.LEVEL_NODEGROUP
15679       lock_name = self.group_uuid
15680     elif self.op.kind == constants.TAG_NETWORK:
15681       self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15682       lock_level = locking.LEVEL_NETWORK
15683       lock_name = self.network_uuid
15684     else:
15685       lock_level = None
15686       lock_name = None
15687
15688     if lock_level and getattr(self.op, "use_locking", True):
15689       self.needed_locks[lock_level] = lock_name
15690
15691     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15692     # not possible to acquire the BGL based on opcode parameters)
15693
15694   def CheckPrereq(self):
15695     """Check prerequisites.
15696
15697     """
15698     if self.op.kind == constants.TAG_CLUSTER:
15699       self.target = self.cfg.GetClusterInfo()
15700     elif self.op.kind == constants.TAG_NODE:
15701       self.target = self.cfg.GetNodeInfo(self.op.name)
15702     elif self.op.kind == constants.TAG_INSTANCE:
15703       self.target = self.cfg.GetInstanceInfo(self.op.name)
15704     elif self.op.kind == constants.TAG_NODEGROUP:
15705       self.target = self.cfg.GetNodeGroup(self.group_uuid)
15706     elif self.op.kind == constants.TAG_NETWORK:
15707       self.target = self.cfg.GetNetwork(self.network_uuid)
15708     else:
15709       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15710                                  str(self.op.kind), errors.ECODE_INVAL)
15711
15712
15713 class LUTagsGet(TagsLU):
15714   """Returns the tags of a given object.
15715
15716   """
15717   REQ_BGL = False
15718
15719   def ExpandNames(self):
15720     TagsLU.ExpandNames(self)
15721
15722     # Share locks as this is only a read operation
15723     self.share_locks = _ShareAll()
15724
15725   def Exec(self, feedback_fn):
15726     """Returns the tag list.
15727
15728     """
15729     return list(self.target.GetTags())
15730
15731
15732 class LUTagsSearch(NoHooksLU):
15733   """Searches the tags for a given pattern.
15734
15735   """
15736   REQ_BGL = False
15737
15738   def ExpandNames(self):
15739     self.needed_locks = {}
15740
15741   def CheckPrereq(self):
15742     """Check prerequisites.
15743
15744     This checks the pattern passed for validity by compiling it.
15745
15746     """
15747     try:
15748       self.re = re.compile(self.op.pattern)
15749     except re.error, err:
15750       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15751                                  (self.op.pattern, err), errors.ECODE_INVAL)
15752
15753   def Exec(self, feedback_fn):
15754     """Returns the tag list.
15755
15756     """
15757     cfg = self.cfg
15758     tgts = [("/cluster", cfg.GetClusterInfo())]
15759     ilist = cfg.GetAllInstancesInfo().values()
15760     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15761     nlist = cfg.GetAllNodesInfo().values()
15762     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15763     tgts.extend(("/nodegroup/%s" % n.name, n)
15764                 for n in cfg.GetAllNodeGroupsInfo().values())
15765     results = []
15766     for path, target in tgts:
15767       for tag in target.GetTags():
15768         if self.re.search(tag):
15769           results.append((path, tag))
15770     return results
15771
15772
15773 class LUTagsSet(TagsLU):
15774   """Sets a tag on a given object.
15775
15776   """
15777   REQ_BGL = False
15778
15779   def CheckPrereq(self):
15780     """Check prerequisites.
15781
15782     This checks the type and length of the tag name and value.
15783
15784     """
15785     TagsLU.CheckPrereq(self)
15786     for tag in self.op.tags:
15787       objects.TaggableObject.ValidateTag(tag)
15788
15789   def Exec(self, feedback_fn):
15790     """Sets the tag.
15791
15792     """
15793     try:
15794       for tag in self.op.tags:
15795         self.target.AddTag(tag)
15796     except errors.TagError, err:
15797       raise errors.OpExecError("Error while setting tag: %s" % str(err))
15798     self.cfg.Update(self.target, feedback_fn)
15799
15800
15801 class LUTagsDel(TagsLU):
15802   """Delete a list of tags from a given object.
15803
15804   """
15805   REQ_BGL = False
15806
15807   def CheckPrereq(self):
15808     """Check prerequisites.
15809
15810     This checks that we have the given tag.
15811
15812     """
15813     TagsLU.CheckPrereq(self)
15814     for tag in self.op.tags:
15815       objects.TaggableObject.ValidateTag(tag)
15816     del_tags = frozenset(self.op.tags)
15817     cur_tags = self.target.GetTags()
15818
15819     diff_tags = del_tags - cur_tags
15820     if diff_tags:
15821       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15822       raise errors.OpPrereqError("Tag(s) %s not found" %
15823                                  (utils.CommaJoin(diff_names), ),
15824                                  errors.ECODE_NOENT)
15825
15826   def Exec(self, feedback_fn):
15827     """Remove the tag from the object.
15828
15829     """
15830     for tag in self.op.tags:
15831       self.target.RemoveTag(tag)
15832     self.cfg.Update(self.target, feedback_fn)
15833
15834
15835 class LUTestDelay(NoHooksLU):
15836   """Sleep for a specified amount of time.
15837
15838   This LU sleeps on the master and/or nodes for a specified amount of
15839   time.
15840
15841   """
15842   REQ_BGL = False
15843
15844   def ExpandNames(self):
15845     """Expand names and set required locks.
15846
15847     This expands the node list, if any.
15848
15849     """
15850     self.needed_locks = {}
15851     if self.op.on_nodes:
15852       # _GetWantedNodes can be used here, but is not always appropriate to use
15853       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15854       # more information.
15855       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15856       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15857
15858   def _TestDelay(self):
15859     """Do the actual sleep.
15860
15861     """
15862     if self.op.on_master:
15863       if not utils.TestDelay(self.op.duration):
15864         raise errors.OpExecError("Error during master delay test")
15865     if self.op.on_nodes:
15866       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15867       for node, node_result in result.items():
15868         node_result.Raise("Failure during rpc call to node %s" % node)
15869
15870   def Exec(self, feedback_fn):
15871     """Execute the test delay opcode, with the wanted repetitions.
15872
15873     """
15874     if self.op.repeat == 0:
15875       self._TestDelay()
15876     else:
15877       top_value = self.op.repeat - 1
15878       for i in range(self.op.repeat):
15879         self.LogInfo("Test delay iteration %d/%d", i, top_value)
15880         self._TestDelay()
15881
15882
15883 class LURestrictedCommand(NoHooksLU):
15884   """Logical unit for executing restricted commands.
15885
15886   """
15887   REQ_BGL = False
15888
15889   def ExpandNames(self):
15890     if self.op.nodes:
15891       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15892
15893     self.needed_locks = {
15894       locking.LEVEL_NODE: self.op.nodes,
15895       }
15896     self.share_locks = {
15897       locking.LEVEL_NODE: not self.op.use_locking,
15898       }
15899
15900   def CheckPrereq(self):
15901     """Check prerequisites.
15902
15903     """
15904
15905   def Exec(self, feedback_fn):
15906     """Execute restricted command and return output.
15907
15908     """
15909     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15910
15911     # Check if correct locks are held
15912     assert set(self.op.nodes).issubset(owned_nodes)
15913
15914     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15915
15916     result = []
15917
15918     for node_name in self.op.nodes:
15919       nres = rpcres[node_name]
15920       if nres.fail_msg:
15921         msg = ("Command '%s' on node '%s' failed: %s" %
15922                (self.op.command, node_name, nres.fail_msg))
15923         result.append((False, msg))
15924       else:
15925         result.append((True, nres.payload))
15926
15927     return result
15928
15929
15930 class LUTestJqueue(NoHooksLU):
15931   """Utility LU to test some aspects of the job queue.
15932
15933   """
15934   REQ_BGL = False
15935
15936   # Must be lower than default timeout for WaitForJobChange to see whether it
15937   # notices changed jobs
15938   _CLIENT_CONNECT_TIMEOUT = 20.0
15939   _CLIENT_CONFIRM_TIMEOUT = 60.0
15940
15941   @classmethod
15942   def _NotifyUsingSocket(cls, cb, errcls):
15943     """Opens a Unix socket and waits for another program to connect.
15944
15945     @type cb: callable
15946     @param cb: Callback to send socket name to client
15947     @type errcls: class
15948     @param errcls: Exception class to use for errors
15949
15950     """
15951     # Using a temporary directory as there's no easy way to create temporary
15952     # sockets without writing a custom loop around tempfile.mktemp and
15953     # socket.bind
15954     tmpdir = tempfile.mkdtemp()
15955     try:
15956       tmpsock = utils.PathJoin(tmpdir, "sock")
15957
15958       logging.debug("Creating temporary socket at %s", tmpsock)
15959       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15960       try:
15961         sock.bind(tmpsock)
15962         sock.listen(1)
15963
15964         # Send details to client
15965         cb(tmpsock)
15966
15967         # Wait for client to connect before continuing
15968         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15969         try:
15970           (conn, _) = sock.accept()
15971         except socket.error, err:
15972           raise errcls("Client didn't connect in time (%s)" % err)
15973       finally:
15974         sock.close()
15975     finally:
15976       # Remove as soon as client is connected
15977       shutil.rmtree(tmpdir)
15978
15979     # Wait for client to close
15980     try:
15981       try:
15982         # pylint: disable=E1101
15983         # Instance of '_socketobject' has no ... member
15984         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15985         conn.recv(1)
15986       except socket.error, err:
15987         raise errcls("Client failed to confirm notification (%s)" % err)
15988     finally:
15989       conn.close()
15990
15991   def _SendNotification(self, test, arg, sockname):
15992     """Sends a notification to the client.
15993
15994     @type test: string
15995     @param test: Test name
15996     @param arg: Test argument (depends on test)
15997     @type sockname: string
15998     @param sockname: Socket path
15999
16000     """
16001     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
16002
16003   def _Notify(self, prereq, test, arg):
16004     """Notifies the client of a test.
16005
16006     @type prereq: bool
16007     @param prereq: Whether this is a prereq-phase test
16008     @type test: string
16009     @param test: Test name
16010     @param arg: Test argument (depends on test)
16011
16012     """
16013     if prereq:
16014       errcls = errors.OpPrereqError
16015     else:
16016       errcls = errors.OpExecError
16017
16018     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16019                                                   test, arg),
16020                                    errcls)
16021
16022   def CheckArguments(self):
16023     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16024     self.expandnames_calls = 0
16025
16026   def ExpandNames(self):
16027     checkargs_calls = getattr(self, "checkargs_calls", 0)
16028     if checkargs_calls < 1:
16029       raise errors.ProgrammerError("CheckArguments was not called")
16030
16031     self.expandnames_calls += 1
16032
16033     if self.op.notify_waitlock:
16034       self._Notify(True, constants.JQT_EXPANDNAMES, None)
16035
16036     self.LogInfo("Expanding names")
16037
16038     # Get lock on master node (just to get a lock, not for a particular reason)
16039     self.needed_locks = {
16040       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16041       }
16042
16043   def Exec(self, feedback_fn):
16044     if self.expandnames_calls < 1:
16045       raise errors.ProgrammerError("ExpandNames was not called")
16046
16047     if self.op.notify_exec:
16048       self._Notify(False, constants.JQT_EXEC, None)
16049
16050     self.LogInfo("Executing")
16051
16052     if self.op.log_messages:
16053       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16054       for idx, msg in enumerate(self.op.log_messages):
16055         self.LogInfo("Sending log message %s", idx + 1)
16056         feedback_fn(constants.JQT_MSGPREFIX + msg)
16057         # Report how many test messages have been sent
16058         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16059
16060     if self.op.fail:
16061       raise errors.OpExecError("Opcode failure was requested")
16062
16063     return True
16064
16065
16066 class LUTestAllocator(NoHooksLU):
16067   """Run allocator tests.
16068
16069   This LU runs the allocator tests
16070
16071   """
16072   def CheckPrereq(self):
16073     """Check prerequisites.
16074
16075     This checks the opcode parameters depending on the director and mode test.
16076
16077     """
16078     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16079                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
16080       for attr in ["memory", "disks", "disk_template",
16081                    "os", "tags", "nics", "vcpus"]:
16082         if not hasattr(self.op, attr):
16083           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16084                                      attr, errors.ECODE_INVAL)
16085       iname = self.cfg.ExpandInstanceName(self.op.name)
16086       if iname is not None:
16087         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16088                                    iname, errors.ECODE_EXISTS)
16089       if not isinstance(self.op.nics, list):
16090         raise errors.OpPrereqError("Invalid parameter 'nics'",
16091                                    errors.ECODE_INVAL)
16092       if not isinstance(self.op.disks, list):
16093         raise errors.OpPrereqError("Invalid parameter 'disks'",
16094                                    errors.ECODE_INVAL)
16095       for row in self.op.disks:
16096         if (not isinstance(row, dict) or
16097             constants.IDISK_SIZE not in row or
16098             not isinstance(row[constants.IDISK_SIZE], int) or
16099             constants.IDISK_MODE not in row or
16100             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16101           raise errors.OpPrereqError("Invalid contents of the 'disks'"
16102                                      " parameter", errors.ECODE_INVAL)
16103       if self.op.hypervisor is None:
16104         self.op.hypervisor = self.cfg.GetHypervisorType()
16105     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16106       fname = _ExpandInstanceName(self.cfg, self.op.name)
16107       self.op.name = fname
16108       self.relocate_from = \
16109           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16110     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16111                           constants.IALLOCATOR_MODE_NODE_EVAC):
16112       if not self.op.instances:
16113         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16114       self.op.instances = _GetWantedInstances(self, self.op.instances)
16115     else:
16116       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16117                                  self.op.mode, errors.ECODE_INVAL)
16118
16119     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16120       if self.op.iallocator is None:
16121         raise errors.OpPrereqError("Missing allocator name",
16122                                    errors.ECODE_INVAL)
16123     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16124       raise errors.OpPrereqError("Wrong allocator test '%s'" %
16125                                  self.op.direction, errors.ECODE_INVAL)
16126
16127   def Exec(self, feedback_fn):
16128     """Run the allocator test.
16129
16130     """
16131     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16132       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16133                                           memory=self.op.memory,
16134                                           disks=self.op.disks,
16135                                           disk_template=self.op.disk_template,
16136                                           os=self.op.os,
16137                                           tags=self.op.tags,
16138                                           nics=self.op.nics,
16139                                           vcpus=self.op.vcpus,
16140                                           spindle_use=self.op.spindle_use,
16141                                           hypervisor=self.op.hypervisor)
16142     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16143       req = iallocator.IAReqRelocate(name=self.op.name,
16144                                      relocate_from=list(self.relocate_from))
16145     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16146       req = iallocator.IAReqGroupChange(instances=self.op.instances,
16147                                         target_groups=self.op.target_groups)
16148     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16149       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16150                                      evac_mode=self.op.evac_mode)
16151     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16152       disk_template = self.op.disk_template
16153       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16154                                              memory=self.op.memory,
16155                                              disks=self.op.disks,
16156                                              disk_template=disk_template,
16157                                              os=self.op.os,
16158                                              tags=self.op.tags,
16159                                              nics=self.op.nics,
16160                                              vcpus=self.op.vcpus,
16161                                              spindle_use=self.op.spindle_use,
16162                                              hypervisor=self.op.hypervisor)
16163                for idx in range(self.op.count)]
16164       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16165     else:
16166       raise errors.ProgrammerError("Uncatched mode %s in"
16167                                    " LUTestAllocator.Exec", self.op.mode)
16168
16169     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16170     if self.op.direction == constants.IALLOCATOR_DIR_IN:
16171       result = ial.in_text
16172     else:
16173       ial.Run(self.op.iallocator, validate=False)
16174       result = ial.out_text
16175     return result
16176
16177
16178 class LUNetworkAdd(LogicalUnit):
16179   """Logical unit for creating networks.
16180
16181   """
16182   HPATH = "network-add"
16183   HTYPE = constants.HTYPE_NETWORK
16184   REQ_BGL = False
16185
16186   def BuildHooksNodes(self):
16187     """Build hooks nodes.
16188
16189     """
16190     mn = self.cfg.GetMasterNode()
16191     return ([mn], [mn])
16192
16193   def CheckArguments(self):
16194     if self.op.mac_prefix:
16195       self.op.mac_prefix = \
16196         utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16197
16198   def ExpandNames(self):
16199     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16200
16201     if self.op.conflicts_check:
16202       self.share_locks[locking.LEVEL_NODE] = 1
16203       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16204       self.needed_locks = {
16205         locking.LEVEL_NODE: locking.ALL_SET,
16206         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16207         }
16208     else:
16209       self.needed_locks = {}
16210
16211     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16212
16213   def CheckPrereq(self):
16214     if self.op.network is None:
16215       raise errors.OpPrereqError("Network must be given",
16216                                  errors.ECODE_INVAL)
16217
16218     uuid = self.cfg.LookupNetwork(self.op.network_name)
16219
16220     if uuid:
16221       raise errors.OpPrereqError(("Network with name '%s' already exists" %
16222                                   self.op.network_name), errors.ECODE_EXISTS)
16223
16224     # Check tag validity
16225     for tag in self.op.tags:
16226       objects.TaggableObject.ValidateTag(tag)
16227
16228   def BuildHooksEnv(self):
16229     """Build hooks env.
16230
16231     """
16232     args = {
16233       "name": self.op.network_name,
16234       "subnet": self.op.network,
16235       "gateway": self.op.gateway,
16236       "network6": self.op.network6,
16237       "gateway6": self.op.gateway6,
16238       "mac_prefix": self.op.mac_prefix,
16239       "network_type": self.op.network_type,
16240       "tags": self.op.tags,
16241       }
16242     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16243
16244   def Exec(self, feedback_fn):
16245     """Add the ip pool to the cluster.
16246
16247     """
16248     nobj = objects.Network(name=self.op.network_name,
16249                            network=self.op.network,
16250                            gateway=self.op.gateway,
16251                            network6=self.op.network6,
16252                            gateway6=self.op.gateway6,
16253                            mac_prefix=self.op.mac_prefix,
16254                            network_type=self.op.network_type,
16255                            uuid=self.network_uuid,
16256                            family=constants.IP4_VERSION)
16257     # Initialize the associated address pool
16258     try:
16259       pool = network.AddressPool.InitializeNetwork(nobj)
16260     except errors.AddressPoolError, e:
16261       raise errors.OpExecError("Cannot create IP pool for this network: %s" % e)
16262
16263     # Check if we need to reserve the nodes and the cluster master IP
16264     # These may not be allocated to any instances in routed mode, as
16265     # they wouldn't function anyway.
16266     if self.op.conflicts_check:
16267       for node in self.cfg.GetAllNodesInfo().values():
16268         for ip in [node.primary_ip, node.secondary_ip]:
16269           try:
16270             if pool.Contains(ip):
16271               pool.Reserve(ip)
16272               self.LogInfo("Reserved IP address of node '%s' (%s)",
16273                            node.name, ip)
16274           except errors.AddressPoolError:
16275             self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
16276                             node.name, ip)
16277
16278       master_ip = self.cfg.GetClusterInfo().master_ip
16279       try:
16280         if pool.Contains(master_ip):
16281           pool.Reserve(master_ip)
16282           self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16283       except errors.AddressPoolError:
16284         self.LogWarning("Cannot reserve cluster master IP address (%s)",
16285                         master_ip)
16286
16287     if self.op.add_reserved_ips:
16288       for ip in self.op.add_reserved_ips:
16289         try:
16290           pool.Reserve(ip, external=True)
16291         except errors.AddressPoolError, e:
16292           raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
16293
16294     if self.op.tags:
16295       for tag in self.op.tags:
16296         nobj.AddTag(tag)
16297
16298     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16299     del self.remove_locks[locking.LEVEL_NETWORK]
16300
16301
16302 class LUNetworkRemove(LogicalUnit):
16303   HPATH = "network-remove"
16304   HTYPE = constants.HTYPE_NETWORK
16305   REQ_BGL = False
16306
16307   def ExpandNames(self):
16308     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16309
16310     if not self.network_uuid:
16311       raise errors.OpPrereqError(("Network '%s' not found" %
16312                                   self.op.network_name), errors.ECODE_NOENT)
16313
16314     self.share_locks[locking.LEVEL_NODEGROUP] = 1
16315     self.needed_locks = {
16316       locking.LEVEL_NETWORK: [self.network_uuid],
16317       locking.LEVEL_NODEGROUP: locking.ALL_SET,
16318       }
16319
16320   def CheckPrereq(self):
16321     """Check prerequisites.
16322
16323     This checks that the given network name exists as a network, that is
16324     empty (i.e., contains no nodes), and that is not the last group of the
16325     cluster.
16326
16327     """
16328     # Verify that the network is not conncted.
16329     node_groups = [group.name
16330                    for group in self.cfg.GetAllNodeGroupsInfo().values()
16331                    if self.network_uuid in group.networks]
16332
16333     if node_groups:
16334       self.LogWarning("Network '%s' is connected to the following"
16335                       " node groups: %s" %
16336                       (self.op.network_name,
16337                        utils.CommaJoin(utils.NiceSort(node_groups))))
16338       raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16339
16340   def BuildHooksEnv(self):
16341     """Build hooks env.
16342
16343     """
16344     return {
16345       "NETWORK_NAME": self.op.network_name,
16346       }
16347
16348   def BuildHooksNodes(self):
16349     """Build hooks nodes.
16350
16351     """
16352     mn = self.cfg.GetMasterNode()
16353     return ([mn], [mn])
16354
16355   def Exec(self, feedback_fn):
16356     """Remove the network.
16357
16358     """
16359     try:
16360       self.cfg.RemoveNetwork(self.network_uuid)
16361     except errors.ConfigurationError:
16362       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16363                                (self.op.network_name, self.network_uuid))
16364
16365
16366 class LUNetworkSetParams(LogicalUnit):
16367   """Modifies the parameters of a network.
16368
16369   """
16370   HPATH = "network-modify"
16371   HTYPE = constants.HTYPE_NETWORK
16372   REQ_BGL = False
16373
16374   def CheckArguments(self):
16375     if (self.op.gateway and
16376         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16377       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16378                                  " at once", errors.ECODE_INVAL)
16379
16380   def ExpandNames(self):
16381     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16382     if self.network_uuid is None:
16383       raise errors.OpPrereqError(("Network '%s' not found" %
16384                                   self.op.network_name), errors.ECODE_NOENT)
16385
16386     self.needed_locks = {
16387       locking.LEVEL_NETWORK: [self.network_uuid],
16388       }
16389
16390   def CheckPrereq(self):
16391     """Check prerequisites.
16392
16393     """
16394     self.network = self.cfg.GetNetwork(self.network_uuid)
16395     self.gateway = self.network.gateway
16396     self.network_type = self.network.network_type
16397     self.mac_prefix = self.network.mac_prefix
16398     self.network6 = self.network.network6
16399     self.gateway6 = self.network.gateway6
16400     self.tags = self.network.tags
16401
16402     self.pool = network.AddressPool(self.network)
16403
16404     if self.op.gateway:
16405       if self.op.gateway == constants.VALUE_NONE:
16406         self.gateway = None
16407       else:
16408         self.gateway = self.op.gateway
16409         if self.pool.IsReserved(self.gateway):
16410           raise errors.OpPrereqError("Gateway IP address '%s' is already"
16411                                      " reserved" % self.gateway,
16412                                      errors.ECODE_STATE)
16413
16414     if self.op.network_type:
16415       if self.op.network_type == constants.VALUE_NONE:
16416         self.network_type = None
16417       else:
16418         self.network_type = self.op.network_type
16419
16420     if self.op.mac_prefix:
16421       if self.op.mac_prefix == constants.VALUE_NONE:
16422         self.mac_prefix = None
16423       else:
16424         self.mac_prefix = \
16425           utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16426
16427     if self.op.gateway6:
16428       if self.op.gateway6 == constants.VALUE_NONE:
16429         self.gateway6 = None
16430       else:
16431         self.gateway6 = self.op.gateway6
16432
16433     if self.op.network6:
16434       if self.op.network6 == constants.VALUE_NONE:
16435         self.network6 = None
16436       else:
16437         self.network6 = self.op.network6
16438
16439   def BuildHooksEnv(self):
16440     """Build hooks env.
16441
16442     """
16443     args = {
16444       "name": self.op.network_name,
16445       "subnet": self.network.network,
16446       "gateway": self.gateway,
16447       "network6": self.network6,
16448       "gateway6": self.gateway6,
16449       "mac_prefix": self.mac_prefix,
16450       "network_type": self.network_type,
16451       "tags": self.tags,
16452       }
16453     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16454
16455   def BuildHooksNodes(self):
16456     """Build hooks nodes.
16457
16458     """
16459     mn = self.cfg.GetMasterNode()
16460     return ([mn], [mn])
16461
16462   def Exec(self, feedback_fn):
16463     """Modifies the network.
16464
16465     """
16466     #TODO: reserve/release via temporary reservation manager
16467     #      extend cfg.ReserveIp/ReleaseIp with the external flag
16468     if self.op.gateway:
16469       if self.gateway == self.network.gateway:
16470         self.LogWarning("Gateway is already %s", self.gateway)
16471       else:
16472         if self.gateway:
16473           self.pool.Reserve(self.gateway, external=True)
16474         if self.network.gateway:
16475           self.pool.Release(self.network.gateway, external=True)
16476         self.network.gateway = self.gateway
16477
16478     if self.op.add_reserved_ips:
16479       for ip in self.op.add_reserved_ips:
16480         try:
16481           if self.pool.IsReserved(ip):
16482             self.LogWarning("IP address %s is already reserved", ip)
16483           else:
16484             self.pool.Reserve(ip, external=True)
16485         except errors.AddressPoolError, err:
16486           self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16487
16488     if self.op.remove_reserved_ips:
16489       for ip in self.op.remove_reserved_ips:
16490         if ip == self.network.gateway:
16491           self.LogWarning("Cannot unreserve Gateway's IP")
16492           continue
16493         try:
16494           if not self.pool.IsReserved(ip):
16495             self.LogWarning("IP address %s is already unreserved", ip)
16496           else:
16497             self.pool.Release(ip, external=True)
16498         except errors.AddressPoolError, err:
16499           self.LogWarning("Cannot release IP address %s: %s", ip, err)
16500
16501     if self.op.mac_prefix:
16502       self.network.mac_prefix = self.mac_prefix
16503
16504     if self.op.network6:
16505       self.network.network6 = self.network6
16506
16507     if self.op.gateway6:
16508       self.network.gateway6 = self.gateway6
16509
16510     if self.op.network_type:
16511       self.network.network_type = self.network_type
16512
16513     self.pool.Validate()
16514
16515     self.cfg.Update(self.network, feedback_fn)
16516
16517
16518 class _NetworkQuery(_QueryBase):
16519   FIELDS = query.NETWORK_FIELDS
16520
16521   def ExpandNames(self, lu):
16522     lu.needed_locks = {}
16523     lu.share_locks = _ShareAll()
16524
16525     self.do_locking = self.use_locking
16526
16527     all_networks = lu.cfg.GetAllNetworksInfo()
16528     name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16529
16530     if self.names:
16531       missing = []
16532       self.wanted = []
16533
16534       for name in self.names:
16535         if name in name_to_uuid:
16536           self.wanted.append(name_to_uuid[name])
16537         else:
16538           missing.append(name)
16539
16540       if missing:
16541         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16542                                    errors.ECODE_NOENT)
16543     else:
16544       self.wanted = locking.ALL_SET
16545
16546     if self.do_locking:
16547       lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16548       if query.NETQ_INST in self.requested_data:
16549         lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16550       if query.NETQ_GROUP in self.requested_data:
16551         lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16552
16553   def DeclareLocks(self, lu, level):
16554     pass
16555
16556   def _GetQueryData(self, lu):
16557     """Computes the list of networks and their attributes.
16558
16559     """
16560     all_networks = lu.cfg.GetAllNetworksInfo()
16561
16562     network_uuids = self._GetNames(lu, all_networks.keys(),
16563                                    locking.LEVEL_NETWORK)
16564
16565     name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16566
16567     do_instances = query.NETQ_INST in self.requested_data
16568     do_groups = query.NETQ_GROUP in self.requested_data
16569
16570     network_to_instances = None
16571     network_to_groups = None
16572
16573     # For NETQ_GROUP, we need to map network->[groups]
16574     if do_groups:
16575       all_groups = lu.cfg.GetAllNodeGroupsInfo()
16576       network_to_groups = dict((uuid, []) for uuid in network_uuids)
16577       for _, group in all_groups.iteritems():
16578         for net_uuid in network_uuids:
16579           netparams = group.networks.get(net_uuid, None)
16580           if netparams:
16581             info = (group.name, netparams[constants.NIC_MODE],
16582                     netparams[constants.NIC_LINK])
16583
16584             network_to_groups[net_uuid].append(info)
16585
16586     if do_instances:
16587       all_instances = lu.cfg.GetAllInstancesInfo()
16588       network_to_instances = dict((uuid, []) for uuid in network_uuids)
16589       for instance in all_instances.values():
16590         for nic in instance.nics:
16591           if nic.network:
16592             net_uuid = name_to_uuid[nic.network]
16593             if net_uuid in network_uuids:
16594               network_to_instances[net_uuid].append(instance.name)
16595             break
16596
16597     if query.NETQ_STATS in self.requested_data:
16598       stats = \
16599         dict((uuid,
16600               self._GetStats(network.AddressPool(all_networks[uuid])))
16601              for uuid in network_uuids)
16602     else:
16603       stats = None
16604
16605     return query.NetworkQueryData([all_networks[uuid]
16606                                    for uuid in network_uuids],
16607                                    network_to_groups,
16608                                    network_to_instances,
16609                                    stats)
16610
16611   @staticmethod
16612   def _GetStats(pool):
16613     """Returns statistics for a network address pool.
16614
16615     """
16616     return {
16617       "free_count": pool.GetFreeCount(),
16618       "reserved_count": pool.GetReservedCount(),
16619       "map": pool.GetMap(),
16620       "external_reservations":
16621         utils.CommaJoin(pool.GetExternalReservations()),
16622       }
16623
16624
16625 class LUNetworkQuery(NoHooksLU):
16626   """Logical unit for querying networks.
16627
16628   """
16629   REQ_BGL = False
16630
16631   def CheckArguments(self):
16632     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16633                             self.op.output_fields, self.op.use_locking)
16634
16635   def ExpandNames(self):
16636     self.nq.ExpandNames(self)
16637
16638   def Exec(self, feedback_fn):
16639     return self.nq.OldStyleQuery(self)
16640
16641
16642 class LUNetworkConnect(LogicalUnit):
16643   """Connect a network to a nodegroup
16644
16645   """
16646   HPATH = "network-connect"
16647   HTYPE = constants.HTYPE_NETWORK
16648   REQ_BGL = False
16649
16650   def ExpandNames(self):
16651     self.network_name = self.op.network_name
16652     self.group_name = self.op.group_name
16653     self.network_mode = self.op.network_mode
16654     self.network_link = self.op.network_link
16655
16656     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16657     if self.network_uuid is None:
16658       raise errors.OpPrereqError("Network '%s' does not exist" %
16659                                  self.network_name, errors.ECODE_NOENT)
16660
16661     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16662     if self.group_uuid is None:
16663       raise errors.OpPrereqError("Group '%s' does not exist" %
16664                                  self.group_name, errors.ECODE_NOENT)
16665
16666     self.needed_locks = {
16667       locking.LEVEL_INSTANCE: [],
16668       locking.LEVEL_NODEGROUP: [self.group_uuid],
16669       }
16670     self.share_locks[locking.LEVEL_INSTANCE] = 1
16671
16672     if self.op.conflicts_check:
16673       self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16674       self.share_locks[locking.LEVEL_NETWORK] = 1
16675
16676   def DeclareLocks(self, level):
16677     if level == locking.LEVEL_INSTANCE:
16678       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16679
16680       # Lock instances optimistically, needs verification once group lock has
16681       # been acquired
16682       if self.op.conflicts_check:
16683         self.needed_locks[locking.LEVEL_INSTANCE] = \
16684             self.cfg.GetNodeGroupInstances(self.group_uuid)
16685
16686   def BuildHooksEnv(self):
16687     ret = {
16688       "GROUP_NAME": self.group_name,
16689       "GROUP_NETWORK_MODE": self.network_mode,
16690       "GROUP_NETWORK_LINK": self.network_link,
16691       }
16692     return ret
16693
16694   def BuildHooksNodes(self):
16695     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16696     return (nodes, nodes)
16697
16698   def CheckPrereq(self):
16699     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16700
16701     assert self.group_uuid in owned_groups
16702
16703     self.netparams = {
16704       constants.NIC_MODE: self.network_mode,
16705       constants.NIC_LINK: self.network_link,
16706       }
16707     objects.NIC.CheckParameterSyntax(self.netparams)
16708
16709     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16710     #if self.network_mode == constants.NIC_MODE_BRIDGED:
16711     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16712     self.connected = False
16713     if self.network_uuid in self.group.networks:
16714       self.LogWarning("Network '%s' is already mapped to group '%s'" %
16715                       (self.network_name, self.group.name))
16716       self.connected = True
16717       return
16718
16719     if self.op.conflicts_check:
16720       pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16721
16722       _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16723                             "connect to")
16724
16725   def Exec(self, feedback_fn):
16726     if self.connected:
16727       return
16728
16729     self.group.networks[self.network_uuid] = self.netparams
16730     self.cfg.Update(self.group, feedback_fn)
16731
16732
16733 def _NetworkConflictCheck(lu, check_fn, action):
16734   """Checks for network interface conflicts with a network.
16735
16736   @type lu: L{LogicalUnit}
16737   @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16738     returning boolean
16739   @param check_fn: Function checking for conflict
16740   @type action: string
16741   @param action: Part of error message (see code)
16742   @raise errors.OpPrereqError: If conflicting IP addresses are found.
16743
16744   """
16745   # Check if locked instances are still correct
16746   owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
16747   _CheckNodeGroupInstances(lu.cfg, lu.group_uuid, owned_instances)
16748
16749   conflicts = []
16750
16751   for (_, instance) in lu.cfg.GetMultiInstanceInfo(owned_instances):
16752     instconflicts = [(idx, nic.ip)
16753                      for (idx, nic) in enumerate(instance.nics)
16754                      if check_fn(nic)]
16755
16756     if instconflicts:
16757       conflicts.append((instance.name, instconflicts))
16758
16759   if conflicts:
16760     lu.LogWarning("IP addresses from network '%s', which is about to %s"
16761                   " node group '%s', are in use: %s" %
16762                   (lu.network_name, action, lu.group.name,
16763                    utils.CommaJoin(("%s: %s" %
16764                                     (name, _FmtNetworkConflict(details)))
16765                                    for (name, details) in conflicts)))
16766
16767     raise errors.OpPrereqError("Conflicting IP addresses found; "
16768                                " remove/modify the corresponding network"
16769                                " interfaces", errors.ECODE_STATE)
16770
16771
16772 def _FmtNetworkConflict(details):
16773   """Utility for L{_NetworkConflictCheck}.
16774
16775   """
16776   return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16777                          for (idx, ipaddr) in details)
16778
16779
16780 class LUNetworkDisconnect(LogicalUnit):
16781   """Disconnect a network to a nodegroup
16782
16783   """
16784   HPATH = "network-disconnect"
16785   HTYPE = constants.HTYPE_NETWORK
16786   REQ_BGL = False
16787
16788   def ExpandNames(self):
16789     self.network_name = self.op.network_name
16790     self.group_name = self.op.group_name
16791
16792     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16793     if self.network_uuid is None:
16794       raise errors.OpPrereqError("Network '%s' does not exist" %
16795                                  self.network_name, errors.ECODE_NOENT)
16796
16797     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16798     if self.group_uuid is None:
16799       raise errors.OpPrereqError("Group '%s' does not exist" %
16800                                  self.group_name, errors.ECODE_NOENT)
16801
16802     self.needed_locks = {
16803       locking.LEVEL_INSTANCE: [],
16804       locking.LEVEL_NODEGROUP: [self.group_uuid],
16805       }
16806     self.share_locks[locking.LEVEL_INSTANCE] = 1
16807
16808   def DeclareLocks(self, level):
16809     if level == locking.LEVEL_INSTANCE:
16810       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16811
16812       # Lock instances optimistically, needs verification once group lock has
16813       # been acquired
16814       self.needed_locks[locking.LEVEL_INSTANCE] = \
16815         self.cfg.GetNodeGroupInstances(self.group_uuid)
16816
16817   def BuildHooksEnv(self):
16818     ret = {
16819       "GROUP_NAME": self.group_name,
16820       }
16821     return ret
16822
16823   def BuildHooksNodes(self):
16824     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16825     return (nodes, nodes)
16826
16827   def CheckPrereq(self):
16828     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16829
16830     assert self.group_uuid in owned_groups
16831
16832     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16833     self.connected = True
16834     if self.network_uuid not in self.group.networks:
16835       self.LogWarning("Network '%s' is not mapped to group '%s'",
16836                       self.network_name, self.group.name)
16837       self.connected = False
16838       return
16839
16840     _NetworkConflictCheck(self, lambda nic: nic.network == self.network_name,
16841                           "disconnect from")
16842
16843   def Exec(self, feedback_fn):
16844     if not self.connected:
16845       return
16846
16847     del self.group.networks[self.network_uuid]
16848     self.cfg.Update(self.group, feedback_fn)
16849
16850
16851 #: Query type implementations
16852 _QUERY_IMPL = {
16853   constants.QR_CLUSTER: _ClusterQuery,
16854   constants.QR_INSTANCE: _InstanceQuery,
16855   constants.QR_NODE: _NodeQuery,
16856   constants.QR_GROUP: _GroupQuery,
16857   constants.QR_NETWORK: _NetworkQuery,
16858   constants.QR_OS: _OsQuery,
16859   constants.QR_EXTSTORAGE: _ExtStorageQuery,
16860   constants.QR_EXPORT: _ExportQuery,
16861   }
16862
16863 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16864
16865
16866 def _GetQueryImplementation(name):
16867   """Returns the implemtnation for a query type.
16868
16869   @param name: Query type, must be one of L{constants.QR_VIA_OP}
16870
16871   """
16872   try:
16873     return _QUERY_IMPL[name]
16874   except KeyError:
16875     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16876                                errors.ECODE_INVAL)
16877
16878
16879 def _CheckForConflictingIp(lu, ip, node):
16880   """In case of conflicting IP address raise error.
16881
16882   @type ip: string
16883   @param ip: IP address
16884   @type node: string
16885   @param node: node name
16886
16887   """
16888   (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16889   if conf_net is not None:
16890     raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16891                                 (ip, conf_net)),
16892                                errors.ECODE_STATE)
16893
16894   return (None, None)