code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti import network
  65 from ganeti.masterd import iallocator
  66
  67 import ganeti.masterd.instance # pylint: disable=W0611
  68
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141
 142     # Dictionaries used to declare locking needs to mcpu
 143     self.needed_locks = None
 144     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 145     self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
 146
 147     self.add_locks = {}
 148     self.remove_locks = {}
 149
 150     # Used to force good behavior when calling helper functions
 151     self.recalculate_locks = {}
 152
 153     # logging
 154     self.Log = processor.Log # pylint: disable=C0103
 155     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 156     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 157     self.LogStep = processor.LogStep # pylint: disable=C0103
 158     # support for dry-run
 159     self.dry_run_result = None
 160     # support for generic debug attribute
 161     if (not hasattr(self.op, "debug_level") or
 162         not isinstance(self.op.debug_level, int)):
 163       self.op.debug_level = 0
 164
 165     # Tasklets
 166     self.tasklets = None
 167
 168     # Validate opcode parameters and set defaults
 169     self.op.Validate(True)
 170
 171     self.CheckArguments()
 172
 173   def CheckArguments(self):
 174     """Check syntactic validity for the opcode arguments.
 175
 176     This method is for doing a simple syntactic check and ensure
 177     validity of opcode parameters, without any cluster-related
 178     checks. While the same can be accomplished in ExpandNames and/or
 179     CheckPrereq, doing these separate is better because:
 180
 181       - ExpandNames is left as as purely a lock-related function
 182       - CheckPrereq is run after we have acquired locks (and possible
 183         waited for them)
 184
 185     The function is allowed to change the self.op attribute so that
 186     later methods can no longer worry about missing parameters.
 187
 188     """
 189     pass
 190
 191   def ExpandNames(self):
 192     """Expand names for this LU.
 193
 194     This method is called before starting to execute the opcode, and it should
 195     update all the parameters of the opcode to their canonical form (e.g. a
 196     short node name must be fully expanded after this method has successfully
 197     completed). This way locking, hooks, logging, etc. can work correctly.
 198
 199     LUs which implement this method must also populate the self.needed_locks
 200     member, as a dict with lock levels as keys, and a list of needed lock names
 201     as values. Rules:
 202
 203       - use an empty dict if you don't need any lock
 204       - if you don't need any lock at a particular level omit that
 205         level (note that in this case C{DeclareLocks} won't be called
 206         at all for that level)
 207       - if you need locks at a level, but you can't calculate it in
 208         this function, initialise that level with an empty list and do
 209         further processing in L{LogicalUnit.DeclareLocks} (see that
 210         function's docstring)
 211       - don't put anything for the BGL level
 212       - if you want all locks at a level use L{locking.ALL_SET} as a value
 213
 214     If you need to share locks (rather than acquire them exclusively) at one
 215     level you can modify self.share_locks, setting a true value (usually 1) for
 216     that level. By default locks are not shared.
 217
 218     This function can also define a list of tasklets, which then will be
 219     executed in order instead of the usual LU-level CheckPrereq and Exec
 220     functions, if those are not defined by the LU.
 221
 222     Examples::
 223
 224       # Acquire all nodes and one instance
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: locking.ALL_SET,
 227         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 228       }
 229       # Acquire just two nodes
 230       self.needed_locks = {
 231         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 232       }
 233       # Acquire no locks
 234       self.needed_locks = {} # No, you can't leave it to the default value None
 235
 236     """
 237     # The implementation of this method is mandatory only if the new LU is
 238     # concurrent, so that old LUs don't need to be changed all at the same
 239     # time.
 240     if self.REQ_BGL:
 241       self.needed_locks = {} # Exclusive LUs don't need locks.
 242     else:
 243       raise NotImplementedError
 244
 245   def DeclareLocks(self, level):
 246     """Declare LU locking needs for a level
 247
 248     While most LUs can just declare their locking needs at ExpandNames time,
 249     sometimes there's the need to calculate some locks after having acquired
 250     the ones before. This function is called just before acquiring locks at a
 251     particular level, but after acquiring the ones at lower levels, and permits
 252     such calculations. It can be used to modify self.needed_locks, and by
 253     default it does nothing.
 254
 255     This function is only called if you have something already set in
 256     self.needed_locks for the level.
 257
 258     @param level: Locking level which is going to be locked
 259     @type level: member of L{ganeti.locking.LEVELS}
 260
 261     """
 262
 263   def CheckPrereq(self):
 264     """Check prerequisites for this LU.
 265
 266     This method should check that the prerequisites for the execution
 267     of this LU are fulfilled. It can do internode communication, but
 268     it should be idempotent - no cluster or system changes are
 269     allowed.
 270
 271     The method should raise errors.OpPrereqError in case something is
 272     not fulfilled. Its return value is ignored.
 273
 274     This method should also update all the parameters of the opcode to
 275     their canonical form if it hasn't been done by ExpandNames before.
 276
 277     """
 278     if self.tasklets is not None:
 279       for (idx, tl) in enumerate(self.tasklets):
 280         logging.debug("Checking prerequisites for tasklet %s/%s",
 281                       idx + 1, len(self.tasklets))
 282         tl.CheckPrereq()
 283     else:
 284       pass
 285
 286   def Exec(self, feedback_fn):
 287     """Execute the LU.
 288
 289     This method should implement the actual work. It should raise
 290     errors.OpExecError for failures that are somewhat dealt with in
 291     code, or expected.
 292
 293     """
 294     if self.tasklets is not None:
 295       for (idx, tl) in enumerate(self.tasklets):
 296         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 297         tl.Exec(feedback_fn)
 298     else:
 299       raise NotImplementedError
 300
 301   def BuildHooksEnv(self):
 302     """Build hooks environment for this LU.
 303
 304     @rtype: dict
 305     @return: Dictionary containing the environment that will be used for
 306       running the hooks for this LU. The keys of the dict must not be prefixed
 307       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 308       will extend the environment with additional variables. If no environment
 309       should be defined, an empty dictionary should be returned (not C{None}).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def BuildHooksNodes(self):
 317     """Build list of nodes to run LU's hooks.
 318
 319     @rtype: tuple; (list, list)
 320     @return: Tuple containing a list of node names on which the hook
 321       should run before the execution and a list of node names on which the
 322       hook should run after the execution. No nodes should be returned as an
 323       empty list (and not None).
 324     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 325       will not be called.
 326
 327     """
 328     raise NotImplementedError
 329
 330   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 331     """Notify the LU about the results of its hooks.
 332
 333     This method is called every time a hooks phase is executed, and notifies
 334     the Logical Unit about the hooks' result. The LU can then use it to alter
 335     its result based on the hooks.  By default the method does nothing and the
 336     previous result is passed back unchanged but any LU can define it if it
 337     wants to use the local cluster hook-scripts somehow.
 338
 339     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 340         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 341     @param hook_results: the results of the multi-node hooks rpc call
 342     @param feedback_fn: function used send feedback back to the caller
 343     @param lu_result: the previous Exec result this LU had, or None
 344         in the PRE phase
 345     @return: the new Exec result, based on the previous result
 346         and hook results
 347
 348     """
 349     # API must be kept, thus we ignore the unused argument and could
 350     # be a function warnings
 351     # pylint: disable=W0613,R0201
 352     return lu_result
 353
 354   def _ExpandAndLockInstance(self):
 355     """Helper function to expand and lock an instance.
 356
 357     Many LUs that work on an instance take its name in self.op.instance_name
 358     and need to expand it and then declare the expanded name for locking. This
 359     function does it, and then updates self.op.instance_name to the expanded
 360     name. It also initializes needed_locks as a dict, if this hasn't been done
 361     before.
 362
 363     """
 364     if self.needed_locks is None:
 365       self.needed_locks = {}
 366     else:
 367       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 368         "_ExpandAndLockInstance called with instance-level locks set"
 369     self.op.instance_name = _ExpandInstanceName(self.cfg,
 370                                                 self.op.instance_name)
 371     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 372
 373   def _LockInstancesNodes(self, primary_only=False,
 374                           level=locking.LEVEL_NODE):
 375     """Helper function to declare instances' nodes for locking.
 376
 377     This function should be called after locking one or more instances to lock
 378     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 379     with all primary or secondary nodes for instances already locked and
 380     present in self.needed_locks[locking.LEVEL_INSTANCE].
 381
 382     It should be called from DeclareLocks, and for safety only works if
 383     self.recalculate_locks[locking.LEVEL_NODE] is set.
 384
 385     In the future it may grow parameters to just lock some instance's nodes, or
 386     to just lock primaries or secondary nodes, if needed.
 387
 388     If should be called in DeclareLocks in a way similar to::
 389
 390       if level == locking.LEVEL_NODE:
 391         self._LockInstancesNodes()
 392
 393     @type primary_only: boolean
 394     @param primary_only: only lock primary nodes of locked instances
 395     @param level: Which lock level to use for locking nodes
 396
 397     """
 398     assert level in self.recalculate_locks, \
 399       "_LockInstancesNodes helper function called with no nodes to recalculate"
 400
 401     # TODO: check if we're really been called with the instance locks held
 402
 403     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 404     # future we might want to have different behaviors depending on the value
 405     # of self.recalculate_locks[locking.LEVEL_NODE]
 406     wanted_nodes = []
 407     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 408     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 409       wanted_nodes.append(instance.primary_node)
 410       if not primary_only:
 411         wanted_nodes.extend(instance.secondary_nodes)
 412
 413     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 414       self.needed_locks[level] = wanted_nodes
 415     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 416       self.needed_locks[level].extend(wanted_nodes)
 417     else:
 418       raise errors.ProgrammerError("Unknown recalculation mode")
 419
 420     del self.recalculate_locks[level]
 421
 422
 423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 424   """Simple LU which runs no hooks.
 425
 426   This LU is intended as a parent for other LogicalUnits which will
 427   run no hooks, in order to reduce duplicate code.
 428
 429   """
 430   HPATH = None
 431   HTYPE = None
 432
 433   def BuildHooksEnv(self):
 434     """Empty BuildHooksEnv for NoHooksLu.
 435
 436     This just raises an error.
 437
 438     """
 439     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 440
 441   def BuildHooksNodes(self):
 442     """Empty BuildHooksNodes for NoHooksLU.
 443
 444     """
 445     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 446
 447
 448 class Tasklet:
 449   """Tasklet base class.
 450
 451   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 452   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 453   tasklets know nothing about locks.
 454
 455   Subclasses must follow these rules:
 456     - Implement CheckPrereq
 457     - Implement Exec
 458
 459   """
 460   def __init__(self, lu):
 461     self.lu = lu
 462
 463     # Shortcuts
 464     self.cfg = lu.cfg
 465     self.rpc = lu.rpc
 466
 467   def CheckPrereq(self):
 468     """Check prerequisites for this tasklets.
 469
 470     This method should check whether the prerequisites for the execution of
 471     this tasklet are fulfilled. It can do internode communication, but it
 472     should be idempotent - no cluster or system changes are allowed.
 473
 474     The method should raise errors.OpPrereqError in case something is not
 475     fulfilled. Its return value is ignored.
 476
 477     This method should also update all parameters to their canonical form if it
 478     hasn't been done before.
 479
 480     """
 481     pass
 482
 483   def Exec(self, feedback_fn):
 484     """Execute the tasklet.
 485
 486     This method should implement the actual work. It should raise
 487     errors.OpExecError for failures that are somewhat dealt with in code, or
 488     expected.
 489
 490     """
 491     raise NotImplementedError
 492
 493
 494 class _QueryBase:
 495   """Base for query utility classes.
 496
 497   """
 498   #: Attribute holding field definitions
 499   FIELDS = None
 500
 501   #: Field to sort by
 502   SORT_FIELD = "name"
 503
 504   def __init__(self, qfilter, fields, use_locking):
 505     """Initializes this class.
 506
 507     """
 508     self.use_locking = use_locking
 509
 510     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 511                              namefield=self.SORT_FIELD)
 512     self.requested_data = self.query.RequestedData()
 513     self.names = self.query.RequestedNames()
 514
 515     # Sort only if no names were requested
 516     self.sort_by_name = not self.names
 517
 518     self.do_locking = None
 519     self.wanted = None
 520
 521   def _GetNames(self, lu, all_names, lock_level):
 522     """Helper function to determine names asked for in the query.
 523
 524     """
 525     if self.do_locking:
 526       names = lu.owned_locks(lock_level)
 527     else:
 528       names = all_names
 529
 530     if self.wanted == locking.ALL_SET:
 531       assert not self.names
 532       # caller didn't specify names, so ordering is not important
 533       return utils.NiceSort(names)
 534
 535     # caller specified names and we must keep the same order
 536     assert self.names
 537     assert not self.do_locking or lu.glm.is_owned(lock_level)
 538
 539     missing = set(self.wanted).difference(names)
 540     if missing:
 541       raise errors.OpExecError("Some items were removed before retrieving"
 542                                " their data: %s" % missing)
 543
 544     # Return expanded names
 545     return self.wanted
 546
 547   def ExpandNames(self, lu):
 548     """Expand names for this query.
 549
 550     See L{LogicalUnit.ExpandNames}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def DeclareLocks(self, lu, level):
 556     """Declare locks for this query.
 557
 558     See L{LogicalUnit.DeclareLocks}.
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def _GetQueryData(self, lu):
 564     """Collects all data for this query.
 565
 566     @return: Query data object
 567
 568     """
 569     raise NotImplementedError()
 570
 571   def NewStyleQuery(self, lu):
 572     """Collect data and execute query.
 573
 574     """
 575     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 576                                   sort_by_name=self.sort_by_name)
 577
 578   def OldStyleQuery(self, lu):
 579     """Collect data and execute query.
 580
 581     """
 582     return self.query.OldStyleQuery(self._GetQueryData(lu),
 583                                     sort_by_name=self.sort_by_name)
 584
 585
 586 def _ShareAll():
 587   """Returns a dict declaring all lock levels shared.
 588
 589   """
 590   return dict.fromkeys(locking.LEVELS, 1)
 591
 592
 593 def _AnnotateDiskParams(instance, devs, cfg):
 594   """Little helper wrapper to the rpc annotation method.
 595
 596   @param instance: The instance object
 597   @type devs: List of L{objects.Disk}
 598   @param devs: The root devices (not any of its children!)
 599   @param cfg: The config object
 600   @returns The annotated disk copies
 601   @see L{rpc.AnnotateDiskParams}
 602
 603   """
 604   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 605                                 cfg.GetInstanceDiskParams(instance))
 606
 607
 608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 609                               cur_group_uuid):
 610   """Checks if node groups for locked instances are still correct.
 611
 612   @type cfg: L{config.ConfigWriter}
 613   @param cfg: Cluster configuration
 614   @type instances: dict; string as key, L{objects.Instance} as value
 615   @param instances: Dictionary, instance name as key, instance object as value
 616   @type owned_groups: iterable of string
 617   @param owned_groups: List of owned groups
 618   @type owned_nodes: iterable of string
 619   @param owned_nodes: List of owned nodes
 620   @type cur_group_uuid: string or None
 621   @param cur_group_uuid: Optional group UUID to check against instance's groups
 622
 623   """
 624   for (name, inst) in instances.items():
 625     assert owned_nodes.issuperset(inst.all_nodes), \
 626       "Instance %s's nodes changed while we kept the lock" % name
 627
 628     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 629
 630     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 631       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 632
 633
 634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 635                              primary_only=False):
 636   """Checks if the owned node groups are still correct for an instance.
 637
 638   @type cfg: L{config.ConfigWriter}
 639   @param cfg: The cluster configuration
 640   @type instance_name: string
 641   @param instance_name: Instance name
 642   @type owned_groups: set or frozenset
 643   @param owned_groups: List of currently owned node groups
 644   @type primary_only: boolean
 645   @param primary_only: Whether to check node groups for only the primary node
 646
 647   """
 648   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 649
 650   if not owned_groups.issuperset(inst_groups):
 651     raise errors.OpPrereqError("Instance %s's node groups changed since"
 652                                " locks were acquired, current groups are"
 653                                " are '%s', owning groups '%s'; retry the"
 654                                " operation" %
 655                                (instance_name,
 656                                 utils.CommaJoin(inst_groups),
 657                                 utils.CommaJoin(owned_groups)),
 658                                errors.ECODE_STATE)
 659
 660   return inst_groups
 661
 662
 663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 664   """Checks if the instances in a node group are still correct.
 665
 666   @type cfg: L{config.ConfigWriter}
 667   @param cfg: The cluster configuration
 668   @type group_uuid: string
 669   @param group_uuid: Node group UUID
 670   @type owned_instances: set or frozenset
 671   @param owned_instances: List of currently owned instances
 672
 673   """
 674   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 675   if owned_instances != wanted_instances:
 676     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 677                                " locks were acquired, wanted '%s', have '%s';"
 678                                " retry the operation" %
 679                                (group_uuid,
 680                                 utils.CommaJoin(wanted_instances),
 681                                 utils.CommaJoin(owned_instances)),
 682                                errors.ECODE_STATE)
 683
 684   return wanted_instances
 685
 686
 687 def _SupportsOob(cfg, node):
 688   """Tells if node supports OOB.
 689
 690   @type cfg: L{config.ConfigWriter}
 691   @param cfg: The cluster configuration
 692   @type node: L{objects.Node}
 693   @param node: The node
 694   @return: The OOB script if supported or an empty string otherwise
 695
 696   """
 697   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 698
 699
 700 def _IsExclusiveStorageEnabledNode(cfg, node):
 701   """Whether exclusive_storage is in effect for the given node.
 702
 703   @type cfg: L{config.ConfigWriter}
 704   @param cfg: The cluster configuration
 705   @type node: L{objects.Node}
 706   @param node: The node
 707   @rtype: bool
 708   @return: The effective value of exclusive_storage
 709
 710   """
 711   return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
 712
 713
 714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
 715   """Whether exclusive_storage is in effect for the given node.
 716
 717   @type cfg: L{config.ConfigWriter}
 718   @param cfg: The cluster configuration
 719   @type nodename: string
 720   @param nodename: The node
 721   @rtype: bool
 722   @return: The effective value of exclusive_storage
 723   @raise errors.OpPrereqError: if no node exists with the given name
 724
 725   """
 726   ni = cfg.GetNodeInfo(nodename)
 727   if ni is None:
 728     raise errors.OpPrereqError("Invalid node name %s" % nodename,
 729                                errors.ECODE_NOENT)
 730   return _IsExclusiveStorageEnabledNode(cfg, ni)
 731
 732
 733 def _CopyLockList(names):
 734   """Makes a copy of a list of lock names.
 735
 736   Handles L{locking.ALL_SET} correctly.
 737
 738   """
 739   if names == locking.ALL_SET:
 740     return locking.ALL_SET
 741   else:
 742     return names[:]
 743
 744
 745 def _GetWantedNodes(lu, nodes):
 746   """Returns list of checked and expanded node names.
 747
 748   @type lu: L{LogicalUnit}
 749   @param lu: the logical unit on whose behalf we execute
 750   @type nodes: list
 751   @param nodes: list of node names or None for all nodes
 752   @rtype: list
 753   @return: the list of nodes, sorted
 754   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 755
 756   """
 757   if nodes:
 758     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 759
 760   return utils.NiceSort(lu.cfg.GetNodeList())
 761
 762
 763 def _GetWantedInstances(lu, instances):
 764   """Returns list of checked and expanded instance names.
 765
 766   @type lu: L{LogicalUnit}
 767   @param lu: the logical unit on whose behalf we execute
 768   @type instances: list
 769   @param instances: list of instance names or None for all instances
 770   @rtype: list
 771   @return: the list of instances, sorted
 772   @raise errors.OpPrereqError: if the instances parameter is wrong type
 773   @raise errors.OpPrereqError: if any of the passed instances is not found
 774
 775   """
 776   if instances:
 777     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 778   else:
 779     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 780   return wanted
 781
 782
 783 def _GetUpdatedParams(old_params, update_dict,
 784                       use_default=True, use_none=False):
 785   """Return the new version of a parameter dictionary.
 786
 787   @type old_params: dict
 788   @param old_params: old parameters
 789   @type update_dict: dict
 790   @param update_dict: dict containing new parameter values, or
 791       constants.VALUE_DEFAULT to reset the parameter to its default
 792       value
 793   @param use_default: boolean
 794   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 795       values as 'to be deleted' values
 796   @param use_none: boolean
 797   @type use_none: whether to recognise C{None} values as 'to be
 798       deleted' values
 799   @rtype: dict
 800   @return: the new parameter dictionary
 801
 802   """
 803   params_copy = copy.deepcopy(old_params)
 804   for key, val in update_dict.iteritems():
 805     if ((use_default and val == constants.VALUE_DEFAULT) or
 806         (use_none and val is None)):
 807       try:
 808         del params_copy[key]
 809       except KeyError:
 810         pass
 811     else:
 812       params_copy[key] = val
 813   return params_copy
 814
 815
 816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 817   """Return the new version of a instance policy.
 818
 819   @param group_policy: whether this policy applies to a group and thus
 820     we should support removal of policy entries
 821
 822   """
 823   use_none = use_default = group_policy
 824   ipolicy = copy.deepcopy(old_ipolicy)
 825   for key, value in new_ipolicy.items():
 826     if key not in constants.IPOLICY_ALL_KEYS:
 827       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 828                                  errors.ECODE_INVAL)
 829     if key in constants.IPOLICY_ISPECS:
 830       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 831       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 832                                        use_none=use_none,
 833                                        use_default=use_default)
 834     else:
 835       if (not value or value == [constants.VALUE_DEFAULT] or
 836           value == constants.VALUE_DEFAULT):
 837         if group_policy:
 838           del ipolicy[key]
 839         else:
 840           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 841                                      " on the cluster'" % key,
 842                                      errors.ECODE_INVAL)
 843       else:
 844         if key in constants.IPOLICY_PARAMETERS:
 845           # FIXME: we assume all such values are float
 846           try:
 847             ipolicy[key] = float(value)
 848           except (TypeError, ValueError), err:
 849             raise errors.OpPrereqError("Invalid value for attribute"
 850                                        " '%s': '%s', error: %s" %
 851                                        (key, value, err), errors.ECODE_INVAL)
 852         else:
 853           # FIXME: we assume all others are lists; this should be redone
 854           # in a nicer way
 855           ipolicy[key] = list(value)
 856   try:
 857     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 858   except errors.ConfigurationError, err:
 859     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 860                                errors.ECODE_INVAL)
 861   return ipolicy
 862
 863
 864 def _UpdateAndVerifySubDict(base, updates, type_check):
 865   """Updates and verifies a dict with sub dicts of the same type.
 866
 867   @param base: The dict with the old data
 868   @param updates: The dict with the new data
 869   @param type_check: Dict suitable to ForceDictType to verify correct types
 870   @returns: A new dict with updated and verified values
 871
 872   """
 873   def fn(old, value):
 874     new = _GetUpdatedParams(old, value)
 875     utils.ForceDictType(new, type_check)
 876     return new
 877
 878   ret = copy.deepcopy(base)
 879   ret.update(dict((key, fn(base.get(key, {}), value))
 880                   for key, value in updates.items()))
 881   return ret
 882
 883
 884 def _MergeAndVerifyHvState(op_input, obj_input):
 885   """Combines the hv state from an opcode with the one of the object
 886
 887   @param op_input: The input dict from the opcode
 888   @param obj_input: The input dict from the objects
 889   @return: The verified and updated dict
 890
 891   """
 892   if op_input:
 893     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 894     if invalid_hvs:
 895       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 896                                  " %s" % utils.CommaJoin(invalid_hvs),
 897                                  errors.ECODE_INVAL)
 898     if obj_input is None:
 899       obj_input = {}
 900     type_check = constants.HVSTS_PARAMETER_TYPES
 901     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 902
 903   return None
 904
 905
 906 def _MergeAndVerifyDiskState(op_input, obj_input):
 907   """Combines the disk state from an opcode with the one of the object
 908
 909   @param op_input: The input dict from the opcode
 910   @param obj_input: The input dict from the objects
 911   @return: The verified and updated dict
 912   """
 913   if op_input:
 914     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 915     if invalid_dst:
 916       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 917                                  utils.CommaJoin(invalid_dst),
 918                                  errors.ECODE_INVAL)
 919     type_check = constants.DSS_PARAMETER_TYPES
 920     if obj_input is None:
 921       obj_input = {}
 922     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 923                                               type_check))
 924                 for key, value in op_input.items())
 925
 926   return None
 927
 928
 929 def _ReleaseLocks(lu, level, names=None, keep=None):
 930   """Releases locks owned by an LU.
 931
 932   @type lu: L{LogicalUnit}
 933   @param level: Lock level
 934   @type names: list or None
 935   @param names: Names of locks to release
 936   @type keep: list or None
 937   @param keep: Names of locks to retain
 938
 939   """
 940   assert not (keep is not None and names is not None), \
 941          "Only one of the 'names' and the 'keep' parameters can be given"
 942
 943   if names is not None:
 944     should_release = names.__contains__
 945   elif keep:
 946     should_release = lambda name: name not in keep
 947   else:
 948     should_release = None
 949
 950   owned = lu.owned_locks(level)
 951   if not owned:
 952     # Not owning any lock at this level, do nothing
 953     pass
 954
 955   elif should_release:
 956     retain = []
 957     release = []
 958
 959     # Determine which locks to release
 960     for name in owned:
 961       if should_release(name):
 962         release.append(name)
 963       else:
 964         retain.append(name)
 965
 966     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 967
 968     # Release just some locks
 969     lu.glm.release(level, names=release)
 970
 971     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 972   else:
 973     # Release everything
 974     lu.glm.release(level)
 975
 976     assert not lu.glm.is_owned(level), "No locks should be owned"
 977
 978
 979 def _MapInstanceDisksToNodes(instances):
 980   """Creates a map from (node, volume) to instance name.
 981
 982   @type instances: list of L{objects.Instance}
 983   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 984
 985   """
 986   return dict(((node, vol), inst.name)
 987               for inst in instances
 988               for (node, vols) in inst.MapLVsByNode().items()
 989               for vol in vols)
 990
 991
 992 def _RunPostHook(lu, node_name):
 993   """Runs the post-hook for an opcode on a single node.
 994
 995   """
 996   hm = lu.proc.BuildHooksManager(lu)
 997   try:
 998     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 999   except Exception, err: # pylint: disable=W0703
1000     lu.LogWarning("Errors occurred running hooks on %s: %s",
1001                   node_name, err)
1002
1003
1004 def _CheckOutputFields(static, dynamic, selected):
1005   """Checks whether all selected fields are valid.
1006
1007   @type static: L{utils.FieldSet}
1008   @param static: static fields set
1009   @type dynamic: L{utils.FieldSet}
1010   @param dynamic: dynamic fields set
1011
1012   """
1013   f = utils.FieldSet()
1014   f.Extend(static)
1015   f.Extend(dynamic)
1016
1017   delta = f.NonMatching(selected)
1018   if delta:
1019     raise errors.OpPrereqError("Unknown output fields selected: %s"
1020                                % ",".join(delta), errors.ECODE_INVAL)
1021
1022
1023 def _CheckGlobalHvParams(params):
1024   """Validates that given hypervisor params are not global ones.
1025
1026   This will ensure that instances don't get customised versions of
1027   global params.
1028
1029   """
1030   used_globals = constants.HVC_GLOBALS.intersection(params)
1031   if used_globals:
1032     msg = ("The following hypervisor parameters are global and cannot"
1033            " be customized at instance level, please modify them at"
1034            " cluster level: %s" % utils.CommaJoin(used_globals))
1035     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1036
1037
1038 def _CheckNodeOnline(lu, node, msg=None):
1039   """Ensure that a given node is online.
1040
1041   @param lu: the LU on behalf of which we make the check
1042   @param node: the node to check
1043   @param msg: if passed, should be a message to replace the default one
1044   @raise errors.OpPrereqError: if the node is offline
1045
1046   """
1047   if msg is None:
1048     msg = "Can't use offline node"
1049   if lu.cfg.GetNodeInfo(node).offline:
1050     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1051
1052
1053 def _CheckNodeNotDrained(lu, node):
1054   """Ensure that a given node is not drained.
1055
1056   @param lu: the LU on behalf of which we make the check
1057   @param node: the node to check
1058   @raise errors.OpPrereqError: if the node is drained
1059
1060   """
1061   if lu.cfg.GetNodeInfo(node).drained:
1062     raise errors.OpPrereqError("Can't use drained node %s" % node,
1063                                errors.ECODE_STATE)
1064
1065
1066 def _CheckNodeVmCapable(lu, node):
1067   """Ensure that a given node is vm capable.
1068
1069   @param lu: the LU on behalf of which we make the check
1070   @param node: the node to check
1071   @raise errors.OpPrereqError: if the node is not vm capable
1072
1073   """
1074   if not lu.cfg.GetNodeInfo(node).vm_capable:
1075     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1076                                errors.ECODE_STATE)
1077
1078
1079 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1080   """Ensure that a node supports a given OS.
1081
1082   @param lu: the LU on behalf of which we make the check
1083   @param node: the node to check
1084   @param os_name: the OS to query about
1085   @param force_variant: whether to ignore variant errors
1086   @raise errors.OpPrereqError: if the node is not supporting the OS
1087
1088   """
1089   result = lu.rpc.call_os_get(node, os_name)
1090   result.Raise("OS '%s' not in supported OS list for node %s" %
1091                (os_name, node),
1092                prereq=True, ecode=errors.ECODE_INVAL)
1093   if not force_variant:
1094     _CheckOSVariant(result.payload, os_name)
1095
1096
1097 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1098   """Ensure that a node has the given secondary ip.
1099
1100   @type lu: L{LogicalUnit}
1101   @param lu: the LU on behalf of which we make the check
1102   @type node: string
1103   @param node: the node to check
1104   @type secondary_ip: string
1105   @param secondary_ip: the ip to check
1106   @type prereq: boolean
1107   @param prereq: whether to throw a prerequisite or an execute error
1108   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1109   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1110
1111   """
1112   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1113   result.Raise("Failure checking secondary ip on node %s" % node,
1114                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1115   if not result.payload:
1116     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1117            " please fix and re-run this command" % secondary_ip)
1118     if prereq:
1119       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1120     else:
1121       raise errors.OpExecError(msg)
1122
1123
1124 def _CheckNodePVs(nresult, exclusive_storage):
1125   """Check node PVs.
1126
1127   """
1128   pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1129   if pvlist_dict is None:
1130     return (["Can't get PV list from node"], None)
1131   pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1132   errlist = []
1133   # check that ':' is not present in PV names, since it's a
1134   # special character for lvcreate (denotes the range of PEs to
1135   # use on the PV)
1136   for pv in pvlist:
1137     if ":" in pv.name:
1138       errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1139                      (pv.name, pv.vg_name))
1140   es_pvinfo = None
1141   if exclusive_storage:
1142     (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1143     errlist.extend(errmsgs)
1144     shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1145     if shared_pvs:
1146       for (pvname, lvlist) in shared_pvs:
1147         # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1148         errlist.append("PV %s is shared among unrelated LVs (%s)" %
1149                        (pvname, utils.CommaJoin(lvlist)))
1150   return (errlist, es_pvinfo)
1151
1152
1153 def _GetClusterDomainSecret():
1154   """Reads the cluster domain secret.
1155
1156   """
1157   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1158                                strict=True)
1159
1160
1161 def _CheckInstanceState(lu, instance, req_states, msg=None):
1162   """Ensure that an instance is in one of the required states.
1163
1164   @param lu: the LU on behalf of which we make the check
1165   @param instance: the instance to check
1166   @param msg: if passed, should be a message to replace the default one
1167   @raise errors.OpPrereqError: if the instance is not in the required state
1168
1169   """
1170   if msg is None:
1171     msg = ("can't use instance from outside %s states" %
1172            utils.CommaJoin(req_states))
1173   if instance.admin_state not in req_states:
1174     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1175                                (instance.name, instance.admin_state, msg),
1176                                errors.ECODE_STATE)
1177
1178   if constants.ADMINST_UP not in req_states:
1179     pnode = instance.primary_node
1180     if not lu.cfg.GetNodeInfo(pnode).offline:
1181       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1182       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1183                   prereq=True, ecode=errors.ECODE_ENVIRON)
1184       if instance.name in ins_l.payload:
1185         raise errors.OpPrereqError("Instance %s is running, %s" %
1186                                    (instance.name, msg), errors.ECODE_STATE)
1187     else:
1188       lu.LogWarning("Primary node offline, ignoring check that instance"
1189                      " is down")
1190
1191
1192 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1193   """Computes if value is in the desired range.
1194
1195   @param name: name of the parameter for which we perform the check
1196   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1197       not just 'disk')
1198   @param ipolicy: dictionary containing min, max and std values
1199   @param value: actual value that we want to use
1200   @return: None or element not meeting the criteria
1201
1202
1203   """
1204   if value in [None, constants.VALUE_AUTO]:
1205     return None
1206   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1207   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1208   if value > max_v or min_v > value:
1209     if qualifier:
1210       fqn = "%s/%s" % (name, qualifier)
1211     else:
1212       fqn = name
1213     return ("%s value %s is not in range [%s, %s]" %
1214             (fqn, value, min_v, max_v))
1215   return None
1216
1217
1218 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1219                                  nic_count, disk_sizes, spindle_use,
1220                                  _compute_fn=_ComputeMinMaxSpec):
1221   """Verifies ipolicy against provided specs.
1222
1223   @type ipolicy: dict
1224   @param ipolicy: The ipolicy
1225   @type mem_size: int
1226   @param mem_size: The memory size
1227   @type cpu_count: int
1228   @param cpu_count: Used cpu cores
1229   @type disk_count: int
1230   @param disk_count: Number of disks used
1231   @type nic_count: int
1232   @param nic_count: Number of nics used
1233   @type disk_sizes: list of ints
1234   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1235   @type spindle_use: int
1236   @param spindle_use: The number of spindles this instance uses
1237   @param _compute_fn: The compute function (unittest only)
1238   @return: A list of violations, or an empty list of no violations are found
1239
1240   """
1241   assert disk_count == len(disk_sizes)
1242
1243   test_settings = [
1244     (constants.ISPEC_MEM_SIZE, "", mem_size),
1245     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1246     (constants.ISPEC_DISK_COUNT, "", disk_count),
1247     (constants.ISPEC_NIC_COUNT, "", nic_count),
1248     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1249     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1250          for idx, d in enumerate(disk_sizes)]
1251
1252   return filter(None,
1253                 (_compute_fn(name, qualifier, ipolicy, value)
1254                  for (name, qualifier, value) in test_settings))
1255
1256
1257 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1258                                      _compute_fn=_ComputeIPolicySpecViolation):
1259   """Compute if instance meets the specs of ipolicy.
1260
1261   @type ipolicy: dict
1262   @param ipolicy: The ipolicy to verify against
1263   @type instance: L{objects.Instance}
1264   @param instance: The instance to verify
1265   @param _compute_fn: The function to verify ipolicy (unittest only)
1266   @see: L{_ComputeIPolicySpecViolation}
1267
1268   """
1269   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1270   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1271   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1272   disk_count = len(instance.disks)
1273   disk_sizes = [disk.size for disk in instance.disks]
1274   nic_count = len(instance.nics)
1275
1276   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1277                      disk_sizes, spindle_use)
1278
1279
1280 def _ComputeIPolicyInstanceSpecViolation(
1281   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1282   """Compute if instance specs meets the specs of ipolicy.
1283
1284   @type ipolicy: dict
1285   @param ipolicy: The ipolicy to verify against
1286   @param instance_spec: dict
1287   @param instance_spec: The instance spec to verify
1288   @param _compute_fn: The function to verify ipolicy (unittest only)
1289   @see: L{_ComputeIPolicySpecViolation}
1290
1291   """
1292   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1293   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1294   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1295   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1296   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1297   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1298
1299   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1300                      disk_sizes, spindle_use)
1301
1302
1303 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1304                                  target_group,
1305                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1306   """Compute if instance meets the specs of the new target group.
1307
1308   @param ipolicy: The ipolicy to verify
1309   @param instance: The instance object to verify
1310   @param current_group: The current group of the instance
1311   @param target_group: The new group of the instance
1312   @param _compute_fn: The function to verify ipolicy (unittest only)
1313   @see: L{_ComputeIPolicySpecViolation}
1314
1315   """
1316   if current_group == target_group:
1317     return []
1318   else:
1319     return _compute_fn(ipolicy, instance)
1320
1321
1322 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1323                             _compute_fn=_ComputeIPolicyNodeViolation):
1324   """Checks that the target node is correct in terms of instance policy.
1325
1326   @param ipolicy: The ipolicy to verify
1327   @param instance: The instance object to verify
1328   @param node: The new node to relocate
1329   @param ignore: Ignore violations of the ipolicy
1330   @param _compute_fn: The function to verify ipolicy (unittest only)
1331   @see: L{_ComputeIPolicySpecViolation}
1332
1333   """
1334   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1335   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1336
1337   if res:
1338     msg = ("Instance does not meet target node group's (%s) instance"
1339            " policy: %s") % (node.group, utils.CommaJoin(res))
1340     if ignore:
1341       lu.LogWarning(msg)
1342     else:
1343       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1344
1345
1346 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1347   """Computes a set of any instances that would violate the new ipolicy.
1348
1349   @param old_ipolicy: The current (still in-place) ipolicy
1350   @param new_ipolicy: The new (to become) ipolicy
1351   @param instances: List of instances to verify
1352   @return: A list of instances which violates the new ipolicy but
1353       did not before
1354
1355   """
1356   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1357           _ComputeViolatingInstances(old_ipolicy, instances))
1358
1359
1360 def _ExpandItemName(fn, name, kind):
1361   """Expand an item name.
1362
1363   @param fn: the function to use for expansion
1364   @param name: requested item name
1365   @param kind: text description ('Node' or 'Instance')
1366   @return: the resolved (full) name
1367   @raise errors.OpPrereqError: if the item is not found
1368
1369   """
1370   full_name = fn(name)
1371   if full_name is None:
1372     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1373                                errors.ECODE_NOENT)
1374   return full_name
1375
1376
1377 def _ExpandNodeName(cfg, name):
1378   """Wrapper over L{_ExpandItemName} for nodes."""
1379   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1380
1381
1382 def _ExpandInstanceName(cfg, name):
1383   """Wrapper over L{_ExpandItemName} for instance."""
1384   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1385
1386
1387 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1388                          network_type, mac_prefix, tags):
1389   """Builds network related env variables for hooks
1390
1391   This builds the hook environment from individual variables.
1392
1393   @type name: string
1394   @param name: the name of the network
1395   @type subnet: string
1396   @param subnet: the ipv4 subnet
1397   @type gateway: string
1398   @param gateway: the ipv4 gateway
1399   @type network6: string
1400   @param network6: the ipv6 subnet
1401   @type gateway6: string
1402   @param gateway6: the ipv6 gateway
1403   @type network_type: string
1404   @param network_type: the type of the network
1405   @type mac_prefix: string
1406   @param mac_prefix: the mac_prefix
1407   @type tags: list
1408   @param tags: the tags of the network
1409
1410   """
1411   env = {}
1412   if name:
1413     env["NETWORK_NAME"] = name
1414   if subnet:
1415     env["NETWORK_SUBNET"] = subnet
1416   if gateway:
1417     env["NETWORK_GATEWAY"] = gateway
1418   if network6:
1419     env["NETWORK_SUBNET6"] = network6
1420   if gateway6:
1421     env["NETWORK_GATEWAY6"] = gateway6
1422   if mac_prefix:
1423     env["NETWORK_MAC_PREFIX"] = mac_prefix
1424   if network_type:
1425     env["NETWORK_TYPE"] = network_type
1426   if tags:
1427     env["NETWORK_TAGS"] = " ".join(tags)
1428
1429   return env
1430
1431
1432 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1433                           minmem, maxmem, vcpus, nics, disk_template, disks,
1434                           bep, hvp, hypervisor_name, tags):
1435   """Builds instance related env variables for hooks
1436
1437   This builds the hook environment from individual variables.
1438
1439   @type name: string
1440   @param name: the name of the instance
1441   @type primary_node: string
1442   @param primary_node: the name of the instance's primary node
1443   @type secondary_nodes: list
1444   @param secondary_nodes: list of secondary nodes as strings
1445   @type os_type: string
1446   @param os_type: the name of the instance's OS
1447   @type status: string
1448   @param status: the desired status of the instance
1449   @type minmem: string
1450   @param minmem: the minimum memory size of the instance
1451   @type maxmem: string
1452   @param maxmem: the maximum memory size of the instance
1453   @type vcpus: string
1454   @param vcpus: the count of VCPUs the instance has
1455   @type nics: list
1456   @param nics: list of tuples (ip, mac, mode, link, network) representing
1457       the NICs the instance has
1458   @type disk_template: string
1459   @param disk_template: the disk template of the instance
1460   @type disks: list
1461   @param disks: the list of (size, mode) pairs
1462   @type bep: dict
1463   @param bep: the backend parameters for the instance
1464   @type hvp: dict
1465   @param hvp: the hypervisor parameters for the instance
1466   @type hypervisor_name: string
1467   @param hypervisor_name: the hypervisor for the instance
1468   @type tags: list
1469   @param tags: list of instance tags as strings
1470   @rtype: dict
1471   @return: the hook environment for this instance
1472
1473   """
1474   env = {
1475     "OP_TARGET": name,
1476     "INSTANCE_NAME": name,
1477     "INSTANCE_PRIMARY": primary_node,
1478     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1479     "INSTANCE_OS_TYPE": os_type,
1480     "INSTANCE_STATUS": status,
1481     "INSTANCE_MINMEM": minmem,
1482     "INSTANCE_MAXMEM": maxmem,
1483     # TODO(2.7) remove deprecated "memory" value
1484     "INSTANCE_MEMORY": maxmem,
1485     "INSTANCE_VCPUS": vcpus,
1486     "INSTANCE_DISK_TEMPLATE": disk_template,
1487     "INSTANCE_HYPERVISOR": hypervisor_name,
1488   }
1489   if nics:
1490     nic_count = len(nics)
1491     for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1492       if ip is None:
1493         ip = ""
1494       env["INSTANCE_NIC%d_IP" % idx] = ip
1495       env["INSTANCE_NIC%d_MAC" % idx] = mac
1496       env["INSTANCE_NIC%d_MODE" % idx] = mode
1497       env["INSTANCE_NIC%d_LINK" % idx] = link
1498       if network:
1499         env["INSTANCE_NIC%d_NETWORK" % idx] = net
1500         if netinfo:
1501           nobj = objects.Network.FromDict(netinfo)
1502           if nobj.network:
1503             env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1504           if nobj.gateway:
1505             env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1506           if nobj.network6:
1507             env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1508           if nobj.gateway6:
1509             env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1510           if nobj.mac_prefix:
1511             env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1512           if nobj.network_type:
1513             env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1514           if nobj.tags:
1515             env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1516       if mode == constants.NIC_MODE_BRIDGED:
1517         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1518   else:
1519     nic_count = 0
1520
1521   env["INSTANCE_NIC_COUNT"] = nic_count
1522
1523   if disks:
1524     disk_count = len(disks)
1525     for idx, (size, mode) in enumerate(disks):
1526       env["INSTANCE_DISK%d_SIZE" % idx] = size
1527       env["INSTANCE_DISK%d_MODE" % idx] = mode
1528   else:
1529     disk_count = 0
1530
1531   env["INSTANCE_DISK_COUNT"] = disk_count
1532
1533   if not tags:
1534     tags = []
1535
1536   env["INSTANCE_TAGS"] = " ".join(tags)
1537
1538   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1539     for key, value in source.items():
1540       env["INSTANCE_%s_%s" % (kind, key)] = value
1541
1542   return env
1543
1544
1545 def _NICToTuple(lu, nic):
1546   """Build a tupple of nic information.
1547
1548   @type lu:  L{LogicalUnit}
1549   @param lu: the logical unit on whose behalf we execute
1550   @type nic: L{objects.NIC}
1551   @param nic: nic to convert to hooks tuple
1552
1553   """
1554   ip = nic.ip
1555   mac = nic.mac
1556   cluster = lu.cfg.GetClusterInfo()
1557   filled_params = cluster.SimpleFillNIC(nic.nicparams)
1558   mode = filled_params[constants.NIC_MODE]
1559   link = filled_params[constants.NIC_LINK]
1560   net = nic.network
1561   netinfo = None
1562   if net:
1563     net_uuid = lu.cfg.LookupNetwork(net)
1564     if net_uuid:
1565       nobj = lu.cfg.GetNetwork(net_uuid)
1566       netinfo = objects.Network.ToDict(nobj)
1567   return (ip, mac, mode, link, net, netinfo)
1568
1569
1570 def _NICListToTuple(lu, nics):
1571   """Build a list of nic information tuples.
1572
1573   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1574   value in LUInstanceQueryData.
1575
1576   @type lu:  L{LogicalUnit}
1577   @param lu: the logical unit on whose behalf we execute
1578   @type nics: list of L{objects.NIC}
1579   @param nics: list of nics to convert to hooks tuples
1580
1581   """
1582   hooks_nics = []
1583   for nic in nics:
1584     hooks_nics.append(_NICToTuple(lu, nic))
1585   return hooks_nics
1586
1587
1588 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1589   """Builds instance related env variables for hooks from an object.
1590
1591   @type lu: L{LogicalUnit}
1592   @param lu: the logical unit on whose behalf we execute
1593   @type instance: L{objects.Instance}
1594   @param instance: the instance for which we should build the
1595       environment
1596   @type override: dict
1597   @param override: dictionary with key/values that will override
1598       our values
1599   @rtype: dict
1600   @return: the hook environment dictionary
1601
1602   """
1603   cluster = lu.cfg.GetClusterInfo()
1604   bep = cluster.FillBE(instance)
1605   hvp = cluster.FillHV(instance)
1606   args = {
1607     "name": instance.name,
1608     "primary_node": instance.primary_node,
1609     "secondary_nodes": instance.secondary_nodes,
1610     "os_type": instance.os,
1611     "status": instance.admin_state,
1612     "maxmem": bep[constants.BE_MAXMEM],
1613     "minmem": bep[constants.BE_MINMEM],
1614     "vcpus": bep[constants.BE_VCPUS],
1615     "nics": _NICListToTuple(lu, instance.nics),
1616     "disk_template": instance.disk_template,
1617     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1618     "bep": bep,
1619     "hvp": hvp,
1620     "hypervisor_name": instance.hypervisor,
1621     "tags": instance.tags,
1622   }
1623   if override:
1624     args.update(override)
1625   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1626
1627
1628 def _AdjustCandidatePool(lu, exceptions):
1629   """Adjust the candidate pool after node operations.
1630
1631   """
1632   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1633   if mod_list:
1634     lu.LogInfo("Promoted nodes to master candidate role: %s",
1635                utils.CommaJoin(node.name for node in mod_list))
1636     for name in mod_list:
1637       lu.context.ReaddNode(name)
1638   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1639   if mc_now > mc_max:
1640     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1641                (mc_now, mc_max))
1642
1643
1644 def _DecideSelfPromotion(lu, exceptions=None):
1645   """Decide whether I should promote myself as a master candidate.
1646
1647   """
1648   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1649   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1650   # the new node will increase mc_max with one, so:
1651   mc_should = min(mc_should + 1, cp_size)
1652   return mc_now < mc_should
1653
1654
1655 def _ComputeViolatingInstances(ipolicy, instances):
1656   """Computes a set of instances who violates given ipolicy.
1657
1658   @param ipolicy: The ipolicy to verify
1659   @type instances: object.Instance
1660   @param instances: List of instances to verify
1661   @return: A frozenset of instance names violating the ipolicy
1662
1663   """
1664   return frozenset([inst.name for inst in instances
1665                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1666
1667
1668 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1669   """Check that the brigdes needed by a list of nics exist.
1670
1671   """
1672   cluster = lu.cfg.GetClusterInfo()
1673   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1674   brlist = [params[constants.NIC_LINK] for params in paramslist
1675             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1676   if brlist:
1677     result = lu.rpc.call_bridges_exist(target_node, brlist)
1678     result.Raise("Error checking bridges on destination node '%s'" %
1679                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1680
1681
1682 def _CheckInstanceBridgesExist(lu, instance, node=None):
1683   """Check that the brigdes needed by an instance exist.
1684
1685   """
1686   if node is None:
1687     node = instance.primary_node
1688   _CheckNicsBridgesExist(lu, instance.nics, node)
1689
1690
1691 def _CheckOSVariant(os_obj, name):
1692   """Check whether an OS name conforms to the os variants specification.
1693
1694   @type os_obj: L{objects.OS}
1695   @param os_obj: OS object to check
1696   @type name: string
1697   @param name: OS name passed by the user, to check for validity
1698
1699   """
1700   variant = objects.OS.GetVariant(name)
1701   if not os_obj.supported_variants:
1702     if variant:
1703       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1704                                  " passed)" % (os_obj.name, variant),
1705                                  errors.ECODE_INVAL)
1706     return
1707   if not variant:
1708     raise errors.OpPrereqError("OS name must include a variant",
1709                                errors.ECODE_INVAL)
1710
1711   if variant not in os_obj.supported_variants:
1712     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1713
1714
1715 def _GetNodeInstancesInner(cfg, fn):
1716   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1717
1718
1719 def _GetNodeInstances(cfg, node_name):
1720   """Returns a list of all primary and secondary instances on a node.
1721
1722   """
1723
1724   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1725
1726
1727 def _GetNodePrimaryInstances(cfg, node_name):
1728   """Returns primary instances on a node.
1729
1730   """
1731   return _GetNodeInstancesInner(cfg,
1732                                 lambda inst: node_name == inst.primary_node)
1733
1734
1735 def _GetNodeSecondaryInstances(cfg, node_name):
1736   """Returns secondary instances on a node.
1737
1738   """
1739   return _GetNodeInstancesInner(cfg,
1740                                 lambda inst: node_name in inst.secondary_nodes)
1741
1742
1743 def _GetStorageTypeArgs(cfg, storage_type):
1744   """Returns the arguments for a storage type.
1745
1746   """
1747   # Special case for file storage
1748   if storage_type == constants.ST_FILE:
1749     # storage.FileStorage wants a list of storage directories
1750     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1751
1752   return []
1753
1754
1755 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1756   faulty = []
1757
1758   for dev in instance.disks:
1759     cfg.SetDiskID(dev, node_name)
1760
1761   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1762                                                                 instance))
1763   result.Raise("Failed to get disk status from node %s" % node_name,
1764                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1765
1766   for idx, bdev_status in enumerate(result.payload):
1767     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1768       faulty.append(idx)
1769
1770   return faulty
1771
1772
1773 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1774   """Check the sanity of iallocator and node arguments and use the
1775   cluster-wide iallocator if appropriate.
1776
1777   Check that at most one of (iallocator, node) is specified. If none is
1778   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1779   then the LU's opcode's iallocator slot is filled with the cluster-wide
1780   default iallocator.
1781
1782   @type iallocator_slot: string
1783   @param iallocator_slot: the name of the opcode iallocator slot
1784   @type node_slot: string
1785   @param node_slot: the name of the opcode target node slot
1786
1787   """
1788   node = getattr(lu.op, node_slot, None)
1789   ialloc = getattr(lu.op, iallocator_slot, None)
1790   if node == []:
1791     node = None
1792
1793   if node is not None and ialloc is not None:
1794     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1795                                errors.ECODE_INVAL)
1796   elif ((node is None and ialloc is None) or
1797         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1798     default_iallocator = lu.cfg.GetDefaultIAllocator()
1799     if default_iallocator:
1800       setattr(lu.op, iallocator_slot, default_iallocator)
1801     else:
1802       raise errors.OpPrereqError("No iallocator or node given and no"
1803                                  " cluster-wide default iallocator found;"
1804                                  " please specify either an iallocator or a"
1805                                  " node, or set a cluster-wide default"
1806                                  " iallocator", errors.ECODE_INVAL)
1807
1808
1809 def _GetDefaultIAllocator(cfg, ialloc):
1810   """Decides on which iallocator to use.
1811
1812   @type cfg: L{config.ConfigWriter}
1813   @param cfg: Cluster configuration object
1814   @type ialloc: string or None
1815   @param ialloc: Iallocator specified in opcode
1816   @rtype: string
1817   @return: Iallocator name
1818
1819   """
1820   if not ialloc:
1821     # Use default iallocator
1822     ialloc = cfg.GetDefaultIAllocator()
1823
1824   if not ialloc:
1825     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1826                                " opcode nor as a cluster-wide default",
1827                                errors.ECODE_INVAL)
1828
1829   return ialloc
1830
1831
1832 def _CheckHostnameSane(lu, name):
1833   """Ensures that a given hostname resolves to a 'sane' name.
1834
1835   The given name is required to be a prefix of the resolved hostname,
1836   to prevent accidental mismatches.
1837
1838   @param lu: the logical unit on behalf of which we're checking
1839   @param name: the name we should resolve and check
1840   @return: the resolved hostname object
1841
1842   """
1843   hostname = netutils.GetHostname(name=name)
1844   if hostname.name != name:
1845     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1846   if not utils.MatchNameComponent(name, [hostname.name]):
1847     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1848                                 " same as given hostname '%s'") %
1849                                 (hostname.name, name), errors.ECODE_INVAL)
1850   return hostname
1851
1852
1853 class LUClusterPostInit(LogicalUnit):
1854   """Logical unit for running hooks after cluster initialization.
1855
1856   """
1857   HPATH = "cluster-init"
1858   HTYPE = constants.HTYPE_CLUSTER
1859
1860   def BuildHooksEnv(self):
1861     """Build hooks env.
1862
1863     """
1864     return {
1865       "OP_TARGET": self.cfg.GetClusterName(),
1866       }
1867
1868   def BuildHooksNodes(self):
1869     """Build hooks nodes.
1870
1871     """
1872     return ([], [self.cfg.GetMasterNode()])
1873
1874   def Exec(self, feedback_fn):
1875     """Nothing to do.
1876
1877     """
1878     return True
1879
1880
1881 class LUClusterDestroy(LogicalUnit):
1882   """Logical unit for destroying the cluster.
1883
1884   """
1885   HPATH = "cluster-destroy"
1886   HTYPE = constants.HTYPE_CLUSTER
1887
1888   def BuildHooksEnv(self):
1889     """Build hooks env.
1890
1891     """
1892     return {
1893       "OP_TARGET": self.cfg.GetClusterName(),
1894       }
1895
1896   def BuildHooksNodes(self):
1897     """Build hooks nodes.
1898
1899     """
1900     return ([], [])
1901
1902   def CheckPrereq(self):
1903     """Check prerequisites.
1904
1905     This checks whether the cluster is empty.
1906
1907     Any errors are signaled by raising errors.OpPrereqError.
1908
1909     """
1910     master = self.cfg.GetMasterNode()
1911
1912     nodelist = self.cfg.GetNodeList()
1913     if len(nodelist) != 1 or nodelist[0] != master:
1914       raise errors.OpPrereqError("There are still %d node(s) in"
1915                                  " this cluster." % (len(nodelist) - 1),
1916                                  errors.ECODE_INVAL)
1917     instancelist = self.cfg.GetInstanceList()
1918     if instancelist:
1919       raise errors.OpPrereqError("There are still %d instance(s) in"
1920                                  " this cluster." % len(instancelist),
1921                                  errors.ECODE_INVAL)
1922
1923   def Exec(self, feedback_fn):
1924     """Destroys the cluster.
1925
1926     """
1927     master_params = self.cfg.GetMasterNetworkParameters()
1928
1929     # Run post hooks on master node before it's removed
1930     _RunPostHook(self, master_params.name)
1931
1932     ems = self.cfg.GetUseExternalMipScript()
1933     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1934                                                      master_params, ems)
1935     if result.fail_msg:
1936       self.LogWarning("Error disabling the master IP address: %s",
1937                       result.fail_msg)
1938
1939     return master_params.name
1940
1941
1942 def _VerifyCertificate(filename):
1943   """Verifies a certificate for L{LUClusterVerifyConfig}.
1944
1945   @type filename: string
1946   @param filename: Path to PEM file
1947
1948   """
1949   try:
1950     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1951                                            utils.ReadFile(filename))
1952   except Exception, err: # pylint: disable=W0703
1953     return (LUClusterVerifyConfig.ETYPE_ERROR,
1954             "Failed to load X509 certificate %s: %s" % (filename, err))
1955
1956   (errcode, msg) = \
1957     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1958                                 constants.SSL_CERT_EXPIRATION_ERROR)
1959
1960   if msg:
1961     fnamemsg = "While verifying %s: %s" % (filename, msg)
1962   else:
1963     fnamemsg = None
1964
1965   if errcode is None:
1966     return (None, fnamemsg)
1967   elif errcode == utils.CERT_WARNING:
1968     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1969   elif errcode == utils.CERT_ERROR:
1970     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1971
1972   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1973
1974
1975 def _GetAllHypervisorParameters(cluster, instances):
1976   """Compute the set of all hypervisor parameters.
1977
1978   @type cluster: L{objects.Cluster}
1979   @param cluster: the cluster object
1980   @param instances: list of L{objects.Instance}
1981   @param instances: additional instances from which to obtain parameters
1982   @rtype: list of (origin, hypervisor, parameters)
1983   @return: a list with all parameters found, indicating the hypervisor they
1984        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1985
1986   """
1987   hvp_data = []
1988
1989   for hv_name in cluster.enabled_hypervisors:
1990     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1991
1992   for os_name, os_hvp in cluster.os_hvp.items():
1993     for hv_name, hv_params in os_hvp.items():
1994       if hv_params:
1995         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1996         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1997
1998   # TODO: collapse identical parameter values in a single one
1999   for instance in instances:
2000     if instance.hvparams:
2001       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2002                        cluster.FillHV(instance)))
2003
2004   return hvp_data
2005
2006
2007 class _VerifyErrors(object):
2008   """Mix-in for cluster/group verify LUs.
2009
2010   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2011   self.op and self._feedback_fn to be available.)
2012
2013   """
2014
2015   ETYPE_FIELD = "code"
2016   ETYPE_ERROR = "ERROR"
2017   ETYPE_WARNING = "WARNING"
2018
2019   def _Error(self, ecode, item, msg, *args, **kwargs):
2020     """Format an error message.
2021
2022     Based on the opcode's error_codes parameter, either format a
2023     parseable error code, or a simpler error string.
2024
2025     This must be called only from Exec and functions called from Exec.
2026
2027     """
2028     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2029     itype, etxt, _ = ecode
2030     # first complete the msg
2031     if args:
2032       msg = msg % args
2033     # then format the whole message
2034     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2035       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2036     else:
2037       if item:
2038         item = " " + item
2039       else:
2040         item = ""
2041       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2042     # and finally report it via the feedback_fn
2043     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
2044
2045   def _ErrorIf(self, cond, ecode, *args, **kwargs):
2046     """Log an error message if the passed condition is True.
2047
2048     """
2049     cond = (bool(cond)
2050             or self.op.debug_simulate_errors) # pylint: disable=E1101
2051
2052     # If the error code is in the list of ignored errors, demote the error to a
2053     # warning
2054     (_, etxt, _) = ecode
2055     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
2056       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
2057
2058     if cond:
2059       self._Error(ecode, *args, **kwargs)
2060
2061     # do not mark the operation as failed for WARN cases only
2062     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2063       self.bad = self.bad or cond
2064
2065
2066 class LUClusterVerify(NoHooksLU):
2067   """Submits all jobs necessary to verify the cluster.
2068
2069   """
2070   REQ_BGL = False
2071
2072   def ExpandNames(self):
2073     self.needed_locks = {}
2074
2075   def Exec(self, feedback_fn):
2076     jobs = []
2077
2078     if self.op.group_name:
2079       groups = [self.op.group_name]
2080       depends_fn = lambda: None
2081     else:
2082       groups = self.cfg.GetNodeGroupList()
2083
2084       # Verify global configuration
2085       jobs.append([
2086         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2087         ])
2088
2089       # Always depend on global verification
2090       depends_fn = lambda: [(-len(jobs), [])]
2091
2092     jobs.extend(
2093       [opcodes.OpClusterVerifyGroup(group_name=group,
2094                                     ignore_errors=self.op.ignore_errors,
2095                                     depends=depends_fn())]
2096       for group in groups)
2097
2098     # Fix up all parameters
2099     for op in itertools.chain(*jobs): # pylint: disable=W0142
2100       op.debug_simulate_errors = self.op.debug_simulate_errors
2101       op.verbose = self.op.verbose
2102       op.error_codes = self.op.error_codes
2103       try:
2104         op.skip_checks = self.op.skip_checks
2105       except AttributeError:
2106         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2107
2108     return ResultWithJobs(jobs)
2109
2110
2111 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2112   """Verifies the cluster config.
2113
2114   """
2115   REQ_BGL = False
2116
2117   def _VerifyHVP(self, hvp_data):
2118     """Verifies locally the syntax of the hypervisor parameters.
2119
2120     """
2121     for item, hv_name, hv_params in hvp_data:
2122       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2123              (item, hv_name))
2124       try:
2125         hv_class = hypervisor.GetHypervisorClass(hv_name)
2126         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2127         hv_class.CheckParameterSyntax(hv_params)
2128       except errors.GenericError, err:
2129         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2130
2131   def ExpandNames(self):
2132     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2133     self.share_locks = _ShareAll()
2134
2135   def CheckPrereq(self):
2136     """Check prerequisites.
2137
2138     """
2139     # Retrieve all information
2140     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2141     self.all_node_info = self.cfg.GetAllNodesInfo()
2142     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2143
2144   def Exec(self, feedback_fn):
2145     """Verify integrity of cluster, performing various test on nodes.
2146
2147     """
2148     self.bad = False
2149     self._feedback_fn = feedback_fn
2150
2151     feedback_fn("* Verifying cluster config")
2152
2153     for msg in self.cfg.VerifyConfig():
2154       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2155
2156     feedback_fn("* Verifying cluster certificate files")
2157
2158     for cert_filename in pathutils.ALL_CERT_FILES:
2159       (errcode, msg) = _VerifyCertificate(cert_filename)
2160       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2161
2162     feedback_fn("* Verifying hypervisor parameters")
2163
2164     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2165                                                 self.all_inst_info.values()))
2166
2167     feedback_fn("* Verifying all nodes belong to an existing group")
2168
2169     # We do this verification here because, should this bogus circumstance
2170     # occur, it would never be caught by VerifyGroup, which only acts on
2171     # nodes/instances reachable from existing node groups.
2172
2173     dangling_nodes = set(node.name for node in self.all_node_info.values()
2174                          if node.group not in self.all_group_info)
2175
2176     dangling_instances = {}
2177     no_node_instances = []
2178
2179     for inst in self.all_inst_info.values():
2180       if inst.primary_node in dangling_nodes:
2181         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2182       elif inst.primary_node not in self.all_node_info:
2183         no_node_instances.append(inst.name)
2184
2185     pretty_dangling = [
2186         "%s (%s)" %
2187         (node.name,
2188          utils.CommaJoin(dangling_instances.get(node.name,
2189                                                 ["no instances"])))
2190         for node in dangling_nodes]
2191
2192     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2193                   None,
2194                   "the following nodes (and their instances) belong to a non"
2195                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2196
2197     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2198                   None,
2199                   "the following instances have a non-existing primary-node:"
2200                   " %s", utils.CommaJoin(no_node_instances))
2201
2202     return not self.bad
2203
2204
2205 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2206   """Verifies the status of a node group.
2207
2208   """
2209   HPATH = "cluster-verify"
2210   HTYPE = constants.HTYPE_CLUSTER
2211   REQ_BGL = False
2212
2213   _HOOKS_INDENT_RE = re.compile("^", re.M)
2214
2215   class NodeImage(object):
2216     """A class representing the logical and physical status of a node.
2217
2218     @type name: string
2219     @ivar name: the node name to which this object refers
2220     @ivar volumes: a structure as returned from
2221         L{ganeti.backend.GetVolumeList} (runtime)
2222     @ivar instances: a list of running instances (runtime)
2223     @ivar pinst: list of configured primary instances (config)
2224     @ivar sinst: list of configured secondary instances (config)
2225     @ivar sbp: dictionary of {primary-node: list of instances} for all
2226         instances for which this node is secondary (config)
2227     @ivar mfree: free memory, as reported by hypervisor (runtime)
2228     @ivar dfree: free disk, as reported by the node (runtime)
2229     @ivar offline: the offline status (config)
2230     @type rpc_fail: boolean
2231     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2232         not whether the individual keys were correct) (runtime)
2233     @type lvm_fail: boolean
2234     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2235     @type hyp_fail: boolean
2236     @ivar hyp_fail: whether the RPC call didn't return the instance list
2237     @type ghost: boolean
2238     @ivar ghost: whether this is a known node or not (config)
2239     @type os_fail: boolean
2240     @ivar os_fail: whether the RPC call didn't return valid OS data
2241     @type oslist: list
2242     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2243     @type vm_capable: boolean
2244     @ivar vm_capable: whether the node can host instances
2245     @type pv_min: float
2246     @ivar pv_min: size in MiB of the smallest PVs
2247     @type pv_max: float
2248     @ivar pv_max: size in MiB of the biggest PVs
2249
2250     """
2251     def __init__(self, offline=False, name=None, vm_capable=True):
2252       self.name = name
2253       self.volumes = {}
2254       self.instances = []
2255       self.pinst = []
2256       self.sinst = []
2257       self.sbp = {}
2258       self.mfree = 0
2259       self.dfree = 0
2260       self.offline = offline
2261       self.vm_capable = vm_capable
2262       self.rpc_fail = False
2263       self.lvm_fail = False
2264       self.hyp_fail = False
2265       self.ghost = False
2266       self.os_fail = False
2267       self.oslist = {}
2268       self.pv_min = None
2269       self.pv_max = None
2270
2271   def ExpandNames(self):
2272     # This raises errors.OpPrereqError on its own:
2273     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2274
2275     # Get instances in node group; this is unsafe and needs verification later
2276     inst_names = \
2277       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2278
2279     self.needed_locks = {
2280       locking.LEVEL_INSTANCE: inst_names,
2281       locking.LEVEL_NODEGROUP: [self.group_uuid],
2282       locking.LEVEL_NODE: [],
2283
2284       # This opcode is run by watcher every five minutes and acquires all nodes
2285       # for a group. It doesn't run for a long time, so it's better to acquire
2286       # the node allocation lock as well.
2287       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2288       }
2289
2290     self.share_locks = _ShareAll()
2291
2292   def DeclareLocks(self, level):
2293     if level == locking.LEVEL_NODE:
2294       # Get members of node group; this is unsafe and needs verification later
2295       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2296
2297       all_inst_info = self.cfg.GetAllInstancesInfo()
2298
2299       # In Exec(), we warn about mirrored instances that have primary and
2300       # secondary living in separate node groups. To fully verify that
2301       # volumes for these instances are healthy, we will need to do an
2302       # extra call to their secondaries. We ensure here those nodes will
2303       # be locked.
2304       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2305         # Important: access only the instances whose lock is owned
2306         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2307           nodes.update(all_inst_info[inst].secondary_nodes)
2308
2309       self.needed_locks[locking.LEVEL_NODE] = nodes
2310
2311   def CheckPrereq(self):
2312     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2313     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2314
2315     group_nodes = set(self.group_info.members)
2316     group_instances = \
2317       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2318
2319     unlocked_nodes = \
2320         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2321
2322     unlocked_instances = \
2323         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2324
2325     if unlocked_nodes:
2326       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2327                                  utils.CommaJoin(unlocked_nodes),
2328                                  errors.ECODE_STATE)
2329
2330     if unlocked_instances:
2331       raise errors.OpPrereqError("Missing lock for instances: %s" %
2332                                  utils.CommaJoin(unlocked_instances),
2333                                  errors.ECODE_STATE)
2334
2335     self.all_node_info = self.cfg.GetAllNodesInfo()
2336     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2337
2338     self.my_node_names = utils.NiceSort(group_nodes)
2339     self.my_inst_names = utils.NiceSort(group_instances)
2340
2341     self.my_node_info = dict((name, self.all_node_info[name])
2342                              for name in self.my_node_names)
2343
2344     self.my_inst_info = dict((name, self.all_inst_info[name])
2345                              for name in self.my_inst_names)
2346
2347     # We detect here the nodes that will need the extra RPC calls for verifying
2348     # split LV volumes; they should be locked.
2349     extra_lv_nodes = set()
2350
2351     for inst in self.my_inst_info.values():
2352       if inst.disk_template in constants.DTS_INT_MIRROR:
2353         for nname in inst.all_nodes:
2354           if self.all_node_info[nname].group != self.group_uuid:
2355             extra_lv_nodes.add(nname)
2356
2357     unlocked_lv_nodes = \
2358         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2359
2360     if unlocked_lv_nodes:
2361       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2362                                  utils.CommaJoin(unlocked_lv_nodes),
2363                                  errors.ECODE_STATE)
2364     self.extra_lv_nodes = list(extra_lv_nodes)
2365
2366   def _VerifyNode(self, ninfo, nresult):
2367     """Perform some basic validation on data returned from a node.
2368
2369       - check the result data structure is well formed and has all the
2370         mandatory fields
2371       - check ganeti version
2372
2373     @type ninfo: L{objects.Node}
2374     @param ninfo: the node to check
2375     @param nresult: the results from the node
2376     @rtype: boolean
2377     @return: whether overall this call was successful (and we can expect
2378          reasonable values in the respose)
2379
2380     """
2381     node = ninfo.name
2382     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2383
2384     # main result, nresult should be a non-empty dict
2385     test = not nresult or not isinstance(nresult, dict)
2386     _ErrorIf(test, constants.CV_ENODERPC, node,
2387                   "unable to verify node: no data returned")
2388     if test:
2389       return False
2390
2391     # compares ganeti version
2392     local_version = constants.PROTOCOL_VERSION
2393     remote_version = nresult.get("version", None)
2394     test = not (remote_version and
2395                 isinstance(remote_version, (list, tuple)) and
2396                 len(remote_version) == 2)
2397     _ErrorIf(test, constants.CV_ENODERPC, node,
2398              "connection to node returned invalid data")
2399     if test:
2400       return False
2401
2402     test = local_version != remote_version[0]
2403     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2404              "incompatible protocol versions: master %s,"
2405              " node %s", local_version, remote_version[0])
2406     if test:
2407       return False
2408
2409     # node seems compatible, we can actually try to look into its results
2410
2411     # full package version
2412     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2413                   constants.CV_ENODEVERSION, node,
2414                   "software version mismatch: master %s, node %s",
2415                   constants.RELEASE_VERSION, remote_version[1],
2416                   code=self.ETYPE_WARNING)
2417
2418     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2419     if ninfo.vm_capable and isinstance(hyp_result, dict):
2420       for hv_name, hv_result in hyp_result.iteritems():
2421         test = hv_result is not None
2422         _ErrorIf(test, constants.CV_ENODEHV, node,
2423                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2424
2425     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2426     if ninfo.vm_capable and isinstance(hvp_result, list):
2427       for item, hv_name, hv_result in hvp_result:
2428         _ErrorIf(True, constants.CV_ENODEHV, node,
2429                  "hypervisor %s parameter verify failure (source %s): %s",
2430                  hv_name, item, hv_result)
2431
2432     test = nresult.get(constants.NV_NODESETUP,
2433                        ["Missing NODESETUP results"])
2434     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2435              "; ".join(test))
2436
2437     return True
2438
2439   def _VerifyNodeTime(self, ninfo, nresult,
2440                       nvinfo_starttime, nvinfo_endtime):
2441     """Check the node time.
2442
2443     @type ninfo: L{objects.Node}
2444     @param ninfo: the node to check
2445     @param nresult: the remote results for the node
2446     @param nvinfo_starttime: the start time of the RPC call
2447     @param nvinfo_endtime: the end time of the RPC call
2448
2449     """
2450     node = ninfo.name
2451     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2452
2453     ntime = nresult.get(constants.NV_TIME, None)
2454     try:
2455       ntime_merged = utils.MergeTime(ntime)
2456     except (ValueError, TypeError):
2457       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2458       return
2459
2460     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2461       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2462     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2463       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2464     else:
2465       ntime_diff = None
2466
2467     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2468              "Node time diverges by at least %s from master node time",
2469              ntime_diff)
2470
2471   def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2472     """Check the node LVM results and update info for cross-node checks.
2473
2474     @type ninfo: L{objects.Node}
2475     @param ninfo: the node to check
2476     @param nresult: the remote results for the node
2477     @param vg_name: the configured VG name
2478     @type nimg: L{NodeImage}
2479     @param nimg: node image
2480
2481     """
2482     if vg_name is None:
2483       return
2484
2485     node = ninfo.name
2486     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2487
2488     # checks vg existence and size > 20G
2489     vglist = nresult.get(constants.NV_VGLIST, None)
2490     test = not vglist
2491     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2492     if not test:
2493       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2494                                             constants.MIN_VG_SIZE)
2495       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2496
2497     # Check PVs
2498     (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2499     for em in errmsgs:
2500       self._Error(constants.CV_ENODELVM, node, em)
2501     if pvminmax is not None:
2502       (nimg.pv_min, nimg.pv_max) = pvminmax
2503
2504   def _VerifyGroupLVM(self, node_image, vg_name):
2505     """Check cross-node consistency in LVM.
2506
2507     @type node_image: dict
2508     @param node_image: info about nodes, mapping from node to names to
2509       L{NodeImage} objects
2510     @param vg_name: the configured VG name
2511
2512     """
2513     if vg_name is None:
2514       return
2515
2516     # Only exlcusive storage needs this kind of checks
2517     if not self._exclusive_storage:
2518       return
2519
2520     # exclusive_storage wants all PVs to have the same size (approximately),
2521     # if the smallest and the biggest ones are okay, everything is fine.
2522     # pv_min is None iff pv_max is None
2523     vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2524     if not vals:
2525       return
2526     (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2527     (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2528     bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2529     self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2530                   "PV sizes differ too much in the group; smallest (%s MB) is"
2531                   " on %s, biggest (%s MB) is on %s",
2532                   pvmin, minnode, pvmax, maxnode)
2533
2534   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2535     """Check the node bridges.
2536
2537     @type ninfo: L{objects.Node}
2538     @param ninfo: the node to check
2539     @param nresult: the remote results for the node
2540     @param bridges: the expected list of bridges
2541
2542     """
2543     if not bridges:
2544       return
2545
2546     node = ninfo.name
2547     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2548
2549     missing = nresult.get(constants.NV_BRIDGES, None)
2550     test = not isinstance(missing, list)
2551     _ErrorIf(test, constants.CV_ENODENET, node,
2552              "did not return valid bridge information")
2553     if not test:
2554       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2555                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2556
2557   def _VerifyNodeUserScripts(self, ninfo, nresult):
2558     """Check the results of user scripts presence and executability on the node
2559
2560     @type ninfo: L{objects.Node}
2561     @param ninfo: the node to check
2562     @param nresult: the remote results for the node
2563
2564     """
2565     node = ninfo.name
2566
2567     test = not constants.NV_USERSCRIPTS in nresult
2568     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2569                   "did not return user scripts information")
2570
2571     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2572     if not test:
2573       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2574                     "user scripts not present or not executable: %s" %
2575                     utils.CommaJoin(sorted(broken_scripts)))
2576
2577   def _VerifyNodeNetwork(self, ninfo, nresult):
2578     """Check the node network connectivity results.
2579
2580     @type ninfo: L{objects.Node}
2581     @param ninfo: the node to check
2582     @param nresult: the remote results for the node
2583
2584     """
2585     node = ninfo.name
2586     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2587
2588     test = constants.NV_NODELIST not in nresult
2589     _ErrorIf(test, constants.CV_ENODESSH, node,
2590              "node hasn't returned node ssh connectivity data")
2591     if not test:
2592       if nresult[constants.NV_NODELIST]:
2593         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2594           _ErrorIf(True, constants.CV_ENODESSH, node,
2595                    "ssh communication with node '%s': %s", a_node, a_msg)
2596
2597     test = constants.NV_NODENETTEST not in nresult
2598     _ErrorIf(test, constants.CV_ENODENET, node,
2599              "node hasn't returned node tcp connectivity data")
2600     if not test:
2601       if nresult[constants.NV_NODENETTEST]:
2602         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2603         for anode in nlist:
2604           _ErrorIf(True, constants.CV_ENODENET, node,
2605                    "tcp communication with node '%s': %s",
2606                    anode, nresult[constants.NV_NODENETTEST][anode])
2607
2608     test = constants.NV_MASTERIP not in nresult
2609     _ErrorIf(test, constants.CV_ENODENET, node,
2610              "node hasn't returned node master IP reachability data")
2611     if not test:
2612       if not nresult[constants.NV_MASTERIP]:
2613         if node == self.master_node:
2614           msg = "the master node cannot reach the master IP (not configured?)"
2615         else:
2616           msg = "cannot reach the master IP"
2617         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2618
2619   def _VerifyInstance(self, instance, inst_config, node_image,
2620                       diskstatus):
2621     """Verify an instance.
2622
2623     This function checks to see if the required block devices are
2624     available on the instance's node, and that the nodes are in the correct
2625     state.
2626
2627     """
2628     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2629     pnode = inst_config.primary_node
2630     pnode_img = node_image[pnode]
2631     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2632
2633     node_vol_should = {}
2634     inst_config.MapLVsByNode(node_vol_should)
2635
2636     cluster = self.cfg.GetClusterInfo()
2637     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2638                                                             self.group_info)
2639     err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config)
2640     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2641              code=self.ETYPE_WARNING)
2642
2643     for node in node_vol_should:
2644       n_img = node_image[node]
2645       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2646         # ignore missing volumes on offline or broken nodes
2647         continue
2648       for volume in node_vol_should[node]:
2649         test = volume not in n_img.volumes
2650         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2651                  "volume %s missing on node %s", volume, node)
2652
2653     if inst_config.admin_state == constants.ADMINST_UP:
2654       test = instance not in pnode_img.instances and not pnode_img.offline
2655       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2656                "instance not running on its primary node %s",
2657                pnode)
2658       _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2659                "instance is marked as running and lives on offline node %s",
2660                pnode)
2661
2662     diskdata = [(nname, success, status, idx)
2663                 for (nname, disks) in diskstatus.items()
2664                 for idx, (success, status) in enumerate(disks)]
2665
2666     for nname, success, bdev_status, idx in diskdata:
2667       # the 'ghost node' construction in Exec() ensures that we have a
2668       # node here
2669       snode = node_image[nname]
2670       bad_snode = snode.ghost or snode.offline
2671       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2672                not success and not bad_snode,
2673                constants.CV_EINSTANCEFAULTYDISK, instance,
2674                "couldn't retrieve status for disk/%s on %s: %s",
2675                idx, nname, bdev_status)
2676       _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2677                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2678                constants.CV_EINSTANCEFAULTYDISK, instance,
2679                "disk/%s on %s is faulty", idx, nname)
2680
2681     _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2682              constants.CV_ENODERPC, pnode, "instance %s, connection to"
2683              " primary node failed", instance)
2684
2685     _ErrorIf(len(inst_config.secondary_nodes) > 1,
2686              constants.CV_EINSTANCELAYOUT,
2687              instance, "instance has multiple secondary nodes: %s",
2688              utils.CommaJoin(inst_config.secondary_nodes),
2689              code=self.ETYPE_WARNING)
2690
2691     if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2692       # Disk template not compatible with exclusive_storage: no instance
2693       # node should have the flag set
2694       es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2695                                                      inst_config.all_nodes)
2696       es_nodes = [n for (n, es) in es_flags.items()
2697                   if es]
2698       _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2699                "instance has template %s, which is not supported on nodes"
2700                " that have exclusive storage set: %s",
2701                inst_config.disk_template, utils.CommaJoin(es_nodes))
2702
2703     if inst_config.disk_template in constants.DTS_INT_MIRROR:
2704       instance_nodes = utils.NiceSort(inst_config.all_nodes)
2705       instance_groups = {}
2706
2707       for node in instance_nodes:
2708         instance_groups.setdefault(self.all_node_info[node].group,
2709                                    []).append(node)
2710
2711       pretty_list = [
2712         "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2713         # Sort so that we always list the primary node first.
2714         for group, nodes in sorted(instance_groups.items(),
2715                                    key=lambda (_, nodes): pnode in nodes,
2716                                    reverse=True)]
2717
2718       self._ErrorIf(len(instance_groups) > 1,
2719                     constants.CV_EINSTANCESPLITGROUPS,
2720                     instance, "instance has primary and secondary nodes in"
2721                     " different groups: %s", utils.CommaJoin(pretty_list),
2722                     code=self.ETYPE_WARNING)
2723
2724     inst_nodes_offline = []
2725     for snode in inst_config.secondary_nodes:
2726       s_img = node_image[snode]
2727       _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2728                snode, "instance %s, connection to secondary node failed",
2729                instance)
2730
2731       if s_img.offline:
2732         inst_nodes_offline.append(snode)
2733
2734     # warn that the instance lives on offline nodes
2735     _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2736              "instance has offline secondary node(s) %s",
2737              utils.CommaJoin(inst_nodes_offline))
2738     # ... or ghost/non-vm_capable nodes
2739     for node in inst_config.all_nodes:
2740       _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2741                instance, "instance lives on ghost node %s", node)
2742       _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2743                instance, "instance lives on non-vm_capable node %s", node)
2744
2745   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2746     """Verify if there are any unknown volumes in the cluster.
2747
2748     The .os, .swap and backup volumes are ignored. All other volumes are
2749     reported as unknown.
2750
2751     @type reserved: L{ganeti.utils.FieldSet}
2752     @param reserved: a FieldSet of reserved volume names
2753
2754     """
2755     for node, n_img in node_image.items():
2756       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2757           self.all_node_info[node].group != self.group_uuid):
2758         # skip non-healthy nodes
2759         continue
2760       for volume in n_img.volumes:
2761         test = ((node not in node_vol_should or
2762                 volume not in node_vol_should[node]) and
2763                 not reserved.Matches(volume))
2764         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2765                       "volume %s is unknown", volume)
2766
2767   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2768     """Verify N+1 Memory Resilience.
2769
2770     Check that if one single node dies we can still start all the
2771     instances it was primary for.
2772
2773     """
2774     cluster_info = self.cfg.GetClusterInfo()
2775     for node, n_img in node_image.items():
2776       # This code checks that every node which is now listed as
2777       # secondary has enough memory to host all instances it is
2778       # supposed to should a single other node in the cluster fail.
2779       # FIXME: not ready for failover to an arbitrary node
2780       # FIXME: does not support file-backed instances
2781       # WARNING: we currently take into account down instances as well
2782       # as up ones, considering that even if they're down someone
2783       # might want to start them even in the event of a node failure.
2784       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2785         # we're skipping nodes marked offline and nodes in other groups from
2786         # the N+1 warning, since most likely we don't have good memory
2787         # infromation from them; we already list instances living on such
2788         # nodes, and that's enough warning
2789         continue
2790       #TODO(dynmem): also consider ballooning out other instances
2791       for prinode, instances in n_img.sbp.items():
2792         needed_mem = 0
2793         for instance in instances:
2794           bep = cluster_info.FillBE(instance_cfg[instance])
2795           if bep[constants.BE_AUTO_BALANCE]:
2796             needed_mem += bep[constants.BE_MINMEM]
2797         test = n_img.mfree < needed_mem
2798         self._ErrorIf(test, constants.CV_ENODEN1, node,
2799                       "not enough memory to accomodate instance failovers"
2800                       " should node %s fail (%dMiB needed, %dMiB available)",
2801                       prinode, needed_mem, n_img.mfree)
2802
2803   @classmethod
2804   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2805                    (files_all, files_opt, files_mc, files_vm)):
2806     """Verifies file checksums collected from all nodes.
2807
2808     @param errorif: Callback for reporting errors
2809     @param nodeinfo: List of L{objects.Node} objects
2810     @param master_node: Name of master node
2811     @param all_nvinfo: RPC results
2812
2813     """
2814     # Define functions determining which nodes to consider for a file
2815     files2nodefn = [
2816       (files_all, None),
2817       (files_mc, lambda node: (node.master_candidate or
2818                                node.name == master_node)),
2819       (files_vm, lambda node: node.vm_capable),
2820       ]
2821
2822     # Build mapping from filename to list of nodes which should have the file
2823     nodefiles = {}
2824     for (files, fn) in files2nodefn:
2825       if fn is None:
2826         filenodes = nodeinfo
2827       else:
2828         filenodes = filter(fn, nodeinfo)
2829       nodefiles.update((filename,
2830                         frozenset(map(operator.attrgetter("name"), filenodes)))
2831                        for filename in files)
2832
2833     assert set(nodefiles) == (files_all | files_mc | files_vm)
2834
2835     fileinfo = dict((filename, {}) for filename in nodefiles)
2836     ignore_nodes = set()
2837
2838     for node in nodeinfo:
2839       if node.offline:
2840         ignore_nodes.add(node.name)
2841         continue
2842
2843       nresult = all_nvinfo[node.name]
2844
2845       if nresult.fail_msg or not nresult.payload:
2846         node_files = None
2847       else:
2848         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2849         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2850                           for (key, value) in fingerprints.items())
2851         del fingerprints
2852
2853       test = not (node_files and isinstance(node_files, dict))
2854       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2855               "Node did not return file checksum data")
2856       if test:
2857         ignore_nodes.add(node.name)
2858         continue
2859
2860       # Build per-checksum mapping from filename to nodes having it
2861       for (filename, checksum) in node_files.items():
2862         assert filename in nodefiles
2863         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2864
2865     for (filename, checksums) in fileinfo.items():
2866       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2867
2868       # Nodes having the file
2869       with_file = frozenset(node_name
2870                             for nodes in fileinfo[filename].values()
2871                             for node_name in nodes) - ignore_nodes
2872
2873       expected_nodes = nodefiles[filename] - ignore_nodes
2874
2875       # Nodes missing file
2876       missing_file = expected_nodes - with_file
2877
2878       if filename in files_opt:
2879         # All or no nodes
2880         errorif(missing_file and missing_file != expected_nodes,
2881                 constants.CV_ECLUSTERFILECHECK, None,
2882                 "File %s is optional, but it must exist on all or no"
2883                 " nodes (not found on %s)",
2884                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2885       else:
2886         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2887                 "File %s is missing from node(s) %s", filename,
2888                 utils.CommaJoin(utils.NiceSort(missing_file)))
2889
2890         # Warn if a node has a file it shouldn't
2891         unexpected = with_file - expected_nodes
2892         errorif(unexpected,
2893                 constants.CV_ECLUSTERFILECHECK, None,
2894                 "File %s should not exist on node(s) %s",
2895                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2896
2897       # See if there are multiple versions of the file
2898       test = len(checksums) > 1
2899       if test:
2900         variants = ["variant %s on %s" %
2901                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2902                     for (idx, (checksum, nodes)) in
2903                       enumerate(sorted(checksums.items()))]
2904       else:
2905         variants = []
2906
2907       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2908               "File %s found with %s different checksums (%s)",
2909               filename, len(checksums), "; ".join(variants))
2910
2911   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2912                       drbd_map):
2913     """Verifies and the node DRBD status.
2914
2915     @type ninfo: L{objects.Node}
2916     @param ninfo: the node to check
2917     @param nresult: the remote results for the node
2918     @param instanceinfo: the dict of instances
2919     @param drbd_helper: the configured DRBD usermode helper
2920     @param drbd_map: the DRBD map as returned by
2921         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2922
2923     """
2924     node = ninfo.name
2925     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2926
2927     if drbd_helper:
2928       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2929       test = (helper_result is None)
2930       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2931                "no drbd usermode helper returned")
2932       if helper_result:
2933         status, payload = helper_result
2934         test = not status
2935         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2936                  "drbd usermode helper check unsuccessful: %s", payload)
2937         test = status and (payload != drbd_helper)
2938         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2939                  "wrong drbd usermode helper: %s", payload)
2940
2941     # compute the DRBD minors
2942     node_drbd = {}
2943     for minor, instance in drbd_map[node].items():
2944       test = instance not in instanceinfo
2945       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2946                "ghost instance '%s' in temporary DRBD map", instance)
2947         # ghost instance should not be running, but otherwise we
2948         # don't give double warnings (both ghost instance and
2949         # unallocated minor in use)
2950       if test:
2951         node_drbd[minor] = (instance, False)
2952       else:
2953         instance = instanceinfo[instance]
2954         node_drbd[minor] = (instance.name,
2955                             instance.admin_state == constants.ADMINST_UP)
2956
2957     # and now check them
2958     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2959     test = not isinstance(used_minors, (tuple, list))
2960     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2961              "cannot parse drbd status file: %s", str(used_minors))
2962     if test:
2963       # we cannot check drbd status
2964       return
2965
2966     for minor, (iname, must_exist) in node_drbd.items():
2967       test = minor not in used_minors and must_exist
2968       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2969                "drbd minor %d of instance %s is not active", minor, iname)
2970     for minor in used_minors:
2971       test = minor not in node_drbd
2972       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2973                "unallocated drbd minor %d is in use", minor)
2974
2975   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2976     """Builds the node OS structures.
2977
2978     @type ninfo: L{objects.Node}
2979     @param ninfo: the node to check
2980     @param nresult: the remote results for the node
2981     @param nimg: the node image object
2982
2983     """
2984     node = ninfo.name
2985     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2986
2987     remote_os = nresult.get(constants.NV_OSLIST, None)
2988     test = (not isinstance(remote_os, list) or
2989             not compat.all(isinstance(v, list) and len(v) == 7
2990                            for v in remote_os))
2991
2992     _ErrorIf(test, constants.CV_ENODEOS, node,
2993              "node hasn't returned valid OS data")
2994
2995     nimg.os_fail = test
2996
2997     if test:
2998       return
2999
3000     os_dict = {}
3001
3002     for (name, os_path, status, diagnose,
3003          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
3004
3005       if name not in os_dict:
3006         os_dict[name] = []
3007
3008       # parameters is a list of lists instead of list of tuples due to
3009       # JSON lacking a real tuple type, fix it:
3010       parameters = [tuple(v) for v in parameters]
3011       os_dict[name].append((os_path, status, diagnose,
3012                             set(variants), set(parameters), set(api_ver)))
3013
3014     nimg.oslist = os_dict
3015
3016   def _VerifyNodeOS(self, ninfo, nimg, base):
3017     """Verifies the node OS list.
3018
3019     @type ninfo: L{objects.Node}
3020     @param ninfo: the node to check
3021     @param nimg: the node image object
3022     @param base: the 'template' node we match against (e.g. from the master)
3023
3024     """
3025     node = ninfo.name
3026     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3027
3028     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3029
3030     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3031     for os_name, os_data in nimg.oslist.items():
3032       assert os_data, "Empty OS status for OS %s?!" % os_name
3033       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3034       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3035                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3036       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3037                "OS '%s' has multiple entries (first one shadows the rest): %s",
3038                os_name, utils.CommaJoin([v[0] for v in os_data]))
3039       # comparisons with the 'base' image
3040       test = os_name not in base.oslist
3041       _ErrorIf(test, constants.CV_ENODEOS, node,
3042                "Extra OS %s not present on reference node (%s)",
3043                os_name, base.name)
3044       if test:
3045         continue
3046       assert base.oslist[os_name], "Base node has empty OS status?"
3047       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3048       if not b_status:
3049         # base OS is invalid, skipping
3050         continue
3051       for kind, a, b in [("API version", f_api, b_api),
3052                          ("variants list", f_var, b_var),
3053                          ("parameters", beautify_params(f_param),
3054                           beautify_params(b_param))]:
3055         _ErrorIf(a != b, constants.CV_ENODEOS, node,
3056                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3057                  kind, os_name, base.name,
3058                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3059
3060     # check any missing OSes
3061     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3062     _ErrorIf(missing, constants.CV_ENODEOS, node,
3063              "OSes present on reference node %s but missing on this node: %s",
3064              base.name, utils.CommaJoin(missing))
3065
3066   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3067     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3068
3069     @type ninfo: L{objects.Node}
3070     @param ninfo: the node to check
3071     @param nresult: the remote results for the node
3072     @type is_master: bool
3073     @param is_master: Whether node is the master node
3074
3075     """
3076     node = ninfo.name
3077
3078     if (is_master and
3079         (constants.ENABLE_FILE_STORAGE or
3080          constants.ENABLE_SHARED_FILE_STORAGE)):
3081       try:
3082         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3083       except KeyError:
3084         # This should never happen
3085         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3086                       "Node did not return forbidden file storage paths")
3087       else:
3088         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3089                       "Found forbidden file storage paths: %s",
3090                       utils.CommaJoin(fspaths))
3091     else:
3092       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3093                     constants.CV_ENODEFILESTORAGEPATHS, node,
3094                     "Node should not have returned forbidden file storage"
3095                     " paths")
3096
3097   def _VerifyOob(self, ninfo, nresult):
3098     """Verifies out of band functionality of a node.
3099
3100     @type ninfo: L{objects.Node}
3101     @param ninfo: the node to check
3102     @param nresult: the remote results for the node
3103
3104     """
3105     node = ninfo.name
3106     # We just have to verify the paths on master and/or master candidates
3107     # as the oob helper is invoked on the master
3108     if ((ninfo.master_candidate or ninfo.master_capable) and
3109         constants.NV_OOB_PATHS in nresult):
3110       for path_result in nresult[constants.NV_OOB_PATHS]:
3111         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3112
3113   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3114     """Verifies and updates the node volume data.
3115
3116     This function will update a L{NodeImage}'s internal structures
3117     with data from the remote call.
3118
3119     @type ninfo: L{objects.Node}
3120     @param ninfo: the node to check
3121     @param nresult: the remote results for the node
3122     @param nimg: the node image object
3123     @param vg_name: the configured VG name
3124
3125     """
3126     node = ninfo.name
3127     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3128
3129     nimg.lvm_fail = True
3130     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3131     if vg_name is None:
3132       pass
3133     elif isinstance(lvdata, basestring):
3134       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3135                utils.SafeEncode(lvdata))
3136     elif not isinstance(lvdata, dict):
3137       _ErrorIf(True, constants.CV_ENODELVM, node,
3138                "rpc call to node failed (lvlist)")
3139     else:
3140       nimg.volumes = lvdata
3141       nimg.lvm_fail = False
3142
3143   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3144     """Verifies and updates the node instance list.
3145
3146     If the listing was successful, then updates this node's instance
3147     list. Otherwise, it marks the RPC call as failed for the instance
3148     list key.
3149
3150     @type ninfo: L{objects.Node}
3151     @param ninfo: the node to check
3152     @param nresult: the remote results for the node
3153     @param nimg: the node image object
3154
3155     """
3156     idata = nresult.get(constants.NV_INSTANCELIST, None)
3157     test = not isinstance(idata, list)
3158     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3159                   "rpc call to node failed (instancelist): %s",
3160                   utils.SafeEncode(str(idata)))
3161     if test:
3162       nimg.hyp_fail = True
3163     else:
3164       nimg.instances = idata
3165
3166   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3167     """Verifies and computes a node information map
3168
3169     @type ninfo: L{objects.Node}
3170     @param ninfo: the node to check
3171     @param nresult: the remote results for the node
3172     @param nimg: the node image object
3173     @param vg_name: the configured VG name
3174
3175     """
3176     node = ninfo.name
3177     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3178
3179     # try to read free memory (from the hypervisor)
3180     hv_info = nresult.get(constants.NV_HVINFO, None)
3181     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3182     _ErrorIf(test, constants.CV_ENODEHV, node,
3183              "rpc call to node failed (hvinfo)")
3184     if not test:
3185       try:
3186         nimg.mfree = int(hv_info["memory_free"])
3187       except (ValueError, TypeError):
3188         _ErrorIf(True, constants.CV_ENODERPC, node,
3189                  "node returned invalid nodeinfo, check hypervisor")
3190
3191     # FIXME: devise a free space model for file based instances as well
3192     if vg_name is not None:
3193       test = (constants.NV_VGLIST not in nresult or
3194               vg_name not in nresult[constants.NV_VGLIST])
3195       _ErrorIf(test, constants.CV_ENODELVM, node,
3196                "node didn't return data for the volume group '%s'"
3197                " - it is either missing or broken", vg_name)
3198       if not test:
3199         try:
3200           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3201         except (ValueError, TypeError):
3202           _ErrorIf(True, constants.CV_ENODERPC, node,
3203                    "node returned invalid LVM info, check LVM status")
3204
3205   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3206     """Gets per-disk status information for all instances.
3207
3208     @type nodelist: list of strings
3209     @param nodelist: Node names
3210     @type node_image: dict of (name, L{objects.Node})
3211     @param node_image: Node objects
3212     @type instanceinfo: dict of (name, L{objects.Instance})
3213     @param instanceinfo: Instance objects
3214     @rtype: {instance: {node: [(succes, payload)]}}
3215     @return: a dictionary of per-instance dictionaries with nodes as
3216         keys and disk information as values; the disk information is a
3217         list of tuples (success, payload)
3218
3219     """
3220     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3221
3222     node_disks = {}
3223     node_disks_devonly = {}
3224     diskless_instances = set()
3225     diskless = constants.DT_DISKLESS
3226
3227     for nname in nodelist:
3228       node_instances = list(itertools.chain(node_image[nname].pinst,
3229                                             node_image[nname].sinst))
3230       diskless_instances.update(inst for inst in node_instances
3231                                 if instanceinfo[inst].disk_template == diskless)
3232       disks = [(inst, disk)
3233                for inst in node_instances
3234                for disk in instanceinfo[inst].disks]
3235
3236       if not disks:
3237         # No need to collect data
3238         continue
3239
3240       node_disks[nname] = disks
3241
3242       # _AnnotateDiskParams makes already copies of the disks
3243       devonly = []
3244       for (inst, dev) in disks:
3245         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3246         self.cfg.SetDiskID(anno_disk, nname)
3247         devonly.append(anno_disk)
3248
3249       node_disks_devonly[nname] = devonly
3250
3251     assert len(node_disks) == len(node_disks_devonly)
3252
3253     # Collect data from all nodes with disks
3254     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3255                                                           node_disks_devonly)
3256
3257     assert len(result) == len(node_disks)
3258
3259     instdisk = {}
3260
3261     for (nname, nres) in result.items():
3262       disks = node_disks[nname]
3263
3264       if nres.offline:
3265         # No data from this node
3266         data = len(disks) * [(False, "node offline")]
3267       else:
3268         msg = nres.fail_msg
3269         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3270                  "while getting disk information: %s", msg)
3271         if msg:
3272           # No data from this node
3273           data = len(disks) * [(False, msg)]
3274         else:
3275           data = []
3276           for idx, i in enumerate(nres.payload):
3277             if isinstance(i, (tuple, list)) and len(i) == 2:
3278               data.append(i)
3279             else:
3280               logging.warning("Invalid result from node %s, entry %d: %s",
3281                               nname, idx, i)
3282               data.append((False, "Invalid result from the remote node"))
3283
3284       for ((inst, _), status) in zip(disks, data):
3285         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3286
3287     # Add empty entries for diskless instances.
3288     for inst in diskless_instances:
3289       assert inst not in instdisk
3290       instdisk[inst] = {}
3291
3292     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3293                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3294                       compat.all(isinstance(s, (tuple, list)) and
3295                                  len(s) == 2 for s in statuses)
3296                       for inst, nnames in instdisk.items()
3297                       for nname, statuses in nnames.items())
3298     if __debug__:
3299       instdisk_keys = set(instdisk)
3300       instanceinfo_keys = set(instanceinfo)
3301       assert instdisk_keys == instanceinfo_keys, \
3302         ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3303          (instdisk_keys, instanceinfo_keys))
3304
3305     return instdisk
3306
3307   @staticmethod
3308   def _SshNodeSelector(group_uuid, all_nodes):
3309     """Create endless iterators for all potential SSH check hosts.
3310
3311     """
3312     nodes = [node for node in all_nodes
3313              if (node.group != group_uuid and
3314                  not node.offline)]
3315     keyfunc = operator.attrgetter("group")
3316
3317     return map(itertools.cycle,
3318                [sorted(map(operator.attrgetter("name"), names))
3319                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3320                                                   keyfunc)])
3321
3322   @classmethod
3323   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3324     """Choose which nodes should talk to which other nodes.
3325
3326     We will make nodes contact all nodes in their group, and one node from
3327     every other group.
3328
3329     @warning: This algorithm has a known issue if one node group is much
3330       smaller than others (e.g. just one node). In such a case all other
3331       nodes will talk to the single node.
3332
3333     """
3334     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3335     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3336
3337     return (online_nodes,
3338             dict((name, sorted([i.next() for i in sel]))
3339                  for name in online_nodes))
3340
3341   def BuildHooksEnv(self):
3342     """Build hooks env.
3343
3344     Cluster-Verify hooks just ran in the post phase and their failure makes
3345     the output be logged in the verify output and the verification to fail.
3346
3347     """
3348     env = {
3349       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3350       }
3351
3352     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3353                for node in self.my_node_info.values())
3354
3355     return env
3356
3357   def BuildHooksNodes(self):
3358     """Build hooks nodes.
3359
3360     """
3361     return ([], self.my_node_names)
3362
3363   def Exec(self, feedback_fn):
3364     """Verify integrity of the node group, performing various test on nodes.
3365
3366     """
3367     # This method has too many local variables. pylint: disable=R0914
3368     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3369
3370     if not self.my_node_names:
3371       # empty node group
3372       feedback_fn("* Empty node group, skipping verification")
3373       return True
3374
3375     self.bad = False
3376     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3377     verbose = self.op.verbose
3378     self._feedback_fn = feedback_fn
3379
3380     vg_name = self.cfg.GetVGName()
3381     drbd_helper = self.cfg.GetDRBDHelper()
3382     cluster = self.cfg.GetClusterInfo()
3383     hypervisors = cluster.enabled_hypervisors
3384     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3385
3386     i_non_redundant = [] # Non redundant instances
3387     i_non_a_balanced = [] # Non auto-balanced instances
3388     i_offline = 0 # Count of offline instances
3389     n_offline = 0 # Count of offline nodes
3390     n_drained = 0 # Count of nodes being drained
3391     node_vol_should = {}
3392
3393     # FIXME: verify OS list
3394
3395     # File verification
3396     filemap = _ComputeAncillaryFiles(cluster, False)
3397
3398     # do local checksums
3399     master_node = self.master_node = self.cfg.GetMasterNode()
3400     master_ip = self.cfg.GetMasterIP()
3401
3402     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3403
3404     user_scripts = []
3405     if self.cfg.GetUseExternalMipScript():
3406       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3407
3408     node_verify_param = {
3409       constants.NV_FILELIST:
3410         map(vcluster.MakeVirtualPath,
3411             utils.UniqueSequence(filename
3412                                  for files in filemap
3413                                  for filename in files)),
3414       constants.NV_NODELIST:
3415         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3416                                   self.all_node_info.values()),
3417       constants.NV_HYPERVISOR: hypervisors,
3418       constants.NV_HVPARAMS:
3419         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3420       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3421                                  for node in node_data_list
3422                                  if not node.offline],
3423       constants.NV_INSTANCELIST: hypervisors,
3424       constants.NV_VERSION: None,
3425       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3426       constants.NV_NODESETUP: None,
3427       constants.NV_TIME: None,
3428       constants.NV_MASTERIP: (master_node, master_ip),
3429       constants.NV_OSLIST: None,
3430       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3431       constants.NV_USERSCRIPTS: user_scripts,
3432       }
3433
3434     if vg_name is not None:
3435       node_verify_param[constants.NV_VGLIST] = None
3436       node_verify_param[constants.NV_LVLIST] = vg_name
3437       node_verify_param[constants.NV_PVLIST] = [vg_name]
3438
3439     if drbd_helper:
3440       node_verify_param[constants.NV_DRBDLIST] = None
3441       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3442
3443     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3444       # Load file storage paths only from master node
3445       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3446
3447     # bridge checks
3448     # FIXME: this needs to be changed per node-group, not cluster-wide
3449     bridges = set()
3450     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3451     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3452       bridges.add(default_nicpp[constants.NIC_LINK])
3453     for instance in self.my_inst_info.values():
3454       for nic in instance.nics:
3455         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3456         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3457           bridges.add(full_nic[constants.NIC_LINK])
3458
3459     if bridges:
3460       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3461
3462     # Build our expected cluster state
3463     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3464                                                  name=node.name,
3465                                                  vm_capable=node.vm_capable))
3466                       for node in node_data_list)
3467
3468     # Gather OOB paths
3469     oob_paths = []
3470     for node in self.all_node_info.values():
3471       path = _SupportsOob(self.cfg, node)
3472       if path and path not in oob_paths:
3473         oob_paths.append(path)
3474
3475     if oob_paths:
3476       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3477
3478     for instance in self.my_inst_names:
3479       inst_config = self.my_inst_info[instance]
3480       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3481         i_offline += 1
3482
3483       for nname in inst_config.all_nodes:
3484         if nname not in node_image:
3485           gnode = self.NodeImage(name=nname)
3486           gnode.ghost = (nname not in self.all_node_info)
3487           node_image[nname] = gnode
3488
3489       inst_config.MapLVsByNode(node_vol_should)
3490
3491       pnode = inst_config.primary_node
3492       node_image[pnode].pinst.append(instance)
3493
3494       for snode in inst_config.secondary_nodes:
3495         nimg = node_image[snode]
3496         nimg.sinst.append(instance)
3497         if pnode not in nimg.sbp:
3498           nimg.sbp[pnode] = []
3499         nimg.sbp[pnode].append(instance)
3500
3501     es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3502     es_unset_nodes = []
3503     # The value of exclusive_storage should be the same across the group, so if
3504     # it's True for at least a node, we act as if it were set for all the nodes
3505     self._exclusive_storage = compat.any(es_flags.values())
3506     if self._exclusive_storage:
3507       node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3508       es_unset_nodes = [n for (n, es) in es_flags.items()
3509                         if not es]
3510
3511     if es_unset_nodes:
3512       self._Error(constants.CV_EGROUPMIXEDESFLAG, self.group_info.name,
3513                   "The exclusive_storage flag should be uniform in a group,"
3514                   " but these nodes have it unset: %s",
3515                   utils.CommaJoin(utils.NiceSort(es_unset_nodes)))
3516       self.LogWarning("Some checks required by exclusive storage will be"
3517                       " performed also on nodes with the flag unset")
3518
3519     # At this point, we have the in-memory data structures complete,
3520     # except for the runtime information, which we'll gather next
3521
3522     # Due to the way our RPC system works, exact response times cannot be
3523     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3524     # time before and after executing the request, we can at least have a time
3525     # window.
3526     nvinfo_starttime = time.time()
3527     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3528                                            node_verify_param,
3529                                            self.cfg.GetClusterName())
3530     nvinfo_endtime = time.time()
3531
3532     if self.extra_lv_nodes and vg_name is not None:
3533       extra_lv_nvinfo = \
3534           self.rpc.call_node_verify(self.extra_lv_nodes,
3535                                     {constants.NV_LVLIST: vg_name},
3536                                     self.cfg.GetClusterName())
3537     else:
3538       extra_lv_nvinfo = {}
3539
3540     all_drbd_map = self.cfg.ComputeDRBDMap()
3541
3542     feedback_fn("* Gathering disk information (%s nodes)" %
3543                 len(self.my_node_names))
3544     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3545                                      self.my_inst_info)
3546
3547     feedback_fn("* Verifying configuration file consistency")
3548
3549     # If not all nodes are being checked, we need to make sure the master node
3550     # and a non-checked vm_capable node are in the list.
3551     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3552     if absent_nodes:
3553       vf_nvinfo = all_nvinfo.copy()
3554       vf_node_info = list(self.my_node_info.values())
3555       additional_nodes = []
3556       if master_node not in self.my_node_info:
3557         additional_nodes.append(master_node)
3558         vf_node_info.append(self.all_node_info[master_node])
3559       # Add the first vm_capable node we find which is not included,
3560       # excluding the master node (which we already have)
3561       for node in absent_nodes:
3562         nodeinfo = self.all_node_info[node]
3563         if (nodeinfo.vm_capable and not nodeinfo.offline and
3564             node != master_node):
3565           additional_nodes.append(node)
3566           vf_node_info.append(self.all_node_info[node])
3567           break
3568       key = constants.NV_FILELIST
3569       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3570                                                  {key: node_verify_param[key]},
3571                                                  self.cfg.GetClusterName()))
3572     else:
3573       vf_nvinfo = all_nvinfo
3574       vf_node_info = self.my_node_info.values()
3575
3576     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3577
3578     feedback_fn("* Verifying node status")
3579
3580     refos_img = None
3581
3582     for node_i in node_data_list:
3583       node = node_i.name
3584       nimg = node_image[node]
3585
3586       if node_i.offline:
3587         if verbose:
3588           feedback_fn("* Skipping offline node %s" % (node,))
3589         n_offline += 1
3590         continue
3591
3592       if node == master_node:
3593         ntype = "master"
3594       elif node_i.master_candidate:
3595         ntype = "master candidate"
3596       elif node_i.drained:
3597         ntype = "drained"
3598         n_drained += 1
3599       else:
3600         ntype = "regular"
3601       if verbose:
3602         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3603
3604       msg = all_nvinfo[node].fail_msg
3605       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3606                msg)
3607       if msg:
3608         nimg.rpc_fail = True
3609         continue
3610
3611       nresult = all_nvinfo[node].payload
3612
3613       nimg.call_ok = self._VerifyNode(node_i, nresult)
3614       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3615       self._VerifyNodeNetwork(node_i, nresult)
3616       self._VerifyNodeUserScripts(node_i, nresult)
3617       self._VerifyOob(node_i, nresult)
3618       self._VerifyFileStoragePaths(node_i, nresult,
3619                                    node == master_node)
3620
3621       if nimg.vm_capable:
3622         self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3623         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3624                              all_drbd_map)
3625
3626         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3627         self._UpdateNodeInstances(node_i, nresult, nimg)
3628         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3629         self._UpdateNodeOS(node_i, nresult, nimg)
3630
3631         if not nimg.os_fail:
3632           if refos_img is None:
3633             refos_img = nimg
3634           self._VerifyNodeOS(node_i, nimg, refos_img)
3635         self._VerifyNodeBridges(node_i, nresult, bridges)
3636
3637         # Check whether all running instancies are primary for the node. (This
3638         # can no longer be done from _VerifyInstance below, since some of the
3639         # wrong instances could be from other node groups.)
3640         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3641
3642         for inst in non_primary_inst:
3643           test = inst in self.all_inst_info
3644           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3645                    "instance should not run on node %s", node_i.name)
3646           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3647                    "node is running unknown instance %s", inst)
3648
3649     self._VerifyGroupLVM(node_image, vg_name)
3650
3651     for node, result in extra_lv_nvinfo.items():
3652       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3653                               node_image[node], vg_name)
3654
3655     feedback_fn("* Verifying instance status")
3656     for instance in self.my_inst_names:
3657       if verbose:
3658         feedback_fn("* Verifying instance %s" % instance)
3659       inst_config = self.my_inst_info[instance]
3660       self._VerifyInstance(instance, inst_config, node_image,
3661                            instdisk[instance])
3662
3663       # If the instance is non-redundant we cannot survive losing its primary
3664       # node, so we are not N+1 compliant.
3665       if inst_config.disk_template not in constants.DTS_MIRRORED:
3666         i_non_redundant.append(instance)
3667
3668       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3669         i_non_a_balanced.append(instance)
3670
3671     feedback_fn("* Verifying orphan volumes")
3672     reserved = utils.FieldSet(*cluster.reserved_lvs)
3673
3674     # We will get spurious "unknown volume" warnings if any node of this group
3675     # is secondary for an instance whose primary is in another group. To avoid
3676     # them, we find these instances and add their volumes to node_vol_should.
3677     for inst in self.all_inst_info.values():
3678       for secondary in inst.secondary_nodes:
3679         if (secondary in self.my_node_info
3680             and inst.name not in self.my_inst_info):
3681           inst.MapLVsByNode(node_vol_should)
3682           break
3683
3684     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3685
3686     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3687       feedback_fn("* Verifying N+1 Memory redundancy")
3688       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3689
3690     feedback_fn("* Other Notes")
3691     if i_non_redundant:
3692       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3693                   % len(i_non_redundant))
3694
3695     if i_non_a_balanced:
3696       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3697                   % len(i_non_a_balanced))
3698
3699     if i_offline:
3700       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3701
3702     if n_offline:
3703       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3704
3705     if n_drained:
3706       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3707
3708     return not self.bad
3709
3710   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3711     """Analyze the post-hooks' result
3712
3713     This method analyses the hook result, handles it, and sends some
3714     nicely-formatted feedback back to the user.
3715
3716     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3717         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3718     @param hooks_results: the results of the multi-node hooks rpc call
3719     @param feedback_fn: function used send feedback back to the caller
3720     @param lu_result: previous Exec result
3721     @return: the new Exec result, based on the previous result
3722         and hook results
3723
3724     """
3725     # We only really run POST phase hooks, only for non-empty groups,
3726     # and are only interested in their results
3727     if not self.my_node_names:
3728       # empty node group
3729       pass
3730     elif phase == constants.HOOKS_PHASE_POST:
3731       # Used to change hooks' output to proper indentation
3732       feedback_fn("* Hooks Results")
3733       assert hooks_results, "invalid result from hooks"
3734
3735       for node_name in hooks_results:
3736         res = hooks_results[node_name]
3737         msg = res.fail_msg
3738         test = msg and not res.offline
3739         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3740                       "Communication failure in hooks execution: %s", msg)
3741         if res.offline or msg:
3742           # No need to investigate payload if node is offline or gave
3743           # an error.
3744           continue
3745         for script, hkr, output in res.payload:
3746           test = hkr == constants.HKR_FAIL
3747           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3748                         "Script %s failed, output:", script)
3749           if test:
3750             output = self._HOOKS_INDENT_RE.sub("      ", output)
3751             feedback_fn("%s" % output)
3752             lu_result = False
3753
3754     return lu_result
3755
3756
3757 class LUClusterVerifyDisks(NoHooksLU):
3758   """Verifies the cluster disks status.
3759
3760   """
3761   REQ_BGL = False
3762
3763   def ExpandNames(self):
3764     self.share_locks = _ShareAll()
3765     self.needed_locks = {
3766       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3767       }
3768
3769   def Exec(self, feedback_fn):
3770     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3771
3772     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3773     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3774                            for group in group_names])
3775
3776
3777 class LUGroupVerifyDisks(NoHooksLU):
3778   """Verifies the status of all disks in a node group.
3779
3780   """
3781   REQ_BGL = False
3782
3783   def ExpandNames(self):
3784     # Raises errors.OpPrereqError on its own if group can't be found
3785     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3786
3787     self.share_locks = _ShareAll()
3788     self.needed_locks = {
3789       locking.LEVEL_INSTANCE: [],
3790       locking.LEVEL_NODEGROUP: [],
3791       locking.LEVEL_NODE: [],
3792
3793       # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3794       # starts one instance of this opcode for every group, which means all
3795       # nodes will be locked for a short amount of time, so it's better to
3796       # acquire the node allocation lock as well.
3797       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3798       }
3799
3800   def DeclareLocks(self, level):
3801     if level == locking.LEVEL_INSTANCE:
3802       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3803
3804       # Lock instances optimistically, needs verification once node and group
3805       # locks have been acquired
3806       self.needed_locks[locking.LEVEL_INSTANCE] = \
3807         self.cfg.GetNodeGroupInstances(self.group_uuid)
3808
3809     elif level == locking.LEVEL_NODEGROUP:
3810       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3811
3812       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3813         set([self.group_uuid] +
3814             # Lock all groups used by instances optimistically; this requires
3815             # going via the node before it's locked, requiring verification
3816             # later on
3817             [group_uuid
3818              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3819              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3820
3821     elif level == locking.LEVEL_NODE:
3822       # This will only lock the nodes in the group to be verified which contain
3823       # actual instances
3824       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3825       self._LockInstancesNodes()
3826
3827       # Lock all nodes in group to be verified
3828       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3829       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3830       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3831
3832   def CheckPrereq(self):
3833     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3834     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3835     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3836
3837     assert self.group_uuid in owned_groups
3838
3839     # Check if locked instances are still correct
3840     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3841
3842     # Get instance information
3843     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3844
3845     # Check if node groups for locked instances are still correct
3846     _CheckInstancesNodeGroups(self.cfg, self.instances,
3847                               owned_groups, owned_nodes, self.group_uuid)
3848
3849   def Exec(self, feedback_fn):
3850     """Verify integrity of cluster disks.
3851
3852     @rtype: tuple of three items
3853     @return: a tuple of (dict of node-to-node_error, list of instances
3854         which need activate-disks, dict of instance: (node, volume) for
3855         missing volumes
3856
3857     """
3858     res_nodes = {}
3859     res_instances = set()
3860     res_missing = {}
3861
3862     nv_dict = _MapInstanceDisksToNodes(
3863       [inst for inst in self.instances.values()
3864        if inst.admin_state == constants.ADMINST_UP])
3865
3866     if nv_dict:
3867       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3868                              set(self.cfg.GetVmCapableNodeList()))
3869
3870       node_lvs = self.rpc.call_lv_list(nodes, [])
3871
3872       for (node, node_res) in node_lvs.items():
3873         if node_res.offline:
3874           continue
3875
3876         msg = node_res.fail_msg
3877         if msg:
3878           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3879           res_nodes[node] = msg
3880           continue
3881
3882         for lv_name, (_, _, lv_online) in node_res.payload.items():
3883           inst = nv_dict.pop((node, lv_name), None)
3884           if not (lv_online or inst is None):
3885             res_instances.add(inst)
3886
3887       # any leftover items in nv_dict are missing LVs, let's arrange the data
3888       # better
3889       for key, inst in nv_dict.iteritems():
3890         res_missing.setdefault(inst, []).append(list(key))
3891
3892     return (res_nodes, list(res_instances), res_missing)
3893
3894
3895 class LUClusterRepairDiskSizes(NoHooksLU):
3896   """Verifies the cluster disks sizes.
3897
3898   """
3899   REQ_BGL = False
3900
3901   def ExpandNames(self):
3902     if self.op.instances:
3903       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3904       # Not getting the node allocation lock as only a specific set of
3905       # instances (and their nodes) is going to be acquired
3906       self.needed_locks = {
3907         locking.LEVEL_NODE_RES: [],
3908         locking.LEVEL_INSTANCE: self.wanted_names,
3909         }
3910       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3911     else:
3912       self.wanted_names = None
3913       self.needed_locks = {
3914         locking.LEVEL_NODE_RES: locking.ALL_SET,
3915         locking.LEVEL_INSTANCE: locking.ALL_SET,
3916
3917         # This opcode is acquires the node locks for all instances
3918         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3919         }
3920
3921     self.share_locks = {
3922       locking.LEVEL_NODE_RES: 1,
3923       locking.LEVEL_INSTANCE: 0,
3924       locking.LEVEL_NODE_ALLOC: 1,
3925       }
3926
3927   def DeclareLocks(self, level):
3928     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3929       self._LockInstancesNodes(primary_only=True, level=level)
3930
3931   def CheckPrereq(self):
3932     """Check prerequisites.
3933
3934     This only checks the optional instance list against the existing names.
3935
3936     """
3937     if self.wanted_names is None:
3938       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3939
3940     self.wanted_instances = \
3941         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3942
3943   def _EnsureChildSizes(self, disk):
3944     """Ensure children of the disk have the needed disk size.
3945
3946     This is valid mainly for DRBD8 and fixes an issue where the
3947     children have smaller disk size.
3948
3949     @param disk: an L{ganeti.objects.Disk} object
3950
3951     """
3952     if disk.dev_type == constants.LD_DRBD8:
3953       assert disk.children, "Empty children for DRBD8?"
3954       fchild = disk.children[0]
3955       mismatch = fchild.size < disk.size
3956       if mismatch:
3957         self.LogInfo("Child disk has size %d, parent %d, fixing",
3958                      fchild.size, disk.size)
3959         fchild.size = disk.size
3960
3961       # and we recurse on this child only, not on the metadev
3962       return self._EnsureChildSizes(fchild) or mismatch
3963     else:
3964       return False
3965
3966   def Exec(self, feedback_fn):
3967     """Verify the size of cluster disks.
3968
3969     """
3970     # TODO: check child disks too
3971     # TODO: check differences in size between primary/secondary nodes
3972     per_node_disks = {}
3973     for instance in self.wanted_instances:
3974       pnode = instance.primary_node
3975       if pnode not in per_node_disks:
3976         per_node_disks[pnode] = []
3977       for idx, disk in enumerate(instance.disks):
3978         per_node_disks[pnode].append((instance, idx, disk))
3979
3980     assert not (frozenset(per_node_disks.keys()) -
3981                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3982       "Not owning correct locks"
3983     assert not self.owned_locks(locking.LEVEL_NODE)
3984
3985     changed = []
3986     for node, dskl in per_node_disks.items():
3987       newl = [v[2].Copy() for v in dskl]
3988       for dsk in newl:
3989         self.cfg.SetDiskID(dsk, node)
3990       result = self.rpc.call_blockdev_getsize(node, newl)
3991       if result.fail_msg:
3992         self.LogWarning("Failure in blockdev_getsize call to node"
3993                         " %s, ignoring", node)
3994         continue
3995       if len(result.payload) != len(dskl):
3996         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3997                         " result.payload=%s", node, len(dskl), result.payload)
3998         self.LogWarning("Invalid result from node %s, ignoring node results",
3999                         node)
4000         continue
4001       for ((instance, idx, disk), size) in zip(dskl, result.payload):
4002         if size is None:
4003           self.LogWarning("Disk %d of instance %s did not return size"
4004                           " information, ignoring", idx, instance.name)
4005           continue
4006         if not isinstance(size, (int, long)):
4007           self.LogWarning("Disk %d of instance %s did not return valid"
4008                           " size information, ignoring", idx, instance.name)
4009           continue
4010         size = size >> 20
4011         if size != disk.size:
4012           self.LogInfo("Disk %d of instance %s has mismatched size,"
4013                        " correcting: recorded %d, actual %d", idx,
4014                        instance.name, disk.size, size)
4015           disk.size = size
4016           self.cfg.Update(instance, feedback_fn)
4017           changed.append((instance.name, idx, size))
4018         if self._EnsureChildSizes(disk):
4019           self.cfg.Update(instance, feedback_fn)
4020           changed.append((instance.name, idx, disk.size))
4021     return changed
4022
4023
4024 class LUClusterRename(LogicalUnit):
4025   """Rename the cluster.
4026
4027   """
4028   HPATH = "cluster-rename"
4029   HTYPE = constants.HTYPE_CLUSTER
4030
4031   def BuildHooksEnv(self):
4032     """Build hooks env.
4033
4034     """
4035     return {
4036       "OP_TARGET": self.cfg.GetClusterName(),
4037       "NEW_NAME": self.op.name,
4038       }
4039
4040   def BuildHooksNodes(self):
4041     """Build hooks nodes.
4042
4043     """
4044     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4045
4046   def CheckPrereq(self):
4047     """Verify that the passed name is a valid one.
4048
4049     """
4050     hostname = netutils.GetHostname(name=self.op.name,
4051                                     family=self.cfg.GetPrimaryIPFamily())
4052
4053     new_name = hostname.name
4054     self.ip = new_ip = hostname.ip
4055     old_name = self.cfg.GetClusterName()
4056     old_ip = self.cfg.GetMasterIP()
4057     if new_name == old_name and new_ip == old_ip:
4058       raise errors.OpPrereqError("Neither the name nor the IP address of the"
4059                                  " cluster has changed",
4060                                  errors.ECODE_INVAL)
4061     if new_ip != old_ip:
4062       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4063         raise errors.OpPrereqError("The given cluster IP address (%s) is"
4064                                    " reachable on the network" %
4065                                    new_ip, errors.ECODE_NOTUNIQUE)
4066
4067     self.op.name = new_name
4068
4069   def Exec(self, feedback_fn):
4070     """Rename the cluster.
4071
4072     """
4073     clustername = self.op.name
4074     new_ip = self.ip
4075
4076     # shutdown the master IP
4077     master_params = self.cfg.GetMasterNetworkParameters()
4078     ems = self.cfg.GetUseExternalMipScript()
4079     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4080                                                      master_params, ems)
4081     result.Raise("Could not disable the master role")
4082
4083     try:
4084       cluster = self.cfg.GetClusterInfo()
4085       cluster.cluster_name = clustername
4086       cluster.master_ip = new_ip
4087       self.cfg.Update(cluster, feedback_fn)
4088
4089       # update the known hosts file
4090       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4091       node_list = self.cfg.GetOnlineNodeList()
4092       try:
4093         node_list.remove(master_params.name)
4094       except ValueError:
4095         pass
4096       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4097     finally:
4098       master_params.ip = new_ip
4099       result = self.rpc.call_node_activate_master_ip(master_params.name,
4100                                                      master_params, ems)
4101       msg = result.fail_msg
4102       if msg:
4103         self.LogWarning("Could not re-enable the master role on"
4104                         " the master, please restart manually: %s", msg)
4105
4106     return clustername
4107
4108
4109 def _ValidateNetmask(cfg, netmask):
4110   """Checks if a netmask is valid.
4111
4112   @type cfg: L{config.ConfigWriter}
4113   @param cfg: The cluster configuration
4114   @type netmask: int
4115   @param netmask: the netmask to be verified
4116   @raise errors.OpPrereqError: if the validation fails
4117
4118   """
4119   ip_family = cfg.GetPrimaryIPFamily()
4120   try:
4121     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4122   except errors.ProgrammerError:
4123     raise errors.OpPrereqError("Invalid primary ip family: %s." %
4124                                ip_family, errors.ECODE_INVAL)
4125   if not ipcls.ValidateNetmask(netmask):
4126     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4127                                 (netmask), errors.ECODE_INVAL)
4128
4129
4130 class LUClusterSetParams(LogicalUnit):
4131   """Change the parameters of the cluster.
4132
4133   """
4134   HPATH = "cluster-modify"
4135   HTYPE = constants.HTYPE_CLUSTER
4136   REQ_BGL = False
4137
4138   def CheckArguments(self):
4139     """Check parameters
4140
4141     """
4142     if self.op.uid_pool:
4143       uidpool.CheckUidPool(self.op.uid_pool)
4144
4145     if self.op.add_uids:
4146       uidpool.CheckUidPool(self.op.add_uids)
4147
4148     if self.op.remove_uids:
4149       uidpool.CheckUidPool(self.op.remove_uids)
4150
4151     if self.op.master_netmask is not None:
4152       _ValidateNetmask(self.cfg, self.op.master_netmask)
4153
4154     if self.op.diskparams:
4155       for dt_params in self.op.diskparams.values():
4156         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4157       try:
4158         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4159       except errors.OpPrereqError, err:
4160         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4161                                    errors.ECODE_INVAL)
4162
4163   def ExpandNames(self):
4164     # FIXME: in the future maybe other cluster params won't require checking on
4165     # all nodes to be modified.
4166     # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4167     # resource locks the right thing, shouldn't it be the BGL instead?
4168     self.needed_locks = {
4169       locking.LEVEL_NODE: locking.ALL_SET,
4170       locking.LEVEL_INSTANCE: locking.ALL_SET,
4171       locking.LEVEL_NODEGROUP: locking.ALL_SET,
4172       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4173     }
4174     self.share_locks = _ShareAll()
4175
4176   def BuildHooksEnv(self):
4177     """Build hooks env.
4178
4179     """
4180     return {
4181       "OP_TARGET": self.cfg.GetClusterName(),
4182       "NEW_VG_NAME": self.op.vg_name,
4183       }
4184
4185   def BuildHooksNodes(self):
4186     """Build hooks nodes.
4187
4188     """
4189     mn = self.cfg.GetMasterNode()
4190     return ([mn], [mn])
4191
4192   def CheckPrereq(self):
4193     """Check prerequisites.
4194
4195     This checks whether the given params don't conflict and
4196     if the given volume group is valid.
4197
4198     """
4199     if self.op.vg_name is not None and not self.op.vg_name:
4200       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4201         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4202                                    " instances exist", errors.ECODE_INVAL)
4203
4204     if self.op.drbd_helper is not None and not self.op.drbd_helper:
4205       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4206         raise errors.OpPrereqError("Cannot disable drbd helper while"
4207                                    " drbd-based instances exist",
4208                                    errors.ECODE_INVAL)
4209
4210     node_list = self.owned_locks(locking.LEVEL_NODE)
4211
4212     # if vg_name not None, checks given volume group on all nodes
4213     if self.op.vg_name:
4214       vglist = self.rpc.call_vg_list(node_list)
4215       for node in node_list:
4216         msg = vglist[node].fail_msg
4217         if msg:
4218           # ignoring down node
4219           self.LogWarning("Error while gathering data on node %s"
4220                           " (ignoring node): %s", node, msg)
4221           continue
4222         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4223                                               self.op.vg_name,
4224                                               constants.MIN_VG_SIZE)
4225         if vgstatus:
4226           raise errors.OpPrereqError("Error on node '%s': %s" %
4227                                      (node, vgstatus), errors.ECODE_ENVIRON)
4228
4229     if self.op.drbd_helper:
4230       # checks given drbd helper on all nodes
4231       helpers = self.rpc.call_drbd_helper(node_list)
4232       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4233         if ninfo.offline:
4234           self.LogInfo("Not checking drbd helper on offline node %s", node)
4235           continue
4236         msg = helpers[node].fail_msg
4237         if msg:
4238           raise errors.OpPrereqError("Error checking drbd helper on node"
4239                                      " '%s': %s" % (node, msg),
4240                                      errors.ECODE_ENVIRON)
4241         node_helper = helpers[node].payload
4242         if node_helper != self.op.drbd_helper:
4243           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4244                                      (node, node_helper), errors.ECODE_ENVIRON)
4245
4246     self.cluster = cluster = self.cfg.GetClusterInfo()
4247     # validate params changes
4248     if self.op.beparams:
4249       objects.UpgradeBeParams(self.op.beparams)
4250       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4251       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4252
4253     if self.op.ndparams:
4254       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4255       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4256
4257       # TODO: we need a more general way to handle resetting
4258       # cluster-level parameters to default values
4259       if self.new_ndparams["oob_program"] == "":
4260         self.new_ndparams["oob_program"] = \
4261             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4262
4263     if self.op.hv_state:
4264       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4265                                             self.cluster.hv_state_static)
4266       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4267                                for hv, values in new_hv_state.items())
4268
4269     if self.op.disk_state:
4270       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4271                                                 self.cluster.disk_state_static)
4272       self.new_disk_state = \
4273         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4274                             for name, values in svalues.items()))
4275              for storage, svalues in new_disk_state.items())
4276
4277     if self.op.ipolicy:
4278       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4279                                             group_policy=False)
4280
4281       all_instances = self.cfg.GetAllInstancesInfo().values()
4282       violations = set()
4283       for group in self.cfg.GetAllNodeGroupsInfo().values():
4284         instances = frozenset([inst for inst in all_instances
4285                                if compat.any(node in group.members
4286                                              for node in inst.all_nodes)])
4287         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4288         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4289         new = _ComputeNewInstanceViolations(ipol,
4290                                             new_ipolicy, instances)
4291         if new:
4292           violations.update(new)
4293
4294       if violations:
4295         self.LogWarning("After the ipolicy change the following instances"
4296                         " violate them: %s",
4297                         utils.CommaJoin(utils.NiceSort(violations)))
4298
4299     if self.op.nicparams:
4300       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4301       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4302       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4303       nic_errors = []
4304
4305       # check all instances for consistency
4306       for instance in self.cfg.GetAllInstancesInfo().values():
4307         for nic_idx, nic in enumerate(instance.nics):
4308           params_copy = copy.deepcopy(nic.nicparams)
4309           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4310
4311           # check parameter syntax
4312           try:
4313             objects.NIC.CheckParameterSyntax(params_filled)
4314           except errors.ConfigurationError, err:
4315             nic_errors.append("Instance %s, nic/%d: %s" %
4316                               (instance.name, nic_idx, err))
4317
4318           # if we're moving instances to routed, check that they have an ip
4319           target_mode = params_filled[constants.NIC_MODE]
4320           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4321             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4322                               " address" % (instance.name, nic_idx))
4323       if nic_errors:
4324         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4325                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4326
4327     # hypervisor list/parameters
4328     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4329     if self.op.hvparams:
4330       for hv_name, hv_dict in self.op.hvparams.items():
4331         if hv_name not in self.new_hvparams:
4332           self.new_hvparams[hv_name] = hv_dict
4333         else:
4334           self.new_hvparams[hv_name].update(hv_dict)
4335
4336     # disk template parameters
4337     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4338     if self.op.diskparams:
4339       for dt_name, dt_params in self.op.diskparams.items():
4340         if dt_name not in self.op.diskparams:
4341           self.new_diskparams[dt_name] = dt_params
4342         else:
4343           self.new_diskparams[dt_name].update(dt_params)
4344
4345     # os hypervisor parameters
4346     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4347     if self.op.os_hvp:
4348       for os_name, hvs in self.op.os_hvp.items():
4349         if os_name not in self.new_os_hvp:
4350           self.new_os_hvp[os_name] = hvs
4351         else:
4352           for hv_name, hv_dict in hvs.items():
4353             if hv_dict is None:
4354               # Delete if it exists
4355               self.new_os_hvp[os_name].pop(hv_name, None)
4356             elif hv_name not in self.new_os_hvp[os_name]:
4357               self.new_os_hvp[os_name][hv_name] = hv_dict
4358             else:
4359               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4360
4361     # os parameters
4362     self.new_osp = objects.FillDict(cluster.osparams, {})
4363     if self.op.osparams:
4364       for os_name, osp in self.op.osparams.items():
4365         if os_name not in self.new_osp:
4366           self.new_osp[os_name] = {}
4367
4368         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4369                                                   use_none=True)
4370
4371         if not self.new_osp[os_name]:
4372           # we removed all parameters
4373           del self.new_osp[os_name]
4374         else:
4375           # check the parameter validity (remote check)
4376           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4377                          os_name, self.new_osp[os_name])
4378
4379     # changes to the hypervisor list
4380     if self.op.enabled_hypervisors is not None:
4381       self.hv_list = self.op.enabled_hypervisors
4382       for hv in self.hv_list:
4383         # if the hypervisor doesn't already exist in the cluster
4384         # hvparams, we initialize it to empty, and then (in both
4385         # cases) we make sure to fill the defaults, as we might not
4386         # have a complete defaults list if the hypervisor wasn't
4387         # enabled before
4388         if hv not in new_hvp:
4389           new_hvp[hv] = {}
4390         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4391         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4392     else:
4393       self.hv_list = cluster.enabled_hypervisors
4394
4395     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4396       # either the enabled list has changed, or the parameters have, validate
4397       for hv_name, hv_params in self.new_hvparams.items():
4398         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4399             (self.op.enabled_hypervisors and
4400              hv_name in self.op.enabled_hypervisors)):
4401           # either this is a new hypervisor, or its parameters have changed
4402           hv_class = hypervisor.GetHypervisorClass(hv_name)
4403           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4404           hv_class.CheckParameterSyntax(hv_params)
4405           _CheckHVParams(self, node_list, hv_name, hv_params)
4406
4407     if self.op.os_hvp:
4408       # no need to check any newly-enabled hypervisors, since the
4409       # defaults have already been checked in the above code-block
4410       for os_name, os_hvp in self.new_os_hvp.items():
4411         for hv_name, hv_params in os_hvp.items():
4412           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4413           # we need to fill in the new os_hvp on top of the actual hv_p
4414           cluster_defaults = self.new_hvparams.get(hv_name, {})
4415           new_osp = objects.FillDict(cluster_defaults, hv_params)
4416           hv_class = hypervisor.GetHypervisorClass(hv_name)
4417           hv_class.CheckParameterSyntax(new_osp)
4418           _CheckHVParams(self, node_list, hv_name, new_osp)
4419
4420     if self.op.default_iallocator:
4421       alloc_script = utils.FindFile(self.op.default_iallocator,
4422                                     constants.IALLOCATOR_SEARCH_PATH,
4423                                     os.path.isfile)
4424       if alloc_script is None:
4425         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4426                                    " specified" % self.op.default_iallocator,
4427                                    errors.ECODE_INVAL)
4428
4429   def Exec(self, feedback_fn):
4430     """Change the parameters of the cluster.
4431
4432     """
4433     if self.op.vg_name is not None:
4434       new_volume = self.op.vg_name
4435       if not new_volume:
4436         new_volume = None
4437       if new_volume != self.cfg.GetVGName():
4438         self.cfg.SetVGName(new_volume)
4439       else:
4440         feedback_fn("Cluster LVM configuration already in desired"
4441                     " state, not changing")
4442     if self.op.drbd_helper is not None:
4443       new_helper = self.op.drbd_helper
4444       if not new_helper:
4445         new_helper = None
4446       if new_helper != self.cfg.GetDRBDHelper():
4447         self.cfg.SetDRBDHelper(new_helper)
4448       else:
4449         feedback_fn("Cluster DRBD helper already in desired state,"
4450                     " not changing")
4451     if self.op.hvparams:
4452       self.cluster.hvparams = self.new_hvparams
4453     if self.op.os_hvp:
4454       self.cluster.os_hvp = self.new_os_hvp
4455     if self.op.enabled_hypervisors is not None:
4456       self.cluster.hvparams = self.new_hvparams
4457       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4458     if self.op.beparams:
4459       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4460     if self.op.nicparams:
4461       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4462     if self.op.ipolicy:
4463       self.cluster.ipolicy = self.new_ipolicy
4464     if self.op.osparams:
4465       self.cluster.osparams = self.new_osp
4466     if self.op.ndparams:
4467       self.cluster.ndparams = self.new_ndparams
4468     if self.op.diskparams:
4469       self.cluster.diskparams = self.new_diskparams
4470     if self.op.hv_state:
4471       self.cluster.hv_state_static = self.new_hv_state
4472     if self.op.disk_state:
4473       self.cluster.disk_state_static = self.new_disk_state
4474
4475     if self.op.candidate_pool_size is not None:
4476       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4477       # we need to update the pool size here, otherwise the save will fail
4478       _AdjustCandidatePool(self, [])
4479
4480     if self.op.maintain_node_health is not None:
4481       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4482         feedback_fn("Note: CONFD was disabled at build time, node health"
4483                     " maintenance is not useful (still enabling it)")
4484       self.cluster.maintain_node_health = self.op.maintain_node_health
4485
4486     if self.op.prealloc_wipe_disks is not None:
4487       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4488
4489     if self.op.add_uids is not None:
4490       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4491
4492     if self.op.remove_uids is not None:
4493       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4494
4495     if self.op.uid_pool is not None:
4496       self.cluster.uid_pool = self.op.uid_pool
4497
4498     if self.op.default_iallocator is not None:
4499       self.cluster.default_iallocator = self.op.default_iallocator
4500
4501     if self.op.reserved_lvs is not None:
4502       self.cluster.reserved_lvs = self.op.reserved_lvs
4503
4504     if self.op.use_external_mip_script is not None:
4505       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4506
4507     def helper_os(aname, mods, desc):
4508       desc += " OS list"
4509       lst = getattr(self.cluster, aname)
4510       for key, val in mods:
4511         if key == constants.DDM_ADD:
4512           if val in lst:
4513             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4514           else:
4515             lst.append(val)
4516         elif key == constants.DDM_REMOVE:
4517           if val in lst:
4518             lst.remove(val)
4519           else:
4520             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4521         else:
4522           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4523
4524     if self.op.hidden_os:
4525       helper_os("hidden_os", self.op.hidden_os, "hidden")
4526
4527     if self.op.blacklisted_os:
4528       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4529
4530     if self.op.master_netdev:
4531       master_params = self.cfg.GetMasterNetworkParameters()
4532       ems = self.cfg.GetUseExternalMipScript()
4533       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4534                   self.cluster.master_netdev)
4535       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4536                                                        master_params, ems)
4537       result.Raise("Could not disable the master ip")
4538       feedback_fn("Changing master_netdev from %s to %s" %
4539                   (master_params.netdev, self.op.master_netdev))
4540       self.cluster.master_netdev = self.op.master_netdev
4541
4542     if self.op.master_netmask:
4543       master_params = self.cfg.GetMasterNetworkParameters()
4544       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4545       result = self.rpc.call_node_change_master_netmask(master_params.name,
4546                                                         master_params.netmask,
4547                                                         self.op.master_netmask,
4548                                                         master_params.ip,
4549                                                         master_params.netdev)
4550       if result.fail_msg:
4551         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4552         feedback_fn(msg)
4553
4554       self.cluster.master_netmask = self.op.master_netmask
4555
4556     self.cfg.Update(self.cluster, feedback_fn)
4557
4558     if self.op.master_netdev:
4559       master_params = self.cfg.GetMasterNetworkParameters()
4560       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4561                   self.op.master_netdev)
4562       ems = self.cfg.GetUseExternalMipScript()
4563       result = self.rpc.call_node_activate_master_ip(master_params.name,
4564                                                      master_params, ems)
4565       if result.fail_msg:
4566         self.LogWarning("Could not re-enable the master ip on"
4567                         " the master, please restart manually: %s",
4568                         result.fail_msg)
4569
4570
4571 def _UploadHelper(lu, nodes, fname):
4572   """Helper for uploading a file and showing warnings.
4573
4574   """
4575   if os.path.exists(fname):
4576     result = lu.rpc.call_upload_file(nodes, fname)
4577     for to_node, to_result in result.items():
4578       msg = to_result.fail_msg
4579       if msg:
4580         msg = ("Copy of file %s to node %s failed: %s" %
4581                (fname, to_node, msg))
4582         lu.LogWarning(msg)
4583
4584
4585 def _ComputeAncillaryFiles(cluster, redist):
4586   """Compute files external to Ganeti which need to be consistent.
4587
4588   @type redist: boolean
4589   @param redist: Whether to include files which need to be redistributed
4590
4591   """
4592   # Compute files for all nodes
4593   files_all = set([
4594     pathutils.SSH_KNOWN_HOSTS_FILE,
4595     pathutils.CONFD_HMAC_KEY,
4596     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4597     pathutils.SPICE_CERT_FILE,
4598     pathutils.SPICE_CACERT_FILE,
4599     pathutils.RAPI_USERS_FILE,
4600     ])
4601
4602   if redist:
4603     # we need to ship at least the RAPI certificate
4604     files_all.add(pathutils.RAPI_CERT_FILE)
4605   else:
4606     files_all.update(pathutils.ALL_CERT_FILES)
4607     files_all.update(ssconf.SimpleStore().GetFileList())
4608
4609   if cluster.modify_etc_hosts:
4610     files_all.add(pathutils.ETC_HOSTS)
4611
4612   if cluster.use_external_mip_script:
4613     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4614
4615   # Files which are optional, these must:
4616   # - be present in one other category as well
4617   # - either exist or not exist on all nodes of that category (mc, vm all)
4618   files_opt = set([
4619     pathutils.RAPI_USERS_FILE,
4620     ])
4621
4622   # Files which should only be on master candidates
4623   files_mc = set()
4624
4625   if not redist:
4626     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4627
4628   # File storage
4629   if (not redist and
4630       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4631     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4632     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4633
4634   # Files which should only be on VM-capable nodes
4635   files_vm = set(
4636     filename
4637     for hv_name in cluster.enabled_hypervisors
4638     for filename in
4639       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4640
4641   files_opt |= set(
4642     filename
4643     for hv_name in cluster.enabled_hypervisors
4644     for filename in
4645       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4646
4647   # Filenames in each category must be unique
4648   all_files_set = files_all | files_mc | files_vm
4649   assert (len(all_files_set) ==
4650           sum(map(len, [files_all, files_mc, files_vm]))), \
4651          "Found file listed in more than one file list"
4652
4653   # Optional files must be present in one other category
4654   assert all_files_set.issuperset(files_opt), \
4655          "Optional file not in a different required list"
4656
4657   # This one file should never ever be re-distributed via RPC
4658   assert not (redist and
4659               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4660
4661   return (files_all, files_opt, files_mc, files_vm)
4662
4663
4664 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4665   """Distribute additional files which are part of the cluster configuration.
4666
4667   ConfigWriter takes care of distributing the config and ssconf files, but
4668   there are more files which should be distributed to all nodes. This function
4669   makes sure those are copied.
4670
4671   @param lu: calling logical unit
4672   @param additional_nodes: list of nodes not in the config to distribute to
4673   @type additional_vm: boolean
4674   @param additional_vm: whether the additional nodes are vm-capable or not
4675
4676   """
4677   # Gather target nodes
4678   cluster = lu.cfg.GetClusterInfo()
4679   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4680
4681   online_nodes = lu.cfg.GetOnlineNodeList()
4682   online_set = frozenset(online_nodes)
4683   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4684
4685   if additional_nodes is not None:
4686     online_nodes.extend(additional_nodes)
4687     if additional_vm:
4688       vm_nodes.extend(additional_nodes)
4689
4690   # Never distribute to master node
4691   for nodelist in [online_nodes, vm_nodes]:
4692     if master_info.name in nodelist:
4693       nodelist.remove(master_info.name)
4694
4695   # Gather file lists
4696   (files_all, _, files_mc, files_vm) = \
4697     _ComputeAncillaryFiles(cluster, True)
4698
4699   # Never re-distribute configuration file from here
4700   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4701               pathutils.CLUSTER_CONF_FILE in files_vm)
4702   assert not files_mc, "Master candidates not handled in this function"
4703
4704   filemap = [
4705     (online_nodes, files_all),
4706     (vm_nodes, files_vm),
4707     ]
4708
4709   # Upload the files
4710   for (node_list, files) in filemap:
4711     for fname in files:
4712       _UploadHelper(lu, node_list, fname)
4713
4714
4715 class LUClusterRedistConf(NoHooksLU):
4716   """Force the redistribution of cluster configuration.
4717
4718   This is a very simple LU.
4719
4720   """
4721   REQ_BGL = False
4722
4723   def ExpandNames(self):
4724     self.needed_locks = {
4725       locking.LEVEL_NODE: locking.ALL_SET,
4726       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4727     }
4728     self.share_locks = _ShareAll()
4729
4730   def Exec(self, feedback_fn):
4731     """Redistribute the configuration.
4732
4733     """
4734     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4735     _RedistributeAncillaryFiles(self)
4736
4737
4738 class LUClusterActivateMasterIp(NoHooksLU):
4739   """Activate the master IP on the master node.
4740
4741   """
4742   def Exec(self, feedback_fn):
4743     """Activate the master IP.
4744
4745     """
4746     master_params = self.cfg.GetMasterNetworkParameters()
4747     ems = self.cfg.GetUseExternalMipScript()
4748     result = self.rpc.call_node_activate_master_ip(master_params.name,
4749                                                    master_params, ems)
4750     result.Raise("Could not activate the master IP")
4751
4752
4753 class LUClusterDeactivateMasterIp(NoHooksLU):
4754   """Deactivate the master IP on the master node.
4755
4756   """
4757   def Exec(self, feedback_fn):
4758     """Deactivate the master IP.
4759
4760     """
4761     master_params = self.cfg.GetMasterNetworkParameters()
4762     ems = self.cfg.GetUseExternalMipScript()
4763     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4764                                                      master_params, ems)
4765     result.Raise("Could not deactivate the master IP")
4766
4767
4768 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4769   """Sleep and poll for an instance's disk to sync.
4770
4771   """
4772   if not instance.disks or disks is not None and not disks:
4773     return True
4774
4775   disks = _ExpandCheckDisks(instance, disks)
4776
4777   if not oneshot:
4778     lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4779
4780   node = instance.primary_node
4781
4782   for dev in disks:
4783     lu.cfg.SetDiskID(dev, node)
4784
4785   # TODO: Convert to utils.Retry
4786
4787   retries = 0
4788   degr_retries = 10 # in seconds, as we sleep 1 second each time
4789   while True:
4790     max_time = 0
4791     done = True
4792     cumul_degraded = False
4793     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4794     msg = rstats.fail_msg
4795     if msg:
4796       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4797       retries += 1
4798       if retries >= 10:
4799         raise errors.RemoteError("Can't contact node %s for mirror data,"
4800                                  " aborting." % node)
4801       time.sleep(6)
4802       continue
4803     rstats = rstats.payload
4804     retries = 0
4805     for i, mstat in enumerate(rstats):
4806       if mstat is None:
4807         lu.LogWarning("Can't compute data for node %s/%s",
4808                            node, disks[i].iv_name)
4809         continue
4810
4811       cumul_degraded = (cumul_degraded or
4812                         (mstat.is_degraded and mstat.sync_percent is None))
4813       if mstat.sync_percent is not None:
4814         done = False
4815         if mstat.estimated_time is not None:
4816           rem_time = ("%s remaining (estimated)" %
4817                       utils.FormatSeconds(mstat.estimated_time))
4818           max_time = mstat.estimated_time
4819         else:
4820           rem_time = "no time estimate"
4821         lu.LogInfo("- device %s: %5.2f%% done, %s",
4822                    disks[i].iv_name, mstat.sync_percent, rem_time)
4823
4824     # if we're done but degraded, let's do a few small retries, to
4825     # make sure we see a stable and not transient situation; therefore
4826     # we force restart of the loop
4827     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4828       logging.info("Degraded disks found, %d retries left", degr_retries)
4829       degr_retries -= 1
4830       time.sleep(1)
4831       continue
4832
4833     if done or oneshot:
4834       break
4835
4836     time.sleep(min(60, max_time))
4837
4838   if done:
4839     lu.LogInfo("Instance %s's disks are in sync", instance.name)
4840
4841   return not cumul_degraded
4842
4843
4844 def _BlockdevFind(lu, node, dev, instance):
4845   """Wrapper around call_blockdev_find to annotate diskparams.
4846
4847   @param lu: A reference to the lu object
4848   @param node: The node to call out
4849   @param dev: The device to find
4850   @param instance: The instance object the device belongs to
4851   @returns The result of the rpc call
4852
4853   """
4854   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4855   return lu.rpc.call_blockdev_find(node, disk)
4856
4857
4858 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4859   """Wrapper around L{_CheckDiskConsistencyInner}.
4860
4861   """
4862   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4863   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4864                                     ldisk=ldisk)
4865
4866
4867 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4868                                ldisk=False):
4869   """Check that mirrors are not degraded.
4870
4871   @attention: The device has to be annotated already.
4872
4873   The ldisk parameter, if True, will change the test from the
4874   is_degraded attribute (which represents overall non-ok status for
4875   the device(s)) to the ldisk (representing the local storage status).
4876
4877   """
4878   lu.cfg.SetDiskID(dev, node)
4879
4880   result = True
4881
4882   if on_primary or dev.AssembleOnSecondary():
4883     rstats = lu.rpc.call_blockdev_find(node, dev)
4884     msg = rstats.fail_msg
4885     if msg:
4886       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4887       result = False
4888     elif not rstats.payload:
4889       lu.LogWarning("Can't find disk on node %s", node)
4890       result = False
4891     else:
4892       if ldisk:
4893         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4894       else:
4895         result = result and not rstats.payload.is_degraded
4896
4897   if dev.children:
4898     for child in dev.children:
4899       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4900                                                      on_primary)
4901
4902   return result
4903
4904
4905 class LUOobCommand(NoHooksLU):
4906   """Logical unit for OOB handling.
4907
4908   """
4909   REQ_BGL = False
4910   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4911
4912   def ExpandNames(self):
4913     """Gather locks we need.
4914
4915     """
4916     if self.op.node_names:
4917       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4918       lock_names = self.op.node_names
4919     else:
4920       lock_names = locking.ALL_SET
4921
4922     self.needed_locks = {
4923       locking.LEVEL_NODE: lock_names,
4924       }
4925
4926     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4927
4928     if not self.op.node_names:
4929       # Acquire node allocation lock only if all nodes are affected
4930       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4931
4932   def CheckPrereq(self):
4933     """Check prerequisites.
4934
4935     This checks:
4936      - the node exists in the configuration
4937      - OOB is supported
4938
4939     Any errors are signaled by raising errors.OpPrereqError.
4940
4941     """
4942     self.nodes = []
4943     self.master_node = self.cfg.GetMasterNode()
4944
4945     assert self.op.power_delay >= 0.0
4946
4947     if self.op.node_names:
4948       if (self.op.command in self._SKIP_MASTER and
4949           self.master_node in self.op.node_names):
4950         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4951         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4952
4953         if master_oob_handler:
4954           additional_text = ("run '%s %s %s' if you want to operate on the"
4955                              " master regardless") % (master_oob_handler,
4956                                                       self.op.command,
4957                                                       self.master_node)
4958         else:
4959           additional_text = "it does not support out-of-band operations"
4960
4961         raise errors.OpPrereqError(("Operating on the master node %s is not"
4962                                     " allowed for %s; %s") %
4963                                    (self.master_node, self.op.command,
4964                                     additional_text), errors.ECODE_INVAL)
4965     else:
4966       self.op.node_names = self.cfg.GetNodeList()
4967       if self.op.command in self._SKIP_MASTER:
4968         self.op.node_names.remove(self.master_node)
4969
4970     if self.op.command in self._SKIP_MASTER:
4971       assert self.master_node not in self.op.node_names
4972
4973     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4974       if node is None:
4975         raise errors.OpPrereqError("Node %s not found" % node_name,
4976                                    errors.ECODE_NOENT)
4977       else:
4978         self.nodes.append(node)
4979
4980       if (not self.op.ignore_status and
4981           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4982         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4983                                     " not marked offline") % node_name,
4984                                    errors.ECODE_STATE)
4985
4986   def Exec(self, feedback_fn):
4987     """Execute OOB and return result if we expect any.
4988
4989     """
4990     master_node = self.master_node
4991     ret = []
4992
4993     for idx, node in enumerate(utils.NiceSort(self.nodes,
4994                                               key=lambda node: node.name)):
4995       node_entry = [(constants.RS_NORMAL, node.name)]
4996       ret.append(node_entry)
4997
4998       oob_program = _SupportsOob(self.cfg, node)
4999
5000       if not oob_program:
5001         node_entry.append((constants.RS_UNAVAIL, None))
5002         continue
5003
5004       logging.info("Executing out-of-band command '%s' using '%s' on %s",
5005                    self.op.command, oob_program, node.name)
5006       result = self.rpc.call_run_oob(master_node, oob_program,
5007                                      self.op.command, node.name,
5008                                      self.op.timeout)
5009
5010       if result.fail_msg:
5011         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
5012                         node.name, result.fail_msg)
5013         node_entry.append((constants.RS_NODATA, None))
5014       else:
5015         try:
5016           self._CheckPayload(result)
5017         except errors.OpExecError, err:
5018           self.LogWarning("Payload returned by node '%s' is not valid: %s",
5019                           node.name, err)
5020           node_entry.append((constants.RS_NODATA, None))
5021         else:
5022           if self.op.command == constants.OOB_HEALTH:
5023             # For health we should log important events
5024             for item, status in result.payload:
5025               if status in [constants.OOB_STATUS_WARNING,
5026                             constants.OOB_STATUS_CRITICAL]:
5027                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5028                                 item, node.name, status)
5029
5030           if self.op.command == constants.OOB_POWER_ON:
5031             node.powered = True
5032           elif self.op.command == constants.OOB_POWER_OFF:
5033             node.powered = False
5034           elif self.op.command == constants.OOB_POWER_STATUS:
5035             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5036             if powered != node.powered:
5037               logging.warning(("Recorded power state (%s) of node '%s' does not"
5038                                " match actual power state (%s)"), node.powered,
5039                               node.name, powered)
5040
5041           # For configuration changing commands we should update the node
5042           if self.op.command in (constants.OOB_POWER_ON,
5043                                  constants.OOB_POWER_OFF):
5044             self.cfg.Update(node, feedback_fn)
5045
5046           node_entry.append((constants.RS_NORMAL, result.payload))
5047
5048           if (self.op.command == constants.OOB_POWER_ON and
5049               idx < len(self.nodes) - 1):
5050             time.sleep(self.op.power_delay)
5051
5052     return ret
5053
5054   def _CheckPayload(self, result):
5055     """Checks if the payload is valid.
5056
5057     @param result: RPC result
5058     @raises errors.OpExecError: If payload is not valid
5059
5060     """
5061     errs = []
5062     if self.op.command == constants.OOB_HEALTH:
5063       if not isinstance(result.payload, list):
5064         errs.append("command 'health' is expected to return a list but got %s" %
5065                     type(result.payload))
5066       else:
5067         for item, status in result.payload:
5068           if status not in constants.OOB_STATUSES:
5069             errs.append("health item '%s' has invalid status '%s'" %
5070                         (item, status))
5071
5072     if self.op.command == constants.OOB_POWER_STATUS:
5073       if not isinstance(result.payload, dict):
5074         errs.append("power-status is expected to return a dict but got %s" %
5075                     type(result.payload))
5076
5077     if self.op.command in [
5078       constants.OOB_POWER_ON,
5079       constants.OOB_POWER_OFF,
5080       constants.OOB_POWER_CYCLE,
5081       ]:
5082       if result.payload is not None:
5083         errs.append("%s is expected to not return payload but got '%s'" %
5084                     (self.op.command, result.payload))
5085
5086     if errs:
5087       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5088                                utils.CommaJoin(errs))
5089
5090
5091 class _OsQuery(_QueryBase):
5092   FIELDS = query.OS_FIELDS
5093
5094   def ExpandNames(self, lu):
5095     # Lock all nodes in shared mode
5096     # Temporary removal of locks, should be reverted later
5097     # TODO: reintroduce locks when they are lighter-weight
5098     lu.needed_locks = {}
5099     #self.share_locks[locking.LEVEL_NODE] = 1
5100     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5101
5102     # The following variables interact with _QueryBase._GetNames
5103     if self.names:
5104       self.wanted = self.names
5105     else:
5106       self.wanted = locking.ALL_SET
5107
5108     self.do_locking = self.use_locking
5109
5110   def DeclareLocks(self, lu, level):
5111     pass
5112
5113   @staticmethod
5114   def _DiagnoseByOS(rlist):
5115     """Remaps a per-node return list into an a per-os per-node dictionary
5116
5117     @param rlist: a map with node names as keys and OS objects as values
5118
5119     @rtype: dict
5120     @return: a dictionary with osnames as keys and as value another
5121         map, with nodes as keys and tuples of (path, status, diagnose,
5122         variants, parameters, api_versions) as values, eg::
5123
5124           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5125                                      (/srv/..., False, "invalid api")],
5126                            "node2": [(/srv/..., True, "", [], [])]}
5127           }
5128
5129     """
5130     all_os = {}
5131     # we build here the list of nodes that didn't fail the RPC (at RPC
5132     # level), so that nodes with a non-responding node daemon don't
5133     # make all OSes invalid
5134     good_nodes = [node_name for node_name in rlist
5135                   if not rlist[node_name].fail_msg]
5136     for node_name, nr in rlist.items():
5137       if nr.fail_msg or not nr.payload:
5138         continue
5139       for (name, path, status, diagnose, variants,
5140            params, api_versions) in nr.payload:
5141         if name not in all_os:
5142           # build a list of nodes for this os containing empty lists
5143           # for each node in node_list
5144           all_os[name] = {}
5145           for nname in good_nodes:
5146             all_os[name][nname] = []
5147         # convert params from [name, help] to (name, help)
5148         params = [tuple(v) for v in params]
5149         all_os[name][node_name].append((path, status, diagnose,
5150                                         variants, params, api_versions))
5151     return all_os
5152
5153   def _GetQueryData(self, lu):
5154     """Computes the list of nodes and their attributes.
5155
5156     """
5157     # Locking is not used
5158     assert not (compat.any(lu.glm.is_owned(level)
5159                            for level in locking.LEVELS
5160                            if level != locking.LEVEL_CLUSTER) or
5161                 self.do_locking or self.use_locking)
5162
5163     valid_nodes = [node.name
5164                    for node in lu.cfg.GetAllNodesInfo().values()
5165                    if not node.offline and node.vm_capable]
5166     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5167     cluster = lu.cfg.GetClusterInfo()
5168
5169     data = {}
5170
5171     for (os_name, os_data) in pol.items():
5172       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5173                           hidden=(os_name in cluster.hidden_os),
5174                           blacklisted=(os_name in cluster.blacklisted_os))
5175
5176       variants = set()
5177       parameters = set()
5178       api_versions = set()
5179
5180       for idx, osl in enumerate(os_data.values()):
5181         info.valid = bool(info.valid and osl and osl[0][1])
5182         if not info.valid:
5183           break
5184
5185         (node_variants, node_params, node_api) = osl[0][3:6]
5186         if idx == 0:
5187           # First entry
5188           variants.update(node_variants)
5189           parameters.update(node_params)
5190           api_versions.update(node_api)
5191         else:
5192           # Filter out inconsistent values
5193           variants.intersection_update(node_variants)
5194           parameters.intersection_update(node_params)
5195           api_versions.intersection_update(node_api)
5196
5197       info.variants = list(variants)
5198       info.parameters = list(parameters)
5199       info.api_versions = list(api_versions)
5200
5201       data[os_name] = info
5202
5203     # Prepare data in requested order
5204     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5205             if name in data]
5206
5207
5208 class LUOsDiagnose(NoHooksLU):
5209   """Logical unit for OS diagnose/query.
5210
5211   """
5212   REQ_BGL = False
5213
5214   @staticmethod
5215   def _BuildFilter(fields, names):
5216     """Builds a filter for querying OSes.
5217
5218     """
5219     name_filter = qlang.MakeSimpleFilter("name", names)
5220
5221     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5222     # respective field is not requested
5223     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5224                      for fname in ["hidden", "blacklisted"]
5225                      if fname not in fields]
5226     if "valid" not in fields:
5227       status_filter.append([qlang.OP_TRUE, "valid"])
5228
5229     if status_filter:
5230       status_filter.insert(0, qlang.OP_AND)
5231     else:
5232       status_filter = None
5233
5234     if name_filter and status_filter:
5235       return [qlang.OP_AND, name_filter, status_filter]
5236     elif name_filter:
5237       return name_filter
5238     else:
5239       return status_filter
5240
5241   def CheckArguments(self):
5242     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5243                        self.op.output_fields, False)
5244
5245   def ExpandNames(self):
5246     self.oq.ExpandNames(self)
5247
5248   def Exec(self, feedback_fn):
5249     return self.oq.OldStyleQuery(self)
5250
5251
5252 class _ExtStorageQuery(_QueryBase):
5253   FIELDS = query.EXTSTORAGE_FIELDS
5254
5255   def ExpandNames(self, lu):
5256     # Lock all nodes in shared mode
5257     # Temporary removal of locks, should be reverted later
5258     # TODO: reintroduce locks when they are lighter-weight
5259     lu.needed_locks = {}
5260     #self.share_locks[locking.LEVEL_NODE] = 1
5261     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5262
5263     # The following variables interact with _QueryBase._GetNames
5264     if self.names:
5265       self.wanted = self.names
5266     else:
5267       self.wanted = locking.ALL_SET
5268
5269     self.do_locking = self.use_locking
5270
5271   def DeclareLocks(self, lu, level):
5272     pass
5273
5274   @staticmethod
5275   def _DiagnoseByProvider(rlist):
5276     """Remaps a per-node return list into an a per-provider per-node dictionary
5277
5278     @param rlist: a map with node names as keys and ExtStorage objects as values
5279
5280     @rtype: dict
5281     @return: a dictionary with extstorage providers as keys and as
5282         value another map, with nodes as keys and tuples of
5283         (path, status, diagnose, parameters) as values, eg::
5284
5285           {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5286                          "node2": [(/srv/..., False, "missing file")]
5287                          "node3": [(/srv/..., True, "", [])]
5288           }
5289
5290     """
5291     all_es = {}
5292     # we build here the list of nodes that didn't fail the RPC (at RPC
5293     # level), so that nodes with a non-responding node daemon don't
5294     # make all OSes invalid
5295     good_nodes = [node_name for node_name in rlist
5296                   if not rlist[node_name].fail_msg]
5297     for node_name, nr in rlist.items():
5298       if nr.fail_msg or not nr.payload:
5299         continue
5300       for (name, path, status, diagnose, params) in nr.payload:
5301         if name not in all_es:
5302           # build a list of nodes for this os containing empty lists
5303           # for each node in node_list
5304           all_es[name] = {}
5305           for nname in good_nodes:
5306             all_es[name][nname] = []
5307         # convert params from [name, help] to (name, help)
5308         params = [tuple(v) for v in params]
5309         all_es[name][node_name].append((path, status, diagnose, params))
5310     return all_es
5311
5312   def _GetQueryData(self, lu):
5313     """Computes the list of nodes and their attributes.
5314
5315     """
5316     # Locking is not used
5317     assert not (compat.any(lu.glm.is_owned(level)
5318                            for level in locking.LEVELS
5319                            if level != locking.LEVEL_CLUSTER) or
5320                 self.do_locking or self.use_locking)
5321
5322     valid_nodes = [node.name
5323                    for node in lu.cfg.GetAllNodesInfo().values()
5324                    if not node.offline and node.vm_capable]
5325     pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5326
5327     data = {}
5328
5329     nodegroup_list = lu.cfg.GetNodeGroupList()
5330
5331     for (es_name, es_data) in pol.items():
5332       # For every provider compute the nodegroup validity.
5333       # To do this we need to check the validity of each node in es_data
5334       # and then construct the corresponding nodegroup dict:
5335       #      { nodegroup1: status
5336       #        nodegroup2: status
5337       #      }
5338       ndgrp_data = {}
5339       for nodegroup in nodegroup_list:
5340         ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5341
5342         nodegroup_nodes = ndgrp.members
5343         nodegroup_name = ndgrp.name
5344         node_statuses = []
5345
5346         for node in nodegroup_nodes:
5347           if node in valid_nodes:
5348             if es_data[node] != []:
5349               node_status = es_data[node][0][1]
5350               node_statuses.append(node_status)
5351             else:
5352               node_statuses.append(False)
5353
5354         if False in node_statuses:
5355           ndgrp_data[nodegroup_name] = False
5356         else:
5357           ndgrp_data[nodegroup_name] = True
5358
5359       # Compute the provider's parameters
5360       parameters = set()
5361       for idx, esl in enumerate(es_data.values()):
5362         valid = bool(esl and esl[0][1])
5363         if not valid:
5364           break
5365
5366         node_params = esl[0][3]
5367         if idx == 0:
5368           # First entry
5369           parameters.update(node_params)
5370         else:
5371           # Filter out inconsistent values
5372           parameters.intersection_update(node_params)
5373
5374       params = list(parameters)
5375
5376       # Now fill all the info for this provider
5377       info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5378                                   nodegroup_status=ndgrp_data,
5379                                   parameters=params)
5380
5381       data[es_name] = info
5382
5383     # Prepare data in requested order
5384     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5385             if name in data]
5386
5387
5388 class LUExtStorageDiagnose(NoHooksLU):
5389   """Logical unit for ExtStorage diagnose/query.
5390
5391   """
5392   REQ_BGL = False
5393
5394   def CheckArguments(self):
5395     self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5396                                self.op.output_fields, False)
5397
5398   def ExpandNames(self):
5399     self.eq.ExpandNames(self)
5400
5401   def Exec(self, feedback_fn):
5402     return self.eq.OldStyleQuery(self)
5403
5404
5405 class LUNodeRemove(LogicalUnit):
5406   """Logical unit for removing a node.
5407
5408   """
5409   HPATH = "node-remove"
5410   HTYPE = constants.HTYPE_NODE
5411
5412   def BuildHooksEnv(self):
5413     """Build hooks env.
5414
5415     """
5416     return {
5417       "OP_TARGET": self.op.node_name,
5418       "NODE_NAME": self.op.node_name,
5419       }
5420
5421   def BuildHooksNodes(self):
5422     """Build hooks nodes.
5423
5424     This doesn't run on the target node in the pre phase as a failed
5425     node would then be impossible to remove.
5426
5427     """
5428     all_nodes = self.cfg.GetNodeList()
5429     try:
5430       all_nodes.remove(self.op.node_name)
5431     except ValueError:
5432       pass
5433     return (all_nodes, all_nodes)
5434
5435   def CheckPrereq(self):
5436     """Check prerequisites.
5437
5438     This checks:
5439      - the node exists in the configuration
5440      - it does not have primary or secondary instances
5441      - it's not the master
5442
5443     Any errors are signaled by raising errors.OpPrereqError.
5444
5445     """
5446     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5447     node = self.cfg.GetNodeInfo(self.op.node_name)
5448     assert node is not None
5449
5450     masternode = self.cfg.GetMasterNode()
5451     if node.name == masternode:
5452       raise errors.OpPrereqError("Node is the master node, failover to another"
5453                                  " node is required", errors.ECODE_INVAL)
5454
5455     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5456       if node.name in instance.all_nodes:
5457         raise errors.OpPrereqError("Instance %s is still running on the node,"
5458                                    " please remove first" % instance_name,
5459                                    errors.ECODE_INVAL)
5460     self.op.node_name = node.name
5461     self.node = node
5462
5463   def Exec(self, feedback_fn):
5464     """Removes the node from the cluster.
5465
5466     """
5467     node = self.node
5468     logging.info("Stopping the node daemon and removing configs from node %s",
5469                  node.name)
5470
5471     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5472
5473     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5474       "Not owning BGL"
5475
5476     # Promote nodes to master candidate as needed
5477     _AdjustCandidatePool(self, exceptions=[node.name])
5478     self.context.RemoveNode(node.name)
5479
5480     # Run post hooks on the node before it's removed
5481     _RunPostHook(self, node.name)
5482
5483     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5484     msg = result.fail_msg
5485     if msg:
5486       self.LogWarning("Errors encountered on the remote node while leaving"
5487                       " the cluster: %s", msg)
5488
5489     # Remove node from our /etc/hosts
5490     if self.cfg.GetClusterInfo().modify_etc_hosts:
5491       master_node = self.cfg.GetMasterNode()
5492       result = self.rpc.call_etc_hosts_modify(master_node,
5493                                               constants.ETC_HOSTS_REMOVE,
5494                                               node.name, None)
5495       result.Raise("Can't update hosts file with new host data")
5496       _RedistributeAncillaryFiles(self)
5497
5498
5499 class _NodeQuery(_QueryBase):
5500   FIELDS = query.NODE_FIELDS
5501
5502   def ExpandNames(self, lu):
5503     lu.needed_locks = {}
5504     lu.share_locks = _ShareAll()
5505
5506     if self.names:
5507       self.wanted = _GetWantedNodes(lu, self.names)
5508     else:
5509       self.wanted = locking.ALL_SET
5510
5511     self.do_locking = (self.use_locking and
5512                        query.NQ_LIVE in self.requested_data)
5513
5514     if self.do_locking:
5515       # If any non-static field is requested we need to lock the nodes
5516       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5517       lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5518
5519   def DeclareLocks(self, lu, level):
5520     pass
5521
5522   def _GetQueryData(self, lu):
5523     """Computes the list of nodes and their attributes.
5524
5525     """
5526     all_info = lu.cfg.GetAllNodesInfo()
5527
5528     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5529
5530     # Gather data as requested
5531     if query.NQ_LIVE in self.requested_data:
5532       # filter out non-vm_capable nodes
5533       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5534
5535       es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5536       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5537                                         [lu.cfg.GetHypervisorType()], es_flags)
5538       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5539                        for (name, nresult) in node_data.items()
5540                        if not nresult.fail_msg and nresult.payload)
5541     else:
5542       live_data = None
5543
5544     if query.NQ_INST in self.requested_data:
5545       node_to_primary = dict([(name, set()) for name in nodenames])
5546       node_to_secondary = dict([(name, set()) for name in nodenames])
5547
5548       inst_data = lu.cfg.GetAllInstancesInfo()
5549
5550       for inst in inst_data.values():
5551         if inst.primary_node in node_to_primary:
5552           node_to_primary[inst.primary_node].add(inst.name)
5553         for secnode in inst.secondary_nodes:
5554           if secnode in node_to_secondary:
5555             node_to_secondary[secnode].add(inst.name)
5556     else:
5557       node_to_primary = None
5558       node_to_secondary = None
5559
5560     if query.NQ_OOB in self.requested_data:
5561       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5562                          for name, node in all_info.iteritems())
5563     else:
5564       oob_support = None
5565
5566     if query.NQ_GROUP in self.requested_data:
5567       groups = lu.cfg.GetAllNodeGroupsInfo()
5568     else:
5569       groups = {}
5570
5571     return query.NodeQueryData([all_info[name] for name in nodenames],
5572                                live_data, lu.cfg.GetMasterNode(),
5573                                node_to_primary, node_to_secondary, groups,
5574                                oob_support, lu.cfg.GetClusterInfo())
5575
5576
5577 class LUNodeQuery(NoHooksLU):
5578   """Logical unit for querying nodes.
5579
5580   """
5581   # pylint: disable=W0142
5582   REQ_BGL = False
5583
5584   def CheckArguments(self):
5585     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5586                          self.op.output_fields, self.op.use_locking)
5587
5588   def ExpandNames(self):
5589     self.nq.ExpandNames(self)
5590
5591   def DeclareLocks(self, level):
5592     self.nq.DeclareLocks(self, level)
5593
5594   def Exec(self, feedback_fn):
5595     return self.nq.OldStyleQuery(self)
5596
5597
5598 class LUNodeQueryvols(NoHooksLU):
5599   """Logical unit for getting volumes on node(s).
5600
5601   """
5602   REQ_BGL = False
5603   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5604   _FIELDS_STATIC = utils.FieldSet("node")
5605
5606   def CheckArguments(self):
5607     _CheckOutputFields(static=self._FIELDS_STATIC,
5608                        dynamic=self._FIELDS_DYNAMIC,
5609                        selected=self.op.output_fields)
5610
5611   def ExpandNames(self):
5612     self.share_locks = _ShareAll()
5613
5614     if self.op.nodes:
5615       self.needed_locks = {
5616         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5617         }
5618     else:
5619       self.needed_locks = {
5620         locking.LEVEL_NODE: locking.ALL_SET,
5621         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5622         }
5623
5624   def Exec(self, feedback_fn):
5625     """Computes the list of nodes and their attributes.
5626
5627     """
5628     nodenames = self.owned_locks(locking.LEVEL_NODE)
5629     volumes = self.rpc.call_node_volumes(nodenames)
5630
5631     ilist = self.cfg.GetAllInstancesInfo()
5632     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5633
5634     output = []
5635     for node in nodenames:
5636       nresult = volumes[node]
5637       if nresult.offline:
5638         continue
5639       msg = nresult.fail_msg
5640       if msg:
5641         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5642         continue
5643
5644       node_vols = sorted(nresult.payload,
5645                          key=operator.itemgetter("dev"))
5646
5647       for vol in node_vols:
5648         node_output = []
5649         for field in self.op.output_fields:
5650           if field == "node":
5651             val = node
5652           elif field == "phys":
5653             val = vol["dev"]
5654           elif field == "vg":
5655             val = vol["vg"]
5656           elif field == "name":
5657             val = vol["name"]
5658           elif field == "size":
5659             val = int(float(vol["size"]))
5660           elif field == "instance":
5661             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5662           else:
5663             raise errors.ParameterError(field)
5664           node_output.append(str(val))
5665
5666         output.append(node_output)
5667
5668     return output
5669
5670
5671 class LUNodeQueryStorage(NoHooksLU):
5672   """Logical unit for getting information on storage units on node(s).
5673
5674   """
5675   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5676   REQ_BGL = False
5677
5678   def CheckArguments(self):
5679     _CheckOutputFields(static=self._FIELDS_STATIC,
5680                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5681                        selected=self.op.output_fields)
5682
5683   def ExpandNames(self):
5684     self.share_locks = _ShareAll()
5685
5686     if self.op.nodes:
5687       self.needed_locks = {
5688         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5689         }
5690     else:
5691       self.needed_locks = {
5692         locking.LEVEL_NODE: locking.ALL_SET,
5693         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5694         }
5695
5696   def Exec(self, feedback_fn):
5697     """Computes the list of nodes and their attributes.
5698
5699     """
5700     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5701
5702     # Always get name to sort by
5703     if constants.SF_NAME in self.op.output_fields:
5704       fields = self.op.output_fields[:]
5705     else:
5706       fields = [constants.SF_NAME] + self.op.output_fields
5707
5708     # Never ask for node or type as it's only known to the LU
5709     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5710       while extra in fields:
5711         fields.remove(extra)
5712
5713     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5714     name_idx = field_idx[constants.SF_NAME]
5715
5716     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5717     data = self.rpc.call_storage_list(self.nodes,
5718                                       self.op.storage_type, st_args,
5719                                       self.op.name, fields)
5720
5721     result = []
5722
5723     for node in utils.NiceSort(self.nodes):
5724       nresult = data[node]
5725       if nresult.offline:
5726         continue
5727
5728       msg = nresult.fail_msg
5729       if msg:
5730         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5731         continue
5732
5733       rows = dict([(row[name_idx], row) for row in nresult.payload])
5734
5735       for name in utils.NiceSort(rows.keys()):
5736         row = rows[name]
5737
5738         out = []
5739
5740         for field in self.op.output_fields:
5741           if field == constants.SF_NODE:
5742             val = node
5743           elif field == constants.SF_TYPE:
5744             val = self.op.storage_type
5745           elif field in field_idx:
5746             val = row[field_idx[field]]
5747           else:
5748             raise errors.ParameterError(field)
5749
5750           out.append(val)
5751
5752         result.append(out)
5753
5754     return result
5755
5756
5757 class _InstanceQuery(_QueryBase):
5758   FIELDS = query.INSTANCE_FIELDS
5759
5760   def ExpandNames(self, lu):
5761     lu.needed_locks = {}
5762     lu.share_locks = _ShareAll()
5763
5764     if self.names:
5765       self.wanted = _GetWantedInstances(lu, self.names)
5766     else:
5767       self.wanted = locking.ALL_SET
5768
5769     self.do_locking = (self.use_locking and
5770                        query.IQ_LIVE in self.requested_data)
5771     if self.do_locking:
5772       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5773       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5774       lu.needed_locks[locking.LEVEL_NODE] = []
5775       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5776
5777     self.do_grouplocks = (self.do_locking and
5778                           query.IQ_NODES in self.requested_data)
5779
5780   def DeclareLocks(self, lu, level):
5781     if self.do_locking:
5782       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5783         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5784
5785         # Lock all groups used by instances optimistically; this requires going
5786         # via the node before it's locked, requiring verification later on
5787         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5788           set(group_uuid
5789               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5790               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5791       elif level == locking.LEVEL_NODE:
5792         lu._LockInstancesNodes() # pylint: disable=W0212
5793
5794   @staticmethod
5795   def _CheckGroupLocks(lu):
5796     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5797     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5798
5799     # Check if node groups for locked instances are still correct
5800     for instance_name in owned_instances:
5801       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5802
5803   def _GetQueryData(self, lu):
5804     """Computes the list of instances and their attributes.
5805
5806     """
5807     if self.do_grouplocks:
5808       self._CheckGroupLocks(lu)
5809
5810     cluster = lu.cfg.GetClusterInfo()
5811     all_info = lu.cfg.GetAllInstancesInfo()
5812
5813     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5814
5815     instance_list = [all_info[name] for name in instance_names]
5816     nodes = frozenset(itertools.chain(*(inst.all_nodes
5817                                         for inst in instance_list)))
5818     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5819     bad_nodes = []
5820     offline_nodes = []
5821     wrongnode_inst = set()
5822
5823     # Gather data as requested
5824     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5825       live_data = {}
5826       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5827       for name in nodes:
5828         result = node_data[name]
5829         if result.offline:
5830           # offline nodes will be in both lists
5831           assert result.fail_msg
5832           offline_nodes.append(name)
5833         if result.fail_msg:
5834           bad_nodes.append(name)
5835         elif result.payload:
5836           for inst in result.payload:
5837             if inst in all_info:
5838               if all_info[inst].primary_node == name:
5839                 live_data.update(result.payload)
5840               else:
5841                 wrongnode_inst.add(inst)
5842             else:
5843               # orphan instance; we don't list it here as we don't
5844               # handle this case yet in the output of instance listing
5845               logging.warning("Orphan instance '%s' found on node %s",
5846                               inst, name)
5847         # else no instance is alive
5848     else:
5849       live_data = {}
5850
5851     if query.IQ_DISKUSAGE in self.requested_data:
5852       gmi = ganeti.masterd.instance
5853       disk_usage = dict((inst.name,
5854                          gmi.ComputeDiskSize(inst.disk_template,
5855                                              [{constants.IDISK_SIZE: disk.size}
5856                                               for disk in inst.disks]))
5857                         for inst in instance_list)
5858     else:
5859       disk_usage = None
5860
5861     if query.IQ_CONSOLE in self.requested_data:
5862       consinfo = {}
5863       for inst in instance_list:
5864         if inst.name in live_data:
5865           # Instance is running
5866           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5867         else:
5868           consinfo[inst.name] = None
5869       assert set(consinfo.keys()) == set(instance_names)
5870     else:
5871       consinfo = None
5872
5873     if query.IQ_NODES in self.requested_data:
5874       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5875                                             instance_list)))
5876       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5877       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5878                     for uuid in set(map(operator.attrgetter("group"),
5879                                         nodes.values())))
5880     else:
5881       nodes = None
5882       groups = None
5883
5884     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5885                                    disk_usage, offline_nodes, bad_nodes,
5886                                    live_data, wrongnode_inst, consinfo,
5887                                    nodes, groups)
5888
5889
5890 class LUQuery(NoHooksLU):
5891   """Query for resources/items of a certain kind.
5892
5893   """
5894   # pylint: disable=W0142
5895   REQ_BGL = False
5896
5897   def CheckArguments(self):
5898     qcls = _GetQueryImplementation(self.op.what)
5899
5900     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5901
5902   def ExpandNames(self):
5903     self.impl.ExpandNames(self)
5904
5905   def DeclareLocks(self, level):
5906     self.impl.DeclareLocks(self, level)
5907
5908   def Exec(self, feedback_fn):
5909     return self.impl.NewStyleQuery(self)
5910
5911
5912 class LUQueryFields(NoHooksLU):
5913   """Query for resources/items of a certain kind.
5914
5915   """
5916   # pylint: disable=W0142
5917   REQ_BGL = False
5918
5919   def CheckArguments(self):
5920     self.qcls = _GetQueryImplementation(self.op.what)
5921
5922   def ExpandNames(self):
5923     self.needed_locks = {}
5924
5925   def Exec(self, feedback_fn):
5926     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5927
5928
5929 class LUNodeModifyStorage(NoHooksLU):
5930   """Logical unit for modifying a storage volume on a node.
5931
5932   """
5933   REQ_BGL = False
5934
5935   def CheckArguments(self):
5936     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5937
5938     storage_type = self.op.storage_type
5939
5940     try:
5941       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5942     except KeyError:
5943       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5944                                  " modified" % storage_type,
5945                                  errors.ECODE_INVAL)
5946
5947     diff = set(self.op.changes.keys()) - modifiable
5948     if diff:
5949       raise errors.OpPrereqError("The following fields can not be modified for"
5950                                  " storage units of type '%s': %r" %
5951                                  (storage_type, list(diff)),
5952                                  errors.ECODE_INVAL)
5953
5954   def ExpandNames(self):
5955     self.needed_locks = {
5956       locking.LEVEL_NODE: self.op.node_name,
5957       }
5958
5959   def Exec(self, feedback_fn):
5960     """Computes the list of nodes and their attributes.
5961
5962     """
5963     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5964     result = self.rpc.call_storage_modify(self.op.node_name,
5965                                           self.op.storage_type, st_args,
5966                                           self.op.name, self.op.changes)
5967     result.Raise("Failed to modify storage unit '%s' on %s" %
5968                  (self.op.name, self.op.node_name))
5969
5970
5971 class LUNodeAdd(LogicalUnit):
5972   """Logical unit for adding node to the cluster.
5973
5974   """
5975   HPATH = "node-add"
5976   HTYPE = constants.HTYPE_NODE
5977   _NFLAGS = ["master_capable", "vm_capable"]
5978
5979   def CheckArguments(self):
5980     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5981     # validate/normalize the node name
5982     self.hostname = netutils.GetHostname(name=self.op.node_name,
5983                                          family=self.primary_ip_family)
5984     self.op.node_name = self.hostname.name
5985
5986     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5987       raise errors.OpPrereqError("Cannot readd the master node",
5988                                  errors.ECODE_STATE)
5989
5990     if self.op.readd and self.op.group:
5991       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5992                                  " being readded", errors.ECODE_INVAL)
5993
5994   def BuildHooksEnv(self):
5995     """Build hooks env.
5996
5997     This will run on all nodes before, and on all nodes + the new node after.
5998
5999     """
6000     return {
6001       "OP_TARGET": self.op.node_name,
6002       "NODE_NAME": self.op.node_name,
6003       "NODE_PIP": self.op.primary_ip,
6004       "NODE_SIP": self.op.secondary_ip,
6005       "MASTER_CAPABLE": str(self.op.master_capable),
6006       "VM_CAPABLE": str(self.op.vm_capable),
6007       }
6008
6009   def BuildHooksNodes(self):
6010     """Build hooks nodes.
6011
6012     """
6013     # Exclude added node
6014     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6015     post_nodes = pre_nodes + [self.op.node_name, ]
6016
6017     return (pre_nodes, post_nodes)
6018
6019   def CheckPrereq(self):
6020     """Check prerequisites.
6021
6022     This checks:
6023      - the new node is not already in the config
6024      - it is resolvable
6025      - its parameters (single/dual homed) matches the cluster
6026
6027     Any errors are signaled by raising errors.OpPrereqError.
6028
6029     """
6030     cfg = self.cfg
6031     hostname = self.hostname
6032     node = hostname.name
6033     primary_ip = self.op.primary_ip = hostname.ip
6034     if self.op.secondary_ip is None:
6035       if self.primary_ip_family == netutils.IP6Address.family:
6036         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6037                                    " IPv4 address must be given as secondary",
6038                                    errors.ECODE_INVAL)
6039       self.op.secondary_ip = primary_ip
6040
6041     secondary_ip = self.op.secondary_ip
6042     if not netutils.IP4Address.IsValid(secondary_ip):
6043       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6044                                  " address" % secondary_ip, errors.ECODE_INVAL)
6045
6046     node_list = cfg.GetNodeList()
6047     if not self.op.readd and node in node_list:
6048       raise errors.OpPrereqError("Node %s is already in the configuration" %
6049                                  node, errors.ECODE_EXISTS)
6050     elif self.op.readd and node not in node_list:
6051       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6052                                  errors.ECODE_NOENT)
6053
6054     self.changed_primary_ip = False
6055
6056     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6057       if self.op.readd and node == existing_node_name:
6058         if existing_node.secondary_ip != secondary_ip:
6059           raise errors.OpPrereqError("Readded node doesn't have the same IP"
6060                                      " address configuration as before",
6061                                      errors.ECODE_INVAL)
6062         if existing_node.primary_ip != primary_ip:
6063           self.changed_primary_ip = True
6064
6065         continue
6066
6067       if (existing_node.primary_ip == primary_ip or
6068           existing_node.secondary_ip == primary_ip or
6069           existing_node.primary_ip == secondary_ip or
6070           existing_node.secondary_ip == secondary_ip):
6071         raise errors.OpPrereqError("New node ip address(es) conflict with"
6072                                    " existing node %s" % existing_node.name,
6073                                    errors.ECODE_NOTUNIQUE)
6074
6075     # After this 'if' block, None is no longer a valid value for the
6076     # _capable op attributes
6077     if self.op.readd:
6078       old_node = self.cfg.GetNodeInfo(node)
6079       assert old_node is not None, "Can't retrieve locked node %s" % node
6080       for attr in self._NFLAGS:
6081         if getattr(self.op, attr) is None:
6082           setattr(self.op, attr, getattr(old_node, attr))
6083     else:
6084       for attr in self._NFLAGS:
6085         if getattr(self.op, attr) is None:
6086           setattr(self.op, attr, True)
6087
6088     if self.op.readd and not self.op.vm_capable:
6089       pri, sec = cfg.GetNodeInstances(node)
6090       if pri or sec:
6091         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6092                                    " flag set to false, but it already holds"
6093                                    " instances" % node,
6094                                    errors.ECODE_STATE)
6095
6096     # check that the type of the node (single versus dual homed) is the
6097     # same as for the master
6098     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6099     master_singlehomed = myself.secondary_ip == myself.primary_ip
6100     newbie_singlehomed = secondary_ip == primary_ip
6101     if master_singlehomed != newbie_singlehomed:
6102       if master_singlehomed:
6103         raise errors.OpPrereqError("The master has no secondary ip but the"
6104                                    " new node has one",
6105                                    errors.ECODE_INVAL)
6106       else:
6107         raise errors.OpPrereqError("The master has a secondary ip but the"
6108                                    " new node doesn't have one",
6109                                    errors.ECODE_INVAL)
6110
6111     # checks reachability
6112     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6113       raise errors.OpPrereqError("Node not reachable by ping",
6114                                  errors.ECODE_ENVIRON)
6115
6116     if not newbie_singlehomed:
6117       # check reachability from my secondary ip to newbie's secondary ip
6118       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6119                               source=myself.secondary_ip):
6120         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6121                                    " based ping to node daemon port",
6122                                    errors.ECODE_ENVIRON)
6123
6124     if self.op.readd:
6125       exceptions = [node]
6126     else:
6127       exceptions = []
6128
6129     if self.op.master_capable:
6130       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6131     else:
6132       self.master_candidate = False
6133
6134     if self.op.readd:
6135       self.new_node = old_node
6136     else:
6137       node_group = cfg.LookupNodeGroup(self.op.group)
6138       self.new_node = objects.Node(name=node,
6139                                    primary_ip=primary_ip,
6140                                    secondary_ip=secondary_ip,
6141                                    master_candidate=self.master_candidate,
6142                                    offline=False, drained=False,
6143                                    group=node_group, ndparams={})
6144
6145     if self.op.ndparams:
6146       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6147
6148     if self.op.hv_state:
6149       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6150
6151     if self.op.disk_state:
6152       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6153
6154     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6155     #       it a property on the base class.
6156     rpcrunner = rpc.DnsOnlyRunner()
6157     result = rpcrunner.call_version([node])[node]
6158     result.Raise("Can't get version information from node %s" % node)
6159     if constants.PROTOCOL_VERSION == result.payload:
6160       logging.info("Communication to node %s fine, sw version %s match",
6161                    node, result.payload)
6162     else:
6163       raise errors.OpPrereqError("Version mismatch master version %s,"
6164                                  " node version %s" %
6165                                  (constants.PROTOCOL_VERSION, result.payload),
6166                                  errors.ECODE_ENVIRON)
6167
6168     vg_name = cfg.GetVGName()
6169     if vg_name is not None:
6170       vparams = {constants.NV_PVLIST: [vg_name]}
6171       excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6172       if self.op.ndparams:
6173         excl_stor = self.op.ndparams.get(constants.ND_EXCLUSIVE_STORAGE,
6174                                          excl_stor)
6175       cname = self.cfg.GetClusterName()
6176       result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6177       (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6178       if errmsgs:
6179         raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6180                                    "; ".join(errmsgs), errors.ECODE_ENVIRON)
6181
6182   def Exec(self, feedback_fn):
6183     """Adds the new node to the cluster.
6184
6185     """
6186     new_node = self.new_node
6187     node = new_node.name
6188
6189     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6190       "Not owning BGL"
6191
6192     # We adding a new node so we assume it's powered
6193     new_node.powered = True
6194
6195     # for re-adds, reset the offline/drained/master-candidate flags;
6196     # we need to reset here, otherwise offline would prevent RPC calls
6197     # later in the procedure; this also means that if the re-add
6198     # fails, we are left with a non-offlined, broken node
6199     if self.op.readd:
6200       new_node.drained = new_node.offline = False # pylint: disable=W0201
6201       self.LogInfo("Readding a node, the offline/drained flags were reset")
6202       # if we demote the node, we do cleanup later in the procedure
6203       new_node.master_candidate = self.master_candidate
6204       if self.changed_primary_ip:
6205         new_node.primary_ip = self.op.primary_ip
6206
6207     # copy the master/vm_capable flags
6208     for attr in self._NFLAGS:
6209       setattr(new_node, attr, getattr(self.op, attr))
6210
6211     # notify the user about any possible mc promotion
6212     if new_node.master_candidate:
6213       self.LogInfo("Node will be a master candidate")
6214
6215     if self.op.ndparams:
6216       new_node.ndparams = self.op.ndparams
6217     else:
6218       new_node.ndparams = {}
6219
6220     if self.op.hv_state:
6221       new_node.hv_state_static = self.new_hv_state
6222
6223     if self.op.disk_state:
6224       new_node.disk_state_static = self.new_disk_state
6225
6226     # Add node to our /etc/hosts, and add key to known_hosts
6227     if self.cfg.GetClusterInfo().modify_etc_hosts:
6228       master_node = self.cfg.GetMasterNode()
6229       result = self.rpc.call_etc_hosts_modify(master_node,
6230                                               constants.ETC_HOSTS_ADD,
6231                                               self.hostname.name,
6232                                               self.hostname.ip)
6233       result.Raise("Can't update hosts file with new host data")
6234
6235     if new_node.secondary_ip != new_node.primary_ip:
6236       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6237                                False)
6238
6239     node_verify_list = [self.cfg.GetMasterNode()]
6240     node_verify_param = {
6241       constants.NV_NODELIST: ([node], {}),
6242       # TODO: do a node-net-test as well?
6243     }
6244
6245     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6246                                        self.cfg.GetClusterName())
6247     for verifier in node_verify_list:
6248       result[verifier].Raise("Cannot communicate with node %s" % verifier)
6249       nl_payload = result[verifier].payload[constants.NV_NODELIST]
6250       if nl_payload:
6251         for failed in nl_payload:
6252           feedback_fn("ssh/hostname verification failed"
6253                       " (checking from %s): %s" %
6254                       (verifier, nl_payload[failed]))
6255         raise errors.OpExecError("ssh/hostname verification failed")
6256
6257     if self.op.readd:
6258       _RedistributeAncillaryFiles(self)
6259       self.context.ReaddNode(new_node)
6260       # make sure we redistribute the config
6261       self.cfg.Update(new_node, feedback_fn)
6262       # and make sure the new node will not have old files around
6263       if not new_node.master_candidate:
6264         result = self.rpc.call_node_demote_from_mc(new_node.name)
6265         msg = result.fail_msg
6266         if msg:
6267           self.LogWarning("Node failed to demote itself from master"
6268                           " candidate status: %s" % msg)
6269     else:
6270       _RedistributeAncillaryFiles(self, additional_nodes=[node],
6271                                   additional_vm=self.op.vm_capable)
6272       self.context.AddNode(new_node, self.proc.GetECId())
6273
6274
6275 class LUNodeSetParams(LogicalUnit):
6276   """Modifies the parameters of a node.
6277
6278   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6279       to the node role (as _ROLE_*)
6280   @cvar _R2F: a dictionary from node role to tuples of flags
6281   @cvar _FLAGS: a list of attribute names corresponding to the flags
6282
6283   """
6284   HPATH = "node-modify"
6285   HTYPE = constants.HTYPE_NODE
6286   REQ_BGL = False
6287   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6288   _F2R = {
6289     (True, False, False): _ROLE_CANDIDATE,
6290     (False, True, False): _ROLE_DRAINED,
6291     (False, False, True): _ROLE_OFFLINE,
6292     (False, False, False): _ROLE_REGULAR,
6293     }
6294   _R2F = dict((v, k) for k, v in _F2R.items())
6295   _FLAGS = ["master_candidate", "drained", "offline"]
6296
6297   def CheckArguments(self):
6298     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6299     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6300                 self.op.master_capable, self.op.vm_capable,
6301                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6302                 self.op.disk_state]
6303     if all_mods.count(None) == len(all_mods):
6304       raise errors.OpPrereqError("Please pass at least one modification",
6305                                  errors.ECODE_INVAL)
6306     if all_mods.count(True) > 1:
6307       raise errors.OpPrereqError("Can't set the node into more than one"
6308                                  " state at the same time",
6309                                  errors.ECODE_INVAL)
6310
6311     # Boolean value that tells us whether we might be demoting from MC
6312     self.might_demote = (self.op.master_candidate is False or
6313                          self.op.offline is True or
6314                          self.op.drained is True or
6315                          self.op.master_capable is False)
6316
6317     if self.op.secondary_ip:
6318       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6319         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6320                                    " address" % self.op.secondary_ip,
6321                                    errors.ECODE_INVAL)
6322
6323     self.lock_all = self.op.auto_promote and self.might_demote
6324     self.lock_instances = self.op.secondary_ip is not None
6325
6326   def _InstanceFilter(self, instance):
6327     """Filter for getting affected instances.
6328
6329     """
6330     return (instance.disk_template in constants.DTS_INT_MIRROR and
6331             self.op.node_name in instance.all_nodes)
6332
6333   def ExpandNames(self):
6334     if self.lock_all:
6335       self.needed_locks = {
6336         locking.LEVEL_NODE: locking.ALL_SET,
6337
6338         # Block allocations when all nodes are locked
6339         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6340         }
6341     else:
6342       self.needed_locks = {
6343         locking.LEVEL_NODE: self.op.node_name,
6344         }
6345
6346     # Since modifying a node can have severe effects on currently running
6347     # operations the resource lock is at least acquired in shared mode
6348     self.needed_locks[locking.LEVEL_NODE_RES] = \
6349       self.needed_locks[locking.LEVEL_NODE]
6350
6351     # Get all locks except nodes in shared mode; they are not used for anything
6352     # but read-only access
6353     self.share_locks = _ShareAll()
6354     self.share_locks[locking.LEVEL_NODE] = 0
6355     self.share_locks[locking.LEVEL_NODE_RES] = 0
6356     self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6357
6358     if self.lock_instances:
6359       self.needed_locks[locking.LEVEL_INSTANCE] = \
6360         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6361
6362   def BuildHooksEnv(self):
6363     """Build hooks env.
6364
6365     This runs on the master node.
6366
6367     """
6368     return {
6369       "OP_TARGET": self.op.node_name,
6370       "MASTER_CANDIDATE": str(self.op.master_candidate),
6371       "OFFLINE": str(self.op.offline),
6372       "DRAINED": str(self.op.drained),
6373       "MASTER_CAPABLE": str(self.op.master_capable),
6374       "VM_CAPABLE": str(self.op.vm_capable),
6375       }
6376
6377   def BuildHooksNodes(self):
6378     """Build hooks nodes.
6379
6380     """
6381     nl = [self.cfg.GetMasterNode(), self.op.node_name]
6382     return (nl, nl)
6383
6384   def CheckPrereq(self):
6385     """Check prerequisites.
6386
6387     This only checks the instance list against the existing names.
6388
6389     """
6390     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6391
6392     if self.lock_instances:
6393       affected_instances = \
6394         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6395
6396       # Verify instance locks
6397       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6398       wanted_instances = frozenset(affected_instances.keys())
6399       if wanted_instances - owned_instances:
6400         raise errors.OpPrereqError("Instances affected by changing node %s's"
6401                                    " secondary IP address have changed since"
6402                                    " locks were acquired, wanted '%s', have"
6403                                    " '%s'; retry the operation" %
6404                                    (self.op.node_name,
6405                                     utils.CommaJoin(wanted_instances),
6406                                     utils.CommaJoin(owned_instances)),
6407                                    errors.ECODE_STATE)
6408     else:
6409       affected_instances = None
6410
6411     if (self.op.master_candidate is not None or
6412         self.op.drained is not None or
6413         self.op.offline is not None):
6414       # we can't change the master's node flags
6415       if self.op.node_name == self.cfg.GetMasterNode():
6416         raise errors.OpPrereqError("The master role can be changed"
6417                                    " only via master-failover",
6418                                    errors.ECODE_INVAL)
6419
6420     if self.op.master_candidate and not node.master_capable:
6421       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6422                                  " it a master candidate" % node.name,
6423                                  errors.ECODE_STATE)
6424
6425     if self.op.vm_capable is False:
6426       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6427       if ipri or isec:
6428         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6429                                    " the vm_capable flag" % node.name,
6430                                    errors.ECODE_STATE)
6431
6432     if node.master_candidate and self.might_demote and not self.lock_all:
6433       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6434       # check if after removing the current node, we're missing master
6435       # candidates
6436       (mc_remaining, mc_should, _) = \
6437           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6438       if mc_remaining < mc_should:
6439         raise errors.OpPrereqError("Not enough master candidates, please"
6440                                    " pass auto promote option to allow"
6441                                    " promotion (--auto-promote or RAPI"
6442                                    " auto_promote=True)", errors.ECODE_STATE)
6443
6444     self.old_flags = old_flags = (node.master_candidate,
6445                                   node.drained, node.offline)
6446     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6447     self.old_role = old_role = self._F2R[old_flags]
6448
6449     # Check for ineffective changes
6450     for attr in self._FLAGS:
6451       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6452         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6453         setattr(self.op, attr, None)
6454
6455     # Past this point, any flag change to False means a transition
6456     # away from the respective state, as only real changes are kept
6457
6458     # TODO: We might query the real power state if it supports OOB
6459     if _SupportsOob(self.cfg, node):
6460       if self.op.offline is False and not (node.powered or
6461                                            self.op.powered is True):
6462         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6463                                     " offline status can be reset") %
6464                                    self.op.node_name, errors.ECODE_STATE)
6465     elif self.op.powered is not None:
6466       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6467                                   " as it does not support out-of-band"
6468                                   " handling") % self.op.node_name,
6469                                  errors.ECODE_STATE)
6470
6471     # If we're being deofflined/drained, we'll MC ourself if needed
6472     if (self.op.drained is False or self.op.offline is False or
6473         (self.op.master_capable and not node.master_capable)):
6474       if _DecideSelfPromotion(self):
6475         self.op.master_candidate = True
6476         self.LogInfo("Auto-promoting node to master candidate")
6477
6478     # If we're no longer master capable, we'll demote ourselves from MC
6479     if self.op.master_capable is False and node.master_candidate:
6480       self.LogInfo("Demoting from master candidate")
6481       self.op.master_candidate = False
6482
6483     # Compute new role
6484     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6485     if self.op.master_candidate:
6486       new_role = self._ROLE_CANDIDATE
6487     elif self.op.drained:
6488       new_role = self._ROLE_DRAINED
6489     elif self.op.offline:
6490       new_role = self._ROLE_OFFLINE
6491     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6492       # False is still in new flags, which means we're un-setting (the
6493       # only) True flag
6494       new_role = self._ROLE_REGULAR
6495     else: # no new flags, nothing, keep old role
6496       new_role = old_role
6497
6498     self.new_role = new_role
6499
6500     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6501       # Trying to transition out of offline status
6502       result = self.rpc.call_version([node.name])[node.name]
6503       if result.fail_msg:
6504         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6505                                    " to report its version: %s" %
6506                                    (node.name, result.fail_msg),
6507                                    errors.ECODE_STATE)
6508       else:
6509         self.LogWarning("Transitioning node from offline to online state"
6510                         " without using re-add. Please make sure the node"
6511                         " is healthy!")
6512
6513     # When changing the secondary ip, verify if this is a single-homed to
6514     # multi-homed transition or vice versa, and apply the relevant
6515     # restrictions.
6516     if self.op.secondary_ip:
6517       # Ok even without locking, because this can't be changed by any LU
6518       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6519       master_singlehomed = master.secondary_ip == master.primary_ip
6520       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6521         if self.op.force and node.name == master.name:
6522           self.LogWarning("Transitioning from single-homed to multi-homed"
6523                           " cluster; all nodes will require a secondary IP"
6524                           " address")
6525         else:
6526           raise errors.OpPrereqError("Changing the secondary ip on a"
6527                                      " single-homed cluster requires the"
6528                                      " --force option to be passed, and the"
6529                                      " target node to be the master",
6530                                      errors.ECODE_INVAL)
6531       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6532         if self.op.force and node.name == master.name:
6533           self.LogWarning("Transitioning from multi-homed to single-homed"
6534                           " cluster; secondary IP addresses will have to be"
6535                           " removed")
6536         else:
6537           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6538                                      " same as the primary IP on a multi-homed"
6539                                      " cluster, unless the --force option is"
6540                                      " passed, and the target node is the"
6541                                      " master", errors.ECODE_INVAL)
6542
6543       assert not (frozenset(affected_instances) -
6544                   self.owned_locks(locking.LEVEL_INSTANCE))
6545
6546       if node.offline:
6547         if affected_instances:
6548           msg = ("Cannot change secondary IP address: offline node has"
6549                  " instances (%s) configured to use it" %
6550                  utils.CommaJoin(affected_instances.keys()))
6551           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6552       else:
6553         # On online nodes, check that no instances are running, and that
6554         # the node has the new ip and we can reach it.
6555         for instance in affected_instances.values():
6556           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6557                               msg="cannot change secondary ip")
6558
6559         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6560         if master.name != node.name:
6561           # check reachability from master secondary ip to new secondary ip
6562           if not netutils.TcpPing(self.op.secondary_ip,
6563                                   constants.DEFAULT_NODED_PORT,
6564                                   source=master.secondary_ip):
6565             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6566                                        " based ping to node daemon port",
6567                                        errors.ECODE_ENVIRON)
6568
6569     if self.op.ndparams:
6570       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6571       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6572       self.new_ndparams = new_ndparams
6573
6574     if self.op.hv_state:
6575       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6576                                                  self.node.hv_state_static)
6577
6578     if self.op.disk_state:
6579       self.new_disk_state = \
6580         _MergeAndVerifyDiskState(self.op.disk_state,
6581                                  self.node.disk_state_static)
6582
6583   def Exec(self, feedback_fn):
6584     """Modifies a node.
6585
6586     """
6587     node = self.node
6588     old_role = self.old_role
6589     new_role = self.new_role
6590
6591     result = []
6592
6593     if self.op.ndparams:
6594       node.ndparams = self.new_ndparams
6595
6596     if self.op.powered is not None:
6597       node.powered = self.op.powered
6598
6599     if self.op.hv_state:
6600       node.hv_state_static = self.new_hv_state
6601
6602     if self.op.disk_state:
6603       node.disk_state_static = self.new_disk_state
6604
6605     for attr in ["master_capable", "vm_capable"]:
6606       val = getattr(self.op, attr)
6607       if val is not None:
6608         setattr(node, attr, val)
6609         result.append((attr, str(val)))
6610
6611     if new_role != old_role:
6612       # Tell the node to demote itself, if no longer MC and not offline
6613       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6614         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6615         if msg:
6616           self.LogWarning("Node failed to demote itself: %s", msg)
6617
6618       new_flags = self._R2F[new_role]
6619       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6620         if of != nf:
6621           result.append((desc, str(nf)))
6622       (node.master_candidate, node.drained, node.offline) = new_flags
6623
6624       # we locked all nodes, we adjust the CP before updating this node
6625       if self.lock_all:
6626         _AdjustCandidatePool(self, [node.name])
6627
6628     if self.op.secondary_ip:
6629       node.secondary_ip = self.op.secondary_ip
6630       result.append(("secondary_ip", self.op.secondary_ip))
6631
6632     # this will trigger configuration file update, if needed
6633     self.cfg.Update(node, feedback_fn)
6634
6635     # this will trigger job queue propagation or cleanup if the mc
6636     # flag changed
6637     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6638       self.context.ReaddNode(node)
6639
6640     return result
6641
6642
6643 class LUNodePowercycle(NoHooksLU):
6644   """Powercycles a node.
6645
6646   """
6647   REQ_BGL = False
6648
6649   def CheckArguments(self):
6650     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6651     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6652       raise errors.OpPrereqError("The node is the master and the force"
6653                                  " parameter was not set",
6654                                  errors.ECODE_INVAL)
6655
6656   def ExpandNames(self):
6657     """Locking for PowercycleNode.
6658
6659     This is a last-resort option and shouldn't block on other
6660     jobs. Therefore, we grab no locks.
6661
6662     """
6663     self.needed_locks = {}
6664
6665   def Exec(self, feedback_fn):
6666     """Reboots a node.
6667
6668     """
6669     result = self.rpc.call_node_powercycle(self.op.node_name,
6670                                            self.cfg.GetHypervisorType())
6671     result.Raise("Failed to schedule the reboot")
6672     return result.payload
6673
6674
6675 class LUClusterQuery(NoHooksLU):
6676   """Query cluster configuration.
6677
6678   """
6679   REQ_BGL = False
6680
6681   def ExpandNames(self):
6682     self.needed_locks = {}
6683
6684   def Exec(self, feedback_fn):
6685     """Return cluster config.
6686
6687     """
6688     cluster = self.cfg.GetClusterInfo()
6689     os_hvp = {}
6690
6691     # Filter just for enabled hypervisors
6692     for os_name, hv_dict in cluster.os_hvp.items():
6693       os_hvp[os_name] = {}
6694       for hv_name, hv_params in hv_dict.items():
6695         if hv_name in cluster.enabled_hypervisors:
6696           os_hvp[os_name][hv_name] = hv_params
6697
6698     # Convert ip_family to ip_version
6699     primary_ip_version = constants.IP4_VERSION
6700     if cluster.primary_ip_family == netutils.IP6Address.family:
6701       primary_ip_version = constants.IP6_VERSION
6702
6703     result = {
6704       "software_version": constants.RELEASE_VERSION,
6705       "protocol_version": constants.PROTOCOL_VERSION,
6706       "config_version": constants.CONFIG_VERSION,
6707       "os_api_version": max(constants.OS_API_VERSIONS),
6708       "export_version": constants.EXPORT_VERSION,
6709       "architecture": runtime.GetArchInfo(),
6710       "name": cluster.cluster_name,
6711       "master": cluster.master_node,
6712       "default_hypervisor": cluster.primary_hypervisor,
6713       "enabled_hypervisors": cluster.enabled_hypervisors,
6714       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6715                         for hypervisor_name in cluster.enabled_hypervisors]),
6716       "os_hvp": os_hvp,
6717       "beparams": cluster.beparams,
6718       "osparams": cluster.osparams,
6719       "ipolicy": cluster.ipolicy,
6720       "nicparams": cluster.nicparams,
6721       "ndparams": cluster.ndparams,
6722       "diskparams": cluster.diskparams,
6723       "candidate_pool_size": cluster.candidate_pool_size,
6724       "master_netdev": cluster.master_netdev,
6725       "master_netmask": cluster.master_netmask,
6726       "use_external_mip_script": cluster.use_external_mip_script,
6727       "volume_group_name": cluster.volume_group_name,
6728       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6729       "file_storage_dir": cluster.file_storage_dir,
6730       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6731       "maintain_node_health": cluster.maintain_node_health,
6732       "ctime": cluster.ctime,
6733       "mtime": cluster.mtime,
6734       "uuid": cluster.uuid,
6735       "tags": list(cluster.GetTags()),
6736       "uid_pool": cluster.uid_pool,
6737       "default_iallocator": cluster.default_iallocator,
6738       "reserved_lvs": cluster.reserved_lvs,
6739       "primary_ip_version": primary_ip_version,
6740       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6741       "hidden_os": cluster.hidden_os,
6742       "blacklisted_os": cluster.blacklisted_os,
6743       }
6744
6745     return result
6746
6747
6748 class LUClusterConfigQuery(NoHooksLU):
6749   """Return configuration values.
6750
6751   """
6752   REQ_BGL = False
6753
6754   def CheckArguments(self):
6755     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6756
6757   def ExpandNames(self):
6758     self.cq.ExpandNames(self)
6759
6760   def DeclareLocks(self, level):
6761     self.cq.DeclareLocks(self, level)
6762
6763   def Exec(self, feedback_fn):
6764     result = self.cq.OldStyleQuery(self)
6765
6766     assert len(result) == 1
6767
6768     return result[0]
6769
6770
6771 class _ClusterQuery(_QueryBase):
6772   FIELDS = query.CLUSTER_FIELDS
6773
6774   #: Do not sort (there is only one item)
6775   SORT_FIELD = None
6776
6777   def ExpandNames(self, lu):
6778     lu.needed_locks = {}
6779
6780     # The following variables interact with _QueryBase._GetNames
6781     self.wanted = locking.ALL_SET
6782     self.do_locking = self.use_locking
6783
6784     if self.do_locking:
6785       raise errors.OpPrereqError("Can not use locking for cluster queries",
6786                                  errors.ECODE_INVAL)
6787
6788   def DeclareLocks(self, lu, level):
6789     pass
6790
6791   def _GetQueryData(self, lu):
6792     """Computes the list of nodes and their attributes.
6793
6794     """
6795     # Locking is not used
6796     assert not (compat.any(lu.glm.is_owned(level)
6797                            for level in locking.LEVELS
6798                            if level != locking.LEVEL_CLUSTER) or
6799                 self.do_locking or self.use_locking)
6800
6801     if query.CQ_CONFIG in self.requested_data:
6802       cluster = lu.cfg.GetClusterInfo()
6803     else:
6804       cluster = NotImplemented
6805
6806     if query.CQ_QUEUE_DRAINED in self.requested_data:
6807       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6808     else:
6809       drain_flag = NotImplemented
6810
6811     if query.CQ_WATCHER_PAUSE in self.requested_data:
6812       master_name = lu.cfg.GetMasterNode()
6813
6814       result = lu.rpc.call_get_watcher_pause(master_name)
6815       result.Raise("Can't retrieve watcher pause from master node '%s'" %
6816                    master_name)
6817
6818       watcher_pause = result.payload
6819     else:
6820       watcher_pause = NotImplemented
6821
6822     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6823
6824
6825 class LUInstanceActivateDisks(NoHooksLU):
6826   """Bring up an instance's disks.
6827
6828   """
6829   REQ_BGL = False
6830
6831   def ExpandNames(self):
6832     self._ExpandAndLockInstance()
6833     self.needed_locks[locking.LEVEL_NODE] = []
6834     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6835
6836   def DeclareLocks(self, level):
6837     if level == locking.LEVEL_NODE:
6838       self._LockInstancesNodes()
6839
6840   def CheckPrereq(self):
6841     """Check prerequisites.
6842
6843     This checks that the instance is in the cluster.
6844
6845     """
6846     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6847     assert self.instance is not None, \
6848       "Cannot retrieve locked instance %s" % self.op.instance_name
6849     _CheckNodeOnline(self, self.instance.primary_node)
6850
6851   def Exec(self, feedback_fn):
6852     """Activate the disks.
6853
6854     """
6855     disks_ok, disks_info = \
6856               _AssembleInstanceDisks(self, self.instance,
6857                                      ignore_size=self.op.ignore_size)
6858     if not disks_ok:
6859       raise errors.OpExecError("Cannot activate block devices")
6860
6861     if self.op.wait_for_sync:
6862       if not _WaitForSync(self, self.instance):
6863         raise errors.OpExecError("Some disks of the instance are degraded!")
6864
6865     return disks_info
6866
6867
6868 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6869                            ignore_size=False):
6870   """Prepare the block devices for an instance.
6871
6872   This sets up the block devices on all nodes.
6873
6874   @type lu: L{LogicalUnit}
6875   @param lu: the logical unit on whose behalf we execute
6876   @type instance: L{objects.Instance}
6877   @param instance: the instance for whose disks we assemble
6878   @type disks: list of L{objects.Disk} or None
6879   @param disks: which disks to assemble (or all, if None)
6880   @type ignore_secondaries: boolean
6881   @param ignore_secondaries: if true, errors on secondary nodes
6882       won't result in an error return from the function
6883   @type ignore_size: boolean
6884   @param ignore_size: if true, the current known size of the disk
6885       will not be used during the disk activation, useful for cases
6886       when the size is wrong
6887   @return: False if the operation failed, otherwise a list of
6888       (host, instance_visible_name, node_visible_name)
6889       with the mapping from node devices to instance devices
6890
6891   """
6892   device_info = []
6893   disks_ok = True
6894   iname = instance.name
6895   disks = _ExpandCheckDisks(instance, disks)
6896
6897   # With the two passes mechanism we try to reduce the window of
6898   # opportunity for the race condition of switching DRBD to primary
6899   # before handshaking occured, but we do not eliminate it
6900
6901   # The proper fix would be to wait (with some limits) until the
6902   # connection has been made and drbd transitions from WFConnection
6903   # into any other network-connected state (Connected, SyncTarget,
6904   # SyncSource, etc.)
6905
6906   # 1st pass, assemble on all nodes in secondary mode
6907   for idx, inst_disk in enumerate(disks):
6908     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6909       if ignore_size:
6910         node_disk = node_disk.Copy()
6911         node_disk.UnsetSize()
6912       lu.cfg.SetDiskID(node_disk, node)
6913       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6914                                              False, idx)
6915       msg = result.fail_msg
6916       if msg:
6917         is_offline_secondary = (node in instance.secondary_nodes and
6918                                 result.offline)
6919         lu.LogWarning("Could not prepare block device %s on node %s"
6920                       " (is_primary=False, pass=1): %s",
6921                       inst_disk.iv_name, node, msg)
6922         if not (ignore_secondaries or is_offline_secondary):
6923           disks_ok = False
6924
6925   # FIXME: race condition on drbd migration to primary
6926
6927   # 2nd pass, do only the primary node
6928   for idx, inst_disk in enumerate(disks):
6929     dev_path = None
6930
6931     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6932       if node != instance.primary_node:
6933         continue
6934       if ignore_size:
6935         node_disk = node_disk.Copy()
6936         node_disk.UnsetSize()
6937       lu.cfg.SetDiskID(node_disk, node)
6938       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6939                                              True, idx)
6940       msg = result.fail_msg
6941       if msg:
6942         lu.LogWarning("Could not prepare block device %s on node %s"
6943                       " (is_primary=True, pass=2): %s",
6944                       inst_disk.iv_name, node, msg)
6945         disks_ok = False
6946       else:
6947         dev_path = result.payload
6948
6949     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6950
6951   # leave the disks configured for the primary node
6952   # this is a workaround that would be fixed better by
6953   # improving the logical/physical id handling
6954   for disk in disks:
6955     lu.cfg.SetDiskID(disk, instance.primary_node)
6956
6957   return disks_ok, device_info
6958
6959
6960 def _StartInstanceDisks(lu, instance, force):
6961   """Start the disks of an instance.
6962
6963   """
6964   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6965                                            ignore_secondaries=force)
6966   if not disks_ok:
6967     _ShutdownInstanceDisks(lu, instance)
6968     if force is not None and not force:
6969       lu.LogWarning("",
6970                     hint=("If the message above refers to a secondary node,"
6971                           " you can retry the operation using '--force'"))
6972     raise errors.OpExecError("Disk consistency error")
6973
6974
6975 class LUInstanceDeactivateDisks(NoHooksLU):
6976   """Shutdown an instance's disks.
6977
6978   """
6979   REQ_BGL = False
6980
6981   def ExpandNames(self):
6982     self._ExpandAndLockInstance()
6983     self.needed_locks[locking.LEVEL_NODE] = []
6984     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6985
6986   def DeclareLocks(self, level):
6987     if level == locking.LEVEL_NODE:
6988       self._LockInstancesNodes()
6989
6990   def CheckPrereq(self):
6991     """Check prerequisites.
6992
6993     This checks that the instance is in the cluster.
6994
6995     """
6996     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6997     assert self.instance is not None, \
6998       "Cannot retrieve locked instance %s" % self.op.instance_name
6999
7000   def Exec(self, feedback_fn):
7001     """Deactivate the disks
7002
7003     """
7004     instance = self.instance
7005     if self.op.force:
7006       _ShutdownInstanceDisks(self, instance)
7007     else:
7008       _SafeShutdownInstanceDisks(self, instance)
7009
7010
7011 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
7012   """Shutdown block devices of an instance.
7013
7014   This function checks if an instance is running, before calling
7015   _ShutdownInstanceDisks.
7016
7017   """
7018   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7019   _ShutdownInstanceDisks(lu, instance, disks=disks)
7020
7021
7022 def _ExpandCheckDisks(instance, disks):
7023   """Return the instance disks selected by the disks list
7024
7025   @type disks: list of L{objects.Disk} or None
7026   @param disks: selected disks
7027   @rtype: list of L{objects.Disk}
7028   @return: selected instance disks to act on
7029
7030   """
7031   if disks is None:
7032     return instance.disks
7033   else:
7034     if not set(disks).issubset(instance.disks):
7035       raise errors.ProgrammerError("Can only act on disks belonging to the"
7036                                    " target instance")
7037     return disks
7038
7039
7040 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7041   """Shutdown block devices of an instance.
7042
7043   This does the shutdown on all nodes of the instance.
7044
7045   If the ignore_primary is false, errors on the primary node are
7046   ignored.
7047
7048   """
7049   all_result = True
7050   disks = _ExpandCheckDisks(instance, disks)
7051
7052   for disk in disks:
7053     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7054       lu.cfg.SetDiskID(top_disk, node)
7055       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7056       msg = result.fail_msg
7057       if msg:
7058         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7059                       disk.iv_name, node, msg)
7060         if ((node == instance.primary_node and not ignore_primary) or
7061             (node != instance.primary_node and not result.offline)):
7062           all_result = False
7063   return all_result
7064
7065
7066 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7067   """Checks if a node has enough free memory.
7068
7069   This function checks if a given node has the needed amount of free
7070   memory. In case the node has less memory or we cannot get the
7071   information from the node, this function raises an OpPrereqError
7072   exception.
7073
7074   @type lu: C{LogicalUnit}
7075   @param lu: a logical unit from which we get configuration data
7076   @type node: C{str}
7077   @param node: the node to check
7078   @type reason: C{str}
7079   @param reason: string to use in the error message
7080   @type requested: C{int}
7081   @param requested: the amount of memory in MiB to check for
7082   @type hypervisor_name: C{str}
7083   @param hypervisor_name: the hypervisor to ask for memory stats
7084   @rtype: integer
7085   @return: node current free memory
7086   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7087       we cannot check the node
7088
7089   """
7090   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7091   nodeinfo[node].Raise("Can't get data from node %s" % node,
7092                        prereq=True, ecode=errors.ECODE_ENVIRON)
7093   (_, _, (hv_info, )) = nodeinfo[node].payload
7094
7095   free_mem = hv_info.get("memory_free", None)
7096   if not isinstance(free_mem, int):
7097     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7098                                " was '%s'" % (node, free_mem),
7099                                errors.ECODE_ENVIRON)
7100   if requested > free_mem:
7101     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7102                                " needed %s MiB, available %s MiB" %
7103                                (node, reason, requested, free_mem),
7104                                errors.ECODE_NORES)
7105   return free_mem
7106
7107
7108 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7109   """Checks if nodes have enough free disk space in all the VGs.
7110
7111   This function checks if all given nodes have the needed amount of
7112   free disk. In case any node has less disk or we cannot get the
7113   information from the node, this function raises an OpPrereqError
7114   exception.
7115
7116   @type lu: C{LogicalUnit}
7117   @param lu: a logical unit from which we get configuration data
7118   @type nodenames: C{list}
7119   @param nodenames: the list of node names to check
7120   @type req_sizes: C{dict}
7121   @param req_sizes: the hash of vg and corresponding amount of disk in
7122       MiB to check for
7123   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7124       or we cannot check the node
7125
7126   """
7127   for vg, req_size in req_sizes.items():
7128     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7129
7130
7131 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7132   """Checks if nodes have enough free disk space in the specified VG.
7133
7134   This function checks if all given nodes have the needed amount of
7135   free disk. In case any node has less disk or we cannot get the
7136   information from the node, this function raises an OpPrereqError
7137   exception.
7138
7139   @type lu: C{LogicalUnit}
7140   @param lu: a logical unit from which we get configuration data
7141   @type nodenames: C{list}
7142   @param nodenames: the list of node names to check
7143   @type vg: C{str}
7144   @param vg: the volume group to check
7145   @type requested: C{int}
7146   @param requested: the amount of disk in MiB to check for
7147   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7148       or we cannot check the node
7149
7150   """
7151   es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7152   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7153   for node in nodenames:
7154     info = nodeinfo[node]
7155     info.Raise("Cannot get current information from node %s" % node,
7156                prereq=True, ecode=errors.ECODE_ENVIRON)
7157     (_, (vg_info, ), _) = info.payload
7158     vg_free = vg_info.get("vg_free", None)
7159     if not isinstance(vg_free, int):
7160       raise errors.OpPrereqError("Can't compute free disk space on node"
7161                                  " %s for vg %s, result was '%s'" %
7162                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
7163     if requested > vg_free:
7164       raise errors.OpPrereqError("Not enough disk space on target node %s"
7165                                  " vg %s: required %d MiB, available %d MiB" %
7166                                  (node, vg, requested, vg_free),
7167                                  errors.ECODE_NORES)
7168
7169
7170 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7171   """Checks if nodes have enough physical CPUs
7172
7173   This function checks if all given nodes have the needed number of
7174   physical CPUs. In case any node has less CPUs or we cannot get the
7175   information from the node, this function raises an OpPrereqError
7176   exception.
7177
7178   @type lu: C{LogicalUnit}
7179   @param lu: a logical unit from which we get configuration data
7180   @type nodenames: C{list}
7181   @param nodenames: the list of node names to check
7182   @type requested: C{int}
7183   @param requested: the minimum acceptable number of physical CPUs
7184   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7185       or we cannot check the node
7186
7187   """
7188   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7189   for node in nodenames:
7190     info = nodeinfo[node]
7191     info.Raise("Cannot get current information from node %s" % node,
7192                prereq=True, ecode=errors.ECODE_ENVIRON)
7193     (_, _, (hv_info, )) = info.payload
7194     num_cpus = hv_info.get("cpu_total", None)
7195     if not isinstance(num_cpus, int):
7196       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7197                                  " on node %s, result was '%s'" %
7198                                  (node, num_cpus), errors.ECODE_ENVIRON)
7199     if requested > num_cpus:
7200       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7201                                  "required" % (node, num_cpus, requested),
7202                                  errors.ECODE_NORES)
7203
7204
7205 class LUInstanceStartup(LogicalUnit):
7206   """Starts an instance.
7207
7208   """
7209   HPATH = "instance-start"
7210   HTYPE = constants.HTYPE_INSTANCE
7211   REQ_BGL = False
7212
7213   def CheckArguments(self):
7214     # extra beparams
7215     if self.op.beparams:
7216       # fill the beparams dict
7217       objects.UpgradeBeParams(self.op.beparams)
7218       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7219
7220   def ExpandNames(self):
7221     self._ExpandAndLockInstance()
7222     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7223
7224   def DeclareLocks(self, level):
7225     if level == locking.LEVEL_NODE_RES:
7226       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7227
7228   def BuildHooksEnv(self):
7229     """Build hooks env.
7230
7231     This runs on master, primary and secondary nodes of the instance.
7232
7233     """
7234     env = {
7235       "FORCE": self.op.force,
7236       }
7237
7238     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7239
7240     return env
7241
7242   def BuildHooksNodes(self):
7243     """Build hooks nodes.
7244
7245     """
7246     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7247     return (nl, nl)
7248
7249   def CheckPrereq(self):
7250     """Check prerequisites.
7251
7252     This checks that the instance is in the cluster.
7253
7254     """
7255     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7256     assert self.instance is not None, \
7257       "Cannot retrieve locked instance %s" % self.op.instance_name
7258
7259     # extra hvparams
7260     if self.op.hvparams:
7261       # check hypervisor parameter syntax (locally)
7262       cluster = self.cfg.GetClusterInfo()
7263       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7264       filled_hvp = cluster.FillHV(instance)
7265       filled_hvp.update(self.op.hvparams)
7266       hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7267       hv_type.CheckParameterSyntax(filled_hvp)
7268       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7269
7270     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7271
7272     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7273
7274     if self.primary_offline and self.op.ignore_offline_nodes:
7275       self.LogWarning("Ignoring offline primary node")
7276
7277       if self.op.hvparams or self.op.beparams:
7278         self.LogWarning("Overridden parameters are ignored")
7279     else:
7280       _CheckNodeOnline(self, instance.primary_node)
7281
7282       bep = self.cfg.GetClusterInfo().FillBE(instance)
7283       bep.update(self.op.beparams)
7284
7285       # check bridges existence
7286       _CheckInstanceBridgesExist(self, instance)
7287
7288       remote_info = self.rpc.call_instance_info(instance.primary_node,
7289                                                 instance.name,
7290                                                 instance.hypervisor)
7291       remote_info.Raise("Error checking node %s" % instance.primary_node,
7292                         prereq=True, ecode=errors.ECODE_ENVIRON)
7293       if not remote_info.payload: # not running already
7294         _CheckNodeFreeMemory(self, instance.primary_node,
7295                              "starting instance %s" % instance.name,
7296                              bep[constants.BE_MINMEM], instance.hypervisor)
7297
7298   def Exec(self, feedback_fn):
7299     """Start the instance.
7300
7301     """
7302     instance = self.instance
7303     force = self.op.force
7304
7305     if not self.op.no_remember:
7306       self.cfg.MarkInstanceUp(instance.name)
7307
7308     if self.primary_offline:
7309       assert self.op.ignore_offline_nodes
7310       self.LogInfo("Primary node offline, marked instance as started")
7311     else:
7312       node_current = instance.primary_node
7313
7314       _StartInstanceDisks(self, instance, force)
7315
7316       result = \
7317         self.rpc.call_instance_start(node_current,
7318                                      (instance, self.op.hvparams,
7319                                       self.op.beparams),
7320                                      self.op.startup_paused)
7321       msg = result.fail_msg
7322       if msg:
7323         _ShutdownInstanceDisks(self, instance)
7324         raise errors.OpExecError("Could not start instance: %s" % msg)
7325
7326
7327 class LUInstanceReboot(LogicalUnit):
7328   """Reboot an instance.
7329
7330   """
7331   HPATH = "instance-reboot"
7332   HTYPE = constants.HTYPE_INSTANCE
7333   REQ_BGL = False
7334
7335   def ExpandNames(self):
7336     self._ExpandAndLockInstance()
7337
7338   def BuildHooksEnv(self):
7339     """Build hooks env.
7340
7341     This runs on master, primary and secondary nodes of the instance.
7342
7343     """
7344     env = {
7345       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7346       "REBOOT_TYPE": self.op.reboot_type,
7347       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7348       }
7349
7350     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7351
7352     return env
7353
7354   def BuildHooksNodes(self):
7355     """Build hooks nodes.
7356
7357     """
7358     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7359     return (nl, nl)
7360
7361   def CheckPrereq(self):
7362     """Check prerequisites.
7363
7364     This checks that the instance is in the cluster.
7365
7366     """
7367     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7368     assert self.instance is not None, \
7369       "Cannot retrieve locked instance %s" % self.op.instance_name
7370     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7371     _CheckNodeOnline(self, instance.primary_node)
7372
7373     # check bridges existence
7374     _CheckInstanceBridgesExist(self, instance)
7375
7376   def Exec(self, feedback_fn):
7377     """Reboot the instance.
7378
7379     """
7380     instance = self.instance
7381     ignore_secondaries = self.op.ignore_secondaries
7382     reboot_type = self.op.reboot_type
7383
7384     remote_info = self.rpc.call_instance_info(instance.primary_node,
7385                                               instance.name,
7386                                               instance.hypervisor)
7387     remote_info.Raise("Error checking node %s" % instance.primary_node)
7388     instance_running = bool(remote_info.payload)
7389
7390     node_current = instance.primary_node
7391
7392     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7393                                             constants.INSTANCE_REBOOT_HARD]:
7394       for disk in instance.disks:
7395         self.cfg.SetDiskID(disk, node_current)
7396       result = self.rpc.call_instance_reboot(node_current, instance,
7397                                              reboot_type,
7398                                              self.op.shutdown_timeout)
7399       result.Raise("Could not reboot instance")
7400     else:
7401       if instance_running:
7402         result = self.rpc.call_instance_shutdown(node_current, instance,
7403                                                  self.op.shutdown_timeout)
7404         result.Raise("Could not shutdown instance for full reboot")
7405         _ShutdownInstanceDisks(self, instance)
7406       else:
7407         self.LogInfo("Instance %s was already stopped, starting now",
7408                      instance.name)
7409       _StartInstanceDisks(self, instance, ignore_secondaries)
7410       result = self.rpc.call_instance_start(node_current,
7411                                             (instance, None, None), False)
7412       msg = result.fail_msg
7413       if msg:
7414         _ShutdownInstanceDisks(self, instance)
7415         raise errors.OpExecError("Could not start instance for"
7416                                  " full reboot: %s" % msg)
7417
7418     self.cfg.MarkInstanceUp(instance.name)
7419
7420
7421 class LUInstanceShutdown(LogicalUnit):
7422   """Shutdown an instance.
7423
7424   """
7425   HPATH = "instance-stop"
7426   HTYPE = constants.HTYPE_INSTANCE
7427   REQ_BGL = False
7428
7429   def ExpandNames(self):
7430     self._ExpandAndLockInstance()
7431
7432   def BuildHooksEnv(self):
7433     """Build hooks env.
7434
7435     This runs on master, primary and secondary nodes of the instance.
7436
7437     """
7438     env = _BuildInstanceHookEnvByObject(self, self.instance)
7439     env["TIMEOUT"] = self.op.timeout
7440     return env
7441
7442   def BuildHooksNodes(self):
7443     """Build hooks nodes.
7444
7445     """
7446     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7447     return (nl, nl)
7448
7449   def CheckPrereq(self):
7450     """Check prerequisites.
7451
7452     This checks that the instance is in the cluster.
7453
7454     """
7455     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7456     assert self.instance is not None, \
7457       "Cannot retrieve locked instance %s" % self.op.instance_name
7458
7459     if not self.op.force:
7460       _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7461     else:
7462       self.LogWarning("Ignoring offline instance check")
7463
7464     self.primary_offline = \
7465       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7466
7467     if self.primary_offline and self.op.ignore_offline_nodes:
7468       self.LogWarning("Ignoring offline primary node")
7469     else:
7470       _CheckNodeOnline(self, self.instance.primary_node)
7471
7472   def Exec(self, feedback_fn):
7473     """Shutdown the instance.
7474
7475     """
7476     instance = self.instance
7477     node_current = instance.primary_node
7478     timeout = self.op.timeout
7479
7480     # If the instance is offline we shouldn't mark it as down, as that
7481     # resets the offline flag.
7482     if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7483       self.cfg.MarkInstanceDown(instance.name)
7484
7485     if self.primary_offline:
7486       assert self.op.ignore_offline_nodes
7487       self.LogInfo("Primary node offline, marked instance as stopped")
7488     else:
7489       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7490       msg = result.fail_msg
7491       if msg:
7492         self.LogWarning("Could not shutdown instance: %s", msg)
7493
7494       _ShutdownInstanceDisks(self, instance)
7495
7496
7497 class LUInstanceReinstall(LogicalUnit):
7498   """Reinstall an instance.
7499
7500   """
7501   HPATH = "instance-reinstall"
7502   HTYPE = constants.HTYPE_INSTANCE
7503   REQ_BGL = False
7504
7505   def ExpandNames(self):
7506     self._ExpandAndLockInstance()
7507
7508   def BuildHooksEnv(self):
7509     """Build hooks env.
7510
7511     This runs on master, primary and secondary nodes of the instance.
7512
7513     """
7514     return _BuildInstanceHookEnvByObject(self, self.instance)
7515
7516   def BuildHooksNodes(self):
7517     """Build hooks nodes.
7518
7519     """
7520     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7521     return (nl, nl)
7522
7523   def CheckPrereq(self):
7524     """Check prerequisites.
7525
7526     This checks that the instance is in the cluster and is not running.
7527
7528     """
7529     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7530     assert instance is not None, \
7531       "Cannot retrieve locked instance %s" % self.op.instance_name
7532     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7533                      " offline, cannot reinstall")
7534
7535     if instance.disk_template == constants.DT_DISKLESS:
7536       raise errors.OpPrereqError("Instance '%s' has no disks" %
7537                                  self.op.instance_name,
7538                                  errors.ECODE_INVAL)
7539     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7540
7541     if self.op.os_type is not None:
7542       # OS verification
7543       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7544       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7545       instance_os = self.op.os_type
7546     else:
7547       instance_os = instance.os
7548
7549     nodelist = list(instance.all_nodes)
7550
7551     if self.op.osparams:
7552       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7553       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7554       self.os_inst = i_osdict # the new dict (without defaults)
7555     else:
7556       self.os_inst = None
7557
7558     self.instance = instance
7559
7560   def Exec(self, feedback_fn):
7561     """Reinstall the instance.
7562
7563     """
7564     inst = self.instance
7565
7566     if self.op.os_type is not None:
7567       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7568       inst.os = self.op.os_type
7569       # Write to configuration
7570       self.cfg.Update(inst, feedback_fn)
7571
7572     _StartInstanceDisks(self, inst, None)
7573     try:
7574       feedback_fn("Running the instance OS create scripts...")
7575       # FIXME: pass debug option from opcode to backend
7576       result = self.rpc.call_instance_os_add(inst.primary_node,
7577                                              (inst, self.os_inst), True,
7578                                              self.op.debug_level)
7579       result.Raise("Could not install OS for instance %s on node %s" %
7580                    (inst.name, inst.primary_node))
7581     finally:
7582       _ShutdownInstanceDisks(self, inst)
7583
7584
7585 class LUInstanceRecreateDisks(LogicalUnit):
7586   """Recreate an instance's missing disks.
7587
7588   """
7589   HPATH = "instance-recreate-disks"
7590   HTYPE = constants.HTYPE_INSTANCE
7591   REQ_BGL = False
7592
7593   _MODIFYABLE = compat.UniqueFrozenset([
7594     constants.IDISK_SIZE,
7595     constants.IDISK_MODE,
7596     ])
7597
7598   # New or changed disk parameters may have different semantics
7599   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7600     constants.IDISK_ADOPT,
7601
7602     # TODO: Implement support changing VG while recreating
7603     constants.IDISK_VG,
7604     constants.IDISK_METAVG,
7605     constants.IDISK_PROVIDER,
7606     ]))
7607
7608   def _RunAllocator(self):
7609     """Run the allocator based on input opcode.
7610
7611     """
7612     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7613
7614     # FIXME
7615     # The allocator should actually run in "relocate" mode, but current
7616     # allocators don't support relocating all the nodes of an instance at
7617     # the same time. As a workaround we use "allocate" mode, but this is
7618     # suboptimal for two reasons:
7619     # - The instance name passed to the allocator is present in the list of
7620     #   existing instances, so there could be a conflict within the
7621     #   internal structures of the allocator. This doesn't happen with the
7622     #   current allocators, but it's a liability.
7623     # - The allocator counts the resources used by the instance twice: once
7624     #   because the instance exists already, and once because it tries to
7625     #   allocate a new instance.
7626     # The allocator could choose some of the nodes on which the instance is
7627     # running, but that's not a problem. If the instance nodes are broken,
7628     # they should be already be marked as drained or offline, and hence
7629     # skipped by the allocator. If instance disks have been lost for other
7630     # reasons, then recreating the disks on the same nodes should be fine.
7631     disk_template = self.instance.disk_template
7632     spindle_use = be_full[constants.BE_SPINDLE_USE]
7633     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7634                                         disk_template=disk_template,
7635                                         tags=list(self.instance.GetTags()),
7636                                         os=self.instance.os,
7637                                         nics=[{}],
7638                                         vcpus=be_full[constants.BE_VCPUS],
7639                                         memory=be_full[constants.BE_MAXMEM],
7640                                         spindle_use=spindle_use,
7641                                         disks=[{constants.IDISK_SIZE: d.size,
7642                                                 constants.IDISK_MODE: d.mode}
7643                                                 for d in self.instance.disks],
7644                                         hypervisor=self.instance.hypervisor,
7645                                         node_whitelist=None)
7646     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7647
7648     ial.Run(self.op.iallocator)
7649
7650     assert req.RequiredNodes() == len(self.instance.all_nodes)
7651
7652     if not ial.success:
7653       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7654                                  " %s" % (self.op.iallocator, ial.info),
7655                                  errors.ECODE_NORES)
7656
7657     self.op.nodes = ial.result
7658     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7659                  self.op.instance_name, self.op.iallocator,
7660                  utils.CommaJoin(ial.result))
7661
7662   def CheckArguments(self):
7663     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7664       # Normalize and convert deprecated list of disk indices
7665       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7666
7667     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7668     if duplicates:
7669       raise errors.OpPrereqError("Some disks have been specified more than"
7670                                  " once: %s" % utils.CommaJoin(duplicates),
7671                                  errors.ECODE_INVAL)
7672
7673     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7674     # when neither iallocator nor nodes are specified
7675     if self.op.iallocator or self.op.nodes:
7676       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7677
7678     for (idx, params) in self.op.disks:
7679       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7680       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7681       if unsupported:
7682         raise errors.OpPrereqError("Parameters for disk %s try to change"
7683                                    " unmodifyable parameter(s): %s" %
7684                                    (idx, utils.CommaJoin(unsupported)),
7685                                    errors.ECODE_INVAL)
7686
7687   def ExpandNames(self):
7688     self._ExpandAndLockInstance()
7689     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7690
7691     if self.op.nodes:
7692       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7693       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7694     else:
7695       self.needed_locks[locking.LEVEL_NODE] = []
7696       if self.op.iallocator:
7697         # iallocator will select a new node in the same group
7698         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7699         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7700
7701     self.needed_locks[locking.LEVEL_NODE_RES] = []
7702
7703   def DeclareLocks(self, level):
7704     if level == locking.LEVEL_NODEGROUP:
7705       assert self.op.iallocator is not None
7706       assert not self.op.nodes
7707       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7708       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7709       # Lock the primary group used by the instance optimistically; this
7710       # requires going via the node before it's locked, requiring
7711       # verification later on
7712       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7713         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7714
7715     elif level == locking.LEVEL_NODE:
7716       # If an allocator is used, then we lock all the nodes in the current
7717       # instance group, as we don't know yet which ones will be selected;
7718       # if we replace the nodes without using an allocator, locks are
7719       # already declared in ExpandNames; otherwise, we need to lock all the
7720       # instance nodes for disk re-creation
7721       if self.op.iallocator:
7722         assert not self.op.nodes
7723         assert not self.needed_locks[locking.LEVEL_NODE]
7724         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7725
7726         # Lock member nodes of the group of the primary node
7727         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7728           self.needed_locks[locking.LEVEL_NODE].extend(
7729             self.cfg.GetNodeGroup(group_uuid).members)
7730
7731         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7732       elif not self.op.nodes:
7733         self._LockInstancesNodes(primary_only=False)
7734     elif level == locking.LEVEL_NODE_RES:
7735       # Copy node locks
7736       self.needed_locks[locking.LEVEL_NODE_RES] = \
7737         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7738
7739   def BuildHooksEnv(self):
7740     """Build hooks env.
7741
7742     This runs on master, primary and secondary nodes of the instance.
7743
7744     """
7745     return _BuildInstanceHookEnvByObject(self, self.instance)
7746
7747   def BuildHooksNodes(self):
7748     """Build hooks nodes.
7749
7750     """
7751     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7752     return (nl, nl)
7753
7754   def CheckPrereq(self):
7755     """Check prerequisites.
7756
7757     This checks that the instance is in the cluster and is not running.
7758
7759     """
7760     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7761     assert instance is not None, \
7762       "Cannot retrieve locked instance %s" % self.op.instance_name
7763     if self.op.nodes:
7764       if len(self.op.nodes) != len(instance.all_nodes):
7765         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7766                                    " %d replacement nodes were specified" %
7767                                    (instance.name, len(instance.all_nodes),
7768                                     len(self.op.nodes)),
7769                                    errors.ECODE_INVAL)
7770       assert instance.disk_template != constants.DT_DRBD8 or \
7771           len(self.op.nodes) == 2
7772       assert instance.disk_template != constants.DT_PLAIN or \
7773           len(self.op.nodes) == 1
7774       primary_node = self.op.nodes[0]
7775     else:
7776       primary_node = instance.primary_node
7777     if not self.op.iallocator:
7778       _CheckNodeOnline(self, primary_node)
7779
7780     if instance.disk_template == constants.DT_DISKLESS:
7781       raise errors.OpPrereqError("Instance '%s' has no disks" %
7782                                  self.op.instance_name, errors.ECODE_INVAL)
7783
7784     # Verify if node group locks are still correct
7785     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7786     if owned_groups:
7787       # Node group locks are acquired only for the primary node (and only
7788       # when the allocator is used)
7789       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7790                                primary_only=True)
7791
7792     # if we replace nodes *and* the old primary is offline, we don't
7793     # check the instance state
7794     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7795     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7796       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7797                           msg="cannot recreate disks")
7798
7799     if self.op.disks:
7800       self.disks = dict(self.op.disks)
7801     else:
7802       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7803
7804     maxidx = max(self.disks.keys())
7805     if maxidx >= len(instance.disks):
7806       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7807                                  errors.ECODE_INVAL)
7808
7809     if ((self.op.nodes or self.op.iallocator) and
7810         sorted(self.disks.keys()) != range(len(instance.disks))):
7811       raise errors.OpPrereqError("Can't recreate disks partially and"
7812                                  " change the nodes at the same time",
7813                                  errors.ECODE_INVAL)
7814
7815     self.instance = instance
7816
7817     if self.op.iallocator:
7818       self._RunAllocator()
7819       # Release unneeded node and node resource locks
7820       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7821       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7822       _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7823
7824     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7825
7826   def Exec(self, feedback_fn):
7827     """Recreate the disks.
7828
7829     """
7830     instance = self.instance
7831
7832     assert (self.owned_locks(locking.LEVEL_NODE) ==
7833             self.owned_locks(locking.LEVEL_NODE_RES))
7834
7835     to_skip = []
7836     mods = [] # keeps track of needed changes
7837
7838     for idx, disk in enumerate(instance.disks):
7839       try:
7840         changes = self.disks[idx]
7841       except KeyError:
7842         # Disk should not be recreated
7843         to_skip.append(idx)
7844         continue
7845
7846       # update secondaries for disks, if needed
7847       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7848         # need to update the nodes and minors
7849         assert len(self.op.nodes) == 2
7850         assert len(disk.logical_id) == 6 # otherwise disk internals
7851                                          # have changed
7852         (_, _, old_port, _, _, old_secret) = disk.logical_id
7853         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7854         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7855                   new_minors[0], new_minors[1], old_secret)
7856         assert len(disk.logical_id) == len(new_id)
7857       else:
7858         new_id = None
7859
7860       mods.append((idx, new_id, changes))
7861
7862     # now that we have passed all asserts above, we can apply the mods
7863     # in a single run (to avoid partial changes)
7864     for idx, new_id, changes in mods:
7865       disk = instance.disks[idx]
7866       if new_id is not None:
7867         assert disk.dev_type == constants.LD_DRBD8
7868         disk.logical_id = new_id
7869       if changes:
7870         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7871                     mode=changes.get(constants.IDISK_MODE, None))
7872
7873     # change primary node, if needed
7874     if self.op.nodes:
7875       instance.primary_node = self.op.nodes[0]
7876       self.LogWarning("Changing the instance's nodes, you will have to"
7877                       " remove any disks left on the older nodes manually")
7878
7879     if self.op.nodes:
7880       self.cfg.Update(instance, feedback_fn)
7881
7882     # All touched nodes must be locked
7883     mylocks = self.owned_locks(locking.LEVEL_NODE)
7884     assert mylocks.issuperset(frozenset(instance.all_nodes))
7885     _CreateDisks(self, instance, to_skip=to_skip)
7886
7887
7888 class LUInstanceRename(LogicalUnit):
7889   """Rename an instance.
7890
7891   """
7892   HPATH = "instance-rename"
7893   HTYPE = constants.HTYPE_INSTANCE
7894
7895   def CheckArguments(self):
7896     """Check arguments.
7897
7898     """
7899     if self.op.ip_check and not self.op.name_check:
7900       # TODO: make the ip check more flexible and not depend on the name check
7901       raise errors.OpPrereqError("IP address check requires a name check",
7902                                  errors.ECODE_INVAL)
7903
7904   def BuildHooksEnv(self):
7905     """Build hooks env.
7906
7907     This runs on master, primary and secondary nodes of the instance.
7908
7909     """
7910     env = _BuildInstanceHookEnvByObject(self, self.instance)
7911     env["INSTANCE_NEW_NAME"] = self.op.new_name
7912     return env
7913
7914   def BuildHooksNodes(self):
7915     """Build hooks nodes.
7916
7917     """
7918     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7919     return (nl, nl)
7920
7921   def CheckPrereq(self):
7922     """Check prerequisites.
7923
7924     This checks that the instance is in the cluster and is not running.
7925
7926     """
7927     self.op.instance_name = _ExpandInstanceName(self.cfg,
7928                                                 self.op.instance_name)
7929     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7930     assert instance is not None
7931     _CheckNodeOnline(self, instance.primary_node)
7932     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7933                         msg="cannot rename")
7934     self.instance = instance
7935
7936     new_name = self.op.new_name
7937     if self.op.name_check:
7938       hostname = _CheckHostnameSane(self, new_name)
7939       new_name = self.op.new_name = hostname.name
7940       if (self.op.ip_check and
7941           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7942         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7943                                    (hostname.ip, new_name),
7944                                    errors.ECODE_NOTUNIQUE)
7945
7946     instance_list = self.cfg.GetInstanceList()
7947     if new_name in instance_list and new_name != instance.name:
7948       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7949                                  new_name, errors.ECODE_EXISTS)
7950
7951   def Exec(self, feedback_fn):
7952     """Rename the instance.
7953
7954     """
7955     inst = self.instance
7956     old_name = inst.name
7957
7958     rename_file_storage = False
7959     if (inst.disk_template in constants.DTS_FILEBASED and
7960         self.op.new_name != inst.name):
7961       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7962       rename_file_storage = True
7963
7964     self.cfg.RenameInstance(inst.name, self.op.new_name)
7965     # Change the instance lock. This is definitely safe while we hold the BGL.
7966     # Otherwise the new lock would have to be added in acquired mode.
7967     assert self.REQ_BGL
7968     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7969     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7970     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7971
7972     # re-read the instance from the configuration after rename
7973     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7974
7975     if rename_file_storage:
7976       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7977       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7978                                                      old_file_storage_dir,
7979                                                      new_file_storage_dir)
7980       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7981                    " (but the instance has been renamed in Ganeti)" %
7982                    (inst.primary_node, old_file_storage_dir,
7983                     new_file_storage_dir))
7984
7985     _StartInstanceDisks(self, inst, None)
7986     # update info on disks
7987     info = _GetInstanceInfoText(inst)
7988     for (idx, disk) in enumerate(inst.disks):
7989       for node in inst.all_nodes:
7990         self.cfg.SetDiskID(disk, node)
7991         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7992         if result.fail_msg:
7993           self.LogWarning("Error setting info on node %s for disk %s: %s",
7994                           node, idx, result.fail_msg)
7995     try:
7996       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7997                                                  old_name, self.op.debug_level)
7998       msg = result.fail_msg
7999       if msg:
8000         msg = ("Could not run OS rename script for instance %s on node %s"
8001                " (but the instance has been renamed in Ganeti): %s" %
8002                (inst.name, inst.primary_node, msg))
8003         self.LogWarning(msg)
8004     finally:
8005       _ShutdownInstanceDisks(self, inst)
8006
8007     return inst.name
8008
8009
8010 class LUInstanceRemove(LogicalUnit):
8011   """Remove an instance.
8012
8013   """
8014   HPATH = "instance-remove"
8015   HTYPE = constants.HTYPE_INSTANCE
8016   REQ_BGL = False
8017
8018   def ExpandNames(self):
8019     self._ExpandAndLockInstance()
8020     self.needed_locks[locking.LEVEL_NODE] = []
8021     self.needed_locks[locking.LEVEL_NODE_RES] = []
8022     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8023
8024   def DeclareLocks(self, level):
8025     if level == locking.LEVEL_NODE:
8026       self._LockInstancesNodes()
8027     elif level == locking.LEVEL_NODE_RES:
8028       # Copy node locks
8029       self.needed_locks[locking.LEVEL_NODE_RES] = \
8030         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8031
8032   def BuildHooksEnv(self):
8033     """Build hooks env.
8034
8035     This runs on master, primary and secondary nodes of the instance.
8036
8037     """
8038     env = _BuildInstanceHookEnvByObject(self, self.instance)
8039     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8040     return env
8041
8042   def BuildHooksNodes(self):
8043     """Build hooks nodes.
8044
8045     """
8046     nl = [self.cfg.GetMasterNode()]
8047     nl_post = list(self.instance.all_nodes) + nl
8048     return (nl, nl_post)
8049
8050   def CheckPrereq(self):
8051     """Check prerequisites.
8052
8053     This checks that the instance is in the cluster.
8054
8055     """
8056     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8057     assert self.instance is not None, \
8058       "Cannot retrieve locked instance %s" % self.op.instance_name
8059
8060   def Exec(self, feedback_fn):
8061     """Remove the instance.
8062
8063     """
8064     instance = self.instance
8065     logging.info("Shutting down instance %s on node %s",
8066                  instance.name, instance.primary_node)
8067
8068     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8069                                              self.op.shutdown_timeout)
8070     msg = result.fail_msg
8071     if msg:
8072       if self.op.ignore_failures:
8073         feedback_fn("Warning: can't shutdown instance: %s" % msg)
8074       else:
8075         raise errors.OpExecError("Could not shutdown instance %s on"
8076                                  " node %s: %s" %
8077                                  (instance.name, instance.primary_node, msg))
8078
8079     assert (self.owned_locks(locking.LEVEL_NODE) ==
8080             self.owned_locks(locking.LEVEL_NODE_RES))
8081     assert not (set(instance.all_nodes) -
8082                 self.owned_locks(locking.LEVEL_NODE)), \
8083       "Not owning correct locks"
8084
8085     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8086
8087
8088 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8089   """Utility function to remove an instance.
8090
8091   """
8092   logging.info("Removing block devices for instance %s", instance.name)
8093
8094   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8095     if not ignore_failures:
8096       raise errors.OpExecError("Can't remove instance's disks")
8097     feedback_fn("Warning: can't remove instance's disks")
8098
8099   logging.info("Removing instance %s out of cluster config", instance.name)
8100
8101   lu.cfg.RemoveInstance(instance.name)
8102
8103   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8104     "Instance lock removal conflict"
8105
8106   # Remove lock for the instance
8107   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8108
8109
8110 class LUInstanceQuery(NoHooksLU):
8111   """Logical unit for querying instances.
8112
8113   """
8114   # pylint: disable=W0142
8115   REQ_BGL = False
8116
8117   def CheckArguments(self):
8118     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8119                              self.op.output_fields, self.op.use_locking)
8120
8121   def ExpandNames(self):
8122     self.iq.ExpandNames(self)
8123
8124   def DeclareLocks(self, level):
8125     self.iq.DeclareLocks(self, level)
8126
8127   def Exec(self, feedback_fn):
8128     return self.iq.OldStyleQuery(self)
8129
8130
8131 def _ExpandNamesForMigration(lu):
8132   """Expands names for use with L{TLMigrateInstance}.
8133
8134   @type lu: L{LogicalUnit}
8135
8136   """
8137   if lu.op.target_node is not None:
8138     lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8139
8140   lu.needed_locks[locking.LEVEL_NODE] = []
8141   lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8142
8143   lu.needed_locks[locking.LEVEL_NODE_RES] = []
8144   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8145
8146   # The node allocation lock is actually only needed for replicated instances
8147   # (e.g. DRBD8) and if an iallocator is used.
8148   lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8149
8150
8151 def _DeclareLocksForMigration(lu, level):
8152   """Declares locks for L{TLMigrateInstance}.
8153
8154   @type lu: L{LogicalUnit}
8155   @param level: Lock level
8156
8157   """
8158   if level == locking.LEVEL_NODE_ALLOC:
8159     assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8160
8161     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8162
8163     # Node locks are already declared here rather than at LEVEL_NODE as we need
8164     # the instance object anyway to declare the node allocation lock.
8165     if instance.disk_template in constants.DTS_EXT_MIRROR:
8166       if lu.op.target_node is None:
8167         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8168         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8169       else:
8170         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8171                                                lu.op.target_node]
8172       del lu.recalculate_locks[locking.LEVEL_NODE]
8173     else:
8174       lu._LockInstancesNodes() # pylint: disable=W0212
8175
8176   elif level == locking.LEVEL_NODE:
8177     # Node locks are declared together with the node allocation lock
8178     assert (lu.needed_locks[locking.LEVEL_NODE] or
8179             lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8180
8181   elif level == locking.LEVEL_NODE_RES:
8182     # Copy node locks
8183     lu.needed_locks[locking.LEVEL_NODE_RES] = \
8184       _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8185
8186
8187 class LUInstanceFailover(LogicalUnit):
8188   """Failover an instance.
8189
8190   """
8191   HPATH = "instance-failover"
8192   HTYPE = constants.HTYPE_INSTANCE
8193   REQ_BGL = False
8194
8195   def CheckArguments(self):
8196     """Check the arguments.
8197
8198     """
8199     self.iallocator = getattr(self.op, "iallocator", None)
8200     self.target_node = getattr(self.op, "target_node", None)
8201
8202   def ExpandNames(self):
8203     self._ExpandAndLockInstance()
8204     _ExpandNamesForMigration(self)
8205
8206     self._migrater = \
8207       TLMigrateInstance(self, self.op.instance_name, False, True, False,
8208                         self.op.ignore_consistency, True,
8209                         self.op.shutdown_timeout, self.op.ignore_ipolicy)
8210
8211     self.tasklets = [self._migrater]
8212
8213   def DeclareLocks(self, level):
8214     _DeclareLocksForMigration(self, level)
8215
8216   def BuildHooksEnv(self):
8217     """Build hooks env.
8218
8219     This runs on master, primary and secondary nodes of the instance.
8220
8221     """
8222     instance = self._migrater.instance
8223     source_node = instance.primary_node
8224     target_node = self.op.target_node
8225     env = {
8226       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8227       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8228       "OLD_PRIMARY": source_node,
8229       "NEW_PRIMARY": target_node,
8230       }
8231
8232     if instance.disk_template in constants.DTS_INT_MIRROR:
8233       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8234       env["NEW_SECONDARY"] = source_node
8235     else:
8236       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8237
8238     env.update(_BuildInstanceHookEnvByObject(self, instance))
8239
8240     return env
8241
8242   def BuildHooksNodes(self):
8243     """Build hooks nodes.
8244
8245     """
8246     instance = self._migrater.instance
8247     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8248     return (nl, nl + [instance.primary_node])
8249
8250
8251 class LUInstanceMigrate(LogicalUnit):
8252   """Migrate an instance.
8253
8254   This is migration without shutting down, compared to the failover,
8255   which is done with shutdown.
8256
8257   """
8258   HPATH = "instance-migrate"
8259   HTYPE = constants.HTYPE_INSTANCE
8260   REQ_BGL = False
8261
8262   def ExpandNames(self):
8263     self._ExpandAndLockInstance()
8264     _ExpandNamesForMigration(self)
8265
8266     self._migrater = \
8267       TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8268                         False, self.op.allow_failover, False,
8269                         self.op.allow_runtime_changes,
8270                         constants.DEFAULT_SHUTDOWN_TIMEOUT,
8271                         self.op.ignore_ipolicy)
8272
8273     self.tasklets = [self._migrater]
8274
8275   def DeclareLocks(self, level):
8276     _DeclareLocksForMigration(self, level)
8277
8278   def BuildHooksEnv(self):
8279     """Build hooks env.
8280
8281     This runs on master, primary and secondary nodes of the instance.
8282
8283     """
8284     instance = self._migrater.instance
8285     source_node = instance.primary_node
8286     target_node = self.op.target_node
8287     env = _BuildInstanceHookEnvByObject(self, instance)
8288     env.update({
8289       "MIGRATE_LIVE": self._migrater.live,
8290       "MIGRATE_CLEANUP": self.op.cleanup,
8291       "OLD_PRIMARY": source_node,
8292       "NEW_PRIMARY": target_node,
8293       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8294       })
8295
8296     if instance.disk_template in constants.DTS_INT_MIRROR:
8297       env["OLD_SECONDARY"] = target_node
8298       env["NEW_SECONDARY"] = source_node
8299     else:
8300       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8301
8302     return env
8303
8304   def BuildHooksNodes(self):
8305     """Build hooks nodes.
8306
8307     """
8308     instance = self._migrater.instance
8309     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8310     return (nl, nl + [instance.primary_node])
8311
8312
8313 class LUInstanceMove(LogicalUnit):
8314   """Move an instance by data-copying.
8315
8316   """
8317   HPATH = "instance-move"
8318   HTYPE = constants.HTYPE_INSTANCE
8319   REQ_BGL = False
8320
8321   def ExpandNames(self):
8322     self._ExpandAndLockInstance()
8323     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8324     self.op.target_node = target_node
8325     self.needed_locks[locking.LEVEL_NODE] = [target_node]
8326     self.needed_locks[locking.LEVEL_NODE_RES] = []
8327     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8328
8329   def DeclareLocks(self, level):
8330     if level == locking.LEVEL_NODE:
8331       self._LockInstancesNodes(primary_only=True)
8332     elif level == locking.LEVEL_NODE_RES:
8333       # Copy node locks
8334       self.needed_locks[locking.LEVEL_NODE_RES] = \
8335         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8336
8337   def BuildHooksEnv(self):
8338     """Build hooks env.
8339
8340     This runs on master, primary and secondary nodes of the instance.
8341
8342     """
8343     env = {
8344       "TARGET_NODE": self.op.target_node,
8345       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8346       }
8347     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8348     return env
8349
8350   def BuildHooksNodes(self):
8351     """Build hooks nodes.
8352
8353     """
8354     nl = [
8355       self.cfg.GetMasterNode(),
8356       self.instance.primary_node,
8357       self.op.target_node,
8358       ]
8359     return (nl, nl)
8360
8361   def CheckPrereq(self):
8362     """Check prerequisites.
8363
8364     This checks that the instance is in the cluster.
8365
8366     """
8367     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8368     assert self.instance is not None, \
8369       "Cannot retrieve locked instance %s" % self.op.instance_name
8370
8371     node = self.cfg.GetNodeInfo(self.op.target_node)
8372     assert node is not None, \
8373       "Cannot retrieve locked node %s" % self.op.target_node
8374
8375     self.target_node = target_node = node.name
8376
8377     if target_node == instance.primary_node:
8378       raise errors.OpPrereqError("Instance %s is already on the node %s" %
8379                                  (instance.name, target_node),
8380                                  errors.ECODE_STATE)
8381
8382     bep = self.cfg.GetClusterInfo().FillBE(instance)
8383
8384     for idx, dsk in enumerate(instance.disks):
8385       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8386         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8387                                    " cannot copy" % idx, errors.ECODE_STATE)
8388
8389     _CheckNodeOnline(self, target_node)
8390     _CheckNodeNotDrained(self, target_node)
8391     _CheckNodeVmCapable(self, target_node)
8392     cluster = self.cfg.GetClusterInfo()
8393     group_info = self.cfg.GetNodeGroup(node.group)
8394     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8395     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8396                             ignore=self.op.ignore_ipolicy)
8397
8398     if instance.admin_state == constants.ADMINST_UP:
8399       # check memory requirements on the secondary node
8400       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8401                            instance.name, bep[constants.BE_MAXMEM],
8402                            instance.hypervisor)
8403     else:
8404       self.LogInfo("Not checking memory on the secondary node as"
8405                    " instance will not be started")
8406
8407     # check bridge existance
8408     _CheckInstanceBridgesExist(self, instance, node=target_node)
8409
8410   def Exec(self, feedback_fn):
8411     """Move an instance.
8412
8413     The move is done by shutting it down on its present node, copying
8414     the data over (slow) and starting it on the new node.
8415
8416     """
8417     instance = self.instance
8418
8419     source_node = instance.primary_node
8420     target_node = self.target_node
8421
8422     self.LogInfo("Shutting down instance %s on source node %s",
8423                  instance.name, source_node)
8424
8425     assert (self.owned_locks(locking.LEVEL_NODE) ==
8426             self.owned_locks(locking.LEVEL_NODE_RES))
8427
8428     result = self.rpc.call_instance_shutdown(source_node, instance,
8429                                              self.op.shutdown_timeout)
8430     msg = result.fail_msg
8431     if msg:
8432       if self.op.ignore_consistency:
8433         self.LogWarning("Could not shutdown instance %s on node %s."
8434                         " Proceeding anyway. Please make sure node"
8435                         " %s is down. Error details: %s",
8436                         instance.name, source_node, source_node, msg)
8437       else:
8438         raise errors.OpExecError("Could not shutdown instance %s on"
8439                                  " node %s: %s" %
8440                                  (instance.name, source_node, msg))
8441
8442     # create the target disks
8443     try:
8444       _CreateDisks(self, instance, target_node=target_node)
8445     except errors.OpExecError:
8446       self.LogWarning("Device creation failed, reverting...")
8447       try:
8448         _RemoveDisks(self, instance, target_node=target_node)
8449       finally:
8450         self.cfg.ReleaseDRBDMinors(instance.name)
8451         raise
8452
8453     cluster_name = self.cfg.GetClusterInfo().cluster_name
8454
8455     errs = []
8456     # activate, get path, copy the data over
8457     for idx, disk in enumerate(instance.disks):
8458       self.LogInfo("Copying data for disk %d", idx)
8459       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8460                                                instance.name, True, idx)
8461       if result.fail_msg:
8462         self.LogWarning("Can't assemble newly created disk %d: %s",
8463                         idx, result.fail_msg)
8464         errs.append(result.fail_msg)
8465         break
8466       dev_path = result.payload
8467       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8468                                              target_node, dev_path,
8469                                              cluster_name)
8470       if result.fail_msg:
8471         self.LogWarning("Can't copy data over for disk %d: %s",
8472                         idx, result.fail_msg)
8473         errs.append(result.fail_msg)
8474         break
8475
8476     if errs:
8477       self.LogWarning("Some disks failed to copy, aborting")
8478       try:
8479         _RemoveDisks(self, instance, target_node=target_node)
8480       finally:
8481         self.cfg.ReleaseDRBDMinors(instance.name)
8482         raise errors.OpExecError("Errors during disk copy: %s" %
8483                                  (",".join(errs),))
8484
8485     instance.primary_node = target_node
8486     self.cfg.Update(instance, feedback_fn)
8487
8488     self.LogInfo("Removing the disks on the original node")
8489     _RemoveDisks(self, instance, target_node=source_node)
8490
8491     # Only start the instance if it's marked as up
8492     if instance.admin_state == constants.ADMINST_UP:
8493       self.LogInfo("Starting instance %s on node %s",
8494                    instance.name, target_node)
8495
8496       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8497                                            ignore_secondaries=True)
8498       if not disks_ok:
8499         _ShutdownInstanceDisks(self, instance)
8500         raise errors.OpExecError("Can't activate the instance's disks")
8501
8502       result = self.rpc.call_instance_start(target_node,
8503                                             (instance, None, None), False)
8504       msg = result.fail_msg
8505       if msg:
8506         _ShutdownInstanceDisks(self, instance)
8507         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8508                                  (instance.name, target_node, msg))
8509
8510
8511 class LUNodeMigrate(LogicalUnit):
8512   """Migrate all instances from a node.
8513
8514   """
8515   HPATH = "node-migrate"
8516   HTYPE = constants.HTYPE_NODE
8517   REQ_BGL = False
8518
8519   def CheckArguments(self):
8520     pass
8521
8522   def ExpandNames(self):
8523     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8524
8525     self.share_locks = _ShareAll()
8526     self.needed_locks = {
8527       locking.LEVEL_NODE: [self.op.node_name],
8528       }
8529
8530   def BuildHooksEnv(self):
8531     """Build hooks env.
8532
8533     This runs on the master, the primary and all the secondaries.
8534
8535     """
8536     return {
8537       "NODE_NAME": self.op.node_name,
8538       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8539       }
8540
8541   def BuildHooksNodes(self):
8542     """Build hooks nodes.
8543
8544     """
8545     nl = [self.cfg.GetMasterNode()]
8546     return (nl, nl)
8547
8548   def CheckPrereq(self):
8549     pass
8550
8551   def Exec(self, feedback_fn):
8552     # Prepare jobs for migration instances
8553     allow_runtime_changes = self.op.allow_runtime_changes
8554     jobs = [
8555       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8556                                  mode=self.op.mode,
8557                                  live=self.op.live,
8558                                  iallocator=self.op.iallocator,
8559                                  target_node=self.op.target_node,
8560                                  allow_runtime_changes=allow_runtime_changes,
8561                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8562       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8563
8564     # TODO: Run iallocator in this opcode and pass correct placement options to
8565     # OpInstanceMigrate. Since other jobs can modify the cluster between
8566     # running the iallocator and the actual migration, a good consistency model
8567     # will have to be found.
8568
8569     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8570             frozenset([self.op.node_name]))
8571
8572     return ResultWithJobs(jobs)
8573
8574
8575 class TLMigrateInstance(Tasklet):
8576   """Tasklet class for instance migration.
8577
8578   @type live: boolean
8579   @ivar live: whether the migration will be done live or non-live;
8580       this variable is initalized only after CheckPrereq has run
8581   @type cleanup: boolean
8582   @ivar cleanup: Wheater we cleanup from a failed migration
8583   @type iallocator: string
8584   @ivar iallocator: The iallocator used to determine target_node
8585   @type target_node: string
8586   @ivar target_node: If given, the target_node to reallocate the instance to
8587   @type failover: boolean
8588   @ivar failover: Whether operation results in failover or migration
8589   @type fallback: boolean
8590   @ivar fallback: Whether fallback to failover is allowed if migration not
8591                   possible
8592   @type ignore_consistency: boolean
8593   @ivar ignore_consistency: Wheter we should ignore consistency between source
8594                             and target node
8595   @type shutdown_timeout: int
8596   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8597   @type ignore_ipolicy: bool
8598   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8599
8600   """
8601
8602   # Constants
8603   _MIGRATION_POLL_INTERVAL = 1      # seconds
8604   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8605
8606   def __init__(self, lu, instance_name, cleanup, failover, fallback,
8607                ignore_consistency, allow_runtime_changes, shutdown_timeout,
8608                ignore_ipolicy):
8609     """Initializes this class.
8610
8611     """
8612     Tasklet.__init__(self, lu)
8613
8614     # Parameters
8615     self.instance_name = instance_name
8616     self.cleanup = cleanup
8617     self.live = False # will be overridden later
8618     self.failover = failover
8619     self.fallback = fallback
8620     self.ignore_consistency = ignore_consistency
8621     self.shutdown_timeout = shutdown_timeout
8622     self.ignore_ipolicy = ignore_ipolicy
8623     self.allow_runtime_changes = allow_runtime_changes
8624
8625   def CheckPrereq(self):
8626     """Check prerequisites.
8627
8628     This checks that the instance is in the cluster.
8629
8630     """
8631     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8632     instance = self.cfg.GetInstanceInfo(instance_name)
8633     assert instance is not None
8634     self.instance = instance
8635     cluster = self.cfg.GetClusterInfo()
8636
8637     if (not self.cleanup and
8638         not instance.admin_state == constants.ADMINST_UP and
8639         not self.failover and self.fallback):
8640       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8641                       " switching to failover")
8642       self.failover = True
8643
8644     if instance.disk_template not in constants.DTS_MIRRORED:
8645       if self.failover:
8646         text = "failovers"
8647       else:
8648         text = "migrations"
8649       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8650                                  " %s" % (instance.disk_template, text),
8651                                  errors.ECODE_STATE)
8652
8653     if instance.disk_template in constants.DTS_EXT_MIRROR:
8654       assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8655
8656       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8657
8658       if self.lu.op.iallocator:
8659         self._RunAllocator()
8660       else:
8661         # We set set self.target_node as it is required by
8662         # BuildHooksEnv
8663         self.target_node = self.lu.op.target_node
8664
8665       # Check that the target node is correct in terms of instance policy
8666       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8667       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8668       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8669                                                               group_info)
8670       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8671                               ignore=self.ignore_ipolicy)
8672
8673       # self.target_node is already populated, either directly or by the
8674       # iallocator run
8675       target_node = self.target_node
8676       if self.target_node == instance.primary_node:
8677         raise errors.OpPrereqError("Cannot migrate instance %s"
8678                                    " to its primary (%s)" %
8679                                    (instance.name, instance.primary_node),
8680                                    errors.ECODE_STATE)
8681
8682       if len(self.lu.tasklets) == 1:
8683         # It is safe to release locks only when we're the only tasklet
8684         # in the LU
8685         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8686                       keep=[instance.primary_node, self.target_node])
8687         _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8688
8689     else:
8690       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8691
8692       secondary_nodes = instance.secondary_nodes
8693       if not secondary_nodes:
8694         raise errors.ConfigurationError("No secondary node but using"
8695                                         " %s disk template" %
8696                                         instance.disk_template)
8697       target_node = secondary_nodes[0]
8698       if self.lu.op.iallocator or (self.lu.op.target_node and
8699                                    self.lu.op.target_node != target_node):
8700         if self.failover:
8701           text = "failed over"
8702         else:
8703           text = "migrated"
8704         raise errors.OpPrereqError("Instances with disk template %s cannot"
8705                                    " be %s to arbitrary nodes"
8706                                    " (neither an iallocator nor a target"
8707                                    " node can be passed)" %
8708                                    (instance.disk_template, text),
8709                                    errors.ECODE_INVAL)
8710       nodeinfo = self.cfg.GetNodeInfo(target_node)
8711       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8712       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8713                                                               group_info)
8714       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8715                               ignore=self.ignore_ipolicy)
8716
8717     i_be = cluster.FillBE(instance)
8718
8719     # check memory requirements on the secondary node
8720     if (not self.cleanup and
8721          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8722       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8723                                                "migrating instance %s" %
8724                                                instance.name,
8725                                                i_be[constants.BE_MINMEM],
8726                                                instance.hypervisor)
8727     else:
8728       self.lu.LogInfo("Not checking memory on the secondary node as"
8729                       " instance will not be started")
8730
8731     # check if failover must be forced instead of migration
8732     if (not self.cleanup and not self.failover and
8733         i_be[constants.BE_ALWAYS_FAILOVER]):
8734       self.lu.LogInfo("Instance configured to always failover; fallback"
8735                       " to failover")
8736       self.failover = True
8737
8738     # check bridge existance
8739     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8740
8741     if not self.cleanup:
8742       _CheckNodeNotDrained(self.lu, target_node)
8743       if not self.failover:
8744         result = self.rpc.call_instance_migratable(instance.primary_node,
8745                                                    instance)
8746         if result.fail_msg and self.fallback:
8747           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8748                           " failover")
8749           self.failover = True
8750         else:
8751           result.Raise("Can't migrate, please use failover",
8752                        prereq=True, ecode=errors.ECODE_STATE)
8753
8754     assert not (self.failover and self.cleanup)
8755
8756     if not self.failover:
8757       if self.lu.op.live is not None and self.lu.op.mode is not None:
8758         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8759                                    " parameters are accepted",
8760                                    errors.ECODE_INVAL)
8761       if self.lu.op.live is not None:
8762         if self.lu.op.live:
8763           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8764         else:
8765           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8766         # reset the 'live' parameter to None so that repeated
8767         # invocations of CheckPrereq do not raise an exception
8768         self.lu.op.live = None
8769       elif self.lu.op.mode is None:
8770         # read the default value from the hypervisor
8771         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8772         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8773
8774       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8775     else:
8776       # Failover is never live
8777       self.live = False
8778
8779     if not (self.failover or self.cleanup):
8780       remote_info = self.rpc.call_instance_info(instance.primary_node,
8781                                                 instance.name,
8782                                                 instance.hypervisor)
8783       remote_info.Raise("Error checking instance on node %s" %
8784                         instance.primary_node)
8785       instance_running = bool(remote_info.payload)
8786       if instance_running:
8787         self.current_mem = int(remote_info.payload["memory"])
8788
8789   def _RunAllocator(self):
8790     """Run the allocator based on input opcode.
8791
8792     """
8793     assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8794
8795     # FIXME: add a self.ignore_ipolicy option
8796     req = iallocator.IAReqRelocate(name=self.instance_name,
8797                                    relocate_from=[self.instance.primary_node])
8798     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8799
8800     ial.Run(self.lu.op.iallocator)
8801
8802     if not ial.success:
8803       raise errors.OpPrereqError("Can't compute nodes using"
8804                                  " iallocator '%s': %s" %
8805                                  (self.lu.op.iallocator, ial.info),
8806                                  errors.ECODE_NORES)
8807     self.target_node = ial.result[0]
8808     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8809                     self.instance_name, self.lu.op.iallocator,
8810                     utils.CommaJoin(ial.result))
8811
8812   def _WaitUntilSync(self):
8813     """Poll with custom rpc for disk sync.
8814
8815     This uses our own step-based rpc call.
8816
8817     """
8818     self.feedback_fn("* wait until resync is done")
8819     all_done = False
8820     while not all_done:
8821       all_done = True
8822       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8823                                             self.nodes_ip,
8824                                             (self.instance.disks,
8825                                              self.instance))
8826       min_percent = 100
8827       for node, nres in result.items():
8828         nres.Raise("Cannot resync disks on node %s" % node)
8829         node_done, node_percent = nres.payload
8830         all_done = all_done and node_done
8831         if node_percent is not None:
8832           min_percent = min(min_percent, node_percent)
8833       if not all_done:
8834         if min_percent < 100:
8835           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8836         time.sleep(2)
8837
8838   def _EnsureSecondary(self, node):
8839     """Demote a node to secondary.
8840
8841     """
8842     self.feedback_fn("* switching node %s to secondary mode" % node)
8843
8844     for dev in self.instance.disks:
8845       self.cfg.SetDiskID(dev, node)
8846
8847     result = self.rpc.call_blockdev_close(node, self.instance.name,
8848                                           self.instance.disks)
8849     result.Raise("Cannot change disk to secondary on node %s" % node)
8850
8851   def _GoStandalone(self):
8852     """Disconnect from the network.
8853
8854     """
8855     self.feedback_fn("* changing into standalone mode")
8856     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8857                                                self.instance.disks)
8858     for node, nres in result.items():
8859       nres.Raise("Cannot disconnect disks node %s" % node)
8860
8861   def _GoReconnect(self, multimaster):
8862     """Reconnect to the network.
8863
8864     """
8865     if multimaster:
8866       msg = "dual-master"
8867     else:
8868       msg = "single-master"
8869     self.feedback_fn("* changing disks into %s mode" % msg)
8870     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8871                                            (self.instance.disks, self.instance),
8872                                            self.instance.name, multimaster)
8873     for node, nres in result.items():
8874       nres.Raise("Cannot change disks config on node %s" % node)
8875
8876   def _ExecCleanup(self):
8877     """Try to cleanup after a failed migration.
8878
8879     The cleanup is done by:
8880       - check that the instance is running only on one node
8881         (and update the config if needed)
8882       - change disks on its secondary node to secondary
8883       - wait until disks are fully synchronized
8884       - disconnect from the network
8885       - change disks into single-master mode
8886       - wait again until disks are fully synchronized
8887
8888     """
8889     instance = self.instance
8890     target_node = self.target_node
8891     source_node = self.source_node
8892
8893     # check running on only one node
8894     self.feedback_fn("* checking where the instance actually runs"
8895                      " (if this hangs, the hypervisor might be in"
8896                      " a bad state)")
8897     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8898     for node, result in ins_l.items():
8899       result.Raise("Can't contact node %s" % node)
8900
8901     runningon_source = instance.name in ins_l[source_node].payload
8902     runningon_target = instance.name in ins_l[target_node].payload
8903
8904     if runningon_source and runningon_target:
8905       raise errors.OpExecError("Instance seems to be running on two nodes,"
8906                                " or the hypervisor is confused; you will have"
8907                                " to ensure manually that it runs only on one"
8908                                " and restart this operation")
8909
8910     if not (runningon_source or runningon_target):
8911       raise errors.OpExecError("Instance does not seem to be running at all;"
8912                                " in this case it's safer to repair by"
8913                                " running 'gnt-instance stop' to ensure disk"
8914                                " shutdown, and then restarting it")
8915
8916     if runningon_target:
8917       # the migration has actually succeeded, we need to update the config
8918       self.feedback_fn("* instance running on secondary node (%s),"
8919                        " updating config" % target_node)
8920       instance.primary_node = target_node
8921       self.cfg.Update(instance, self.feedback_fn)
8922       demoted_node = source_node
8923     else:
8924       self.feedback_fn("* instance confirmed to be running on its"
8925                        " primary node (%s)" % source_node)
8926       demoted_node = target_node
8927
8928     if instance.disk_template in constants.DTS_INT_MIRROR:
8929       self._EnsureSecondary(demoted_node)
8930       try:
8931         self._WaitUntilSync()
8932       except errors.OpExecError:
8933         # we ignore here errors, since if the device is standalone, it
8934         # won't be able to sync
8935         pass
8936       self._GoStandalone()
8937       self._GoReconnect(False)
8938       self._WaitUntilSync()
8939
8940     self.feedback_fn("* done")
8941
8942   def _RevertDiskStatus(self):
8943     """Try to revert the disk status after a failed migration.
8944
8945     """
8946     target_node = self.target_node
8947     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8948       return
8949
8950     try:
8951       self._EnsureSecondary(target_node)
8952       self._GoStandalone()
8953       self._GoReconnect(False)
8954       self._WaitUntilSync()
8955     except errors.OpExecError, err:
8956       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8957                          " please try to recover the instance manually;"
8958                          " error '%s'" % str(err))
8959
8960   def _AbortMigration(self):
8961     """Call the hypervisor code to abort a started migration.
8962
8963     """
8964     instance = self.instance
8965     target_node = self.target_node
8966     source_node = self.source_node
8967     migration_info = self.migration_info
8968
8969     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8970                                                                  instance,
8971                                                                  migration_info,
8972                                                                  False)
8973     abort_msg = abort_result.fail_msg
8974     if abort_msg:
8975       logging.error("Aborting migration failed on target node %s: %s",
8976                     target_node, abort_msg)
8977       # Don't raise an exception here, as we stil have to try to revert the
8978       # disk status, even if this step failed.
8979
8980     abort_result = self.rpc.call_instance_finalize_migration_src(
8981       source_node, instance, False, self.live)
8982     abort_msg = abort_result.fail_msg
8983     if abort_msg:
8984       logging.error("Aborting migration failed on source node %s: %s",
8985                     source_node, abort_msg)
8986
8987   def _ExecMigration(self):
8988     """Migrate an instance.
8989
8990     The migrate is done by:
8991       - change the disks into dual-master mode
8992       - wait until disks are fully synchronized again
8993       - migrate the instance
8994       - change disks on the new secondary node (the old primary) to secondary
8995       - wait until disks are fully synchronized
8996       - change disks into single-master mode
8997
8998     """
8999     instance = self.instance
9000     target_node = self.target_node
9001     source_node = self.source_node
9002
9003     # Check for hypervisor version mismatch and warn the user.
9004     nodeinfo = self.rpc.call_node_info([source_node, target_node],
9005                                        None, [self.instance.hypervisor], False)
9006     for ninfo in nodeinfo.values():
9007       ninfo.Raise("Unable to retrieve node information from node '%s'" %
9008                   ninfo.node)
9009     (_, _, (src_info, )) = nodeinfo[source_node].payload
9010     (_, _, (dst_info, )) = nodeinfo[target_node].payload
9011
9012     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9013         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9014       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9015       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9016       if src_version != dst_version:
9017         self.feedback_fn("* warning: hypervisor version mismatch between"
9018                          " source (%s) and target (%s) node" %
9019                          (src_version, dst_version))
9020
9021     self.feedback_fn("* checking disk consistency between source and target")
9022     for (idx, dev) in enumerate(instance.disks):
9023       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9024         raise errors.OpExecError("Disk %s is degraded or not fully"
9025                                  " synchronized on target node,"
9026                                  " aborting migration" % idx)
9027
9028     if self.current_mem > self.tgt_free_mem:
9029       if not self.allow_runtime_changes:
9030         raise errors.OpExecError("Memory ballooning not allowed and not enough"
9031                                  " free memory to fit instance %s on target"
9032                                  " node %s (have %dMB, need %dMB)" %
9033                                  (instance.name, target_node,
9034                                   self.tgt_free_mem, self.current_mem))
9035       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9036       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9037                                                      instance,
9038                                                      self.tgt_free_mem)
9039       rpcres.Raise("Cannot modify instance runtime memory")
9040
9041     # First get the migration information from the remote node
9042     result = self.rpc.call_migration_info(source_node, instance)
9043     msg = result.fail_msg
9044     if msg:
9045       log_err = ("Failed fetching source migration information from %s: %s" %
9046                  (source_node, msg))
9047       logging.error(log_err)
9048       raise errors.OpExecError(log_err)
9049
9050     self.migration_info = migration_info = result.payload
9051
9052     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9053       # Then switch the disks to master/master mode
9054       self._EnsureSecondary(target_node)
9055       self._GoStandalone()
9056       self._GoReconnect(True)
9057       self._WaitUntilSync()
9058
9059     self.feedback_fn("* preparing %s to accept the instance" % target_node)
9060     result = self.rpc.call_accept_instance(target_node,
9061                                            instance,
9062                                            migration_info,
9063                                            self.nodes_ip[target_node])
9064
9065     msg = result.fail_msg
9066     if msg:
9067       logging.error("Instance pre-migration failed, trying to revert"
9068                     " disk status: %s", msg)
9069       self.feedback_fn("Pre-migration failed, aborting")
9070       self._AbortMigration()
9071       self._RevertDiskStatus()
9072       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9073                                (instance.name, msg))
9074
9075     self.feedback_fn("* migrating instance to %s" % target_node)
9076     result = self.rpc.call_instance_migrate(source_node, instance,
9077                                             self.nodes_ip[target_node],
9078                                             self.live)
9079     msg = result.fail_msg
9080     if msg:
9081       logging.error("Instance migration failed, trying to revert"
9082                     " disk status: %s", msg)
9083       self.feedback_fn("Migration failed, aborting")
9084       self._AbortMigration()
9085       self._RevertDiskStatus()
9086       raise errors.OpExecError("Could not migrate instance %s: %s" %
9087                                (instance.name, msg))
9088
9089     self.feedback_fn("* starting memory transfer")
9090     last_feedback = time.time()
9091     while True:
9092       result = self.rpc.call_instance_get_migration_status(source_node,
9093                                                            instance)
9094       msg = result.fail_msg
9095       ms = result.payload   # MigrationStatus instance
9096       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9097         logging.error("Instance migration failed, trying to revert"
9098                       " disk status: %s", msg)
9099         self.feedback_fn("Migration failed, aborting")
9100         self._AbortMigration()
9101         self._RevertDiskStatus()
9102         if not msg:
9103           msg = "hypervisor returned failure"
9104         raise errors.OpExecError("Could not migrate instance %s: %s" %
9105                                  (instance.name, msg))
9106
9107       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9108         self.feedback_fn("* memory transfer complete")
9109         break
9110
9111       if (utils.TimeoutExpired(last_feedback,
9112                                self._MIGRATION_FEEDBACK_INTERVAL) and
9113           ms.transferred_ram is not None):
9114         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9115         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9116         last_feedback = time.time()
9117
9118       time.sleep(self._MIGRATION_POLL_INTERVAL)
9119
9120     result = self.rpc.call_instance_finalize_migration_src(source_node,
9121                                                            instance,
9122                                                            True,
9123                                                            self.live)
9124     msg = result.fail_msg
9125     if msg:
9126       logging.error("Instance migration succeeded, but finalization failed"
9127                     " on the source node: %s", msg)
9128       raise errors.OpExecError("Could not finalize instance migration: %s" %
9129                                msg)
9130
9131     instance.primary_node = target_node
9132
9133     # distribute new instance config to the other nodes
9134     self.cfg.Update(instance, self.feedback_fn)
9135
9136     result = self.rpc.call_instance_finalize_migration_dst(target_node,
9137                                                            instance,
9138                                                            migration_info,
9139                                                            True)
9140     msg = result.fail_msg
9141     if msg:
9142       logging.error("Instance migration succeeded, but finalization failed"
9143                     " on the target node: %s", msg)
9144       raise errors.OpExecError("Could not finalize instance migration: %s" %
9145                                msg)
9146
9147     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9148       self._EnsureSecondary(source_node)
9149       self._WaitUntilSync()
9150       self._GoStandalone()
9151       self._GoReconnect(False)
9152       self._WaitUntilSync()
9153
9154     # If the instance's disk template is `rbd' or `ext' and there was a
9155     # successful migration, unmap the device from the source node.
9156     if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9157       disks = _ExpandCheckDisks(instance, instance.disks)
9158       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9159       for disk in disks:
9160         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9161         msg = result.fail_msg
9162         if msg:
9163           logging.error("Migration was successful, but couldn't unmap the"
9164                         " block device %s on source node %s: %s",
9165                         disk.iv_name, source_node, msg)
9166           logging.error("You need to unmap the device %s manually on %s",
9167                         disk.iv_name, source_node)
9168
9169     self.feedback_fn("* done")
9170
9171   def _ExecFailover(self):
9172     """Failover an instance.
9173
9174     The failover is done by shutting it down on its present node and
9175     starting it on the secondary.
9176
9177     """
9178     instance = self.instance
9179     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9180
9181     source_node = instance.primary_node
9182     target_node = self.target_node
9183
9184     if instance.admin_state == constants.ADMINST_UP:
9185       self.feedback_fn("* checking disk consistency between source and target")
9186       for (idx, dev) in enumerate(instance.disks):
9187         # for drbd, these are drbd over lvm
9188         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9189                                      False):
9190           if primary_node.offline:
9191             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9192                              " target node %s" %
9193                              (primary_node.name, idx, target_node))
9194           elif not self.ignore_consistency:
9195             raise errors.OpExecError("Disk %s is degraded on target node,"
9196                                      " aborting failover" % idx)
9197     else:
9198       self.feedback_fn("* not checking disk consistency as instance is not"
9199                        " running")
9200
9201     self.feedback_fn("* shutting down instance on source node")
9202     logging.info("Shutting down instance %s on node %s",
9203                  instance.name, source_node)
9204
9205     result = self.rpc.call_instance_shutdown(source_node, instance,
9206                                              self.shutdown_timeout)
9207     msg = result.fail_msg
9208     if msg:
9209       if self.ignore_consistency or primary_node.offline:
9210         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9211                            " proceeding anyway; please make sure node"
9212                            " %s is down; error details: %s",
9213                            instance.name, source_node, source_node, msg)
9214       else:
9215         raise errors.OpExecError("Could not shutdown instance %s on"
9216                                  " node %s: %s" %
9217                                  (instance.name, source_node, msg))
9218
9219     self.feedback_fn("* deactivating the instance's disks on source node")
9220     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9221       raise errors.OpExecError("Can't shut down the instance's disks")
9222
9223     instance.primary_node = target_node
9224     # distribute new instance config to the other nodes
9225     self.cfg.Update(instance, self.feedback_fn)
9226
9227     # Only start the instance if it's marked as up
9228     if instance.admin_state == constants.ADMINST_UP:
9229       self.feedback_fn("* activating the instance's disks on target node %s" %
9230                        target_node)
9231       logging.info("Starting instance %s on node %s",
9232                    instance.name, target_node)
9233
9234       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9235                                            ignore_secondaries=True)
9236       if not disks_ok:
9237         _ShutdownInstanceDisks(self.lu, instance)
9238         raise errors.OpExecError("Can't activate the instance's disks")
9239
9240       self.feedback_fn("* starting the instance on the target node %s" %
9241                        target_node)
9242       result = self.rpc.call_instance_start(target_node, (instance, None, None),
9243                                             False)
9244       msg = result.fail_msg
9245       if msg:
9246         _ShutdownInstanceDisks(self.lu, instance)
9247         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9248                                  (instance.name, target_node, msg))
9249
9250   def Exec(self, feedback_fn):
9251     """Perform the migration.
9252
9253     """
9254     self.feedback_fn = feedback_fn
9255     self.source_node = self.instance.primary_node
9256
9257     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9258     if self.instance.disk_template in constants.DTS_INT_MIRROR:
9259       self.target_node = self.instance.secondary_nodes[0]
9260       # Otherwise self.target_node has been populated either
9261       # directly, or through an iallocator.
9262
9263     self.all_nodes = [self.source_node, self.target_node]
9264     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9265                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
9266
9267     if self.failover:
9268       feedback_fn("Failover instance %s" % self.instance.name)
9269       self._ExecFailover()
9270     else:
9271       feedback_fn("Migrating instance %s" % self.instance.name)
9272
9273       if self.cleanup:
9274         return self._ExecCleanup()
9275       else:
9276         return self._ExecMigration()
9277
9278
9279 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9280                     force_open):
9281   """Wrapper around L{_CreateBlockDevInner}.
9282
9283   This method annotates the root device first.
9284
9285   """
9286   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9287   excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9288   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9289                               force_open, excl_stor)
9290
9291
9292 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9293                          info, force_open, excl_stor):
9294   """Create a tree of block devices on a given node.
9295
9296   If this device type has to be created on secondaries, create it and
9297   all its children.
9298
9299   If not, just recurse to children keeping the same 'force' value.
9300
9301   @attention: The device has to be annotated already.
9302
9303   @param lu: the lu on whose behalf we execute
9304   @param node: the node on which to create the device
9305   @type instance: L{objects.Instance}
9306   @param instance: the instance which owns the device
9307   @type device: L{objects.Disk}
9308   @param device: the device to create
9309   @type force_create: boolean
9310   @param force_create: whether to force creation of this device; this
9311       will be change to True whenever we find a device which has
9312       CreateOnSecondary() attribute
9313   @param info: the extra 'metadata' we should attach to the device
9314       (this will be represented as a LVM tag)
9315   @type force_open: boolean
9316   @param force_open: this parameter will be passes to the
9317       L{backend.BlockdevCreate} function where it specifies
9318       whether we run on primary or not, and it affects both
9319       the child assembly and the device own Open() execution
9320   @type excl_stor: boolean
9321   @param excl_stor: Whether exclusive_storage is active for the node
9322
9323   """
9324   if device.CreateOnSecondary():
9325     force_create = True
9326
9327   if device.children:
9328     for child in device.children:
9329       _CreateBlockDevInner(lu, node, instance, child, force_create,
9330                            info, force_open, excl_stor)
9331
9332   if not force_create:
9333     return
9334
9335   _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9336                         excl_stor)
9337
9338
9339 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9340                           excl_stor):
9341   """Create a single block device on a given node.
9342
9343   This will not recurse over children of the device, so they must be
9344   created in advance.
9345
9346   @param lu: the lu on whose behalf we execute
9347   @param node: the node on which to create the device
9348   @type instance: L{objects.Instance}
9349   @param instance: the instance which owns the device
9350   @type device: L{objects.Disk}
9351   @param device: the device to create
9352   @param info: the extra 'metadata' we should attach to the device
9353       (this will be represented as a LVM tag)
9354   @type force_open: boolean
9355   @param force_open: this parameter will be passes to the
9356       L{backend.BlockdevCreate} function where it specifies
9357       whether we run on primary or not, and it affects both
9358       the child assembly and the device own Open() execution
9359   @type excl_stor: boolean
9360   @param excl_stor: Whether exclusive_storage is active for the node
9361
9362   """
9363   lu.cfg.SetDiskID(device, node)
9364   result = lu.rpc.call_blockdev_create(node, device, device.size,
9365                                        instance.name, force_open, info,
9366                                        excl_stor)
9367   result.Raise("Can't create block device %s on"
9368                " node %s for instance %s" % (device, node, instance.name))
9369   if device.physical_id is None:
9370     device.physical_id = result.payload
9371
9372
9373 def _GenerateUniqueNames(lu, exts):
9374   """Generate a suitable LV name.
9375
9376   This will generate a logical volume name for the given instance.
9377
9378   """
9379   results = []
9380   for val in exts:
9381     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9382     results.append("%s%s" % (new_id, val))
9383   return results
9384
9385
9386 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9387                          iv_name, p_minor, s_minor):
9388   """Generate a drbd8 device complete with its children.
9389
9390   """
9391   assert len(vgnames) == len(names) == 2
9392   port = lu.cfg.AllocatePort()
9393   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9394
9395   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9396                           logical_id=(vgnames[0], names[0]),
9397                           params={})
9398   dev_meta = objects.Disk(dev_type=constants.LD_LV,
9399                           size=constants.DRBD_META_SIZE,
9400                           logical_id=(vgnames[1], names[1]),
9401                           params={})
9402   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9403                           logical_id=(primary, secondary, port,
9404                                       p_minor, s_minor,
9405                                       shared_secret),
9406                           children=[dev_data, dev_meta],
9407                           iv_name=iv_name, params={})
9408   return drbd_dev
9409
9410
9411 _DISK_TEMPLATE_NAME_PREFIX = {
9412   constants.DT_PLAIN: "",
9413   constants.DT_RBD: ".rbd",
9414   constants.DT_EXT: ".ext",
9415   }
9416
9417
9418 _DISK_TEMPLATE_DEVICE_TYPE = {
9419   constants.DT_PLAIN: constants.LD_LV,
9420   constants.DT_FILE: constants.LD_FILE,
9421   constants.DT_SHARED_FILE: constants.LD_FILE,
9422   constants.DT_BLOCK: constants.LD_BLOCKDEV,
9423   constants.DT_RBD: constants.LD_RBD,
9424   constants.DT_EXT: constants.LD_EXT,
9425   }
9426
9427
9428 def _GenerateDiskTemplate(
9429   lu, template_name, instance_name, primary_node, secondary_nodes,
9430   disk_info, file_storage_dir, file_driver, base_index,
9431   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9432   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9433   """Generate the entire disk layout for a given template type.
9434
9435   """
9436   vgname = lu.cfg.GetVGName()
9437   disk_count = len(disk_info)
9438   disks = []
9439
9440   if template_name == constants.DT_DISKLESS:
9441     pass
9442   elif template_name == constants.DT_DRBD8:
9443     if len(secondary_nodes) != 1:
9444       raise errors.ProgrammerError("Wrong template configuration")
9445     remote_node = secondary_nodes[0]
9446     minors = lu.cfg.AllocateDRBDMinor(
9447       [primary_node, remote_node] * len(disk_info), instance_name)
9448
9449     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9450                                                        full_disk_params)
9451     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9452
9453     names = []
9454     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9455                                                for i in range(disk_count)]):
9456       names.append(lv_prefix + "_data")
9457       names.append(lv_prefix + "_meta")
9458     for idx, disk in enumerate(disk_info):
9459       disk_index = idx + base_index
9460       data_vg = disk.get(constants.IDISK_VG, vgname)
9461       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9462       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9463                                       disk[constants.IDISK_SIZE],
9464                                       [data_vg, meta_vg],
9465                                       names[idx * 2:idx * 2 + 2],
9466                                       "disk/%d" % disk_index,
9467                                       minors[idx * 2], minors[idx * 2 + 1])
9468       disk_dev.mode = disk[constants.IDISK_MODE]
9469       disks.append(disk_dev)
9470   else:
9471     if secondary_nodes:
9472       raise errors.ProgrammerError("Wrong template configuration")
9473
9474     if template_name == constants.DT_FILE:
9475       _req_file_storage()
9476     elif template_name == constants.DT_SHARED_FILE:
9477       _req_shr_file_storage()
9478
9479     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9480     if name_prefix is None:
9481       names = None
9482     else:
9483       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9484                                         (name_prefix, base_index + i)
9485                                         for i in range(disk_count)])
9486
9487     if template_name == constants.DT_PLAIN:
9488
9489       def logical_id_fn(idx, _, disk):
9490         vg = disk.get(constants.IDISK_VG, vgname)
9491         return (vg, names[idx])
9492
9493     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9494       logical_id_fn = \
9495         lambda _, disk_index, disk: (file_driver,
9496                                      "%s/disk%d" % (file_storage_dir,
9497                                                     disk_index))
9498     elif template_name == constants.DT_BLOCK:
9499       logical_id_fn = \
9500         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9501                                        disk[constants.IDISK_ADOPT])
9502     elif template_name == constants.DT_RBD:
9503       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9504     elif template_name == constants.DT_EXT:
9505       def logical_id_fn(idx, _, disk):
9506         provider = disk.get(constants.IDISK_PROVIDER, None)
9507         if provider is None:
9508           raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9509                                        " not found", constants.DT_EXT,
9510                                        constants.IDISK_PROVIDER)
9511         return (provider, names[idx])
9512     else:
9513       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9514
9515     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9516
9517     for idx, disk in enumerate(disk_info):
9518       params = {}
9519       # Only for the Ext template add disk_info to params
9520       if template_name == constants.DT_EXT:
9521         params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9522         for key in disk:
9523           if key not in constants.IDISK_PARAMS:
9524             params[key] = disk[key]
9525       disk_index = idx + base_index
9526       size = disk[constants.IDISK_SIZE]
9527       feedback_fn("* disk %s, size %s" %
9528                   (disk_index, utils.FormatUnit(size, "h")))
9529       disks.append(objects.Disk(dev_type=dev_type, size=size,
9530                                 logical_id=logical_id_fn(idx, disk_index, disk),
9531                                 iv_name="disk/%d" % disk_index,
9532                                 mode=disk[constants.IDISK_MODE],
9533                                 params=params))
9534
9535   return disks
9536
9537
9538 def _GetInstanceInfoText(instance):
9539   """Compute that text that should be added to the disk's metadata.
9540
9541   """
9542   return "originstname+%s" % instance.name
9543
9544
9545 def _CalcEta(time_taken, written, total_size):
9546   """Calculates the ETA based on size written and total size.
9547
9548   @param time_taken: The time taken so far
9549   @param written: amount written so far
9550   @param total_size: The total size of data to be written
9551   @return: The remaining time in seconds
9552
9553   """
9554   avg_time = time_taken / float(written)
9555   return (total_size - written) * avg_time
9556
9557
9558 def _WipeDisks(lu, instance, disks=None):
9559   """Wipes instance disks.
9560
9561   @type lu: L{LogicalUnit}
9562   @param lu: the logical unit on whose behalf we execute
9563   @type instance: L{objects.Instance}
9564   @param instance: the instance whose disks we should create
9565   @return: the success of the wipe
9566
9567   """
9568   node = instance.primary_node
9569
9570   if disks is None:
9571     disks = [(idx, disk, 0)
9572              for (idx, disk) in enumerate(instance.disks)]
9573
9574   for (_, device, _) in disks:
9575     lu.cfg.SetDiskID(device, node)
9576
9577   logging.info("Pausing synchronization of disks of instance '%s'",
9578                instance.name)
9579   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9580                                                   (map(compat.snd, disks),
9581                                                    instance),
9582                                                   True)
9583   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9584
9585   for idx, success in enumerate(result.payload):
9586     if not success:
9587       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9588                    " failed", idx, instance.name)
9589
9590   try:
9591     for (idx, device, offset) in disks:
9592       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9593       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9594       wipe_chunk_size = \
9595         int(min(constants.MAX_WIPE_CHUNK,
9596                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9597
9598       size = device.size
9599       last_output = 0
9600       start_time = time.time()
9601
9602       if offset == 0:
9603         info_text = ""
9604       else:
9605         info_text = (" (from %s to %s)" %
9606                      (utils.FormatUnit(offset, "h"),
9607                       utils.FormatUnit(size, "h")))
9608
9609       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9610
9611       logging.info("Wiping disk %d for instance %s on node %s using"
9612                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9613
9614       while offset < size:
9615         wipe_size = min(wipe_chunk_size, size - offset)
9616
9617         logging.debug("Wiping disk %d, offset %s, chunk %s",
9618                       idx, offset, wipe_size)
9619
9620         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9621                                            wipe_size)
9622         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9623                      (idx, offset, wipe_size))
9624
9625         now = time.time()
9626         offset += wipe_size
9627         if now - last_output >= 60:
9628           eta = _CalcEta(now - start_time, offset, size)
9629           lu.LogInfo(" - done: %.1f%% ETA: %s",
9630                      offset / float(size) * 100, utils.FormatSeconds(eta))
9631           last_output = now
9632   finally:
9633     logging.info("Resuming synchronization of disks for instance '%s'",
9634                  instance.name)
9635
9636     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9637                                                     (map(compat.snd, disks),
9638                                                      instance),
9639                                                     False)
9640
9641     if result.fail_msg:
9642       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9643                     node, result.fail_msg)
9644     else:
9645       for idx, success in enumerate(result.payload):
9646         if not success:
9647           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9648                         " failed", idx, instance.name)
9649
9650
9651 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9652   """Create all disks for an instance.
9653
9654   This abstracts away some work from AddInstance.
9655
9656   @type lu: L{LogicalUnit}
9657   @param lu: the logical unit on whose behalf we execute
9658   @type instance: L{objects.Instance}
9659   @param instance: the instance whose disks we should create
9660   @type to_skip: list
9661   @param to_skip: list of indices to skip
9662   @type target_node: string
9663   @param target_node: if passed, overrides the target node for creation
9664   @rtype: boolean
9665   @return: the success of the creation
9666
9667   """
9668   info = _GetInstanceInfoText(instance)
9669   if target_node is None:
9670     pnode = instance.primary_node
9671     all_nodes = instance.all_nodes
9672   else:
9673     pnode = target_node
9674     all_nodes = [pnode]
9675
9676   if instance.disk_template in constants.DTS_FILEBASED:
9677     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9678     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9679
9680     result.Raise("Failed to create directory '%s' on"
9681                  " node %s" % (file_storage_dir, pnode))
9682
9683   # Note: this needs to be kept in sync with adding of disks in
9684   # LUInstanceSetParams
9685   for idx, device in enumerate(instance.disks):
9686     if to_skip and idx in to_skip:
9687       continue
9688     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9689     #HARDCODE
9690     for node in all_nodes:
9691       f_create = node == pnode
9692       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9693
9694
9695 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9696   """Remove all disks for an instance.
9697
9698   This abstracts away some work from `AddInstance()` and
9699   `RemoveInstance()`. Note that in case some of the devices couldn't
9700   be removed, the removal will continue with the other ones (compare
9701   with `_CreateDisks()`).
9702
9703   @type lu: L{LogicalUnit}
9704   @param lu: the logical unit on whose behalf we execute
9705   @type instance: L{objects.Instance}
9706   @param instance: the instance whose disks we should remove
9707   @type target_node: string
9708   @param target_node: used to override the node on which to remove the disks
9709   @rtype: boolean
9710   @return: the success of the removal
9711
9712   """
9713   logging.info("Removing block devices for instance %s", instance.name)
9714
9715   all_result = True
9716   ports_to_release = set()
9717   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9718   for (idx, device) in enumerate(anno_disks):
9719     if target_node:
9720       edata = [(target_node, device)]
9721     else:
9722       edata = device.ComputeNodeTree(instance.primary_node)
9723     for node, disk in edata:
9724       lu.cfg.SetDiskID(disk, node)
9725       result = lu.rpc.call_blockdev_remove(node, disk)
9726       if result.fail_msg:
9727         lu.LogWarning("Could not remove disk %s on node %s,"
9728                       " continuing anyway: %s", idx, node, result.fail_msg)
9729         if not (result.offline and node != instance.primary_node):
9730           all_result = False
9731
9732     # if this is a DRBD disk, return its port to the pool
9733     if device.dev_type in constants.LDS_DRBD:
9734       ports_to_release.add(device.logical_id[2])
9735
9736   if all_result or ignore_failures:
9737     for port in ports_to_release:
9738       lu.cfg.AddTcpUdpPort(port)
9739
9740   if instance.disk_template in constants.DTS_FILEBASED:
9741     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9742     if target_node:
9743       tgt = target_node
9744     else:
9745       tgt = instance.primary_node
9746     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9747     if result.fail_msg:
9748       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9749                     file_storage_dir, instance.primary_node, result.fail_msg)
9750       all_result = False
9751
9752   return all_result
9753
9754
9755 def _ComputeDiskSizePerVG(disk_template, disks):
9756   """Compute disk size requirements in the volume group
9757
9758   """
9759   def _compute(disks, payload):
9760     """Universal algorithm.
9761
9762     """
9763     vgs = {}
9764     for disk in disks:
9765       vgs[disk[constants.IDISK_VG]] = \
9766         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9767
9768     return vgs
9769
9770   # Required free disk space as a function of disk and swap space
9771   req_size_dict = {
9772     constants.DT_DISKLESS: {},
9773     constants.DT_PLAIN: _compute(disks, 0),
9774     # 128 MB are added for drbd metadata for each disk
9775     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9776     constants.DT_FILE: {},
9777     constants.DT_SHARED_FILE: {},
9778   }
9779
9780   if disk_template not in req_size_dict:
9781     raise errors.ProgrammerError("Disk template '%s' size requirement"
9782                                  " is unknown" % disk_template)
9783
9784   return req_size_dict[disk_template]
9785
9786
9787 def _FilterVmNodes(lu, nodenames):
9788   """Filters out non-vm_capable nodes from a list.
9789
9790   @type lu: L{LogicalUnit}
9791   @param lu: the logical unit for which we check
9792   @type nodenames: list
9793   @param nodenames: the list of nodes on which we should check
9794   @rtype: list
9795   @return: the list of vm-capable nodes
9796
9797   """
9798   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9799   return [name for name in nodenames if name not in vm_nodes]
9800
9801
9802 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9803   """Hypervisor parameter validation.
9804
9805   This function abstract the hypervisor parameter validation to be
9806   used in both instance create and instance modify.
9807
9808   @type lu: L{LogicalUnit}
9809   @param lu: the logical unit for which we check
9810   @type nodenames: list
9811   @param nodenames: the list of nodes on which we should check
9812   @type hvname: string
9813   @param hvname: the name of the hypervisor we should use
9814   @type hvparams: dict
9815   @param hvparams: the parameters which we need to check
9816   @raise errors.OpPrereqError: if the parameters are not valid
9817
9818   """
9819   nodenames = _FilterVmNodes(lu, nodenames)
9820
9821   cluster = lu.cfg.GetClusterInfo()
9822   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9823
9824   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9825   for node in nodenames:
9826     info = hvinfo[node]
9827     if info.offline:
9828       continue
9829     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9830
9831
9832 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9833   """OS parameters validation.
9834
9835   @type lu: L{LogicalUnit}
9836   @param lu: the logical unit for which we check
9837   @type required: boolean
9838   @param required: whether the validation should fail if the OS is not
9839       found
9840   @type nodenames: list
9841   @param nodenames: the list of nodes on which we should check
9842   @type osname: string
9843   @param osname: the name of the hypervisor we should use
9844   @type osparams: dict
9845   @param osparams: the parameters which we need to check
9846   @raise errors.OpPrereqError: if the parameters are not valid
9847
9848   """
9849   nodenames = _FilterVmNodes(lu, nodenames)
9850   result = lu.rpc.call_os_validate(nodenames, required, osname,
9851                                    [constants.OS_VALIDATE_PARAMETERS],
9852                                    osparams)
9853   for node, nres in result.items():
9854     # we don't check for offline cases since this should be run only
9855     # against the master node and/or an instance's nodes
9856     nres.Raise("OS Parameters validation failed on node %s" % node)
9857     if not nres.payload:
9858       lu.LogInfo("OS %s not found on node %s, validation skipped",
9859                  osname, node)
9860
9861
9862 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9863   """Wrapper around IAReqInstanceAlloc.
9864
9865   @param op: The instance opcode
9866   @param disks: The computed disks
9867   @param nics: The computed nics
9868   @param beparams: The full filled beparams
9869   @param node_whitelist: List of nodes which should appear as online to the
9870     allocator (unless the node is already marked offline)
9871
9872   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9873
9874   """
9875   spindle_use = beparams[constants.BE_SPINDLE_USE]
9876   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9877                                        disk_template=op.disk_template,
9878                                        tags=op.tags,
9879                                        os=op.os_type,
9880                                        vcpus=beparams[constants.BE_VCPUS],
9881                                        memory=beparams[constants.BE_MAXMEM],
9882                                        spindle_use=spindle_use,
9883                                        disks=disks,
9884                                        nics=[n.ToDict() for n in nics],
9885                                        hypervisor=op.hypervisor,
9886                                        node_whitelist=node_whitelist)
9887
9888
9889 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9890   """Computes the nics.
9891
9892   @param op: The instance opcode
9893   @param cluster: Cluster configuration object
9894   @param default_ip: The default ip to assign
9895   @param cfg: An instance of the configuration object
9896   @param ec_id: Execution context ID
9897
9898   @returns: The build up nics
9899
9900   """
9901   nics = []
9902   for nic in op.nics:
9903     nic_mode_req = nic.get(constants.INIC_MODE, None)
9904     nic_mode = nic_mode_req
9905     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9906       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9907
9908     net = nic.get(constants.INIC_NETWORK, None)
9909     link = nic.get(constants.NIC_LINK, None)
9910     ip = nic.get(constants.INIC_IP, None)
9911
9912     if net is None or net.lower() == constants.VALUE_NONE:
9913       net = None
9914     else:
9915       if nic_mode_req is not None or link is not None:
9916         raise errors.OpPrereqError("If network is given, no mode or link"
9917                                    " is allowed to be passed",
9918                                    errors.ECODE_INVAL)
9919
9920     # ip validity checks
9921     if ip is None or ip.lower() == constants.VALUE_NONE:
9922       nic_ip = None
9923     elif ip.lower() == constants.VALUE_AUTO:
9924       if not op.name_check:
9925         raise errors.OpPrereqError("IP address set to auto but name checks"
9926                                    " have been skipped",
9927                                    errors.ECODE_INVAL)
9928       nic_ip = default_ip
9929     else:
9930       # We defer pool operations until later, so that the iallocator has
9931       # filled in the instance's node(s) dimara
9932       if ip.lower() == constants.NIC_IP_POOL:
9933         if net is None:
9934           raise errors.OpPrereqError("if ip=pool, parameter network"
9935                                      " must be passed too",
9936                                      errors.ECODE_INVAL)
9937
9938       elif not netutils.IPAddress.IsValid(ip):
9939         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9940                                    errors.ECODE_INVAL)
9941
9942       nic_ip = ip
9943
9944     # TODO: check the ip address for uniqueness
9945     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9946       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9947                                  errors.ECODE_INVAL)
9948
9949     # MAC address verification
9950     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9951     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9952       mac = utils.NormalizeAndValidateMac(mac)
9953
9954       try:
9955         # TODO: We need to factor this out
9956         cfg.ReserveMAC(mac, ec_id)
9957       except errors.ReservationError:
9958         raise errors.OpPrereqError("MAC address %s already in use"
9959                                    " in cluster" % mac,
9960                                    errors.ECODE_NOTUNIQUE)
9961
9962     #  Build nic parameters
9963     nicparams = {}
9964     if nic_mode_req:
9965       nicparams[constants.NIC_MODE] = nic_mode
9966     if link:
9967       nicparams[constants.NIC_LINK] = link
9968
9969     check_params = cluster.SimpleFillNIC(nicparams)
9970     objects.NIC.CheckParameterSyntax(check_params)
9971     nics.append(objects.NIC(mac=mac, ip=nic_ip,
9972                             network=net, nicparams=nicparams))
9973
9974   return nics
9975
9976
9977 def _ComputeDisks(op, default_vg):
9978   """Computes the instance disks.
9979
9980   @param op: The instance opcode
9981   @param default_vg: The default_vg to assume
9982
9983   @return: The computed disks
9984
9985   """
9986   disks = []
9987   for disk in op.disks:
9988     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9989     if mode not in constants.DISK_ACCESS_SET:
9990       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9991                                  mode, errors.ECODE_INVAL)
9992     size = disk.get(constants.IDISK_SIZE, None)
9993     if size is None:
9994       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9995     try:
9996       size = int(size)
9997     except (TypeError, ValueError):
9998       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9999                                  errors.ECODE_INVAL)
10000
10001     ext_provider = disk.get(constants.IDISK_PROVIDER, None)
10002     if ext_provider and op.disk_template != constants.DT_EXT:
10003       raise errors.OpPrereqError("The '%s' option is only valid for the %s"
10004                                  " disk template, not %s" %
10005                                  (constants.IDISK_PROVIDER, constants.DT_EXT,
10006                                  op.disk_template), errors.ECODE_INVAL)
10007
10008     data_vg = disk.get(constants.IDISK_VG, default_vg)
10009     new_disk = {
10010       constants.IDISK_SIZE: size,
10011       constants.IDISK_MODE: mode,
10012       constants.IDISK_VG: data_vg,
10013       }
10014
10015     if constants.IDISK_METAVG in disk:
10016       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10017     if constants.IDISK_ADOPT in disk:
10018       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10019
10020     # For extstorage, demand the `provider' option and add any
10021     # additional parameters (ext-params) to the dict
10022     if op.disk_template == constants.DT_EXT:
10023       if ext_provider:
10024         new_disk[constants.IDISK_PROVIDER] = ext_provider
10025         for key in disk:
10026           if key not in constants.IDISK_PARAMS:
10027             new_disk[key] = disk[key]
10028       else:
10029         raise errors.OpPrereqError("Missing provider for template '%s'" %
10030                                    constants.DT_EXT, errors.ECODE_INVAL)
10031
10032     disks.append(new_disk)
10033
10034   return disks
10035
10036
10037 def _ComputeFullBeParams(op, cluster):
10038   """Computes the full beparams.
10039
10040   @param op: The instance opcode
10041   @param cluster: The cluster config object
10042
10043   @return: The fully filled beparams
10044
10045   """
10046   default_beparams = cluster.beparams[constants.PP_DEFAULT]
10047   for param, value in op.beparams.iteritems():
10048     if value == constants.VALUE_AUTO:
10049       op.beparams[param] = default_beparams[param]
10050   objects.UpgradeBeParams(op.beparams)
10051   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10052   return cluster.SimpleFillBE(op.beparams)
10053
10054
10055 def _CheckOpportunisticLocking(op):
10056   """Generate error if opportunistic locking is not possible.
10057
10058   """
10059   if op.opportunistic_locking and not op.iallocator:
10060     raise errors.OpPrereqError("Opportunistic locking is only available in"
10061                                " combination with an instance allocator",
10062                                errors.ECODE_INVAL)
10063
10064
10065 class LUInstanceCreate(LogicalUnit):
10066   """Create an instance.
10067
10068   """
10069   HPATH = "instance-add"
10070   HTYPE = constants.HTYPE_INSTANCE
10071   REQ_BGL = False
10072
10073   def CheckArguments(self):
10074     """Check arguments.
10075
10076     """
10077     # do not require name_check to ease forward/backward compatibility
10078     # for tools
10079     if self.op.no_install and self.op.start:
10080       self.LogInfo("No-installation mode selected, disabling startup")
10081       self.op.start = False
10082     # validate/normalize the instance name
10083     self.op.instance_name = \
10084       netutils.Hostname.GetNormalizedName(self.op.instance_name)
10085
10086     if self.op.ip_check and not self.op.name_check:
10087       # TODO: make the ip check more flexible and not depend on the name check
10088       raise errors.OpPrereqError("Cannot do IP address check without a name"
10089                                  " check", errors.ECODE_INVAL)
10090
10091     # check nics' parameter names
10092     for nic in self.op.nics:
10093       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10094
10095     # check disks. parameter names and consistent adopt/no-adopt strategy
10096     has_adopt = has_no_adopt = False
10097     for disk in self.op.disks:
10098       if self.op.disk_template != constants.DT_EXT:
10099         utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10100       if constants.IDISK_ADOPT in disk:
10101         has_adopt = True
10102       else:
10103         has_no_adopt = True
10104     if has_adopt and has_no_adopt:
10105       raise errors.OpPrereqError("Either all disks are adopted or none is",
10106                                  errors.ECODE_INVAL)
10107     if has_adopt:
10108       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10109         raise errors.OpPrereqError("Disk adoption is not supported for the"
10110                                    " '%s' disk template" %
10111                                    self.op.disk_template,
10112                                    errors.ECODE_INVAL)
10113       if self.op.iallocator is not None:
10114         raise errors.OpPrereqError("Disk adoption not allowed with an"
10115                                    " iallocator script", errors.ECODE_INVAL)
10116       if self.op.mode == constants.INSTANCE_IMPORT:
10117         raise errors.OpPrereqError("Disk adoption not allowed for"
10118                                    " instance import", errors.ECODE_INVAL)
10119     else:
10120       if self.op.disk_template in constants.DTS_MUST_ADOPT:
10121         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10122                                    " but no 'adopt' parameter given" %
10123                                    self.op.disk_template,
10124                                    errors.ECODE_INVAL)
10125
10126     self.adopt_disks = has_adopt
10127
10128     # instance name verification
10129     if self.op.name_check:
10130       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10131       self.op.instance_name = self.hostname1.name
10132       # used in CheckPrereq for ip ping check
10133       self.check_ip = self.hostname1.ip
10134     else:
10135       self.check_ip = None
10136
10137     # file storage checks
10138     if (self.op.file_driver and
10139         not self.op.file_driver in constants.FILE_DRIVER):
10140       raise errors.OpPrereqError("Invalid file driver name '%s'" %
10141                                  self.op.file_driver, errors.ECODE_INVAL)
10142
10143     if self.op.disk_template == constants.DT_FILE:
10144       opcodes.RequireFileStorage()
10145     elif self.op.disk_template == constants.DT_SHARED_FILE:
10146       opcodes.RequireSharedFileStorage()
10147
10148     ### Node/iallocator related checks
10149     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10150
10151     if self.op.pnode is not None:
10152       if self.op.disk_template in constants.DTS_INT_MIRROR:
10153         if self.op.snode is None:
10154           raise errors.OpPrereqError("The networked disk templates need"
10155                                      " a mirror node", errors.ECODE_INVAL)
10156       elif self.op.snode:
10157         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10158                         " template")
10159         self.op.snode = None
10160
10161     _CheckOpportunisticLocking(self.op)
10162
10163     self._cds = _GetClusterDomainSecret()
10164
10165     if self.op.mode == constants.INSTANCE_IMPORT:
10166       # On import force_variant must be True, because if we forced it at
10167       # initial install, our only chance when importing it back is that it
10168       # works again!
10169       self.op.force_variant = True
10170
10171       if self.op.no_install:
10172         self.LogInfo("No-installation mode has no effect during import")
10173
10174     elif self.op.mode == constants.INSTANCE_CREATE:
10175       if self.op.os_type is None:
10176         raise errors.OpPrereqError("No guest OS specified",
10177                                    errors.ECODE_INVAL)
10178       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10179         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10180                                    " installation" % self.op.os_type,
10181                                    errors.ECODE_STATE)
10182       if self.op.disk_template is None:
10183         raise errors.OpPrereqError("No disk template specified",
10184                                    errors.ECODE_INVAL)
10185
10186     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10187       # Check handshake to ensure both clusters have the same domain secret
10188       src_handshake = self.op.source_handshake
10189       if not src_handshake:
10190         raise errors.OpPrereqError("Missing source handshake",
10191                                    errors.ECODE_INVAL)
10192
10193       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10194                                                            src_handshake)
10195       if errmsg:
10196         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10197                                    errors.ECODE_INVAL)
10198
10199       # Load and check source CA
10200       self.source_x509_ca_pem = self.op.source_x509_ca
10201       if not self.source_x509_ca_pem:
10202         raise errors.OpPrereqError("Missing source X509 CA",
10203                                    errors.ECODE_INVAL)
10204
10205       try:
10206         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10207                                                     self._cds)
10208       except OpenSSL.crypto.Error, err:
10209         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10210                                    (err, ), errors.ECODE_INVAL)
10211
10212       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10213       if errcode is not None:
10214         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10215                                    errors.ECODE_INVAL)
10216
10217       self.source_x509_ca = cert
10218
10219       src_instance_name = self.op.source_instance_name
10220       if not src_instance_name:
10221         raise errors.OpPrereqError("Missing source instance name",
10222                                    errors.ECODE_INVAL)
10223
10224       self.source_instance_name = \
10225           netutils.GetHostname(name=src_instance_name).name
10226
10227     else:
10228       raise errors.OpPrereqError("Invalid instance creation mode %r" %
10229                                  self.op.mode, errors.ECODE_INVAL)
10230
10231   def ExpandNames(self):
10232     """ExpandNames for CreateInstance.
10233
10234     Figure out the right locks for instance creation.
10235
10236     """
10237     self.needed_locks = {}
10238
10239     instance_name = self.op.instance_name
10240     # this is just a preventive check, but someone might still add this
10241     # instance in the meantime, and creation will fail at lock-add time
10242     if instance_name in self.cfg.GetInstanceList():
10243       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10244                                  instance_name, errors.ECODE_EXISTS)
10245
10246     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10247
10248     if self.op.iallocator:
10249       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10250       # specifying a group on instance creation and then selecting nodes from
10251       # that group
10252       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10253       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10254
10255       if self.op.opportunistic_locking:
10256         self.opportunistic_locks[locking.LEVEL_NODE] = True
10257         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10258     else:
10259       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10260       nodelist = [self.op.pnode]
10261       if self.op.snode is not None:
10262         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10263         nodelist.append(self.op.snode)
10264       self.needed_locks[locking.LEVEL_NODE] = nodelist
10265
10266     # in case of import lock the source node too
10267     if self.op.mode == constants.INSTANCE_IMPORT:
10268       src_node = self.op.src_node
10269       src_path = self.op.src_path
10270
10271       if src_path is None:
10272         self.op.src_path = src_path = self.op.instance_name
10273
10274       if src_node is None:
10275         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10276         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10277         self.op.src_node = None
10278         if os.path.isabs(src_path):
10279           raise errors.OpPrereqError("Importing an instance from a path"
10280                                      " requires a source node option",
10281                                      errors.ECODE_INVAL)
10282       else:
10283         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10284         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10285           self.needed_locks[locking.LEVEL_NODE].append(src_node)
10286         if not os.path.isabs(src_path):
10287           self.op.src_path = src_path = \
10288             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10289
10290     self.needed_locks[locking.LEVEL_NODE_RES] = \
10291       _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10292
10293   def _RunAllocator(self):
10294     """Run the allocator based on input opcode.
10295
10296     """
10297     if self.op.opportunistic_locking:
10298       # Only consider nodes for which a lock is held
10299       node_whitelist = self.owned_locks(locking.LEVEL_NODE)
10300     else:
10301       node_whitelist = None
10302
10303     #TODO Export network to iallocator so that it chooses a pnode
10304     #     in a nodegroup that has the desired network connected to
10305     req = _CreateInstanceAllocRequest(self.op, self.disks,
10306                                       self.nics, self.be_full,
10307                                       node_whitelist)
10308     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10309
10310     ial.Run(self.op.iallocator)
10311
10312     if not ial.success:
10313       # When opportunistic locks are used only a temporary failure is generated
10314       if self.op.opportunistic_locking:
10315         ecode = errors.ECODE_TEMP_NORES
10316       else:
10317         ecode = errors.ECODE_NORES
10318
10319       raise errors.OpPrereqError("Can't compute nodes using"
10320                                  " iallocator '%s': %s" %
10321                                  (self.op.iallocator, ial.info),
10322                                  ecode)
10323
10324     self.op.pnode = ial.result[0]
10325     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10326                  self.op.instance_name, self.op.iallocator,
10327                  utils.CommaJoin(ial.result))
10328
10329     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10330
10331     if req.RequiredNodes() == 2:
10332       self.op.snode = ial.result[1]
10333
10334   def BuildHooksEnv(self):
10335     """Build hooks env.
10336
10337     This runs on master, primary and secondary nodes of the instance.
10338
10339     """
10340     env = {
10341       "ADD_MODE": self.op.mode,
10342       }
10343     if self.op.mode == constants.INSTANCE_IMPORT:
10344       env["SRC_NODE"] = self.op.src_node
10345       env["SRC_PATH"] = self.op.src_path
10346       env["SRC_IMAGES"] = self.src_images
10347
10348     env.update(_BuildInstanceHookEnv(
10349       name=self.op.instance_name,
10350       primary_node=self.op.pnode,
10351       secondary_nodes=self.secondaries,
10352       status=self.op.start,
10353       os_type=self.op.os_type,
10354       minmem=self.be_full[constants.BE_MINMEM],
10355       maxmem=self.be_full[constants.BE_MAXMEM],
10356       vcpus=self.be_full[constants.BE_VCPUS],
10357       nics=_NICListToTuple(self, self.nics),
10358       disk_template=self.op.disk_template,
10359       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10360              for d in self.disks],
10361       bep=self.be_full,
10362       hvp=self.hv_full,
10363       hypervisor_name=self.op.hypervisor,
10364       tags=self.op.tags,
10365     ))
10366
10367     return env
10368
10369   def BuildHooksNodes(self):
10370     """Build hooks nodes.
10371
10372     """
10373     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10374     return nl, nl
10375
10376   def _ReadExportInfo(self):
10377     """Reads the export information from disk.
10378
10379     It will override the opcode source node and path with the actual
10380     information, if these two were not specified before.
10381
10382     @return: the export information
10383
10384     """
10385     assert self.op.mode == constants.INSTANCE_IMPORT
10386
10387     src_node = self.op.src_node
10388     src_path = self.op.src_path
10389
10390     if src_node is None:
10391       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10392       exp_list = self.rpc.call_export_list(locked_nodes)
10393       found = False
10394       for node in exp_list:
10395         if exp_list[node].fail_msg:
10396           continue
10397         if src_path in exp_list[node].payload:
10398           found = True
10399           self.op.src_node = src_node = node
10400           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10401                                                        src_path)
10402           break
10403       if not found:
10404         raise errors.OpPrereqError("No export found for relative path %s" %
10405                                     src_path, errors.ECODE_INVAL)
10406
10407     _CheckNodeOnline(self, src_node)
10408     result = self.rpc.call_export_info(src_node, src_path)
10409     result.Raise("No export or invalid export found in dir %s" % src_path)
10410
10411     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10412     if not export_info.has_section(constants.INISECT_EXP):
10413       raise errors.ProgrammerError("Corrupted export config",
10414                                    errors.ECODE_ENVIRON)
10415
10416     ei_version = export_info.get(constants.INISECT_EXP, "version")
10417     if (int(ei_version) != constants.EXPORT_VERSION):
10418       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10419                                  (ei_version, constants.EXPORT_VERSION),
10420                                  errors.ECODE_ENVIRON)
10421     return export_info
10422
10423   def _ReadExportParams(self, einfo):
10424     """Use export parameters as defaults.
10425
10426     In case the opcode doesn't specify (as in override) some instance
10427     parameters, then try to use them from the export information, if
10428     that declares them.
10429
10430     """
10431     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10432
10433     if self.op.disk_template is None:
10434       if einfo.has_option(constants.INISECT_INS, "disk_template"):
10435         self.op.disk_template = einfo.get(constants.INISECT_INS,
10436                                           "disk_template")
10437         if self.op.disk_template not in constants.DISK_TEMPLATES:
10438           raise errors.OpPrereqError("Disk template specified in configuration"
10439                                      " file is not one of the allowed values:"
10440                                      " %s" %
10441                                      " ".join(constants.DISK_TEMPLATES),
10442                                      errors.ECODE_INVAL)
10443       else:
10444         raise errors.OpPrereqError("No disk template specified and the export"
10445                                    " is missing the disk_template information",
10446                                    errors.ECODE_INVAL)
10447
10448     if not self.op.disks:
10449       disks = []
10450       # TODO: import the disk iv_name too
10451       for idx in range(constants.MAX_DISKS):
10452         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10453           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10454           disks.append({constants.IDISK_SIZE: disk_sz})
10455       self.op.disks = disks
10456       if not disks and self.op.disk_template != constants.DT_DISKLESS:
10457         raise errors.OpPrereqError("No disk info specified and the export"
10458                                    " is missing the disk information",
10459                                    errors.ECODE_INVAL)
10460
10461     if not self.op.nics:
10462       nics = []
10463       for idx in range(constants.MAX_NICS):
10464         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10465           ndict = {}
10466           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10467             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10468             ndict[name] = v
10469           nics.append(ndict)
10470         else:
10471           break
10472       self.op.nics = nics
10473
10474     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10475       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10476
10477     if (self.op.hypervisor is None and
10478         einfo.has_option(constants.INISECT_INS, "hypervisor")):
10479       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10480
10481     if einfo.has_section(constants.INISECT_HYP):
10482       # use the export parameters but do not override the ones
10483       # specified by the user
10484       for name, value in einfo.items(constants.INISECT_HYP):
10485         if name not in self.op.hvparams:
10486           self.op.hvparams[name] = value
10487
10488     if einfo.has_section(constants.INISECT_BEP):
10489       # use the parameters, without overriding
10490       for name, value in einfo.items(constants.INISECT_BEP):
10491         if name not in self.op.beparams:
10492           self.op.beparams[name] = value
10493         # Compatibility for the old "memory" be param
10494         if name == constants.BE_MEMORY:
10495           if constants.BE_MAXMEM not in self.op.beparams:
10496             self.op.beparams[constants.BE_MAXMEM] = value
10497           if constants.BE_MINMEM not in self.op.beparams:
10498             self.op.beparams[constants.BE_MINMEM] = value
10499     else:
10500       # try to read the parameters old style, from the main section
10501       for name in constants.BES_PARAMETERS:
10502         if (name not in self.op.beparams and
10503             einfo.has_option(constants.INISECT_INS, name)):
10504           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10505
10506     if einfo.has_section(constants.INISECT_OSP):
10507       # use the parameters, without overriding
10508       for name, value in einfo.items(constants.INISECT_OSP):
10509         if name not in self.op.osparams:
10510           self.op.osparams[name] = value
10511
10512   def _RevertToDefaults(self, cluster):
10513     """Revert the instance parameters to the default values.
10514
10515     """
10516     # hvparams
10517     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10518     for name in self.op.hvparams.keys():
10519       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10520         del self.op.hvparams[name]
10521     # beparams
10522     be_defs = cluster.SimpleFillBE({})
10523     for name in self.op.beparams.keys():
10524       if name in be_defs and be_defs[name] == self.op.beparams[name]:
10525         del self.op.beparams[name]
10526     # nic params
10527     nic_defs = cluster.SimpleFillNIC({})
10528     for nic in self.op.nics:
10529       for name in constants.NICS_PARAMETERS:
10530         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10531           del nic[name]
10532     # osparams
10533     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10534     for name in self.op.osparams.keys():
10535       if name in os_defs and os_defs[name] == self.op.osparams[name]:
10536         del self.op.osparams[name]
10537
10538   def _CalculateFileStorageDir(self):
10539     """Calculate final instance file storage dir.
10540
10541     """
10542     # file storage dir calculation/check
10543     self.instance_file_storage_dir = None
10544     if self.op.disk_template in constants.DTS_FILEBASED:
10545       # build the full file storage dir path
10546       joinargs = []
10547
10548       if self.op.disk_template == constants.DT_SHARED_FILE:
10549         get_fsd_fn = self.cfg.GetSharedFileStorageDir
10550       else:
10551         get_fsd_fn = self.cfg.GetFileStorageDir
10552
10553       cfg_storagedir = get_fsd_fn()
10554       if not cfg_storagedir:
10555         raise errors.OpPrereqError("Cluster file storage dir not defined",
10556                                    errors.ECODE_STATE)
10557       joinargs.append(cfg_storagedir)
10558
10559       if self.op.file_storage_dir is not None:
10560         joinargs.append(self.op.file_storage_dir)
10561
10562       joinargs.append(self.op.instance_name)
10563
10564       # pylint: disable=W0142
10565       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10566
10567   def CheckPrereq(self): # pylint: disable=R0914
10568     """Check prerequisites.
10569
10570     """
10571     self._CalculateFileStorageDir()
10572
10573     if self.op.mode == constants.INSTANCE_IMPORT:
10574       export_info = self._ReadExportInfo()
10575       self._ReadExportParams(export_info)
10576       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10577     else:
10578       self._old_instance_name = None
10579
10580     if (not self.cfg.GetVGName() and
10581         self.op.disk_template not in constants.DTS_NOT_LVM):
10582       raise errors.OpPrereqError("Cluster does not support lvm-based"
10583                                  " instances", errors.ECODE_STATE)
10584
10585     if (self.op.hypervisor is None or
10586         self.op.hypervisor == constants.VALUE_AUTO):
10587       self.op.hypervisor = self.cfg.GetHypervisorType()
10588
10589     cluster = self.cfg.GetClusterInfo()
10590     enabled_hvs = cluster.enabled_hypervisors
10591     if self.op.hypervisor not in enabled_hvs:
10592       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10593                                  " cluster (%s)" %
10594                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10595                                  errors.ECODE_STATE)
10596
10597     # Check tag validity
10598     for tag in self.op.tags:
10599       objects.TaggableObject.ValidateTag(tag)
10600
10601     # check hypervisor parameter syntax (locally)
10602     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10603     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10604                                       self.op.hvparams)
10605     hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10606     hv_type.CheckParameterSyntax(filled_hvp)
10607     self.hv_full = filled_hvp
10608     # check that we don't specify global parameters on an instance
10609     _CheckGlobalHvParams(self.op.hvparams)
10610
10611     # fill and remember the beparams dict
10612     self.be_full = _ComputeFullBeParams(self.op, cluster)
10613
10614     # build os parameters
10615     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10616
10617     # now that hvp/bep are in final format, let's reset to defaults,
10618     # if told to do so
10619     if self.op.identify_defaults:
10620       self._RevertToDefaults(cluster)
10621
10622     # NIC buildup
10623     self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10624                              self.proc.GetECId())
10625
10626     # disk checks/pre-build
10627     default_vg = self.cfg.GetVGName()
10628     self.disks = _ComputeDisks(self.op, default_vg)
10629
10630     if self.op.mode == constants.INSTANCE_IMPORT:
10631       disk_images = []
10632       for idx in range(len(self.disks)):
10633         option = "disk%d_dump" % idx
10634         if export_info.has_option(constants.INISECT_INS, option):
10635           # FIXME: are the old os-es, disk sizes, etc. useful?
10636           export_name = export_info.get(constants.INISECT_INS, option)
10637           image = utils.PathJoin(self.op.src_path, export_name)
10638           disk_images.append(image)
10639         else:
10640           disk_images.append(False)
10641
10642       self.src_images = disk_images
10643
10644       if self.op.instance_name == self._old_instance_name:
10645         for idx, nic in enumerate(self.nics):
10646           if nic.mac == constants.VALUE_AUTO:
10647             nic_mac_ini = "nic%d_mac" % idx
10648             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10649
10650     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10651
10652     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10653     if self.op.ip_check:
10654       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10655         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10656                                    (self.check_ip, self.op.instance_name),
10657                                    errors.ECODE_NOTUNIQUE)
10658
10659     #### mac address generation
10660     # By generating here the mac address both the allocator and the hooks get
10661     # the real final mac address rather than the 'auto' or 'generate' value.
10662     # There is a race condition between the generation and the instance object
10663     # creation, which means that we know the mac is valid now, but we're not
10664     # sure it will be when we actually add the instance. If things go bad
10665     # adding the instance will abort because of a duplicate mac, and the
10666     # creation job will fail.
10667     for nic in self.nics:
10668       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10669         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10670
10671     #### allocator run
10672
10673     if self.op.iallocator is not None:
10674       self._RunAllocator()
10675
10676     # Release all unneeded node locks
10677     keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10678     _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10679     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10680     _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10681
10682     assert (self.owned_locks(locking.LEVEL_NODE) ==
10683             self.owned_locks(locking.LEVEL_NODE_RES)), \
10684       "Node locks differ from node resource locks"
10685
10686     #### node related checks
10687
10688     # check primary node
10689     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10690     assert self.pnode is not None, \
10691       "Cannot retrieve locked node %s" % self.op.pnode
10692     if pnode.offline:
10693       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10694                                  pnode.name, errors.ECODE_STATE)
10695     if pnode.drained:
10696       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10697                                  pnode.name, errors.ECODE_STATE)
10698     if not pnode.vm_capable:
10699       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10700                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10701
10702     self.secondaries = []
10703
10704     # Fill in any IPs from IP pools. This must happen here, because we need to
10705     # know the nic's primary node, as specified by the iallocator
10706     for idx, nic in enumerate(self.nics):
10707       net = nic.network
10708       if net is not None:
10709         netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10710         if netparams is None:
10711           raise errors.OpPrereqError("No netparams found for network"
10712                                      " %s. Propably not connected to"
10713                                      " node's %s nodegroup" %
10714                                      (net, self.pnode.name),
10715                                      errors.ECODE_INVAL)
10716         self.LogInfo("NIC/%d inherits netparams %s" %
10717                      (idx, netparams.values()))
10718         nic.nicparams = dict(netparams)
10719         if nic.ip is not None:
10720           if nic.ip.lower() == constants.NIC_IP_POOL:
10721             try:
10722               nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10723             except errors.ReservationError:
10724               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10725                                          " from the address pool" % idx,
10726                                          errors.ECODE_STATE)
10727             self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10728           else:
10729             try:
10730               self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10731             except errors.ReservationError:
10732               raise errors.OpPrereqError("IP address %s already in use"
10733                                          " or does not belong to network %s" %
10734                                          (nic.ip, net),
10735                                          errors.ECODE_NOTUNIQUE)
10736
10737       # net is None, ip None or given
10738       elif self.op.conflicts_check:
10739         _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10740
10741     # mirror node verification
10742     if self.op.disk_template in constants.DTS_INT_MIRROR:
10743       if self.op.snode == pnode.name:
10744         raise errors.OpPrereqError("The secondary node cannot be the"
10745                                    " primary node", errors.ECODE_INVAL)
10746       _CheckNodeOnline(self, self.op.snode)
10747       _CheckNodeNotDrained(self, self.op.snode)
10748       _CheckNodeVmCapable(self, self.op.snode)
10749       self.secondaries.append(self.op.snode)
10750
10751       snode = self.cfg.GetNodeInfo(self.op.snode)
10752       if pnode.group != snode.group:
10753         self.LogWarning("The primary and secondary nodes are in two"
10754                         " different node groups; the disk parameters"
10755                         " from the first disk's node group will be"
10756                         " used")
10757
10758     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10759       nodes = [pnode]
10760       if self.op.disk_template in constants.DTS_INT_MIRROR:
10761         nodes.append(snode)
10762       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10763       if compat.any(map(has_es, nodes)):
10764         raise errors.OpPrereqError("Disk template %s not supported with"
10765                                    " exclusive storage" % self.op.disk_template,
10766                                    errors.ECODE_STATE)
10767
10768     nodenames = [pnode.name] + self.secondaries
10769
10770     # Verify instance specs
10771     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10772     ispec = {
10773       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10774       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10775       constants.ISPEC_DISK_COUNT: len(self.disks),
10776       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10777       constants.ISPEC_NIC_COUNT: len(self.nics),
10778       constants.ISPEC_SPINDLE_USE: spindle_use,
10779       }
10780
10781     group_info = self.cfg.GetNodeGroup(pnode.group)
10782     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10783     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10784     if not self.op.ignore_ipolicy and res:
10785       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10786              (pnode.group, group_info.name, utils.CommaJoin(res)))
10787       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10788
10789     if not self.adopt_disks:
10790       if self.op.disk_template == constants.DT_RBD:
10791         # _CheckRADOSFreeSpace() is just a placeholder.
10792         # Any function that checks prerequisites can be placed here.
10793         # Check if there is enough space on the RADOS cluster.
10794         _CheckRADOSFreeSpace()
10795       elif self.op.disk_template == constants.DT_EXT:
10796         # FIXME: Function that checks prereqs if needed
10797         pass
10798       else:
10799         # Check lv size requirements, if not adopting
10800         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10801         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10802
10803     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10804       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10805                                 disk[constants.IDISK_ADOPT])
10806                      for disk in self.disks])
10807       if len(all_lvs) != len(self.disks):
10808         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10809                                    errors.ECODE_INVAL)
10810       for lv_name in all_lvs:
10811         try:
10812           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10813           # to ReserveLV uses the same syntax
10814           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10815         except errors.ReservationError:
10816           raise errors.OpPrereqError("LV named %s used by another instance" %
10817                                      lv_name, errors.ECODE_NOTUNIQUE)
10818
10819       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10820       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10821
10822       node_lvs = self.rpc.call_lv_list([pnode.name],
10823                                        vg_names.payload.keys())[pnode.name]
10824       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10825       node_lvs = node_lvs.payload
10826
10827       delta = all_lvs.difference(node_lvs.keys())
10828       if delta:
10829         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10830                                    utils.CommaJoin(delta),
10831                                    errors.ECODE_INVAL)
10832       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10833       if online_lvs:
10834         raise errors.OpPrereqError("Online logical volumes found, cannot"
10835                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10836                                    errors.ECODE_STATE)
10837       # update the size of disk based on what is found
10838       for dsk in self.disks:
10839         dsk[constants.IDISK_SIZE] = \
10840           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10841                                         dsk[constants.IDISK_ADOPT])][0]))
10842
10843     elif self.op.disk_template == constants.DT_BLOCK:
10844       # Normalize and de-duplicate device paths
10845       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10846                        for disk in self.disks])
10847       if len(all_disks) != len(self.disks):
10848         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10849                                    errors.ECODE_INVAL)
10850       baddisks = [d for d in all_disks
10851                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10852       if baddisks:
10853         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10854                                    " cannot be adopted" %
10855                                    (utils.CommaJoin(baddisks),
10856                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10857                                    errors.ECODE_INVAL)
10858
10859       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10860                                             list(all_disks))[pnode.name]
10861       node_disks.Raise("Cannot get block device information from node %s" %
10862                        pnode.name)
10863       node_disks = node_disks.payload
10864       delta = all_disks.difference(node_disks.keys())
10865       if delta:
10866         raise errors.OpPrereqError("Missing block device(s): %s" %
10867                                    utils.CommaJoin(delta),
10868                                    errors.ECODE_INVAL)
10869       for dsk in self.disks:
10870         dsk[constants.IDISK_SIZE] = \
10871           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10872
10873     # Verify instance specs
10874     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10875     ispec = {
10876       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10877       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10878       constants.ISPEC_DISK_COUNT: len(self.disks),
10879       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10880                                   for disk in self.disks],
10881       constants.ISPEC_NIC_COUNT: len(self.nics),
10882       constants.ISPEC_SPINDLE_USE: spindle_use,
10883       }
10884
10885     group_info = self.cfg.GetNodeGroup(pnode.group)
10886     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10887     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10888     if not self.op.ignore_ipolicy and res:
10889       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10890                                   " policy: %s") % (pnode.group,
10891                                                     utils.CommaJoin(res)),
10892                                   errors.ECODE_INVAL)
10893
10894     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10895
10896     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10897     # check OS parameters (remotely)
10898     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10899
10900     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10901
10902     #TODO: _CheckExtParams (remotely)
10903     # Check parameters for extstorage
10904
10905     # memory check on primary node
10906     #TODO(dynmem): use MINMEM for checking
10907     if self.op.start:
10908       _CheckNodeFreeMemory(self, self.pnode.name,
10909                            "creating instance %s" % self.op.instance_name,
10910                            self.be_full[constants.BE_MAXMEM],
10911                            self.op.hypervisor)
10912
10913     self.dry_run_result = list(nodenames)
10914
10915   def Exec(self, feedback_fn):
10916     """Create and add the instance to the cluster.
10917
10918     """
10919     instance = self.op.instance_name
10920     pnode_name = self.pnode.name
10921
10922     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10923                 self.owned_locks(locking.LEVEL_NODE)), \
10924       "Node locks differ from node resource locks"
10925     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10926
10927     ht_kind = self.op.hypervisor
10928     if ht_kind in constants.HTS_REQ_PORT:
10929       network_port = self.cfg.AllocatePort()
10930     else:
10931       network_port = None
10932
10933     # This is ugly but we got a chicken-egg problem here
10934     # We can only take the group disk parameters, as the instance
10935     # has no disks yet (we are generating them right here).
10936     node = self.cfg.GetNodeInfo(pnode_name)
10937     nodegroup = self.cfg.GetNodeGroup(node.group)
10938     disks = _GenerateDiskTemplate(self,
10939                                   self.op.disk_template,
10940                                   instance, pnode_name,
10941                                   self.secondaries,
10942                                   self.disks,
10943                                   self.instance_file_storage_dir,
10944                                   self.op.file_driver,
10945                                   0,
10946                                   feedback_fn,
10947                                   self.cfg.GetGroupDiskParams(nodegroup))
10948
10949     iobj = objects.Instance(name=instance, os=self.op.os_type,
10950                             primary_node=pnode_name,
10951                             nics=self.nics, disks=disks,
10952                             disk_template=self.op.disk_template,
10953                             admin_state=constants.ADMINST_DOWN,
10954                             network_port=network_port,
10955                             beparams=self.op.beparams,
10956                             hvparams=self.op.hvparams,
10957                             hypervisor=self.op.hypervisor,
10958                             osparams=self.op.osparams,
10959                             )
10960
10961     if self.op.tags:
10962       for tag in self.op.tags:
10963         iobj.AddTag(tag)
10964
10965     if self.adopt_disks:
10966       if self.op.disk_template == constants.DT_PLAIN:
10967         # rename LVs to the newly-generated names; we need to construct
10968         # 'fake' LV disks with the old data, plus the new unique_id
10969         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10970         rename_to = []
10971         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10972           rename_to.append(t_dsk.logical_id)
10973           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10974           self.cfg.SetDiskID(t_dsk, pnode_name)
10975         result = self.rpc.call_blockdev_rename(pnode_name,
10976                                                zip(tmp_disks, rename_to))
10977         result.Raise("Failed to rename adoped LVs")
10978     else:
10979       feedback_fn("* creating instance disks...")
10980       try:
10981         _CreateDisks(self, iobj)
10982       except errors.OpExecError:
10983         self.LogWarning("Device creation failed, reverting...")
10984         try:
10985           _RemoveDisks(self, iobj)
10986         finally:
10987           self.cfg.ReleaseDRBDMinors(instance)
10988           raise
10989
10990     feedback_fn("adding instance %s to cluster config" % instance)
10991
10992     self.cfg.AddInstance(iobj, self.proc.GetECId())
10993
10994     # Declare that we don't want to remove the instance lock anymore, as we've
10995     # added the instance to the config
10996     del self.remove_locks[locking.LEVEL_INSTANCE]
10997
10998     if self.op.mode == constants.INSTANCE_IMPORT:
10999       # Release unused nodes
11000       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
11001     else:
11002       # Release all nodes
11003       _ReleaseLocks(self, locking.LEVEL_NODE)
11004
11005     disk_abort = False
11006     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
11007       feedback_fn("* wiping instance disks...")
11008       try:
11009         _WipeDisks(self, iobj)
11010       except errors.OpExecError, err:
11011         logging.exception("Wiping disks failed")
11012         self.LogWarning("Wiping instance disks failed (%s)", err)
11013         disk_abort = True
11014
11015     if disk_abort:
11016       # Something is already wrong with the disks, don't do anything else
11017       pass
11018     elif self.op.wait_for_sync:
11019       disk_abort = not _WaitForSync(self, iobj)
11020     elif iobj.disk_template in constants.DTS_INT_MIRROR:
11021       # make sure the disks are not degraded (still sync-ing is ok)
11022       feedback_fn("* checking mirrors status")
11023       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11024     else:
11025       disk_abort = False
11026
11027     if disk_abort:
11028       _RemoveDisks(self, iobj)
11029       self.cfg.RemoveInstance(iobj.name)
11030       # Make sure the instance lock gets removed
11031       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11032       raise errors.OpExecError("There are some degraded disks for"
11033                                " this instance")
11034
11035     # Release all node resource locks
11036     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11037
11038     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11039       # we need to set the disks ID to the primary node, since the
11040       # preceding code might or might have not done it, depending on
11041       # disk template and other options
11042       for disk in iobj.disks:
11043         self.cfg.SetDiskID(disk, pnode_name)
11044       if self.op.mode == constants.INSTANCE_CREATE:
11045         if not self.op.no_install:
11046           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11047                         not self.op.wait_for_sync)
11048           if pause_sync:
11049             feedback_fn("* pausing disk sync to install instance OS")
11050             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11051                                                               (iobj.disks,
11052                                                                iobj), True)
11053             for idx, success in enumerate(result.payload):
11054               if not success:
11055                 logging.warn("pause-sync of instance %s for disk %d failed",
11056                              instance, idx)
11057
11058           feedback_fn("* running the instance OS create scripts...")
11059           # FIXME: pass debug option from opcode to backend
11060           os_add_result = \
11061             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11062                                           self.op.debug_level)
11063           if pause_sync:
11064             feedback_fn("* resuming disk sync")
11065             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11066                                                               (iobj.disks,
11067                                                                iobj), False)
11068             for idx, success in enumerate(result.payload):
11069               if not success:
11070                 logging.warn("resume-sync of instance %s for disk %d failed",
11071                              instance, idx)
11072
11073           os_add_result.Raise("Could not add os for instance %s"
11074                               " on node %s" % (instance, pnode_name))
11075
11076       else:
11077         if self.op.mode == constants.INSTANCE_IMPORT:
11078           feedback_fn("* running the instance OS import scripts...")
11079
11080           transfers = []
11081
11082           for idx, image in enumerate(self.src_images):
11083             if not image:
11084               continue
11085
11086             # FIXME: pass debug option from opcode to backend
11087             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11088                                                constants.IEIO_FILE, (image, ),
11089                                                constants.IEIO_SCRIPT,
11090                                                (iobj.disks[idx], idx),
11091                                                None)
11092             transfers.append(dt)
11093
11094           import_result = \
11095             masterd.instance.TransferInstanceData(self, feedback_fn,
11096                                                   self.op.src_node, pnode_name,
11097                                                   self.pnode.secondary_ip,
11098                                                   iobj, transfers)
11099           if not compat.all(import_result):
11100             self.LogWarning("Some disks for instance %s on node %s were not"
11101                             " imported successfully" % (instance, pnode_name))
11102
11103           rename_from = self._old_instance_name
11104
11105         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11106           feedback_fn("* preparing remote import...")
11107           # The source cluster will stop the instance before attempting to make
11108           # a connection. In some cases stopping an instance can take a long
11109           # time, hence the shutdown timeout is added to the connection
11110           # timeout.
11111           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11112                              self.op.source_shutdown_timeout)
11113           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11114
11115           assert iobj.primary_node == self.pnode.name
11116           disk_results = \
11117             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11118                                           self.source_x509_ca,
11119                                           self._cds, timeouts)
11120           if not compat.all(disk_results):
11121             # TODO: Should the instance still be started, even if some disks
11122             # failed to import (valid for local imports, too)?
11123             self.LogWarning("Some disks for instance %s on node %s were not"
11124                             " imported successfully" % (instance, pnode_name))
11125
11126           rename_from = self.source_instance_name
11127
11128         else:
11129           # also checked in the prereq part
11130           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11131                                        % self.op.mode)
11132
11133         # Run rename script on newly imported instance
11134         assert iobj.name == instance
11135         feedback_fn("Running rename script for %s" % instance)
11136         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11137                                                    rename_from,
11138                                                    self.op.debug_level)
11139         if result.fail_msg:
11140           self.LogWarning("Failed to run rename script for %s on node"
11141                           " %s: %s" % (instance, pnode_name, result.fail_msg))
11142
11143     assert not self.owned_locks(locking.LEVEL_NODE_RES)
11144
11145     if self.op.start:
11146       iobj.admin_state = constants.ADMINST_UP
11147       self.cfg.Update(iobj, feedback_fn)
11148       logging.info("Starting instance %s on node %s", instance, pnode_name)
11149       feedback_fn("* starting instance...")
11150       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11151                                             False)
11152       result.Raise("Could not start instance")
11153
11154     return list(iobj.all_nodes)
11155
11156
11157 class LUInstanceMultiAlloc(NoHooksLU):
11158   """Allocates multiple instances at the same time.
11159
11160   """
11161   REQ_BGL = False
11162
11163   def CheckArguments(self):
11164     """Check arguments.
11165
11166     """
11167     nodes = []
11168     for inst in self.op.instances:
11169       if inst.iallocator is not None:
11170         raise errors.OpPrereqError("iallocator are not allowed to be set on"
11171                                    " instance objects", errors.ECODE_INVAL)
11172       nodes.append(bool(inst.pnode))
11173       if inst.disk_template in constants.DTS_INT_MIRROR:
11174         nodes.append(bool(inst.snode))
11175
11176     has_nodes = compat.any(nodes)
11177     if compat.all(nodes) ^ has_nodes:
11178       raise errors.OpPrereqError("There are instance objects providing"
11179                                  " pnode/snode while others do not",
11180                                  errors.ECODE_INVAL)
11181
11182     if self.op.iallocator is None:
11183       default_iallocator = self.cfg.GetDefaultIAllocator()
11184       if default_iallocator and has_nodes:
11185         self.op.iallocator = default_iallocator
11186       else:
11187         raise errors.OpPrereqError("No iallocator or nodes on the instances"
11188                                    " given and no cluster-wide default"
11189                                    " iallocator found; please specify either"
11190                                    " an iallocator or nodes on the instances"
11191                                    " or set a cluster-wide default iallocator",
11192                                    errors.ECODE_INVAL)
11193
11194     _CheckOpportunisticLocking(self.op)
11195
11196     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11197     if dups:
11198       raise errors.OpPrereqError("There are duplicate instance names: %s" %
11199                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
11200
11201   def ExpandNames(self):
11202     """Calculate the locks.
11203
11204     """
11205     self.share_locks = _ShareAll()
11206     self.needed_locks = {
11207       # iallocator will select nodes and even if no iallocator is used,
11208       # collisions with LUInstanceCreate should be avoided
11209       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11210       }
11211
11212     if self.op.iallocator:
11213       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11214       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11215
11216       if self.op.opportunistic_locking:
11217         self.opportunistic_locks[locking.LEVEL_NODE] = True
11218         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11219     else:
11220       nodeslist = []
11221       for inst in self.op.instances:
11222         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11223         nodeslist.append(inst.pnode)
11224         if inst.snode is not None:
11225           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11226           nodeslist.append(inst.snode)
11227
11228       self.needed_locks[locking.LEVEL_NODE] = nodeslist
11229       # Lock resources of instance's primary and secondary nodes (copy to
11230       # prevent accidential modification)
11231       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11232
11233   def CheckPrereq(self):
11234     """Check prerequisite.
11235
11236     """
11237     cluster = self.cfg.GetClusterInfo()
11238     default_vg = self.cfg.GetVGName()
11239     ec_id = self.proc.GetECId()
11240
11241     if self.op.opportunistic_locking:
11242       # Only consider nodes for which a lock is held
11243       node_whitelist = self.owned_locks(locking.LEVEL_NODE)
11244     else:
11245       node_whitelist = None
11246
11247     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11248                                          _ComputeNics(op, cluster, None,
11249                                                       self.cfg, ec_id),
11250                                          _ComputeFullBeParams(op, cluster),
11251                                          node_whitelist)
11252              for op in self.op.instances]
11253
11254     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11255     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11256
11257     ial.Run(self.op.iallocator)
11258
11259     if not ial.success:
11260       raise errors.OpPrereqError("Can't compute nodes using"
11261                                  " iallocator '%s': %s" %
11262                                  (self.op.iallocator, ial.info),
11263                                  errors.ECODE_NORES)
11264
11265     self.ia_result = ial.result
11266
11267     if self.op.dry_run:
11268       self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11269         constants.JOB_IDS_KEY: [],
11270         })
11271
11272   def _ConstructPartialResult(self):
11273     """Contructs the partial result.
11274
11275     """
11276     (allocatable, failed) = self.ia_result
11277     return {
11278       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11279         map(compat.fst, allocatable),
11280       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11281       }
11282
11283   def Exec(self, feedback_fn):
11284     """Executes the opcode.
11285
11286     """
11287     op2inst = dict((op.instance_name, op) for op in self.op.instances)
11288     (allocatable, failed) = self.ia_result
11289
11290     jobs = []
11291     for (name, nodes) in allocatable:
11292       op = op2inst.pop(name)
11293
11294       if len(nodes) > 1:
11295         (op.pnode, op.snode) = nodes
11296       else:
11297         (op.pnode,) = nodes
11298
11299       jobs.append([op])
11300
11301     missing = set(op2inst.keys()) - set(failed)
11302     assert not missing, \
11303       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11304
11305     return ResultWithJobs(jobs, **self._ConstructPartialResult())
11306
11307
11308 def _CheckRADOSFreeSpace():
11309   """Compute disk size requirements inside the RADOS cluster.
11310
11311   """
11312   # For the RADOS cluster we assume there is always enough space.
11313   pass
11314
11315
11316 class LUInstanceConsole(NoHooksLU):
11317   """Connect to an instance's console.
11318
11319   This is somewhat special in that it returns the command line that
11320   you need to run on the master node in order to connect to the
11321   console.
11322
11323   """
11324   REQ_BGL = False
11325
11326   def ExpandNames(self):
11327     self.share_locks = _ShareAll()
11328     self._ExpandAndLockInstance()
11329
11330   def CheckPrereq(self):
11331     """Check prerequisites.
11332
11333     This checks that the instance is in the cluster.
11334
11335     """
11336     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11337     assert self.instance is not None, \
11338       "Cannot retrieve locked instance %s" % self.op.instance_name
11339     _CheckNodeOnline(self, self.instance.primary_node)
11340
11341   def Exec(self, feedback_fn):
11342     """Connect to the console of an instance
11343
11344     """
11345     instance = self.instance
11346     node = instance.primary_node
11347
11348     node_insts = self.rpc.call_instance_list([node],
11349                                              [instance.hypervisor])[node]
11350     node_insts.Raise("Can't get node information from %s" % node)
11351
11352     if instance.name not in node_insts.payload:
11353       if instance.admin_state == constants.ADMINST_UP:
11354         state = constants.INSTST_ERRORDOWN
11355       elif instance.admin_state == constants.ADMINST_DOWN:
11356         state = constants.INSTST_ADMINDOWN
11357       else:
11358         state = constants.INSTST_ADMINOFFLINE
11359       raise errors.OpExecError("Instance %s is not running (state %s)" %
11360                                (instance.name, state))
11361
11362     logging.debug("Connecting to console of %s on %s", instance.name, node)
11363
11364     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11365
11366
11367 def _GetInstanceConsole(cluster, instance):
11368   """Returns console information for an instance.
11369
11370   @type cluster: L{objects.Cluster}
11371   @type instance: L{objects.Instance}
11372   @rtype: dict
11373
11374   """
11375   hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11376   # beparams and hvparams are passed separately, to avoid editing the
11377   # instance and then saving the defaults in the instance itself.
11378   hvparams = cluster.FillHV(instance)
11379   beparams = cluster.FillBE(instance)
11380   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11381
11382   assert console.instance == instance.name
11383   assert console.Validate()
11384
11385   return console.ToDict()
11386
11387
11388 class LUInstanceReplaceDisks(LogicalUnit):
11389   """Replace the disks of an instance.
11390
11391   """
11392   HPATH = "mirrors-replace"
11393   HTYPE = constants.HTYPE_INSTANCE
11394   REQ_BGL = False
11395
11396   def CheckArguments(self):
11397     """Check arguments.
11398
11399     """
11400     remote_node = self.op.remote_node
11401     ialloc = self.op.iallocator
11402     if self.op.mode == constants.REPLACE_DISK_CHG:
11403       if remote_node is None and ialloc is None:
11404         raise errors.OpPrereqError("When changing the secondary either an"
11405                                    " iallocator script must be used or the"
11406                                    " new node given", errors.ECODE_INVAL)
11407       else:
11408         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11409
11410     elif remote_node is not None or ialloc is not None:
11411       # Not replacing the secondary
11412       raise errors.OpPrereqError("The iallocator and new node options can"
11413                                  " only be used when changing the"
11414                                  " secondary node", errors.ECODE_INVAL)
11415
11416   def ExpandNames(self):
11417     self._ExpandAndLockInstance()
11418
11419     assert locking.LEVEL_NODE not in self.needed_locks
11420     assert locking.LEVEL_NODE_RES not in self.needed_locks
11421     assert locking.LEVEL_NODEGROUP not in self.needed_locks
11422
11423     assert self.op.iallocator is None or self.op.remote_node is None, \
11424       "Conflicting options"
11425
11426     if self.op.remote_node is not None:
11427       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11428
11429       # Warning: do not remove the locking of the new secondary here
11430       # unless DRBD8.AddChildren is changed to work in parallel;
11431       # currently it doesn't since parallel invocations of
11432       # FindUnusedMinor will conflict
11433       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11434       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11435     else:
11436       self.needed_locks[locking.LEVEL_NODE] = []
11437       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11438
11439       if self.op.iallocator is not None:
11440         # iallocator will select a new node in the same group
11441         self.needed_locks[locking.LEVEL_NODEGROUP] = []
11442         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11443
11444     self.needed_locks[locking.LEVEL_NODE_RES] = []
11445
11446     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11447                                    self.op.iallocator, self.op.remote_node,
11448                                    self.op.disks, self.op.early_release,
11449                                    self.op.ignore_ipolicy)
11450
11451     self.tasklets = [self.replacer]
11452
11453   def DeclareLocks(self, level):
11454     if level == locking.LEVEL_NODEGROUP:
11455       assert self.op.remote_node is None
11456       assert self.op.iallocator is not None
11457       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11458
11459       self.share_locks[locking.LEVEL_NODEGROUP] = 1
11460       # Lock all groups used by instance optimistically; this requires going
11461       # via the node before it's locked, requiring verification later on
11462       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11463         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11464
11465     elif level == locking.LEVEL_NODE:
11466       if self.op.iallocator is not None:
11467         assert self.op.remote_node is None
11468         assert not self.needed_locks[locking.LEVEL_NODE]
11469         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11470
11471         # Lock member nodes of all locked groups
11472         self.needed_locks[locking.LEVEL_NODE] = \
11473             [node_name
11474              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11475              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11476       else:
11477         assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11478
11479         self._LockInstancesNodes()
11480
11481     elif level == locking.LEVEL_NODE_RES:
11482       # Reuse node locks
11483       self.needed_locks[locking.LEVEL_NODE_RES] = \
11484         self.needed_locks[locking.LEVEL_NODE]
11485
11486   def BuildHooksEnv(self):
11487     """Build hooks env.
11488
11489     This runs on the master, the primary and all the secondaries.
11490
11491     """
11492     instance = self.replacer.instance
11493     env = {
11494       "MODE": self.op.mode,
11495       "NEW_SECONDARY": self.op.remote_node,
11496       "OLD_SECONDARY": instance.secondary_nodes[0],
11497       }
11498     env.update(_BuildInstanceHookEnvByObject(self, instance))
11499     return env
11500
11501   def BuildHooksNodes(self):
11502     """Build hooks nodes.
11503
11504     """
11505     instance = self.replacer.instance
11506     nl = [
11507       self.cfg.GetMasterNode(),
11508       instance.primary_node,
11509       ]
11510     if self.op.remote_node is not None:
11511       nl.append(self.op.remote_node)
11512     return nl, nl
11513
11514   def CheckPrereq(self):
11515     """Check prerequisites.
11516
11517     """
11518     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11519             self.op.iallocator is None)
11520
11521     # Verify if node group locks are still correct
11522     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11523     if owned_groups:
11524       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11525
11526     return LogicalUnit.CheckPrereq(self)
11527
11528
11529 class TLReplaceDisks(Tasklet):
11530   """Replaces disks for an instance.
11531
11532   Note: Locking is not within the scope of this class.
11533
11534   """
11535   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11536                disks, early_release, ignore_ipolicy):
11537     """Initializes this class.
11538
11539     """
11540     Tasklet.__init__(self, lu)
11541
11542     # Parameters
11543     self.instance_name = instance_name
11544     self.mode = mode
11545     self.iallocator_name = iallocator_name
11546     self.remote_node = remote_node
11547     self.disks = disks
11548     self.early_release = early_release
11549     self.ignore_ipolicy = ignore_ipolicy
11550
11551     # Runtime data
11552     self.instance = None
11553     self.new_node = None
11554     self.target_node = None
11555     self.other_node = None
11556     self.remote_node_info = None
11557     self.node_secondary_ip = None
11558
11559   @staticmethod
11560   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11561     """Compute a new secondary node using an IAllocator.
11562
11563     """
11564     req = iallocator.IAReqRelocate(name=instance_name,
11565                                    relocate_from=list(relocate_from))
11566     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11567
11568     ial.Run(iallocator_name)
11569
11570     if not ial.success:
11571       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11572                                  " %s" % (iallocator_name, ial.info),
11573                                  errors.ECODE_NORES)
11574
11575     remote_node_name = ial.result[0]
11576
11577     lu.LogInfo("Selected new secondary for instance '%s': %s",
11578                instance_name, remote_node_name)
11579
11580     return remote_node_name
11581
11582   def _FindFaultyDisks(self, node_name):
11583     """Wrapper for L{_FindFaultyInstanceDisks}.
11584
11585     """
11586     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11587                                     node_name, True)
11588
11589   def _CheckDisksActivated(self, instance):
11590     """Checks if the instance disks are activated.
11591
11592     @param instance: The instance to check disks
11593     @return: True if they are activated, False otherwise
11594
11595     """
11596     nodes = instance.all_nodes
11597
11598     for idx, dev in enumerate(instance.disks):
11599       for node in nodes:
11600         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11601         self.cfg.SetDiskID(dev, node)
11602
11603         result = _BlockdevFind(self, node, dev, instance)
11604
11605         if result.offline:
11606           continue
11607         elif result.fail_msg or not result.payload:
11608           return False
11609
11610     return True
11611
11612   def CheckPrereq(self):
11613     """Check prerequisites.
11614
11615     This checks that the instance is in the cluster.
11616
11617     """
11618     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11619     assert instance is not None, \
11620       "Cannot retrieve locked instance %s" % self.instance_name
11621
11622     if instance.disk_template != constants.DT_DRBD8:
11623       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11624                                  " instances", errors.ECODE_INVAL)
11625
11626     if len(instance.secondary_nodes) != 1:
11627       raise errors.OpPrereqError("The instance has a strange layout,"
11628                                  " expected one secondary but found %d" %
11629                                  len(instance.secondary_nodes),
11630                                  errors.ECODE_FAULT)
11631
11632     instance = self.instance
11633     secondary_node = instance.secondary_nodes[0]
11634
11635     if self.iallocator_name is None:
11636       remote_node = self.remote_node
11637     else:
11638       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11639                                        instance.name, instance.secondary_nodes)
11640
11641     if remote_node is None:
11642       self.remote_node_info = None
11643     else:
11644       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11645              "Remote node '%s' is not locked" % remote_node
11646
11647       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11648       assert self.remote_node_info is not None, \
11649         "Cannot retrieve locked node %s" % remote_node
11650
11651     if remote_node == self.instance.primary_node:
11652       raise errors.OpPrereqError("The specified node is the primary node of"
11653                                  " the instance", errors.ECODE_INVAL)
11654
11655     if remote_node == secondary_node:
11656       raise errors.OpPrereqError("The specified node is already the"
11657                                  " secondary node of the instance",
11658                                  errors.ECODE_INVAL)
11659
11660     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11661                                     constants.REPLACE_DISK_CHG):
11662       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11663                                  errors.ECODE_INVAL)
11664
11665     if self.mode == constants.REPLACE_DISK_AUTO:
11666       if not self._CheckDisksActivated(instance):
11667         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11668                                    " first" % self.instance_name,
11669                                    errors.ECODE_STATE)
11670       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11671       faulty_secondary = self._FindFaultyDisks(secondary_node)
11672
11673       if faulty_primary and faulty_secondary:
11674         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11675                                    " one node and can not be repaired"
11676                                    " automatically" % self.instance_name,
11677                                    errors.ECODE_STATE)
11678
11679       if faulty_primary:
11680         self.disks = faulty_primary
11681         self.target_node = instance.primary_node
11682         self.other_node = secondary_node
11683         check_nodes = [self.target_node, self.other_node]
11684       elif faulty_secondary:
11685         self.disks = faulty_secondary
11686         self.target_node = secondary_node
11687         self.other_node = instance.primary_node
11688         check_nodes = [self.target_node, self.other_node]
11689       else:
11690         self.disks = []
11691         check_nodes = []
11692
11693     else:
11694       # Non-automatic modes
11695       if self.mode == constants.REPLACE_DISK_PRI:
11696         self.target_node = instance.primary_node
11697         self.other_node = secondary_node
11698         check_nodes = [self.target_node, self.other_node]
11699
11700       elif self.mode == constants.REPLACE_DISK_SEC:
11701         self.target_node = secondary_node
11702         self.other_node = instance.primary_node
11703         check_nodes = [self.target_node, self.other_node]
11704
11705       elif self.mode == constants.REPLACE_DISK_CHG:
11706         self.new_node = remote_node
11707         self.other_node = instance.primary_node
11708         self.target_node = secondary_node
11709         check_nodes = [self.new_node, self.other_node]
11710
11711         _CheckNodeNotDrained(self.lu, remote_node)
11712         _CheckNodeVmCapable(self.lu, remote_node)
11713
11714         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11715         assert old_node_info is not None
11716         if old_node_info.offline and not self.early_release:
11717           # doesn't make sense to delay the release
11718           self.early_release = True
11719           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11720                           " early-release mode", secondary_node)
11721
11722       else:
11723         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11724                                      self.mode)
11725
11726       # If not specified all disks should be replaced
11727       if not self.disks:
11728         self.disks = range(len(self.instance.disks))
11729
11730     # TODO: This is ugly, but right now we can't distinguish between internal
11731     # submitted opcode and external one. We should fix that.
11732     if self.remote_node_info:
11733       # We change the node, lets verify it still meets instance policy
11734       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11735       cluster = self.cfg.GetClusterInfo()
11736       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11737                                                               new_group_info)
11738       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11739                               ignore=self.ignore_ipolicy)
11740
11741     for node in check_nodes:
11742       _CheckNodeOnline(self.lu, node)
11743
11744     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11745                                                           self.other_node,
11746                                                           self.target_node]
11747                               if node_name is not None)
11748
11749     # Release unneeded node and node resource locks
11750     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11751     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11752     _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11753
11754     # Release any owned node group
11755     _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11756
11757     # Check whether disks are valid
11758     for disk_idx in self.disks:
11759       instance.FindDisk(disk_idx)
11760
11761     # Get secondary node IP addresses
11762     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11763                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11764
11765   def Exec(self, feedback_fn):
11766     """Execute disk replacement.
11767
11768     This dispatches the disk replacement to the appropriate handler.
11769
11770     """
11771     if __debug__:
11772       # Verify owned locks before starting operation
11773       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11774       assert set(owned_nodes) == set(self.node_secondary_ip), \
11775           ("Incorrect node locks, owning %s, expected %s" %
11776            (owned_nodes, self.node_secondary_ip.keys()))
11777       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11778               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11779       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11780
11781       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11782       assert list(owned_instances) == [self.instance_name], \
11783           "Instance '%s' not locked" % self.instance_name
11784
11785       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11786           "Should not own any node group lock at this point"
11787
11788     if not self.disks:
11789       feedback_fn("No disks need replacement for instance '%s'" %
11790                   self.instance.name)
11791       return
11792
11793     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11794                 (utils.CommaJoin(self.disks), self.instance.name))
11795     feedback_fn("Current primary node: %s" % self.instance.primary_node)
11796     feedback_fn("Current seconary node: %s" %
11797                 utils.CommaJoin(self.instance.secondary_nodes))
11798
11799     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11800
11801     # Activate the instance disks if we're replacing them on a down instance
11802     if activate_disks:
11803       _StartInstanceDisks(self.lu, self.instance, True)
11804
11805     try:
11806       # Should we replace the secondary node?
11807       if self.new_node is not None:
11808         fn = self._ExecDrbd8Secondary
11809       else:
11810         fn = self._ExecDrbd8DiskOnly
11811
11812       result = fn(feedback_fn)
11813     finally:
11814       # Deactivate the instance disks if we're replacing them on a
11815       # down instance
11816       if activate_disks:
11817         _SafeShutdownInstanceDisks(self.lu, self.instance)
11818
11819     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11820
11821     if __debug__:
11822       # Verify owned locks
11823       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11824       nodes = frozenset(self.node_secondary_ip)
11825       assert ((self.early_release and not owned_nodes) or
11826               (not self.early_release and not (set(owned_nodes) - nodes))), \
11827         ("Not owning the correct locks, early_release=%s, owned=%r,"
11828          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11829
11830     return result
11831
11832   def _CheckVolumeGroup(self, nodes):
11833     self.lu.LogInfo("Checking volume groups")
11834
11835     vgname = self.cfg.GetVGName()
11836
11837     # Make sure volume group exists on all involved nodes
11838     results = self.rpc.call_vg_list(nodes)
11839     if not results:
11840       raise errors.OpExecError("Can't list volume groups on the nodes")
11841
11842     for node in nodes:
11843       res = results[node]
11844       res.Raise("Error checking node %s" % node)
11845       if vgname not in res.payload:
11846         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11847                                  (vgname, node))
11848
11849   def _CheckDisksExistence(self, nodes):
11850     # Check disk existence
11851     for idx, dev in enumerate(self.instance.disks):
11852       if idx not in self.disks:
11853         continue
11854
11855       for node in nodes:
11856         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11857         self.cfg.SetDiskID(dev, node)
11858
11859         result = _BlockdevFind(self, node, dev, self.instance)
11860
11861         msg = result.fail_msg
11862         if msg or not result.payload:
11863           if not msg:
11864             msg = "disk not found"
11865           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11866                                    (idx, node, msg))
11867
11868   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11869     for idx, dev in enumerate(self.instance.disks):
11870       if idx not in self.disks:
11871         continue
11872
11873       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11874                       (idx, node_name))
11875
11876       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11877                                    on_primary, ldisk=ldisk):
11878         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11879                                  " replace disks for instance %s" %
11880                                  (node_name, self.instance.name))
11881
11882   def _CreateNewStorage(self, node_name):
11883     """Create new storage on the primary or secondary node.
11884
11885     This is only used for same-node replaces, not for changing the
11886     secondary node, hence we don't want to modify the existing disk.
11887
11888     """
11889     iv_names = {}
11890
11891     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11892     for idx, dev in enumerate(disks):
11893       if idx not in self.disks:
11894         continue
11895
11896       self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11897
11898       self.cfg.SetDiskID(dev, node_name)
11899
11900       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11901       names = _GenerateUniqueNames(self.lu, lv_names)
11902
11903       (data_disk, meta_disk) = dev.children
11904       vg_data = data_disk.logical_id[0]
11905       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11906                              logical_id=(vg_data, names[0]),
11907                              params=data_disk.params)
11908       vg_meta = meta_disk.logical_id[0]
11909       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11910                              size=constants.DRBD_META_SIZE,
11911                              logical_id=(vg_meta, names[1]),
11912                              params=meta_disk.params)
11913
11914       new_lvs = [lv_data, lv_meta]
11915       old_lvs = [child.Copy() for child in dev.children]
11916       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11917       excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11918
11919       # we pass force_create=True to force the LVM creation
11920       for new_lv in new_lvs:
11921         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11922                              _GetInstanceInfoText(self.instance), False,
11923                              excl_stor)
11924
11925     return iv_names
11926
11927   def _CheckDevices(self, node_name, iv_names):
11928     for name, (dev, _, _) in iv_names.iteritems():
11929       self.cfg.SetDiskID(dev, node_name)
11930
11931       result = _BlockdevFind(self, node_name, dev, self.instance)
11932
11933       msg = result.fail_msg
11934       if msg or not result.payload:
11935         if not msg:
11936           msg = "disk not found"
11937         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11938                                  (name, msg))
11939
11940       if result.payload.is_degraded:
11941         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11942
11943   def _RemoveOldStorage(self, node_name, iv_names):
11944     for name, (_, old_lvs, _) in iv_names.iteritems():
11945       self.lu.LogInfo("Remove logical volumes for %s", name)
11946
11947       for lv in old_lvs:
11948         self.cfg.SetDiskID(lv, node_name)
11949
11950         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11951         if msg:
11952           self.lu.LogWarning("Can't remove old LV: %s", msg,
11953                              hint="remove unused LVs manually")
11954
11955   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11956     """Replace a disk on the primary or secondary for DRBD 8.
11957
11958     The algorithm for replace is quite complicated:
11959
11960       1. for each disk to be replaced:
11961
11962         1. create new LVs on the target node with unique names
11963         1. detach old LVs from the drbd device
11964         1. rename old LVs to name_replaced.<time_t>
11965         1. rename new LVs to old LVs
11966         1. attach the new LVs (with the old names now) to the drbd device
11967
11968       1. wait for sync across all devices
11969
11970       1. for each modified disk:
11971
11972         1. remove old LVs (which have the name name_replaces.<time_t>)
11973
11974     Failures are not very well handled.
11975
11976     """
11977     steps_total = 6
11978
11979     # Step: check device activation
11980     self.lu.LogStep(1, steps_total, "Check device existence")
11981     self._CheckDisksExistence([self.other_node, self.target_node])
11982     self._CheckVolumeGroup([self.target_node, self.other_node])
11983
11984     # Step: check other node consistency
11985     self.lu.LogStep(2, steps_total, "Check peer consistency")
11986     self._CheckDisksConsistency(self.other_node,
11987                                 self.other_node == self.instance.primary_node,
11988                                 False)
11989
11990     # Step: create new storage
11991     self.lu.LogStep(3, steps_total, "Allocate new storage")
11992     iv_names = self._CreateNewStorage(self.target_node)
11993
11994     # Step: for each lv, detach+rename*2+attach
11995     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11996     for dev, old_lvs, new_lvs in iv_names.itervalues():
11997       self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11998
11999       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
12000                                                      old_lvs)
12001       result.Raise("Can't detach drbd from local storage on node"
12002                    " %s for device %s" % (self.target_node, dev.iv_name))
12003       #dev.children = []
12004       #cfg.Update(instance)
12005
12006       # ok, we created the new LVs, so now we know we have the needed
12007       # storage; as such, we proceed on the target node to rename
12008       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
12009       # using the assumption that logical_id == physical_id (which in
12010       # turn is the unique_id on that node)
12011
12012       # FIXME(iustin): use a better name for the replaced LVs
12013       temp_suffix = int(time.time())
12014       ren_fn = lambda d, suff: (d.physical_id[0],
12015                                 d.physical_id[1] + "_replaced-%s" % suff)
12016
12017       # Build the rename list based on what LVs exist on the node
12018       rename_old_to_new = []
12019       for to_ren in old_lvs:
12020         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12021         if not result.fail_msg and result.payload:
12022           # device exists
12023           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12024
12025       self.lu.LogInfo("Renaming the old LVs on the target node")
12026       result = self.rpc.call_blockdev_rename(self.target_node,
12027                                              rename_old_to_new)
12028       result.Raise("Can't rename old LVs on node %s" % self.target_node)
12029
12030       # Now we rename the new LVs to the old LVs
12031       self.lu.LogInfo("Renaming the new LVs on the target node")
12032       rename_new_to_old = [(new, old.physical_id)
12033                            for old, new in zip(old_lvs, new_lvs)]
12034       result = self.rpc.call_blockdev_rename(self.target_node,
12035                                              rename_new_to_old)
12036       result.Raise("Can't rename new LVs on node %s" % self.target_node)
12037
12038       # Intermediate steps of in memory modifications
12039       for old, new in zip(old_lvs, new_lvs):
12040         new.logical_id = old.logical_id
12041         self.cfg.SetDiskID(new, self.target_node)
12042
12043       # We need to modify old_lvs so that removal later removes the
12044       # right LVs, not the newly added ones; note that old_lvs is a
12045       # copy here
12046       for disk in old_lvs:
12047         disk.logical_id = ren_fn(disk, temp_suffix)
12048         self.cfg.SetDiskID(disk, self.target_node)
12049
12050       # Now that the new lvs have the old name, we can add them to the device
12051       self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12052       result = self.rpc.call_blockdev_addchildren(self.target_node,
12053                                                   (dev, self.instance), new_lvs)
12054       msg = result.fail_msg
12055       if msg:
12056         for new_lv in new_lvs:
12057           msg2 = self.rpc.call_blockdev_remove(self.target_node,
12058                                                new_lv).fail_msg
12059           if msg2:
12060             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12061                                hint=("cleanup manually the unused logical"
12062                                      "volumes"))
12063         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12064
12065     cstep = itertools.count(5)
12066
12067     if self.early_release:
12068       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12069       self._RemoveOldStorage(self.target_node, iv_names)
12070       # TODO: Check if releasing locks early still makes sense
12071       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12072     else:
12073       # Release all resource locks except those used by the instance
12074       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12075                     keep=self.node_secondary_ip.keys())
12076
12077     # Release all node locks while waiting for sync
12078     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12079
12080     # TODO: Can the instance lock be downgraded here? Take the optional disk
12081     # shutdown in the caller into consideration.
12082
12083     # Wait for sync
12084     # This can fail as the old devices are degraded and _WaitForSync
12085     # does a combined result over all disks, so we don't check its return value
12086     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12087     _WaitForSync(self.lu, self.instance)
12088
12089     # Check all devices manually
12090     self._CheckDevices(self.instance.primary_node, iv_names)
12091
12092     # Step: remove old storage
12093     if not self.early_release:
12094       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12095       self._RemoveOldStorage(self.target_node, iv_names)
12096
12097   def _ExecDrbd8Secondary(self, feedback_fn):
12098     """Replace the secondary node for DRBD 8.
12099
12100     The algorithm for replace is quite complicated:
12101       - for all disks of the instance:
12102         - create new LVs on the new node with same names
12103         - shutdown the drbd device on the old secondary
12104         - disconnect the drbd network on the primary
12105         - create the drbd device on the new secondary
12106         - network attach the drbd on the primary, using an artifice:
12107           the drbd code for Attach() will connect to the network if it
12108           finds a device which is connected to the good local disks but
12109           not network enabled
12110       - wait for sync across all devices
12111       - remove all disks from the old secondary
12112
12113     Failures are not very well handled.
12114
12115     """
12116     steps_total = 6
12117
12118     pnode = self.instance.primary_node
12119
12120     # Step: check device activation
12121     self.lu.LogStep(1, steps_total, "Check device existence")
12122     self._CheckDisksExistence([self.instance.primary_node])
12123     self._CheckVolumeGroup([self.instance.primary_node])
12124
12125     # Step: check other node consistency
12126     self.lu.LogStep(2, steps_total, "Check peer consistency")
12127     self._CheckDisksConsistency(self.instance.primary_node, True, True)
12128
12129     # Step: create new storage
12130     self.lu.LogStep(3, steps_total, "Allocate new storage")
12131     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12132     excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12133     for idx, dev in enumerate(disks):
12134       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12135                       (self.new_node, idx))
12136       # we pass force_create=True to force LVM creation
12137       for new_lv in dev.children:
12138         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12139                              True, _GetInstanceInfoText(self.instance), False,
12140                              excl_stor)
12141
12142     # Step 4: dbrd minors and drbd setups changes
12143     # after this, we must manually remove the drbd minors on both the
12144     # error and the success paths
12145     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12146     minors = self.cfg.AllocateDRBDMinor([self.new_node
12147                                          for dev in self.instance.disks],
12148                                         self.instance.name)
12149     logging.debug("Allocated minors %r", minors)
12150
12151     iv_names = {}
12152     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12153       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12154                       (self.new_node, idx))
12155       # create new devices on new_node; note that we create two IDs:
12156       # one without port, so the drbd will be activated without
12157       # networking information on the new node at this stage, and one
12158       # with network, for the latter activation in step 4
12159       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12160       if self.instance.primary_node == o_node1:
12161         p_minor = o_minor1
12162       else:
12163         assert self.instance.primary_node == o_node2, "Three-node instance?"
12164         p_minor = o_minor2
12165
12166       new_alone_id = (self.instance.primary_node, self.new_node, None,
12167                       p_minor, new_minor, o_secret)
12168       new_net_id = (self.instance.primary_node, self.new_node, o_port,
12169                     p_minor, new_minor, o_secret)
12170
12171       iv_names[idx] = (dev, dev.children, new_net_id)
12172       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12173                     new_net_id)
12174       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12175                               logical_id=new_alone_id,
12176                               children=dev.children,
12177                               size=dev.size,
12178                               params={})
12179       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12180                                              self.cfg)
12181       try:
12182         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12183                               anno_new_drbd,
12184                               _GetInstanceInfoText(self.instance), False,
12185                               excl_stor)
12186       except errors.GenericError:
12187         self.cfg.ReleaseDRBDMinors(self.instance.name)
12188         raise
12189
12190     # We have new devices, shutdown the drbd on the old secondary
12191     for idx, dev in enumerate(self.instance.disks):
12192       self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12193       self.cfg.SetDiskID(dev, self.target_node)
12194       msg = self.rpc.call_blockdev_shutdown(self.target_node,
12195                                             (dev, self.instance)).fail_msg
12196       if msg:
12197         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12198                            "node: %s" % (idx, msg),
12199                            hint=("Please cleanup this device manually as"
12200                                  " soon as possible"))
12201
12202     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12203     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12204                                                self.instance.disks)[pnode]
12205
12206     msg = result.fail_msg
12207     if msg:
12208       # detaches didn't succeed (unlikely)
12209       self.cfg.ReleaseDRBDMinors(self.instance.name)
12210       raise errors.OpExecError("Can't detach the disks from the network on"
12211                                " old node: %s" % (msg,))
12212
12213     # if we managed to detach at least one, we update all the disks of
12214     # the instance to point to the new secondary
12215     self.lu.LogInfo("Updating instance configuration")
12216     for dev, _, new_logical_id in iv_names.itervalues():
12217       dev.logical_id = new_logical_id
12218       self.cfg.SetDiskID(dev, self.instance.primary_node)
12219
12220     self.cfg.Update(self.instance, feedback_fn)
12221
12222     # Release all node locks (the configuration has been updated)
12223     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12224
12225     # and now perform the drbd attach
12226     self.lu.LogInfo("Attaching primary drbds to new secondary"
12227                     " (standalone => connected)")
12228     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12229                                             self.new_node],
12230                                            self.node_secondary_ip,
12231                                            (self.instance.disks, self.instance),
12232                                            self.instance.name,
12233                                            False)
12234     for to_node, to_result in result.items():
12235       msg = to_result.fail_msg
12236       if msg:
12237         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12238                            to_node, msg,
12239                            hint=("please do a gnt-instance info to see the"
12240                                  " status of disks"))
12241
12242     cstep = itertools.count(5)
12243
12244     if self.early_release:
12245       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12246       self._RemoveOldStorage(self.target_node, iv_names)
12247       # TODO: Check if releasing locks early still makes sense
12248       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12249     else:
12250       # Release all resource locks except those used by the instance
12251       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12252                     keep=self.node_secondary_ip.keys())
12253
12254     # TODO: Can the instance lock be downgraded here? Take the optional disk
12255     # shutdown in the caller into consideration.
12256
12257     # Wait for sync
12258     # This can fail as the old devices are degraded and _WaitForSync
12259     # does a combined result over all disks, so we don't check its return value
12260     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12261     _WaitForSync(self.lu, self.instance)
12262
12263     # Check all devices manually
12264     self._CheckDevices(self.instance.primary_node, iv_names)
12265
12266     # Step: remove old storage
12267     if not self.early_release:
12268       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12269       self._RemoveOldStorage(self.target_node, iv_names)
12270
12271
12272 class LURepairNodeStorage(NoHooksLU):
12273   """Repairs the volume group on a node.
12274
12275   """
12276   REQ_BGL = False
12277
12278   def CheckArguments(self):
12279     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12280
12281     storage_type = self.op.storage_type
12282
12283     if (constants.SO_FIX_CONSISTENCY not in
12284         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12285       raise errors.OpPrereqError("Storage units of type '%s' can not be"
12286                                  " repaired" % storage_type,
12287                                  errors.ECODE_INVAL)
12288
12289   def ExpandNames(self):
12290     self.needed_locks = {
12291       locking.LEVEL_NODE: [self.op.node_name],
12292       }
12293
12294   def _CheckFaultyDisks(self, instance, node_name):
12295     """Ensure faulty disks abort the opcode or at least warn."""
12296     try:
12297       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12298                                   node_name, True):
12299         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12300                                    " node '%s'" % (instance.name, node_name),
12301                                    errors.ECODE_STATE)
12302     except errors.OpPrereqError, err:
12303       if self.op.ignore_consistency:
12304         self.LogWarning(str(err.args[0]))
12305       else:
12306         raise
12307
12308   def CheckPrereq(self):
12309     """Check prerequisites.
12310
12311     """
12312     # Check whether any instance on this node has faulty disks
12313     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12314       if inst.admin_state != constants.ADMINST_UP:
12315         continue
12316       check_nodes = set(inst.all_nodes)
12317       check_nodes.discard(self.op.node_name)
12318       for inst_node_name in check_nodes:
12319         self._CheckFaultyDisks(inst, inst_node_name)
12320
12321   def Exec(self, feedback_fn):
12322     feedback_fn("Repairing storage unit '%s' on %s ..." %
12323                 (self.op.name, self.op.node_name))
12324
12325     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12326     result = self.rpc.call_storage_execute(self.op.node_name,
12327                                            self.op.storage_type, st_args,
12328                                            self.op.name,
12329                                            constants.SO_FIX_CONSISTENCY)
12330     result.Raise("Failed to repair storage unit '%s' on %s" %
12331                  (self.op.name, self.op.node_name))
12332
12333
12334 class LUNodeEvacuate(NoHooksLU):
12335   """Evacuates instances off a list of nodes.
12336
12337   """
12338   REQ_BGL = False
12339
12340   _MODE2IALLOCATOR = {
12341     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12342     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12343     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12344     }
12345   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12346   assert (frozenset(_MODE2IALLOCATOR.values()) ==
12347           constants.IALLOCATOR_NEVAC_MODES)
12348
12349   def CheckArguments(self):
12350     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12351
12352   def ExpandNames(self):
12353     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12354
12355     if self.op.remote_node is not None:
12356       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12357       assert self.op.remote_node
12358
12359       if self.op.remote_node == self.op.node_name:
12360         raise errors.OpPrereqError("Can not use evacuated node as a new"
12361                                    " secondary node", errors.ECODE_INVAL)
12362
12363       if self.op.mode != constants.NODE_EVAC_SEC:
12364         raise errors.OpPrereqError("Without the use of an iallocator only"
12365                                    " secondary instances can be evacuated",
12366                                    errors.ECODE_INVAL)
12367
12368     # Declare locks
12369     self.share_locks = _ShareAll()
12370     self.needed_locks = {
12371       locking.LEVEL_INSTANCE: [],
12372       locking.LEVEL_NODEGROUP: [],
12373       locking.LEVEL_NODE: [],
12374       }
12375
12376     # Determine nodes (via group) optimistically, needs verification once locks
12377     # have been acquired
12378     self.lock_nodes = self._DetermineNodes()
12379
12380   def _DetermineNodes(self):
12381     """Gets the list of nodes to operate on.
12382
12383     """
12384     if self.op.remote_node is None:
12385       # Iallocator will choose any node(s) in the same group
12386       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12387     else:
12388       group_nodes = frozenset([self.op.remote_node])
12389
12390     # Determine nodes to be locked
12391     return set([self.op.node_name]) | group_nodes
12392
12393   def _DetermineInstances(self):
12394     """Builds list of instances to operate on.
12395
12396     """
12397     assert self.op.mode in constants.NODE_EVAC_MODES
12398
12399     if self.op.mode == constants.NODE_EVAC_PRI:
12400       # Primary instances only
12401       inst_fn = _GetNodePrimaryInstances
12402       assert self.op.remote_node is None, \
12403         "Evacuating primary instances requires iallocator"
12404     elif self.op.mode == constants.NODE_EVAC_SEC:
12405       # Secondary instances only
12406       inst_fn = _GetNodeSecondaryInstances
12407     else:
12408       # All instances
12409       assert self.op.mode == constants.NODE_EVAC_ALL
12410       inst_fn = _GetNodeInstances
12411       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12412       # per instance
12413       raise errors.OpPrereqError("Due to an issue with the iallocator"
12414                                  " interface it is not possible to evacuate"
12415                                  " all instances at once; specify explicitly"
12416                                  " whether to evacuate primary or secondary"
12417                                  " instances",
12418                                  errors.ECODE_INVAL)
12419
12420     return inst_fn(self.cfg, self.op.node_name)
12421
12422   def DeclareLocks(self, level):
12423     if level == locking.LEVEL_INSTANCE:
12424       # Lock instances optimistically, needs verification once node and group
12425       # locks have been acquired
12426       self.needed_locks[locking.LEVEL_INSTANCE] = \
12427         set(i.name for i in self._DetermineInstances())
12428
12429     elif level == locking.LEVEL_NODEGROUP:
12430       # Lock node groups for all potential target nodes optimistically, needs
12431       # verification once nodes have been acquired
12432       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12433         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12434
12435     elif level == locking.LEVEL_NODE:
12436       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12437
12438   def CheckPrereq(self):
12439     # Verify locks
12440     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12441     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12442     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12443
12444     need_nodes = self._DetermineNodes()
12445
12446     if not owned_nodes.issuperset(need_nodes):
12447       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12448                                  " locks were acquired, current nodes are"
12449                                  " are '%s', used to be '%s'; retry the"
12450                                  " operation" %
12451                                  (self.op.node_name,
12452                                   utils.CommaJoin(need_nodes),
12453                                   utils.CommaJoin(owned_nodes)),
12454                                  errors.ECODE_STATE)
12455
12456     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12457     if owned_groups != wanted_groups:
12458       raise errors.OpExecError("Node groups changed since locks were acquired,"
12459                                " current groups are '%s', used to be '%s';"
12460                                " retry the operation" %
12461                                (utils.CommaJoin(wanted_groups),
12462                                 utils.CommaJoin(owned_groups)))
12463
12464     # Determine affected instances
12465     self.instances = self._DetermineInstances()
12466     self.instance_names = [i.name for i in self.instances]
12467
12468     if set(self.instance_names) != owned_instances:
12469       raise errors.OpExecError("Instances on node '%s' changed since locks"
12470                                " were acquired, current instances are '%s',"
12471                                " used to be '%s'; retry the operation" %
12472                                (self.op.node_name,
12473                                 utils.CommaJoin(self.instance_names),
12474                                 utils.CommaJoin(owned_instances)))
12475
12476     if self.instance_names:
12477       self.LogInfo("Evacuating instances from node '%s': %s",
12478                    self.op.node_name,
12479                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
12480     else:
12481       self.LogInfo("No instances to evacuate from node '%s'",
12482                    self.op.node_name)
12483
12484     if self.op.remote_node is not None:
12485       for i in self.instances:
12486         if i.primary_node == self.op.remote_node:
12487           raise errors.OpPrereqError("Node %s is the primary node of"
12488                                      " instance %s, cannot use it as"
12489                                      " secondary" %
12490                                      (self.op.remote_node, i.name),
12491                                      errors.ECODE_INVAL)
12492
12493   def Exec(self, feedback_fn):
12494     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12495
12496     if not self.instance_names:
12497       # No instances to evacuate
12498       jobs = []
12499
12500     elif self.op.iallocator is not None:
12501       # TODO: Implement relocation to other group
12502       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12503       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12504                                      instances=list(self.instance_names))
12505       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12506
12507       ial.Run(self.op.iallocator)
12508
12509       if not ial.success:
12510         raise errors.OpPrereqError("Can't compute node evacuation using"
12511                                    " iallocator '%s': %s" %
12512                                    (self.op.iallocator, ial.info),
12513                                    errors.ECODE_NORES)
12514
12515       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12516
12517     elif self.op.remote_node is not None:
12518       assert self.op.mode == constants.NODE_EVAC_SEC
12519       jobs = [
12520         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12521                                         remote_node=self.op.remote_node,
12522                                         disks=[],
12523                                         mode=constants.REPLACE_DISK_CHG,
12524                                         early_release=self.op.early_release)]
12525         for instance_name in self.instance_names]
12526
12527     else:
12528       raise errors.ProgrammerError("No iallocator or remote node")
12529
12530     return ResultWithJobs(jobs)
12531
12532
12533 def _SetOpEarlyRelease(early_release, op):
12534   """Sets C{early_release} flag on opcodes if available.
12535
12536   """
12537   try:
12538     op.early_release = early_release
12539   except AttributeError:
12540     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12541
12542   return op
12543
12544
12545 def _NodeEvacDest(use_nodes, group, nodes):
12546   """Returns group or nodes depending on caller's choice.
12547
12548   """
12549   if use_nodes:
12550     return utils.CommaJoin(nodes)
12551   else:
12552     return group
12553
12554
12555 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12556   """Unpacks the result of change-group and node-evacuate iallocator requests.
12557
12558   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12559   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12560
12561   @type lu: L{LogicalUnit}
12562   @param lu: Logical unit instance
12563   @type alloc_result: tuple/list
12564   @param alloc_result: Result from iallocator
12565   @type early_release: bool
12566   @param early_release: Whether to release locks early if possible
12567   @type use_nodes: bool
12568   @param use_nodes: Whether to display node names instead of groups
12569
12570   """
12571   (moved, failed, jobs) = alloc_result
12572
12573   if failed:
12574     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12575                                  for (name, reason) in failed)
12576     lu.LogWarning("Unable to evacuate instances %s", failreason)
12577     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12578
12579   if moved:
12580     lu.LogInfo("Instances to be moved: %s",
12581                utils.CommaJoin("%s (to %s)" %
12582                                (name, _NodeEvacDest(use_nodes, group, nodes))
12583                                for (name, group, nodes) in moved))
12584
12585   return [map(compat.partial(_SetOpEarlyRelease, early_release),
12586               map(opcodes.OpCode.LoadOpCode, ops))
12587           for ops in jobs]
12588
12589
12590 def _DiskSizeInBytesToMebibytes(lu, size):
12591   """Converts a disk size in bytes to mebibytes.
12592
12593   Warns and rounds up if the size isn't an even multiple of 1 MiB.
12594
12595   """
12596   (mib, remainder) = divmod(size, 1024 * 1024)
12597
12598   if remainder != 0:
12599     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12600                   " to not overwrite existing data (%s bytes will not be"
12601                   " wiped)", (1024 * 1024) - remainder)
12602     mib += 1
12603
12604   return mib
12605
12606
12607 class LUInstanceGrowDisk(LogicalUnit):
12608   """Grow a disk of an instance.
12609
12610   """
12611   HPATH = "disk-grow"
12612   HTYPE = constants.HTYPE_INSTANCE
12613   REQ_BGL = False
12614
12615   def ExpandNames(self):
12616     self._ExpandAndLockInstance()
12617     self.needed_locks[locking.LEVEL_NODE] = []
12618     self.needed_locks[locking.LEVEL_NODE_RES] = []
12619     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12620     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12621
12622   def DeclareLocks(self, level):
12623     if level == locking.LEVEL_NODE:
12624       self._LockInstancesNodes()
12625     elif level == locking.LEVEL_NODE_RES:
12626       # Copy node locks
12627       self.needed_locks[locking.LEVEL_NODE_RES] = \
12628         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12629
12630   def BuildHooksEnv(self):
12631     """Build hooks env.
12632
12633     This runs on the master, the primary and all the secondaries.
12634
12635     """
12636     env = {
12637       "DISK": self.op.disk,
12638       "AMOUNT": self.op.amount,
12639       "ABSOLUTE": self.op.absolute,
12640       }
12641     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12642     return env
12643
12644   def BuildHooksNodes(self):
12645     """Build hooks nodes.
12646
12647     """
12648     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12649     return (nl, nl)
12650
12651   def CheckPrereq(self):
12652     """Check prerequisites.
12653
12654     This checks that the instance is in the cluster.
12655
12656     """
12657     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12658     assert instance is not None, \
12659       "Cannot retrieve locked instance %s" % self.op.instance_name
12660     nodenames = list(instance.all_nodes)
12661     for node in nodenames:
12662       _CheckNodeOnline(self, node)
12663
12664     self.instance = instance
12665
12666     if instance.disk_template not in constants.DTS_GROWABLE:
12667       raise errors.OpPrereqError("Instance's disk layout does not support"
12668                                  " growing", errors.ECODE_INVAL)
12669
12670     self.disk = instance.FindDisk(self.op.disk)
12671
12672     if self.op.absolute:
12673       self.target = self.op.amount
12674       self.delta = self.target - self.disk.size
12675       if self.delta < 0:
12676         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12677                                    "current disk size (%s)" %
12678                                    (utils.FormatUnit(self.target, "h"),
12679                                     utils.FormatUnit(self.disk.size, "h")),
12680                                    errors.ECODE_STATE)
12681     else:
12682       self.delta = self.op.amount
12683       self.target = self.disk.size + self.delta
12684       if self.delta < 0:
12685         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12686                                    utils.FormatUnit(self.delta, "h"),
12687                                    errors.ECODE_INVAL)
12688
12689     self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12690
12691   def _CheckDiskSpace(self, nodenames, req_vgspace):
12692     template = self.instance.disk_template
12693     if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12694       # TODO: check the free disk space for file, when that feature will be
12695       # supported
12696       nodes = map(self.cfg.GetNodeInfo, nodenames)
12697       es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12698                         nodes)
12699       if es_nodes:
12700         # With exclusive storage we need to something smarter than just looking
12701         # at free space; for now, let's simply abort the operation.
12702         raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12703                                    " is enabled", errors.ECODE_STATE)
12704       _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12705
12706   def Exec(self, feedback_fn):
12707     """Execute disk grow.
12708
12709     """
12710     instance = self.instance
12711     disk = self.disk
12712
12713     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12714     assert (self.owned_locks(locking.LEVEL_NODE) ==
12715             self.owned_locks(locking.LEVEL_NODE_RES))
12716
12717     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12718
12719     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12720     if not disks_ok:
12721       raise errors.OpExecError("Cannot activate block device to grow")
12722
12723     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12724                 (self.op.disk, instance.name,
12725                  utils.FormatUnit(self.delta, "h"),
12726                  utils.FormatUnit(self.target, "h")))
12727
12728     # First run all grow ops in dry-run mode
12729     for node in instance.all_nodes:
12730       self.cfg.SetDiskID(disk, node)
12731       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12732                                            True, True)
12733       result.Raise("Dry-run grow request failed to node %s" % node)
12734
12735     if wipe_disks:
12736       # Get disk size from primary node for wiping
12737       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12738       result.Raise("Failed to retrieve disk size from node '%s'" %
12739                    instance.primary_node)
12740
12741       (disk_size_in_bytes, ) = result.payload
12742
12743       if disk_size_in_bytes is None:
12744         raise errors.OpExecError("Failed to retrieve disk size from primary"
12745                                  " node '%s'" % instance.primary_node)
12746
12747       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12748
12749       assert old_disk_size >= disk.size, \
12750         ("Retrieved disk size too small (got %s, should be at least %s)" %
12751          (old_disk_size, disk.size))
12752     else:
12753       old_disk_size = None
12754
12755     # We know that (as far as we can test) operations across different
12756     # nodes will succeed, time to run it for real on the backing storage
12757     for node in instance.all_nodes:
12758       self.cfg.SetDiskID(disk, node)
12759       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12760                                            False, True)
12761       result.Raise("Grow request failed to node %s" % node)
12762
12763     # And now execute it for logical storage, on the primary node
12764     node = instance.primary_node
12765     self.cfg.SetDiskID(disk, node)
12766     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12767                                          False, False)
12768     result.Raise("Grow request failed to node %s" % node)
12769
12770     disk.RecordGrow(self.delta)
12771     self.cfg.Update(instance, feedback_fn)
12772
12773     # Changes have been recorded, release node lock
12774     _ReleaseLocks(self, locking.LEVEL_NODE)
12775
12776     # Downgrade lock while waiting for sync
12777     self.glm.downgrade(locking.LEVEL_INSTANCE)
12778
12779     assert wipe_disks ^ (old_disk_size is None)
12780
12781     if wipe_disks:
12782       assert instance.disks[self.op.disk] == disk
12783
12784       # Wipe newly added disk space
12785       _WipeDisks(self, instance,
12786                  disks=[(self.op.disk, disk, old_disk_size)])
12787
12788     if self.op.wait_for_sync:
12789       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12790       if disk_abort:
12791         self.LogWarning("Disk syncing has not returned a good status; check"
12792                         " the instance")
12793       if instance.admin_state != constants.ADMINST_UP:
12794         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12795     elif instance.admin_state != constants.ADMINST_UP:
12796       self.LogWarning("Not shutting down the disk even if the instance is"
12797                       " not supposed to be running because no wait for"
12798                       " sync mode was requested")
12799
12800     assert self.owned_locks(locking.LEVEL_NODE_RES)
12801     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12802
12803
12804 class LUInstanceQueryData(NoHooksLU):
12805   """Query runtime instance data.
12806
12807   """
12808   REQ_BGL = False
12809
12810   def ExpandNames(self):
12811     self.needed_locks = {}
12812
12813     # Use locking if requested or when non-static information is wanted
12814     if not (self.op.static or self.op.use_locking):
12815       self.LogWarning("Non-static data requested, locks need to be acquired")
12816       self.op.use_locking = True
12817
12818     if self.op.instances or not self.op.use_locking:
12819       # Expand instance names right here
12820       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12821     else:
12822       # Will use acquired locks
12823       self.wanted_names = None
12824
12825     if self.op.use_locking:
12826       self.share_locks = _ShareAll()
12827
12828       if self.wanted_names is None:
12829         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12830       else:
12831         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12832
12833       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12834       self.needed_locks[locking.LEVEL_NODE] = []
12835       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12836
12837   def DeclareLocks(self, level):
12838     if self.op.use_locking:
12839       if level == locking.LEVEL_NODEGROUP:
12840         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12841
12842         # Lock all groups used by instances optimistically; this requires going
12843         # via the node before it's locked, requiring verification later on
12844         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12845           frozenset(group_uuid
12846                     for instance_name in owned_instances
12847                     for group_uuid in
12848                       self.cfg.GetInstanceNodeGroups(instance_name))
12849
12850       elif level == locking.LEVEL_NODE:
12851         self._LockInstancesNodes()
12852
12853   def CheckPrereq(self):
12854     """Check prerequisites.
12855
12856     This only checks the optional instance list against the existing names.
12857
12858     """
12859     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12860     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12861     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12862
12863     if self.wanted_names is None:
12864       assert self.op.use_locking, "Locking was not used"
12865       self.wanted_names = owned_instances
12866
12867     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12868
12869     if self.op.use_locking:
12870       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12871                                 None)
12872     else:
12873       assert not (owned_instances or owned_groups or owned_nodes)
12874
12875     self.wanted_instances = instances.values()
12876
12877   def _ComputeBlockdevStatus(self, node, instance, dev):
12878     """Returns the status of a block device
12879
12880     """
12881     if self.op.static or not node:
12882       return None
12883
12884     self.cfg.SetDiskID(dev, node)
12885
12886     result = self.rpc.call_blockdev_find(node, dev)
12887     if result.offline:
12888       return None
12889
12890     result.Raise("Can't compute disk status for %s" % instance.name)
12891
12892     status = result.payload
12893     if status is None:
12894       return None
12895
12896     return (status.dev_path, status.major, status.minor,
12897             status.sync_percent, status.estimated_time,
12898             status.is_degraded, status.ldisk_status)
12899
12900   def _ComputeDiskStatus(self, instance, snode, dev):
12901     """Compute block device status.
12902
12903     """
12904     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12905
12906     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12907
12908   def _ComputeDiskStatusInner(self, instance, snode, dev):
12909     """Compute block device status.
12910
12911     @attention: The device has to be annotated already.
12912
12913     """
12914     if dev.dev_type in constants.LDS_DRBD:
12915       # we change the snode then (otherwise we use the one passed in)
12916       if dev.logical_id[0] == instance.primary_node:
12917         snode = dev.logical_id[1]
12918       else:
12919         snode = dev.logical_id[0]
12920
12921     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12922                                               instance, dev)
12923     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12924
12925     if dev.children:
12926       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12927                                         instance, snode),
12928                          dev.children)
12929     else:
12930       dev_children = []
12931
12932     return {
12933       "iv_name": dev.iv_name,
12934       "dev_type": dev.dev_type,
12935       "logical_id": dev.logical_id,
12936       "physical_id": dev.physical_id,
12937       "pstatus": dev_pstatus,
12938       "sstatus": dev_sstatus,
12939       "children": dev_children,
12940       "mode": dev.mode,
12941       "size": dev.size,
12942       }
12943
12944   def Exec(self, feedback_fn):
12945     """Gather and return data"""
12946     result = {}
12947
12948     cluster = self.cfg.GetClusterInfo()
12949
12950     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12951     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12952
12953     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12954                                                  for node in nodes.values()))
12955
12956     group2name_fn = lambda uuid: groups[uuid].name
12957
12958     for instance in self.wanted_instances:
12959       pnode = nodes[instance.primary_node]
12960
12961       if self.op.static or pnode.offline:
12962         remote_state = None
12963         if pnode.offline:
12964           self.LogWarning("Primary node %s is marked offline, returning static"
12965                           " information only for instance %s" %
12966                           (pnode.name, instance.name))
12967       else:
12968         remote_info = self.rpc.call_instance_info(instance.primary_node,
12969                                                   instance.name,
12970                                                   instance.hypervisor)
12971         remote_info.Raise("Error checking node %s" % instance.primary_node)
12972         remote_info = remote_info.payload
12973         if remote_info and "state" in remote_info:
12974           remote_state = "up"
12975         else:
12976           if instance.admin_state == constants.ADMINST_UP:
12977             remote_state = "down"
12978           else:
12979             remote_state = instance.admin_state
12980
12981       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12982                   instance.disks)
12983
12984       snodes_group_uuids = [nodes[snode_name].group
12985                             for snode_name in instance.secondary_nodes]
12986
12987       result[instance.name] = {
12988         "name": instance.name,
12989         "config_state": instance.admin_state,
12990         "run_state": remote_state,
12991         "pnode": instance.primary_node,
12992         "pnode_group_uuid": pnode.group,
12993         "pnode_group_name": group2name_fn(pnode.group),
12994         "snodes": instance.secondary_nodes,
12995         "snodes_group_uuids": snodes_group_uuids,
12996         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12997         "os": instance.os,
12998         # this happens to be the same format used for hooks
12999         "nics": _NICListToTuple(self, instance.nics),
13000         "disk_template": instance.disk_template,
13001         "disks": disks,
13002         "hypervisor": instance.hypervisor,
13003         "network_port": instance.network_port,
13004         "hv_instance": instance.hvparams,
13005         "hv_actual": cluster.FillHV(instance, skip_globals=True),
13006         "be_instance": instance.beparams,
13007         "be_actual": cluster.FillBE(instance),
13008         "os_instance": instance.osparams,
13009         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
13010         "serial_no": instance.serial_no,
13011         "mtime": instance.mtime,
13012         "ctime": instance.ctime,
13013         "uuid": instance.uuid,
13014         }
13015
13016     return result
13017
13018
13019 def PrepareContainerMods(mods, private_fn):
13020   """Prepares a list of container modifications by adding a private data field.
13021
13022   @type mods: list of tuples; (operation, index, parameters)
13023   @param mods: List of modifications
13024   @type private_fn: callable or None
13025   @param private_fn: Callable for constructing a private data field for a
13026     modification
13027   @rtype: list
13028
13029   """
13030   if private_fn is None:
13031     fn = lambda: None
13032   else:
13033     fn = private_fn
13034
13035   return [(op, idx, params, fn()) for (op, idx, params) in mods]
13036
13037
13038 #: Type description for changes as returned by L{ApplyContainerMods}'s
13039 #: callbacks
13040 _TApplyContModsCbChanges = \
13041   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13042     ht.TNonEmptyString,
13043     ht.TAny,
13044     ])))
13045
13046
13047 def ApplyContainerMods(kind, container, chgdesc, mods,
13048                        create_fn, modify_fn, remove_fn):
13049   """Applies descriptions in C{mods} to C{container}.
13050
13051   @type kind: string
13052   @param kind: One-word item description
13053   @type container: list
13054   @param container: Container to modify
13055   @type chgdesc: None or list
13056   @param chgdesc: List of applied changes
13057   @type mods: list
13058   @param mods: Modifications as returned by L{PrepareContainerMods}
13059   @type create_fn: callable
13060   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13061     receives absolute item index, parameters and private data object as added
13062     by L{PrepareContainerMods}, returns tuple containing new item and changes
13063     as list
13064   @type modify_fn: callable
13065   @param modify_fn: Callback for modifying an existing item
13066     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13067     and private data object as added by L{PrepareContainerMods}, returns
13068     changes as list
13069   @type remove_fn: callable
13070   @param remove_fn: Callback on removing item; receives absolute item index,
13071     item and private data object as added by L{PrepareContainerMods}
13072
13073   """
13074   for (op, idx, params, private) in mods:
13075     if idx == -1:
13076       # Append
13077       absidx = len(container) - 1
13078     elif idx < 0:
13079       raise IndexError("Not accepting negative indices other than -1")
13080     elif idx > len(container):
13081       raise IndexError("Got %s index %s, but there are only %s" %
13082                        (kind, idx, len(container)))
13083     else:
13084       absidx = idx
13085
13086     changes = None
13087
13088     if op == constants.DDM_ADD:
13089       # Calculate where item will be added
13090       if idx == -1:
13091         addidx = len(container)
13092       else:
13093         addidx = idx
13094
13095       if create_fn is None:
13096         item = params
13097       else:
13098         (item, changes) = create_fn(addidx, params, private)
13099
13100       if idx == -1:
13101         container.append(item)
13102       else:
13103         assert idx >= 0
13104         assert idx <= len(container)
13105         # list.insert does so before the specified index
13106         container.insert(idx, item)
13107     else:
13108       # Retrieve existing item
13109       try:
13110         item = container[absidx]
13111       except IndexError:
13112         raise IndexError("Invalid %s index %s" % (kind, idx))
13113
13114       if op == constants.DDM_REMOVE:
13115         assert not params
13116
13117         if remove_fn is not None:
13118           remove_fn(absidx, item, private)
13119
13120         changes = [("%s/%s" % (kind, absidx), "remove")]
13121
13122         assert container[absidx] == item
13123         del container[absidx]
13124       elif op == constants.DDM_MODIFY:
13125         if modify_fn is not None:
13126           changes = modify_fn(absidx, item, params, private)
13127       else:
13128         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13129
13130     assert _TApplyContModsCbChanges(changes)
13131
13132     if not (chgdesc is None or changes is None):
13133       chgdesc.extend(changes)
13134
13135
13136 def _UpdateIvNames(base_index, disks):
13137   """Updates the C{iv_name} attribute of disks.
13138
13139   @type disks: list of L{objects.Disk}
13140
13141   """
13142   for (idx, disk) in enumerate(disks):
13143     disk.iv_name = "disk/%s" % (base_index + idx, )
13144
13145
13146 class _InstNicModPrivate:
13147   """Data structure for network interface modifications.
13148
13149   Used by L{LUInstanceSetParams}.
13150
13151   """
13152   def __init__(self):
13153     self.params = None
13154     self.filled = None
13155
13156
13157 class LUInstanceSetParams(LogicalUnit):
13158   """Modifies an instances's parameters.
13159
13160   """
13161   HPATH = "instance-modify"
13162   HTYPE = constants.HTYPE_INSTANCE
13163   REQ_BGL = False
13164
13165   @staticmethod
13166   def _UpgradeDiskNicMods(kind, mods, verify_fn):
13167     assert ht.TList(mods)
13168     assert not mods or len(mods[0]) in (2, 3)
13169
13170     if mods and len(mods[0]) == 2:
13171       result = []
13172
13173       addremove = 0
13174       for op, params in mods:
13175         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13176           result.append((op, -1, params))
13177           addremove += 1
13178
13179           if addremove > 1:
13180             raise errors.OpPrereqError("Only one %s add or remove operation is"
13181                                        " supported at a time" % kind,
13182                                        errors.ECODE_INVAL)
13183         else:
13184           result.append((constants.DDM_MODIFY, op, params))
13185
13186       assert verify_fn(result)
13187     else:
13188       result = mods
13189
13190     return result
13191
13192   @staticmethod
13193   def _CheckMods(kind, mods, key_types, item_fn):
13194     """Ensures requested disk/NIC modifications are valid.
13195
13196     """
13197     for (op, _, params) in mods:
13198       assert ht.TDict(params)
13199
13200       # If 'key_types' is an empty dict, we assume we have an
13201       # 'ext' template and thus do not ForceDictType
13202       if key_types:
13203         utils.ForceDictType(params, key_types)
13204
13205       if op == constants.DDM_REMOVE:
13206         if params:
13207           raise errors.OpPrereqError("No settings should be passed when"
13208                                      " removing a %s" % kind,
13209                                      errors.ECODE_INVAL)
13210       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13211         item_fn(op, params)
13212       else:
13213         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13214
13215   @staticmethod
13216   def _VerifyDiskModification(op, params):
13217     """Verifies a disk modification.
13218
13219     """
13220     if op == constants.DDM_ADD:
13221       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13222       if mode not in constants.DISK_ACCESS_SET:
13223         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13224                                    errors.ECODE_INVAL)
13225
13226       size = params.get(constants.IDISK_SIZE, None)
13227       if size is None:
13228         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13229                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
13230
13231       try:
13232         size = int(size)
13233       except (TypeError, ValueError), err:
13234         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13235                                    errors.ECODE_INVAL)
13236
13237       params[constants.IDISK_SIZE] = size
13238
13239     elif op == constants.DDM_MODIFY:
13240       if constants.IDISK_SIZE in params:
13241         raise errors.OpPrereqError("Disk size change not possible, use"
13242                                    " grow-disk", errors.ECODE_INVAL)
13243       if constants.IDISK_MODE not in params:
13244         raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13245                                    " modification supported, but missing",
13246                                    errors.ECODE_NOENT)
13247       if len(params) > 1:
13248         raise errors.OpPrereqError("Disk modification doesn't support"
13249                                    " additional arbitrary parameters",
13250                                    errors.ECODE_INVAL)
13251
13252   @staticmethod
13253   def _VerifyNicModification(op, params):
13254     """Verifies a network interface modification.
13255
13256     """
13257     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13258       ip = params.get(constants.INIC_IP, None)
13259       req_net = params.get(constants.INIC_NETWORK, None)
13260       link = params.get(constants.NIC_LINK, None)
13261       mode = params.get(constants.NIC_MODE, None)
13262       if req_net is not None:
13263         if req_net.lower() == constants.VALUE_NONE:
13264           params[constants.INIC_NETWORK] = None
13265           req_net = None
13266         elif link is not None or mode is not None:
13267           raise errors.OpPrereqError("If network is given"
13268                                      " mode or link should not",
13269                                      errors.ECODE_INVAL)
13270
13271       if op == constants.DDM_ADD:
13272         macaddr = params.get(constants.INIC_MAC, None)
13273         if macaddr is None:
13274           params[constants.INIC_MAC] = constants.VALUE_AUTO
13275
13276       if ip is not None:
13277         if ip.lower() == constants.VALUE_NONE:
13278           params[constants.INIC_IP] = None
13279         else:
13280           if ip.lower() == constants.NIC_IP_POOL:
13281             if op == constants.DDM_ADD and req_net is None:
13282               raise errors.OpPrereqError("If ip=pool, parameter network"
13283                                          " cannot be none",
13284                                          errors.ECODE_INVAL)
13285           else:
13286             if not netutils.IPAddress.IsValid(ip):
13287               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13288                                          errors.ECODE_INVAL)
13289
13290       if constants.INIC_MAC in params:
13291         macaddr = params[constants.INIC_MAC]
13292         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13293           macaddr = utils.NormalizeAndValidateMac(macaddr)
13294
13295         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13296           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13297                                      " modifying an existing NIC",
13298                                      errors.ECODE_INVAL)
13299
13300   def CheckArguments(self):
13301     if not (self.op.nics or self.op.disks or self.op.disk_template or
13302             self.op.hvparams or self.op.beparams or self.op.os_name or
13303             self.op.offline is not None or self.op.runtime_mem):
13304       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13305
13306     if self.op.hvparams:
13307       _CheckGlobalHvParams(self.op.hvparams)
13308
13309     self.op.disks = self._UpgradeDiskNicMods(
13310       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13311     self.op.nics = self._UpgradeDiskNicMods(
13312       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13313
13314     if self.op.disks and self.op.disk_template is not None:
13315       raise errors.OpPrereqError("Disk template conversion and other disk"
13316                                  " changes not supported at the same time",
13317                                  errors.ECODE_INVAL)
13318
13319     if (self.op.disk_template and
13320         self.op.disk_template in constants.DTS_INT_MIRROR and
13321         self.op.remote_node is None):
13322       raise errors.OpPrereqError("Changing the disk template to a mirrored"
13323                                  " one requires specifying a secondary node",
13324                                  errors.ECODE_INVAL)
13325
13326     # Check NIC modifications
13327     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13328                     self._VerifyNicModification)
13329
13330   def ExpandNames(self):
13331     self._ExpandAndLockInstance()
13332     self.needed_locks[locking.LEVEL_NODEGROUP] = []
13333     # Can't even acquire node locks in shared mode as upcoming changes in
13334     # Ganeti 2.6 will start to modify the node object on disk conversion
13335     self.needed_locks[locking.LEVEL_NODE] = []
13336     self.needed_locks[locking.LEVEL_NODE_RES] = []
13337     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13338     # Look node group to look up the ipolicy
13339     self.share_locks[locking.LEVEL_NODEGROUP] = 1
13340
13341   def DeclareLocks(self, level):
13342     if level == locking.LEVEL_NODEGROUP:
13343       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13344       # Acquire locks for the instance's nodegroups optimistically. Needs
13345       # to be verified in CheckPrereq
13346       self.needed_locks[locking.LEVEL_NODEGROUP] = \
13347         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13348     elif level == locking.LEVEL_NODE:
13349       self._LockInstancesNodes()
13350       if self.op.disk_template and self.op.remote_node:
13351         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13352         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13353     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13354       # Copy node locks
13355       self.needed_locks[locking.LEVEL_NODE_RES] = \
13356         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13357
13358   def BuildHooksEnv(self):
13359     """Build hooks env.
13360
13361     This runs on the master, primary and secondaries.
13362
13363     """
13364     args = {}
13365     if constants.BE_MINMEM in self.be_new:
13366       args["minmem"] = self.be_new[constants.BE_MINMEM]
13367     if constants.BE_MAXMEM in self.be_new:
13368       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13369     if constants.BE_VCPUS in self.be_new:
13370       args["vcpus"] = self.be_new[constants.BE_VCPUS]
13371     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13372     # information at all.
13373
13374     if self._new_nics is not None:
13375       nics = []
13376
13377       for nic in self._new_nics:
13378         n = copy.deepcopy(nic)
13379         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13380         n.nicparams = nicparams
13381         nics.append(_NICToTuple(self, n))
13382
13383       args["nics"] = nics
13384
13385     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13386     if self.op.disk_template:
13387       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13388     if self.op.runtime_mem:
13389       env["RUNTIME_MEMORY"] = self.op.runtime_mem
13390
13391     return env
13392
13393   def BuildHooksNodes(self):
13394     """Build hooks nodes.
13395
13396     """
13397     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13398     return (nl, nl)
13399
13400   def _PrepareNicModification(self, params, private, old_ip, old_net,
13401                               old_params, cluster, pnode):
13402
13403     update_params_dict = dict([(key, params[key])
13404                                for key in constants.NICS_PARAMETERS
13405                                if key in params])
13406
13407     req_link = update_params_dict.get(constants.NIC_LINK, None)
13408     req_mode = update_params_dict.get(constants.NIC_MODE, None)
13409
13410     new_net = params.get(constants.INIC_NETWORK, old_net)
13411     if new_net is not None:
13412       netparams = self.cfg.GetGroupNetParams(new_net, pnode)
13413       if netparams is None:
13414         raise errors.OpPrereqError("No netparams found for the network"
13415                                    " %s, probably not connected" % new_net,
13416                                    errors.ECODE_INVAL)
13417       new_params = dict(netparams)
13418     else:
13419       new_params = _GetUpdatedParams(old_params, update_params_dict)
13420
13421     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13422
13423     new_filled_params = cluster.SimpleFillNIC(new_params)
13424     objects.NIC.CheckParameterSyntax(new_filled_params)
13425
13426     new_mode = new_filled_params[constants.NIC_MODE]
13427     if new_mode == constants.NIC_MODE_BRIDGED:
13428       bridge = new_filled_params[constants.NIC_LINK]
13429       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13430       if msg:
13431         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13432         if self.op.force:
13433           self.warn.append(msg)
13434         else:
13435           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13436
13437     elif new_mode == constants.NIC_MODE_ROUTED:
13438       ip = params.get(constants.INIC_IP, old_ip)
13439       if ip is None:
13440         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13441                                    " on a routed NIC", errors.ECODE_INVAL)
13442
13443     elif new_mode == constants.NIC_MODE_OVS:
13444       # TODO: check OVS link
13445       self.LogInfo("OVS links are currently not checked for correctness")
13446
13447     if constants.INIC_MAC in params:
13448       mac = params[constants.INIC_MAC]
13449       if mac is None:
13450         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13451                                    errors.ECODE_INVAL)
13452       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13453         # otherwise generate the MAC address
13454         params[constants.INIC_MAC] = \
13455           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13456       else:
13457         # or validate/reserve the current one
13458         try:
13459           self.cfg.ReserveMAC(mac, self.proc.GetECId())
13460         except errors.ReservationError:
13461           raise errors.OpPrereqError("MAC address '%s' already in use"
13462                                      " in cluster" % mac,
13463                                      errors.ECODE_NOTUNIQUE)
13464     elif new_net != old_net:
13465
13466       def get_net_prefix(net):
13467         if net:
13468           uuid = self.cfg.LookupNetwork(net)
13469           if uuid:
13470             nobj = self.cfg.GetNetwork(uuid)
13471             return nobj.mac_prefix
13472         return None
13473
13474       new_prefix = get_net_prefix(new_net)
13475       old_prefix = get_net_prefix(old_net)
13476       if old_prefix != new_prefix:
13477         params[constants.INIC_MAC] = \
13478           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13479
13480     #if there is a change in nic-network configuration
13481     new_ip = params.get(constants.INIC_IP, old_ip)
13482     if (new_ip, new_net) != (old_ip, old_net):
13483       if new_ip:
13484         if new_net:
13485           if new_ip.lower() == constants.NIC_IP_POOL:
13486             try:
13487               new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13488             except errors.ReservationError:
13489               raise errors.OpPrereqError("Unable to get a free IP"
13490                                          " from the address pool",
13491                                          errors.ECODE_STATE)
13492             self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13493             params[constants.INIC_IP] = new_ip
13494           elif new_ip != old_ip or new_net != old_net:
13495             try:
13496               self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13497               self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13498             except errors.ReservationError:
13499               raise errors.OpPrereqError("IP %s not available in network %s" %
13500                                          (new_ip, new_net),
13501                                          errors.ECODE_NOTUNIQUE)
13502         elif new_ip.lower() == constants.NIC_IP_POOL:
13503           raise errors.OpPrereqError("ip=pool, but no network found",
13504                                      errors.ECODE_INVAL)
13505
13506         # new net is None
13507         elif self.op.conflicts_check:
13508           _CheckForConflictingIp(self, new_ip, pnode)
13509
13510       if old_ip:
13511         if old_net:
13512           try:
13513             self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13514           except errors.AddressPoolError:
13515             logging.warning("Release IP %s not contained in network %s",
13516                             old_ip, old_net)
13517
13518     # there are no changes in (net, ip) tuple
13519     elif (old_net is not None and
13520           (req_link is not None or req_mode is not None)):
13521       raise errors.OpPrereqError("Not allowed to change link or mode of"
13522                                  " a NIC that is connected to a network",
13523                                  errors.ECODE_INVAL)
13524
13525     private.params = new_params
13526     private.filled = new_filled_params
13527
13528   def _PreCheckDiskTemplate(self, pnode_info):
13529     """CheckPrereq checks related to a new disk template."""
13530     # Arguments are passed to avoid configuration lookups
13531     instance = self.instance
13532     pnode = instance.primary_node
13533     cluster = self.cluster
13534     if instance.disk_template == self.op.disk_template:
13535       raise errors.OpPrereqError("Instance already has disk template %s" %
13536                                  instance.disk_template, errors.ECODE_INVAL)
13537
13538     if (instance.disk_template,
13539         self.op.disk_template) not in self._DISK_CONVERSIONS:
13540       raise errors.OpPrereqError("Unsupported disk template conversion from"
13541                                  " %s to %s" % (instance.disk_template,
13542                                                 self.op.disk_template),
13543                                  errors.ECODE_INVAL)
13544     _CheckInstanceState(self, instance, INSTANCE_DOWN,
13545                         msg="cannot change disk template")
13546     if self.op.disk_template in constants.DTS_INT_MIRROR:
13547       if self.op.remote_node == pnode:
13548         raise errors.OpPrereqError("Given new secondary node %s is the same"
13549                                    " as the primary node of the instance" %
13550                                    self.op.remote_node, errors.ECODE_STATE)
13551       _CheckNodeOnline(self, self.op.remote_node)
13552       _CheckNodeNotDrained(self, self.op.remote_node)
13553       # FIXME: here we assume that the old instance type is DT_PLAIN
13554       assert instance.disk_template == constants.DT_PLAIN
13555       disks = [{constants.IDISK_SIZE: d.size,
13556                 constants.IDISK_VG: d.logical_id[0]}
13557                for d in instance.disks]
13558       required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13559       _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13560
13561       snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13562       snode_group = self.cfg.GetNodeGroup(snode_info.group)
13563       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13564                                                               snode_group)
13565       _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13566                               ignore=self.op.ignore_ipolicy)
13567       if pnode_info.group != snode_info.group:
13568         self.LogWarning("The primary and secondary nodes are in two"
13569                         " different node groups; the disk parameters"
13570                         " from the first disk's node group will be"
13571                         " used")
13572
13573     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13574       # Make sure none of the nodes require exclusive storage
13575       nodes = [pnode_info]
13576       if self.op.disk_template in constants.DTS_INT_MIRROR:
13577         assert snode_info
13578         nodes.append(snode_info)
13579       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13580       if compat.any(map(has_es, nodes)):
13581         errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13582                   " storage is enabled" % (instance.disk_template,
13583                                            self.op.disk_template))
13584         raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13585
13586   def CheckPrereq(self):
13587     """Check prerequisites.
13588
13589     This only checks the instance list against the existing names.
13590
13591     """
13592     assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13593     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13594
13595     cluster = self.cluster = self.cfg.GetClusterInfo()
13596     assert self.instance is not None, \
13597       "Cannot retrieve locked instance %s" % self.op.instance_name
13598
13599     pnode = instance.primary_node
13600     assert pnode in self.owned_locks(locking.LEVEL_NODE)
13601     nodelist = list(instance.all_nodes)
13602     pnode_info = self.cfg.GetNodeInfo(pnode)
13603     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13604
13605     #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13606     assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13607     group_info = self.cfg.GetNodeGroup(pnode_info.group)
13608
13609     # dictionary with instance information after the modification
13610     ispec = {}
13611
13612     # Check disk modifications. This is done here and not in CheckArguments
13613     # (as with NICs), because we need to know the instance's disk template
13614     if instance.disk_template == constants.DT_EXT:
13615       self._CheckMods("disk", self.op.disks, {},
13616                       self._VerifyDiskModification)
13617     else:
13618       self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13619                       self._VerifyDiskModification)
13620
13621     # Prepare disk/NIC modifications
13622     self.diskmod = PrepareContainerMods(self.op.disks, None)
13623     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13624
13625     # Check the validity of the `provider' parameter
13626     if instance.disk_template in constants.DT_EXT:
13627       for mod in self.diskmod:
13628         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13629         if mod[0] == constants.DDM_ADD:
13630           if ext_provider is None:
13631             raise errors.OpPrereqError("Instance template is '%s' and parameter"
13632                                        " '%s' missing, during disk add" %
13633                                        (constants.DT_EXT,
13634                                         constants.IDISK_PROVIDER),
13635                                        errors.ECODE_NOENT)
13636         elif mod[0] == constants.DDM_MODIFY:
13637           if ext_provider:
13638             raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13639                                        " modification" %
13640                                        constants.IDISK_PROVIDER,
13641                                        errors.ECODE_INVAL)
13642     else:
13643       for mod in self.diskmod:
13644         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13645         if ext_provider is not None:
13646           raise errors.OpPrereqError("Parameter '%s' is only valid for"
13647                                      " instances of type '%s'" %
13648                                      (constants.IDISK_PROVIDER,
13649                                       constants.DT_EXT),
13650                                      errors.ECODE_INVAL)
13651
13652     # OS change
13653     if self.op.os_name and not self.op.force:
13654       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13655                       self.op.force_variant)
13656       instance_os = self.op.os_name
13657     else:
13658       instance_os = instance.os
13659
13660     assert not (self.op.disk_template and self.op.disks), \
13661       "Can't modify disk template and apply disk changes at the same time"
13662
13663     if self.op.disk_template:
13664       self._PreCheckDiskTemplate(pnode_info)
13665
13666     # hvparams processing
13667     if self.op.hvparams:
13668       hv_type = instance.hypervisor
13669       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13670       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13671       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13672
13673       # local check
13674       hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13675       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13676       self.hv_proposed = self.hv_new = hv_new # the new actual values
13677       self.hv_inst = i_hvdict # the new dict (without defaults)
13678     else:
13679       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13680                                               instance.hvparams)
13681       self.hv_new = self.hv_inst = {}
13682
13683     # beparams processing
13684     if self.op.beparams:
13685       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13686                                    use_none=True)
13687       objects.UpgradeBeParams(i_bedict)
13688       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13689       be_new = cluster.SimpleFillBE(i_bedict)
13690       self.be_proposed = self.be_new = be_new # the new actual values
13691       self.be_inst = i_bedict # the new dict (without defaults)
13692     else:
13693       self.be_new = self.be_inst = {}
13694       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13695     be_old = cluster.FillBE(instance)
13696
13697     # CPU param validation -- checking every time a parameter is
13698     # changed to cover all cases where either CPU mask or vcpus have
13699     # changed
13700     if (constants.BE_VCPUS in self.be_proposed and
13701         constants.HV_CPU_MASK in self.hv_proposed):
13702       cpu_list = \
13703         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13704       # Verify mask is consistent with number of vCPUs. Can skip this
13705       # test if only 1 entry in the CPU mask, which means same mask
13706       # is applied to all vCPUs.
13707       if (len(cpu_list) > 1 and
13708           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13709         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13710                                    " CPU mask [%s]" %
13711                                    (self.be_proposed[constants.BE_VCPUS],
13712                                     self.hv_proposed[constants.HV_CPU_MASK]),
13713                                    errors.ECODE_INVAL)
13714
13715       # Only perform this test if a new CPU mask is given
13716       if constants.HV_CPU_MASK in self.hv_new:
13717         # Calculate the largest CPU number requested
13718         max_requested_cpu = max(map(max, cpu_list))
13719         # Check that all of the instance's nodes have enough physical CPUs to
13720         # satisfy the requested CPU mask
13721         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13722                                 max_requested_cpu + 1, instance.hypervisor)
13723
13724     # osparams processing
13725     if self.op.osparams:
13726       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13727       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13728       self.os_inst = i_osdict # the new dict (without defaults)
13729     else:
13730       self.os_inst = {}
13731
13732     self.warn = []
13733
13734     #TODO(dynmem): do the appropriate check involving MINMEM
13735     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13736         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13737       mem_check_list = [pnode]
13738       if be_new[constants.BE_AUTO_BALANCE]:
13739         # either we changed auto_balance to yes or it was from before
13740         mem_check_list.extend(instance.secondary_nodes)
13741       instance_info = self.rpc.call_instance_info(pnode, instance.name,
13742                                                   instance.hypervisor)
13743       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13744                                          [instance.hypervisor], False)
13745       pninfo = nodeinfo[pnode]
13746       msg = pninfo.fail_msg
13747       if msg:
13748         # Assume the primary node is unreachable and go ahead
13749         self.warn.append("Can't get info from primary node %s: %s" %
13750                          (pnode, msg))
13751       else:
13752         (_, _, (pnhvinfo, )) = pninfo.payload
13753         if not isinstance(pnhvinfo.get("memory_free", None), int):
13754           self.warn.append("Node data from primary node %s doesn't contain"
13755                            " free memory information" % pnode)
13756         elif instance_info.fail_msg:
13757           self.warn.append("Can't get instance runtime information: %s" %
13758                            instance_info.fail_msg)
13759         else:
13760           if instance_info.payload:
13761             current_mem = int(instance_info.payload["memory"])
13762           else:
13763             # Assume instance not running
13764             # (there is a slight race condition here, but it's not very
13765             # probable, and we have no other way to check)
13766             # TODO: Describe race condition
13767             current_mem = 0
13768           #TODO(dynmem): do the appropriate check involving MINMEM
13769           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13770                       pnhvinfo["memory_free"])
13771           if miss_mem > 0:
13772             raise errors.OpPrereqError("This change will prevent the instance"
13773                                        " from starting, due to %d MB of memory"
13774                                        " missing on its primary node" %
13775                                        miss_mem, errors.ECODE_NORES)
13776
13777       if be_new[constants.BE_AUTO_BALANCE]:
13778         for node, nres in nodeinfo.items():
13779           if node not in instance.secondary_nodes:
13780             continue
13781           nres.Raise("Can't get info from secondary node %s" % node,
13782                      prereq=True, ecode=errors.ECODE_STATE)
13783           (_, _, (nhvinfo, )) = nres.payload
13784           if not isinstance(nhvinfo.get("memory_free", None), int):
13785             raise errors.OpPrereqError("Secondary node %s didn't return free"
13786                                        " memory information" % node,
13787                                        errors.ECODE_STATE)
13788           #TODO(dynmem): do the appropriate check involving MINMEM
13789           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13790             raise errors.OpPrereqError("This change will prevent the instance"
13791                                        " from failover to its secondary node"
13792                                        " %s, due to not enough memory" % node,
13793                                        errors.ECODE_STATE)
13794
13795     if self.op.runtime_mem:
13796       remote_info = self.rpc.call_instance_info(instance.primary_node,
13797                                                 instance.name,
13798                                                 instance.hypervisor)
13799       remote_info.Raise("Error checking node %s" % instance.primary_node)
13800       if not remote_info.payload: # not running already
13801         raise errors.OpPrereqError("Instance %s is not running" %
13802                                    instance.name, errors.ECODE_STATE)
13803
13804       current_memory = remote_info.payload["memory"]
13805       if (not self.op.force and
13806            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13807             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13808         raise errors.OpPrereqError("Instance %s must have memory between %d"
13809                                    " and %d MB of memory unless --force is"
13810                                    " given" %
13811                                    (instance.name,
13812                                     self.be_proposed[constants.BE_MINMEM],
13813                                     self.be_proposed[constants.BE_MAXMEM]),
13814                                    errors.ECODE_INVAL)
13815
13816       delta = self.op.runtime_mem - current_memory
13817       if delta > 0:
13818         _CheckNodeFreeMemory(self, instance.primary_node,
13819                              "ballooning memory for instance %s" %
13820                              instance.name, delta, instance.hypervisor)
13821
13822     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13823       raise errors.OpPrereqError("Disk operations not supported for"
13824                                  " diskless instances", errors.ECODE_INVAL)
13825
13826     def _PrepareNicCreate(_, params, private):
13827       self._PrepareNicModification(params, private, None, None,
13828                                    {}, cluster, pnode)
13829       return (None, None)
13830
13831     def _PrepareNicMod(_, nic, params, private):
13832       self._PrepareNicModification(params, private, nic.ip, nic.network,
13833                                    nic.nicparams, cluster, pnode)
13834       return None
13835
13836     def _PrepareNicRemove(_, params, __):
13837       ip = params.ip
13838       net = params.network
13839       if net is not None and ip is not None:
13840         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13841
13842     # Verify NIC changes (operating on copy)
13843     nics = instance.nics[:]
13844     ApplyContainerMods("NIC", nics, None, self.nicmod,
13845                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13846     if len(nics) > constants.MAX_NICS:
13847       raise errors.OpPrereqError("Instance has too many network interfaces"
13848                                  " (%d), cannot add more" % constants.MAX_NICS,
13849                                  errors.ECODE_STATE)
13850
13851     # Verify disk changes (operating on a copy)
13852     disks = instance.disks[:]
13853     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13854     if len(disks) > constants.MAX_DISKS:
13855       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13856                                  " more" % constants.MAX_DISKS,
13857                                  errors.ECODE_STATE)
13858     disk_sizes = [disk.size for disk in instance.disks]
13859     disk_sizes.extend(params["size"] for (op, idx, params, private) in
13860                       self.diskmod if op == constants.DDM_ADD)
13861     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13862     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13863
13864     if self.op.offline is not None and self.op.offline:
13865       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13866                           msg="can't change to offline")
13867
13868     # Pre-compute NIC changes (necessary to use result in hooks)
13869     self._nic_chgdesc = []
13870     if self.nicmod:
13871       # Operate on copies as this is still in prereq
13872       nics = [nic.Copy() for nic in instance.nics]
13873       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13874                          self._CreateNewNic, self._ApplyNicMods, None)
13875       self._new_nics = nics
13876       ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13877     else:
13878       self._new_nics = None
13879       ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13880
13881     if not self.op.ignore_ipolicy:
13882       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13883                                                               group_info)
13884
13885       # Fill ispec with backend parameters
13886       ispec[constants.ISPEC_SPINDLE_USE] = \
13887         self.be_new.get(constants.BE_SPINDLE_USE, None)
13888       ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13889                                                          None)
13890
13891       # Copy ispec to verify parameters with min/max values separately
13892       ispec_max = ispec.copy()
13893       ispec_max[constants.ISPEC_MEM_SIZE] = \
13894         self.be_new.get(constants.BE_MAXMEM, None)
13895       res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13896       ispec_min = ispec.copy()
13897       ispec_min[constants.ISPEC_MEM_SIZE] = \
13898         self.be_new.get(constants.BE_MINMEM, None)
13899       res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13900
13901       if (res_max or res_min):
13902         # FIXME: Improve error message by including information about whether
13903         # the upper or lower limit of the parameter fails the ipolicy.
13904         msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13905                (group_info, group_info.name,
13906                 utils.CommaJoin(set(res_max + res_min))))
13907         raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13908
13909   def _ConvertPlainToDrbd(self, feedback_fn):
13910     """Converts an instance from plain to drbd.
13911
13912     """
13913     feedback_fn("Converting template to drbd")
13914     instance = self.instance
13915     pnode = instance.primary_node
13916     snode = self.op.remote_node
13917
13918     assert instance.disk_template == constants.DT_PLAIN
13919
13920     # create a fake disk info for _GenerateDiskTemplate
13921     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13922                   constants.IDISK_VG: d.logical_id[0]}
13923                  for d in instance.disks]
13924     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13925                                       instance.name, pnode, [snode],
13926                                       disk_info, None, None, 0, feedback_fn,
13927                                       self.diskparams)
13928     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13929                                         self.diskparams)
13930     p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13931     s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13932     info = _GetInstanceInfoText(instance)
13933     feedback_fn("Creating additional volumes...")
13934     # first, create the missing data and meta devices
13935     for disk in anno_disks:
13936       # unfortunately this is... not too nice
13937       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13938                             info, True, p_excl_stor)
13939       for child in disk.children:
13940         _CreateSingleBlockDev(self, snode, instance, child, info, True,
13941                               s_excl_stor)
13942     # at this stage, all new LVs have been created, we can rename the
13943     # old ones
13944     feedback_fn("Renaming original volumes...")
13945     rename_list = [(o, n.children[0].logical_id)
13946                    for (o, n) in zip(instance.disks, new_disks)]
13947     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13948     result.Raise("Failed to rename original LVs")
13949
13950     feedback_fn("Initializing DRBD devices...")
13951     # all child devices are in place, we can now create the DRBD devices
13952     for disk in anno_disks:
13953       for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13954         f_create = node == pnode
13955         _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13956                               excl_stor)
13957
13958     # at this point, the instance has been modified
13959     instance.disk_template = constants.DT_DRBD8
13960     instance.disks = new_disks
13961     self.cfg.Update(instance, feedback_fn)
13962
13963     # Release node locks while waiting for sync
13964     _ReleaseLocks(self, locking.LEVEL_NODE)
13965
13966     # disks are created, waiting for sync
13967     disk_abort = not _WaitForSync(self, instance,
13968                                   oneshot=not self.op.wait_for_sync)
13969     if disk_abort:
13970       raise errors.OpExecError("There are some degraded disks for"
13971                                " this instance, please cleanup manually")
13972
13973     # Node resource locks will be released by caller
13974
13975   def _ConvertDrbdToPlain(self, feedback_fn):
13976     """Converts an instance from drbd to plain.
13977
13978     """
13979     instance = self.instance
13980
13981     assert len(instance.secondary_nodes) == 1
13982     assert instance.disk_template == constants.DT_DRBD8
13983
13984     pnode = instance.primary_node
13985     snode = instance.secondary_nodes[0]
13986     feedback_fn("Converting template to plain")
13987
13988     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13989     new_disks = [d.children[0] for d in instance.disks]
13990
13991     # copy over size and mode
13992     for parent, child in zip(old_disks, new_disks):
13993       child.size = parent.size
13994       child.mode = parent.mode
13995
13996     # this is a DRBD disk, return its port to the pool
13997     # NOTE: this must be done right before the call to cfg.Update!
13998     for disk in old_disks:
13999       tcp_port = disk.logical_id[2]
14000       self.cfg.AddTcpUdpPort(tcp_port)
14001
14002     # update instance structure
14003     instance.disks = new_disks
14004     instance.disk_template = constants.DT_PLAIN
14005     self.cfg.Update(instance, feedback_fn)
14006
14007     # Release locks in case removing disks takes a while
14008     _ReleaseLocks(self, locking.LEVEL_NODE)
14009
14010     feedback_fn("Removing volumes on the secondary node...")
14011     for disk in old_disks:
14012       self.cfg.SetDiskID(disk, snode)
14013       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14014       if msg:
14015         self.LogWarning("Could not remove block device %s on node %s,"
14016                         " continuing anyway: %s", disk.iv_name, snode, msg)
14017
14018     feedback_fn("Removing unneeded volumes on the primary node...")
14019     for idx, disk in enumerate(old_disks):
14020       meta = disk.children[1]
14021       self.cfg.SetDiskID(meta, pnode)
14022       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14023       if msg:
14024         self.LogWarning("Could not remove metadata for disk %d on node %s,"
14025                         " continuing anyway: %s", idx, pnode, msg)
14026
14027   def _CreateNewDisk(self, idx, params, _):
14028     """Creates a new disk.
14029
14030     """
14031     instance = self.instance
14032
14033     # add a new disk
14034     if instance.disk_template in constants.DTS_FILEBASED:
14035       (file_driver, file_path) = instance.disks[0].logical_id
14036       file_path = os.path.dirname(file_path)
14037     else:
14038       file_driver = file_path = None
14039
14040     disk = \
14041       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14042                             instance.primary_node, instance.secondary_nodes,
14043                             [params], file_path, file_driver, idx,
14044                             self.Log, self.diskparams)[0]
14045
14046     info = _GetInstanceInfoText(instance)
14047
14048     logging.info("Creating volume %s for instance %s",
14049                  disk.iv_name, instance.name)
14050     # Note: this needs to be kept in sync with _CreateDisks
14051     #HARDCODE
14052     for node in instance.all_nodes:
14053       f_create = (node == instance.primary_node)
14054       try:
14055         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14056       except errors.OpExecError, err:
14057         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14058                         disk.iv_name, disk, node, err)
14059
14060     return (disk, [
14061       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14062       ])
14063
14064   @staticmethod
14065   def _ModifyDisk(idx, disk, params, _):
14066     """Modifies a disk.
14067
14068     """
14069     disk.mode = params[constants.IDISK_MODE]
14070
14071     return [
14072       ("disk.mode/%d" % idx, disk.mode),
14073       ]
14074
14075   def _RemoveDisk(self, idx, root, _):
14076     """Removes a disk.
14077
14078     """
14079     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14080     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14081       self.cfg.SetDiskID(disk, node)
14082       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14083       if msg:
14084         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14085                         " continuing anyway", idx, node, msg)
14086
14087     # if this is a DRBD disk, return its port to the pool
14088     if root.dev_type in constants.LDS_DRBD:
14089       self.cfg.AddTcpUdpPort(root.logical_id[2])
14090
14091   @staticmethod
14092   def _CreateNewNic(idx, params, private):
14093     """Creates data structure for a new network interface.
14094
14095     """
14096     mac = params[constants.INIC_MAC]
14097     ip = params.get(constants.INIC_IP, None)
14098     net = params.get(constants.INIC_NETWORK, None)
14099     #TODO: not private.filled?? can a nic have no nicparams??
14100     nicparams = private.filled
14101
14102     return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
14103       ("nic.%d" % idx,
14104        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14105        (mac, ip, private.filled[constants.NIC_MODE],
14106        private.filled[constants.NIC_LINK],
14107        net)),
14108       ])
14109
14110   @staticmethod
14111   def _ApplyNicMods(idx, nic, params, private):
14112     """Modifies a network interface.
14113
14114     """
14115     changes = []
14116
14117     for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
14118       if key in params:
14119         changes.append(("nic.%s/%d" % (key, idx), params[key]))
14120         setattr(nic, key, params[key])
14121
14122     if private.filled:
14123       nic.nicparams = private.filled
14124
14125       for (key, val) in nic.nicparams.items():
14126         changes.append(("nic.%s/%d" % (key, idx), val))
14127
14128     return changes
14129
14130   def Exec(self, feedback_fn):
14131     """Modifies an instance.
14132
14133     All parameters take effect only at the next restart of the instance.
14134
14135     """
14136     # Process here the warnings from CheckPrereq, as we don't have a
14137     # feedback_fn there.
14138     # TODO: Replace with self.LogWarning
14139     for warn in self.warn:
14140       feedback_fn("WARNING: %s" % warn)
14141
14142     assert ((self.op.disk_template is None) ^
14143             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14144       "Not owning any node resource locks"
14145
14146     result = []
14147     instance = self.instance
14148
14149     # runtime memory
14150     if self.op.runtime_mem:
14151       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14152                                                      instance,
14153                                                      self.op.runtime_mem)
14154       rpcres.Raise("Cannot modify instance runtime memory")
14155       result.append(("runtime_memory", self.op.runtime_mem))
14156
14157     # Apply disk changes
14158     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14159                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14160     _UpdateIvNames(0, instance.disks)
14161
14162     if self.op.disk_template:
14163       if __debug__:
14164         check_nodes = set(instance.all_nodes)
14165         if self.op.remote_node:
14166           check_nodes.add(self.op.remote_node)
14167         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14168           owned = self.owned_locks(level)
14169           assert not (check_nodes - owned), \
14170             ("Not owning the correct locks, owning %r, expected at least %r" %
14171              (owned, check_nodes))
14172
14173       r_shut = _ShutdownInstanceDisks(self, instance)
14174       if not r_shut:
14175         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14176                                  " proceed with disk template conversion")
14177       mode = (instance.disk_template, self.op.disk_template)
14178       try:
14179         self._DISK_CONVERSIONS[mode](self, feedback_fn)
14180       except:
14181         self.cfg.ReleaseDRBDMinors(instance.name)
14182         raise
14183       result.append(("disk_template", self.op.disk_template))
14184
14185       assert instance.disk_template == self.op.disk_template, \
14186         ("Expected disk template '%s', found '%s'" %
14187          (self.op.disk_template, instance.disk_template))
14188
14189     # Release node and resource locks if there are any (they might already have
14190     # been released during disk conversion)
14191     _ReleaseLocks(self, locking.LEVEL_NODE)
14192     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14193
14194     # Apply NIC changes
14195     if self._new_nics is not None:
14196       instance.nics = self._new_nics
14197       result.extend(self._nic_chgdesc)
14198
14199     # hvparams changes
14200     if self.op.hvparams:
14201       instance.hvparams = self.hv_inst
14202       for key, val in self.op.hvparams.iteritems():
14203         result.append(("hv/%s" % key, val))
14204
14205     # beparams changes
14206     if self.op.beparams:
14207       instance.beparams = self.be_inst
14208       for key, val in self.op.beparams.iteritems():
14209         result.append(("be/%s" % key, val))
14210
14211     # OS change
14212     if self.op.os_name:
14213       instance.os = self.op.os_name
14214
14215     # osparams changes
14216     if self.op.osparams:
14217       instance.osparams = self.os_inst
14218       for key, val in self.op.osparams.iteritems():
14219         result.append(("os/%s" % key, val))
14220
14221     if self.op.offline is None:
14222       # Ignore
14223       pass
14224     elif self.op.offline:
14225       # Mark instance as offline
14226       self.cfg.MarkInstanceOffline(instance.name)
14227       result.append(("admin_state", constants.ADMINST_OFFLINE))
14228     else:
14229       # Mark instance as online, but stopped
14230       self.cfg.MarkInstanceDown(instance.name)
14231       result.append(("admin_state", constants.ADMINST_DOWN))
14232
14233     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14234
14235     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14236                 self.owned_locks(locking.LEVEL_NODE)), \
14237       "All node locks should have been released by now"
14238
14239     return result
14240
14241   _DISK_CONVERSIONS = {
14242     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14243     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14244     }
14245
14246
14247 class LUInstanceChangeGroup(LogicalUnit):
14248   HPATH = "instance-change-group"
14249   HTYPE = constants.HTYPE_INSTANCE
14250   REQ_BGL = False
14251
14252   def ExpandNames(self):
14253     self.share_locks = _ShareAll()
14254
14255     self.needed_locks = {
14256       locking.LEVEL_NODEGROUP: [],
14257       locking.LEVEL_NODE: [],
14258       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14259       }
14260
14261     self._ExpandAndLockInstance()
14262
14263     if self.op.target_groups:
14264       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14265                                   self.op.target_groups)
14266     else:
14267       self.req_target_uuids = None
14268
14269     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14270
14271   def DeclareLocks(self, level):
14272     if level == locking.LEVEL_NODEGROUP:
14273       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14274
14275       if self.req_target_uuids:
14276         lock_groups = set(self.req_target_uuids)
14277
14278         # Lock all groups used by instance optimistically; this requires going
14279         # via the node before it's locked, requiring verification later on
14280         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14281         lock_groups.update(instance_groups)
14282       else:
14283         # No target groups, need to lock all of them
14284         lock_groups = locking.ALL_SET
14285
14286       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14287
14288     elif level == locking.LEVEL_NODE:
14289       if self.req_target_uuids:
14290         # Lock all nodes used by instances
14291         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14292         self._LockInstancesNodes()
14293
14294         # Lock all nodes in all potential target groups
14295         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14296                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14297         member_nodes = [node_name
14298                         for group in lock_groups
14299                         for node_name in self.cfg.GetNodeGroup(group).members]
14300         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14301       else:
14302         # Lock all nodes as all groups are potential targets
14303         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14304
14305   def CheckPrereq(self):
14306     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14307     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14308     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14309
14310     assert (self.req_target_uuids is None or
14311             owned_groups.issuperset(self.req_target_uuids))
14312     assert owned_instances == set([self.op.instance_name])
14313
14314     # Get instance information
14315     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14316
14317     # Check if node groups for locked instance are still correct
14318     assert owned_nodes.issuperset(self.instance.all_nodes), \
14319       ("Instance %s's nodes changed while we kept the lock" %
14320        self.op.instance_name)
14321
14322     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14323                                            owned_groups)
14324
14325     if self.req_target_uuids:
14326       # User requested specific target groups
14327       self.target_uuids = frozenset(self.req_target_uuids)
14328     else:
14329       # All groups except those used by the instance are potential targets
14330       self.target_uuids = owned_groups - inst_groups
14331
14332     conflicting_groups = self.target_uuids & inst_groups
14333     if conflicting_groups:
14334       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14335                                  " used by the instance '%s'" %
14336                                  (utils.CommaJoin(conflicting_groups),
14337                                   self.op.instance_name),
14338                                  errors.ECODE_INVAL)
14339
14340     if not self.target_uuids:
14341       raise errors.OpPrereqError("There are no possible target groups",
14342                                  errors.ECODE_INVAL)
14343
14344   def BuildHooksEnv(self):
14345     """Build hooks env.
14346
14347     """
14348     assert self.target_uuids
14349
14350     env = {
14351       "TARGET_GROUPS": " ".join(self.target_uuids),
14352       }
14353
14354     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14355
14356     return env
14357
14358   def BuildHooksNodes(self):
14359     """Build hooks nodes.
14360
14361     """
14362     mn = self.cfg.GetMasterNode()
14363     return ([mn], [mn])
14364
14365   def Exec(self, feedback_fn):
14366     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14367
14368     assert instances == [self.op.instance_name], "Instance not locked"
14369
14370     req = iallocator.IAReqGroupChange(instances=instances,
14371                                       target_groups=list(self.target_uuids))
14372     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14373
14374     ial.Run(self.op.iallocator)
14375
14376     if not ial.success:
14377       raise errors.OpPrereqError("Can't compute solution for changing group of"
14378                                  " instance '%s' using iallocator '%s': %s" %
14379                                  (self.op.instance_name, self.op.iallocator,
14380                                   ial.info), errors.ECODE_NORES)
14381
14382     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14383
14384     self.LogInfo("Iallocator returned %s job(s) for changing group of"
14385                  " instance '%s'", len(jobs), self.op.instance_name)
14386
14387     return ResultWithJobs(jobs)
14388
14389
14390 class LUBackupQuery(NoHooksLU):
14391   """Query the exports list
14392
14393   """
14394   REQ_BGL = False
14395
14396   def CheckArguments(self):
14397     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14398                              ["node", "export"], self.op.use_locking)
14399
14400   def ExpandNames(self):
14401     self.expq.ExpandNames(self)
14402
14403   def DeclareLocks(self, level):
14404     self.expq.DeclareLocks(self, level)
14405
14406   def Exec(self, feedback_fn):
14407     result = {}
14408
14409     for (node, expname) in self.expq.OldStyleQuery(self):
14410       if expname is None:
14411         result[node] = False
14412       else:
14413         result.setdefault(node, []).append(expname)
14414
14415     return result
14416
14417
14418 class _ExportQuery(_QueryBase):
14419   FIELDS = query.EXPORT_FIELDS
14420
14421   #: The node name is not a unique key for this query
14422   SORT_FIELD = "node"
14423
14424   def ExpandNames(self, lu):
14425     lu.needed_locks = {}
14426
14427     # The following variables interact with _QueryBase._GetNames
14428     if self.names:
14429       self.wanted = _GetWantedNodes(lu, self.names)
14430     else:
14431       self.wanted = locking.ALL_SET
14432
14433     self.do_locking = self.use_locking
14434
14435     if self.do_locking:
14436       lu.share_locks = _ShareAll()
14437       lu.needed_locks = {
14438         locking.LEVEL_NODE: self.wanted,
14439         }
14440
14441       if not self.names:
14442         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14443
14444   def DeclareLocks(self, lu, level):
14445     pass
14446
14447   def _GetQueryData(self, lu):
14448     """Computes the list of nodes and their attributes.
14449
14450     """
14451     # Locking is not used
14452     # TODO
14453     assert not (compat.any(lu.glm.is_owned(level)
14454                            for level in locking.LEVELS
14455                            if level != locking.LEVEL_CLUSTER) or
14456                 self.do_locking or self.use_locking)
14457
14458     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14459
14460     result = []
14461
14462     for (node, nres) in lu.rpc.call_export_list(nodes).items():
14463       if nres.fail_msg:
14464         result.append((node, None))
14465       else:
14466         result.extend((node, expname) for expname in nres.payload)
14467
14468     return result
14469
14470
14471 class LUBackupPrepare(NoHooksLU):
14472   """Prepares an instance for an export and returns useful information.
14473
14474   """
14475   REQ_BGL = False
14476
14477   def ExpandNames(self):
14478     self._ExpandAndLockInstance()
14479
14480   def CheckPrereq(self):
14481     """Check prerequisites.
14482
14483     """
14484     instance_name = self.op.instance_name
14485
14486     self.instance = self.cfg.GetInstanceInfo(instance_name)
14487     assert self.instance is not None, \
14488           "Cannot retrieve locked instance %s" % self.op.instance_name
14489     _CheckNodeOnline(self, self.instance.primary_node)
14490
14491     self._cds = _GetClusterDomainSecret()
14492
14493   def Exec(self, feedback_fn):
14494     """Prepares an instance for an export.
14495
14496     """
14497     instance = self.instance
14498
14499     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14500       salt = utils.GenerateSecret(8)
14501
14502       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14503       result = self.rpc.call_x509_cert_create(instance.primary_node,
14504                                               constants.RIE_CERT_VALIDITY)
14505       result.Raise("Can't create X509 key and certificate on %s" % result.node)
14506
14507       (name, cert_pem) = result.payload
14508
14509       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14510                                              cert_pem)
14511
14512       return {
14513         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14514         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14515                           salt),
14516         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14517         }
14518
14519     return None
14520
14521
14522 class LUBackupExport(LogicalUnit):
14523   """Export an instance to an image in the cluster.
14524
14525   """
14526   HPATH = "instance-export"
14527   HTYPE = constants.HTYPE_INSTANCE
14528   REQ_BGL = False
14529
14530   def CheckArguments(self):
14531     """Check the arguments.
14532
14533     """
14534     self.x509_key_name = self.op.x509_key_name
14535     self.dest_x509_ca_pem = self.op.destination_x509_ca
14536
14537     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14538       if not self.x509_key_name:
14539         raise errors.OpPrereqError("Missing X509 key name for encryption",
14540                                    errors.ECODE_INVAL)
14541
14542       if not self.dest_x509_ca_pem:
14543         raise errors.OpPrereqError("Missing destination X509 CA",
14544                                    errors.ECODE_INVAL)
14545
14546   def ExpandNames(self):
14547     self._ExpandAndLockInstance()
14548
14549     # Lock all nodes for local exports
14550     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14551       # FIXME: lock only instance primary and destination node
14552       #
14553       # Sad but true, for now we have do lock all nodes, as we don't know where
14554       # the previous export might be, and in this LU we search for it and
14555       # remove it from its current node. In the future we could fix this by:
14556       #  - making a tasklet to search (share-lock all), then create the
14557       #    new one, then one to remove, after
14558       #  - removing the removal operation altogether
14559       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14560
14561       # Allocations should be stopped while this LU runs with node locks, but
14562       # it doesn't have to be exclusive
14563       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14564       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14565
14566   def DeclareLocks(self, level):
14567     """Last minute lock declaration."""
14568     # All nodes are locked anyway, so nothing to do here.
14569
14570   def BuildHooksEnv(self):
14571     """Build hooks env.
14572
14573     This will run on the master, primary node and target node.
14574
14575     """
14576     env = {
14577       "EXPORT_MODE": self.op.mode,
14578       "EXPORT_NODE": self.op.target_node,
14579       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14580       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14581       # TODO: Generic function for boolean env variables
14582       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14583       }
14584
14585     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14586
14587     return env
14588
14589   def BuildHooksNodes(self):
14590     """Build hooks nodes.
14591
14592     """
14593     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14594
14595     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14596       nl.append(self.op.target_node)
14597
14598     return (nl, nl)
14599
14600   def CheckPrereq(self):
14601     """Check prerequisites.
14602
14603     This checks that the instance and node names are valid.
14604
14605     """
14606     instance_name = self.op.instance_name
14607
14608     self.instance = self.cfg.GetInstanceInfo(instance_name)
14609     assert self.instance is not None, \
14610           "Cannot retrieve locked instance %s" % self.op.instance_name
14611     _CheckNodeOnline(self, self.instance.primary_node)
14612
14613     if (self.op.remove_instance and
14614         self.instance.admin_state == constants.ADMINST_UP and
14615         not self.op.shutdown):
14616       raise errors.OpPrereqError("Can not remove instance without shutting it"
14617                                  " down before", errors.ECODE_STATE)
14618
14619     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14620       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14621       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14622       assert self.dst_node is not None
14623
14624       _CheckNodeOnline(self, self.dst_node.name)
14625       _CheckNodeNotDrained(self, self.dst_node.name)
14626
14627       self._cds = None
14628       self.dest_disk_info = None
14629       self.dest_x509_ca = None
14630
14631     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14632       self.dst_node = None
14633
14634       if len(self.op.target_node) != len(self.instance.disks):
14635         raise errors.OpPrereqError(("Received destination information for %s"
14636                                     " disks, but instance %s has %s disks") %
14637                                    (len(self.op.target_node), instance_name,
14638                                     len(self.instance.disks)),
14639                                    errors.ECODE_INVAL)
14640
14641       cds = _GetClusterDomainSecret()
14642
14643       # Check X509 key name
14644       try:
14645         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14646       except (TypeError, ValueError), err:
14647         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14648                                    errors.ECODE_INVAL)
14649
14650       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14651         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14652                                    errors.ECODE_INVAL)
14653
14654       # Load and verify CA
14655       try:
14656         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14657       except OpenSSL.crypto.Error, err:
14658         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14659                                    (err, ), errors.ECODE_INVAL)
14660
14661       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14662       if errcode is not None:
14663         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14664                                    (msg, ), errors.ECODE_INVAL)
14665
14666       self.dest_x509_ca = cert
14667
14668       # Verify target information
14669       disk_info = []
14670       for idx, disk_data in enumerate(self.op.target_node):
14671         try:
14672           (host, port, magic) = \
14673             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14674         except errors.GenericError, err:
14675           raise errors.OpPrereqError("Target info for disk %s: %s" %
14676                                      (idx, err), errors.ECODE_INVAL)
14677
14678         disk_info.append((host, port, magic))
14679
14680       assert len(disk_info) == len(self.op.target_node)
14681       self.dest_disk_info = disk_info
14682
14683     else:
14684       raise errors.ProgrammerError("Unhandled export mode %r" %
14685                                    self.op.mode)
14686
14687     # instance disk type verification
14688     # TODO: Implement export support for file-based disks
14689     for disk in self.instance.disks:
14690       if disk.dev_type == constants.LD_FILE:
14691         raise errors.OpPrereqError("Export not supported for instances with"
14692                                    " file-based disks", errors.ECODE_INVAL)
14693
14694   def _CleanupExports(self, feedback_fn):
14695     """Removes exports of current instance from all other nodes.
14696
14697     If an instance in a cluster with nodes A..D was exported to node C, its
14698     exports will be removed from the nodes A, B and D.
14699
14700     """
14701     assert self.op.mode != constants.EXPORT_MODE_REMOTE
14702
14703     nodelist = self.cfg.GetNodeList()
14704     nodelist.remove(self.dst_node.name)
14705
14706     # on one-node clusters nodelist will be empty after the removal
14707     # if we proceed the backup would be removed because OpBackupQuery
14708     # substitutes an empty list with the full cluster node list.
14709     iname = self.instance.name
14710     if nodelist:
14711       feedback_fn("Removing old exports for instance %s" % iname)
14712       exportlist = self.rpc.call_export_list(nodelist)
14713       for node in exportlist:
14714         if exportlist[node].fail_msg:
14715           continue
14716         if iname in exportlist[node].payload:
14717           msg = self.rpc.call_export_remove(node, iname).fail_msg
14718           if msg:
14719             self.LogWarning("Could not remove older export for instance %s"
14720                             " on node %s: %s", iname, node, msg)
14721
14722   def Exec(self, feedback_fn):
14723     """Export an instance to an image in the cluster.
14724
14725     """
14726     assert self.op.mode in constants.EXPORT_MODES
14727
14728     instance = self.instance
14729     src_node = instance.primary_node
14730
14731     if self.op.shutdown:
14732       # shutdown the instance, but not the disks
14733       feedback_fn("Shutting down instance %s" % instance.name)
14734       result = self.rpc.call_instance_shutdown(src_node, instance,
14735                                                self.op.shutdown_timeout)
14736       # TODO: Maybe ignore failures if ignore_remove_failures is set
14737       result.Raise("Could not shutdown instance %s on"
14738                    " node %s" % (instance.name, src_node))
14739
14740     # set the disks ID correctly since call_instance_start needs the
14741     # correct drbd minor to create the symlinks
14742     for disk in instance.disks:
14743       self.cfg.SetDiskID(disk, src_node)
14744
14745     activate_disks = (instance.admin_state != constants.ADMINST_UP)
14746
14747     if activate_disks:
14748       # Activate the instance disks if we'exporting a stopped instance
14749       feedback_fn("Activating disks for %s" % instance.name)
14750       _StartInstanceDisks(self, instance, None)
14751
14752     try:
14753       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14754                                                      instance)
14755
14756       helper.CreateSnapshots()
14757       try:
14758         if (self.op.shutdown and
14759             instance.admin_state == constants.ADMINST_UP and
14760             not self.op.remove_instance):
14761           assert not activate_disks
14762           feedback_fn("Starting instance %s" % instance.name)
14763           result = self.rpc.call_instance_start(src_node,
14764                                                 (instance, None, None), False)
14765           msg = result.fail_msg
14766           if msg:
14767             feedback_fn("Failed to start instance: %s" % msg)
14768             _ShutdownInstanceDisks(self, instance)
14769             raise errors.OpExecError("Could not start instance: %s" % msg)
14770
14771         if self.op.mode == constants.EXPORT_MODE_LOCAL:
14772           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14773         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14774           connect_timeout = constants.RIE_CONNECT_TIMEOUT
14775           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14776
14777           (key_name, _, _) = self.x509_key_name
14778
14779           dest_ca_pem = \
14780             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14781                                             self.dest_x509_ca)
14782
14783           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14784                                                      key_name, dest_ca_pem,
14785                                                      timeouts)
14786       finally:
14787         helper.Cleanup()
14788
14789       # Check for backwards compatibility
14790       assert len(dresults) == len(instance.disks)
14791       assert compat.all(isinstance(i, bool) for i in dresults), \
14792              "Not all results are boolean: %r" % dresults
14793
14794     finally:
14795       if activate_disks:
14796         feedback_fn("Deactivating disks for %s" % instance.name)
14797         _ShutdownInstanceDisks(self, instance)
14798
14799     if not (compat.all(dresults) and fin_resu):
14800       failures = []
14801       if not fin_resu:
14802         failures.append("export finalization")
14803       if not compat.all(dresults):
14804         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14805                                if not dsk)
14806         failures.append("disk export: disk(s) %s" % fdsk)
14807
14808       raise errors.OpExecError("Export failed, errors in %s" %
14809                                utils.CommaJoin(failures))
14810
14811     # At this point, the export was successful, we can cleanup/finish
14812
14813     # Remove instance if requested
14814     if self.op.remove_instance:
14815       feedback_fn("Removing instance %s" % instance.name)
14816       _RemoveInstance(self, feedback_fn, instance,
14817                       self.op.ignore_remove_failures)
14818
14819     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14820       self._CleanupExports(feedback_fn)
14821
14822     return fin_resu, dresults
14823
14824
14825 class LUBackupRemove(NoHooksLU):
14826   """Remove exports related to the named instance.
14827
14828   """
14829   REQ_BGL = False
14830
14831   def ExpandNames(self):
14832     self.needed_locks = {
14833       # We need all nodes to be locked in order for RemoveExport to work, but
14834       # we don't need to lock the instance itself, as nothing will happen to it
14835       # (and we can remove exports also for a removed instance)
14836       locking.LEVEL_NODE: locking.ALL_SET,
14837
14838       # Removing backups is quick, so blocking allocations is justified
14839       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14840       }
14841
14842     # Allocations should be stopped while this LU runs with node locks, but it
14843     # doesn't have to be exclusive
14844     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14845
14846   def Exec(self, feedback_fn):
14847     """Remove any export.
14848
14849     """
14850     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14851     # If the instance was not found we'll try with the name that was passed in.
14852     # This will only work if it was an FQDN, though.
14853     fqdn_warn = False
14854     if not instance_name:
14855       fqdn_warn = True
14856       instance_name = self.op.instance_name
14857
14858     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14859     exportlist = self.rpc.call_export_list(locked_nodes)
14860     found = False
14861     for node in exportlist:
14862       msg = exportlist[node].fail_msg
14863       if msg:
14864         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14865         continue
14866       if instance_name in exportlist[node].payload:
14867         found = True
14868         result = self.rpc.call_export_remove(node, instance_name)
14869         msg = result.fail_msg
14870         if msg:
14871           logging.error("Could not remove export for instance %s"
14872                         " on node %s: %s", instance_name, node, msg)
14873
14874     if fqdn_warn and not found:
14875       feedback_fn("Export not found. If trying to remove an export belonging"
14876                   " to a deleted instance please use its Fully Qualified"
14877                   " Domain Name.")
14878
14879
14880 class LUGroupAdd(LogicalUnit):
14881   """Logical unit for creating node groups.
14882
14883   """
14884   HPATH = "group-add"
14885   HTYPE = constants.HTYPE_GROUP
14886   REQ_BGL = False
14887
14888   def ExpandNames(self):
14889     # We need the new group's UUID here so that we can create and acquire the
14890     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14891     # that it should not check whether the UUID exists in the configuration.
14892     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14893     self.needed_locks = {}
14894     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14895
14896   def CheckPrereq(self):
14897     """Check prerequisites.
14898
14899     This checks that the given group name is not an existing node group
14900     already.
14901
14902     """
14903     try:
14904       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14905     except errors.OpPrereqError:
14906       pass
14907     else:
14908       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14909                                  " node group (UUID: %s)" %
14910                                  (self.op.group_name, existing_uuid),
14911                                  errors.ECODE_EXISTS)
14912
14913     if self.op.ndparams:
14914       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14915
14916     if self.op.hv_state:
14917       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14918     else:
14919       self.new_hv_state = None
14920
14921     if self.op.disk_state:
14922       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14923     else:
14924       self.new_disk_state = None
14925
14926     if self.op.diskparams:
14927       for templ in constants.DISK_TEMPLATES:
14928         if templ in self.op.diskparams:
14929           utils.ForceDictType(self.op.diskparams[templ],
14930                               constants.DISK_DT_TYPES)
14931       self.new_diskparams = self.op.diskparams
14932       try:
14933         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14934       except errors.OpPrereqError, err:
14935         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14936                                    errors.ECODE_INVAL)
14937     else:
14938       self.new_diskparams = {}
14939
14940     if self.op.ipolicy:
14941       cluster = self.cfg.GetClusterInfo()
14942       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14943       try:
14944         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14945       except errors.ConfigurationError, err:
14946         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14947                                    errors.ECODE_INVAL)
14948
14949   def BuildHooksEnv(self):
14950     """Build hooks env.
14951
14952     """
14953     return {
14954       "GROUP_NAME": self.op.group_name,
14955       }
14956
14957   def BuildHooksNodes(self):
14958     """Build hooks nodes.
14959
14960     """
14961     mn = self.cfg.GetMasterNode()
14962     return ([mn], [mn])
14963
14964   def Exec(self, feedback_fn):
14965     """Add the node group to the cluster.
14966
14967     """
14968     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14969                                   uuid=self.group_uuid,
14970                                   alloc_policy=self.op.alloc_policy,
14971                                   ndparams=self.op.ndparams,
14972                                   diskparams=self.new_diskparams,
14973                                   ipolicy=self.op.ipolicy,
14974                                   hv_state_static=self.new_hv_state,
14975                                   disk_state_static=self.new_disk_state)
14976
14977     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14978     del self.remove_locks[locking.LEVEL_NODEGROUP]
14979
14980
14981 class LUGroupAssignNodes(NoHooksLU):
14982   """Logical unit for assigning nodes to groups.
14983
14984   """
14985   REQ_BGL = False
14986
14987   def ExpandNames(self):
14988     # These raise errors.OpPrereqError on their own:
14989     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14990     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14991
14992     # We want to lock all the affected nodes and groups. We have readily
14993     # available the list of nodes, and the *destination* group. To gather the
14994     # list of "source" groups, we need to fetch node information later on.
14995     self.needed_locks = {
14996       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14997       locking.LEVEL_NODE: self.op.nodes,
14998       }
14999
15000   def DeclareLocks(self, level):
15001     if level == locking.LEVEL_NODEGROUP:
15002       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
15003
15004       # Try to get all affected nodes' groups without having the group or node
15005       # lock yet. Needs verification later in the code flow.
15006       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
15007
15008       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15009
15010   def CheckPrereq(self):
15011     """Check prerequisites.
15012
15013     """
15014     assert self.needed_locks[locking.LEVEL_NODEGROUP]
15015     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15016             frozenset(self.op.nodes))
15017
15018     expected_locks = (set([self.group_uuid]) |
15019                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15020     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15021     if actual_locks != expected_locks:
15022       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15023                                " current groups are '%s', used to be '%s'" %
15024                                (utils.CommaJoin(expected_locks),
15025                                 utils.CommaJoin(actual_locks)))
15026
15027     self.node_data = self.cfg.GetAllNodesInfo()
15028     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15029     instance_data = self.cfg.GetAllInstancesInfo()
15030
15031     if self.group is None:
15032       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15033                                (self.op.group_name, self.group_uuid))
15034
15035     (new_splits, previous_splits) = \
15036       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15037                                              for node in self.op.nodes],
15038                                             self.node_data, instance_data)
15039
15040     if new_splits:
15041       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15042
15043       if not self.op.force:
15044         raise errors.OpExecError("The following instances get split by this"
15045                                  " change and --force was not given: %s" %
15046                                  fmt_new_splits)
15047       else:
15048         self.LogWarning("This operation will split the following instances: %s",
15049                         fmt_new_splits)
15050
15051         if previous_splits:
15052           self.LogWarning("In addition, these already-split instances continue"
15053                           " to be split across groups: %s",
15054                           utils.CommaJoin(utils.NiceSort(previous_splits)))
15055
15056   def Exec(self, feedback_fn):
15057     """Assign nodes to a new group.
15058
15059     """
15060     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15061
15062     self.cfg.AssignGroupNodes(mods)
15063
15064   @staticmethod
15065   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15066     """Check for split instances after a node assignment.
15067
15068     This method considers a series of node assignments as an atomic operation,
15069     and returns information about split instances after applying the set of
15070     changes.
15071
15072     In particular, it returns information about newly split instances, and
15073     instances that were already split, and remain so after the change.
15074
15075     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15076     considered.
15077
15078     @type changes: list of (node_name, new_group_uuid) pairs.
15079     @param changes: list of node assignments to consider.
15080     @param node_data: a dict with data for all nodes
15081     @param instance_data: a dict with all instances to consider
15082     @rtype: a two-tuple
15083     @return: a list of instances that were previously okay and result split as a
15084       consequence of this change, and a list of instances that were previously
15085       split and this change does not fix.
15086
15087     """
15088     changed_nodes = dict((node, group) for node, group in changes
15089                          if node_data[node].group != group)
15090
15091     all_split_instances = set()
15092     previously_split_instances = set()
15093
15094     def InstanceNodes(instance):
15095       return [instance.primary_node] + list(instance.secondary_nodes)
15096
15097     for inst in instance_data.values():
15098       if inst.disk_template not in constants.DTS_INT_MIRROR:
15099         continue
15100
15101       instance_nodes = InstanceNodes(inst)
15102
15103       if len(set(node_data[node].group for node in instance_nodes)) > 1:
15104         previously_split_instances.add(inst.name)
15105
15106       if len(set(changed_nodes.get(node, node_data[node].group)
15107                  for node in instance_nodes)) > 1:
15108         all_split_instances.add(inst.name)
15109
15110     return (list(all_split_instances - previously_split_instances),
15111             list(previously_split_instances & all_split_instances))
15112
15113
15114 class _GroupQuery(_QueryBase):
15115   FIELDS = query.GROUP_FIELDS
15116
15117   def ExpandNames(self, lu):
15118     lu.needed_locks = {}
15119
15120     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15121     self._cluster = lu.cfg.GetClusterInfo()
15122     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15123
15124     if not self.names:
15125       self.wanted = [name_to_uuid[name]
15126                      for name in utils.NiceSort(name_to_uuid.keys())]
15127     else:
15128       # Accept names to be either names or UUIDs.
15129       missing = []
15130       self.wanted = []
15131       all_uuid = frozenset(self._all_groups.keys())
15132
15133       for name in self.names:
15134         if name in all_uuid:
15135           self.wanted.append(name)
15136         elif name in name_to_uuid:
15137           self.wanted.append(name_to_uuid[name])
15138         else:
15139           missing.append(name)
15140
15141       if missing:
15142         raise errors.OpPrereqError("Some groups do not exist: %s" %
15143                                    utils.CommaJoin(missing),
15144                                    errors.ECODE_NOENT)
15145
15146   def DeclareLocks(self, lu, level):
15147     pass
15148
15149   def _GetQueryData(self, lu):
15150     """Computes the list of node groups and their attributes.
15151
15152     """
15153     do_nodes = query.GQ_NODE in self.requested_data
15154     do_instances = query.GQ_INST in self.requested_data
15155
15156     group_to_nodes = None
15157     group_to_instances = None
15158
15159     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15160     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15161     # latter GetAllInstancesInfo() is not enough, for we have to go through
15162     # instance->node. Hence, we will need to process nodes even if we only need
15163     # instance information.
15164     if do_nodes or do_instances:
15165       all_nodes = lu.cfg.GetAllNodesInfo()
15166       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15167       node_to_group = {}
15168
15169       for node in all_nodes.values():
15170         if node.group in group_to_nodes:
15171           group_to_nodes[node.group].append(node.name)
15172           node_to_group[node.name] = node.group
15173
15174       if do_instances:
15175         all_instances = lu.cfg.GetAllInstancesInfo()
15176         group_to_instances = dict((uuid, []) for uuid in self.wanted)
15177
15178         for instance in all_instances.values():
15179           node = instance.primary_node
15180           if node in node_to_group:
15181             group_to_instances[node_to_group[node]].append(instance.name)
15182
15183         if not do_nodes:
15184           # Do not pass on node information if it was not requested.
15185           group_to_nodes = None
15186
15187     return query.GroupQueryData(self._cluster,
15188                                 [self._all_groups[uuid]
15189                                  for uuid in self.wanted],
15190                                 group_to_nodes, group_to_instances,
15191                                 query.GQ_DISKPARAMS in self.requested_data)
15192
15193
15194 class LUGroupQuery(NoHooksLU):
15195   """Logical unit for querying node groups.
15196
15197   """
15198   REQ_BGL = False
15199
15200   def CheckArguments(self):
15201     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15202                           self.op.output_fields, False)
15203
15204   def ExpandNames(self):
15205     self.gq.ExpandNames(self)
15206
15207   def DeclareLocks(self, level):
15208     self.gq.DeclareLocks(self, level)
15209
15210   def Exec(self, feedback_fn):
15211     return self.gq.OldStyleQuery(self)
15212
15213
15214 class LUGroupSetParams(LogicalUnit):
15215   """Modifies the parameters of a node group.
15216
15217   """
15218   HPATH = "group-modify"
15219   HTYPE = constants.HTYPE_GROUP
15220   REQ_BGL = False
15221
15222   def CheckArguments(self):
15223     all_changes = [
15224       self.op.ndparams,
15225       self.op.diskparams,
15226       self.op.alloc_policy,
15227       self.op.hv_state,
15228       self.op.disk_state,
15229       self.op.ipolicy,
15230       ]
15231
15232     if all_changes.count(None) == len(all_changes):
15233       raise errors.OpPrereqError("Please pass at least one modification",
15234                                  errors.ECODE_INVAL)
15235
15236   def ExpandNames(self):
15237     # This raises errors.OpPrereqError on its own:
15238     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15239
15240     self.needed_locks = {
15241       locking.LEVEL_INSTANCE: [],
15242       locking.LEVEL_NODEGROUP: [self.group_uuid],
15243       }
15244
15245     self.share_locks[locking.LEVEL_INSTANCE] = 1
15246
15247   def DeclareLocks(self, level):
15248     if level == locking.LEVEL_INSTANCE:
15249       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15250
15251       # Lock instances optimistically, needs verification once group lock has
15252       # been acquired
15253       self.needed_locks[locking.LEVEL_INSTANCE] = \
15254           self.cfg.GetNodeGroupInstances(self.group_uuid)
15255
15256   @staticmethod
15257   def _UpdateAndVerifyDiskParams(old, new):
15258     """Updates and verifies disk parameters.
15259
15260     """
15261     new_params = _GetUpdatedParams(old, new)
15262     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15263     return new_params
15264
15265   def CheckPrereq(self):
15266     """Check prerequisites.
15267
15268     """
15269     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15270
15271     # Check if locked instances are still correct
15272     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15273
15274     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15275     cluster = self.cfg.GetClusterInfo()
15276
15277     if self.group is None:
15278       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15279                                (self.op.group_name, self.group_uuid))
15280
15281     if self.op.ndparams:
15282       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15283       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15284       self.new_ndparams = new_ndparams
15285
15286     if self.op.diskparams:
15287       diskparams = self.group.diskparams
15288       uavdp = self._UpdateAndVerifyDiskParams
15289       # For each disktemplate subdict update and verify the values
15290       new_diskparams = dict((dt,
15291                              uavdp(diskparams.get(dt, {}),
15292                                    self.op.diskparams[dt]))
15293                             for dt in constants.DISK_TEMPLATES
15294                             if dt in self.op.diskparams)
15295       # As we've all subdicts of diskparams ready, lets merge the actual
15296       # dict with all updated subdicts
15297       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15298       try:
15299         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15300       except errors.OpPrereqError, err:
15301         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15302                                    errors.ECODE_INVAL)
15303
15304     if self.op.hv_state:
15305       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15306                                                  self.group.hv_state_static)
15307
15308     if self.op.disk_state:
15309       self.new_disk_state = \
15310         _MergeAndVerifyDiskState(self.op.disk_state,
15311                                  self.group.disk_state_static)
15312
15313     if self.op.ipolicy:
15314       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15315                                             self.op.ipolicy,
15316                                             group_policy=True)
15317
15318       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15319       inst_filter = lambda inst: inst.name in owned_instances
15320       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15321       gmi = ganeti.masterd.instance
15322       violations = \
15323           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15324                                                                   self.group),
15325                                         new_ipolicy, instances)
15326
15327       if violations:
15328         self.LogWarning("After the ipolicy change the following instances"
15329                         " violate them: %s",
15330                         utils.CommaJoin(violations))
15331
15332   def BuildHooksEnv(self):
15333     """Build hooks env.
15334
15335     """
15336     return {
15337       "GROUP_NAME": self.op.group_name,
15338       "NEW_ALLOC_POLICY": self.op.alloc_policy,
15339       }
15340
15341   def BuildHooksNodes(self):
15342     """Build hooks nodes.
15343
15344     """
15345     mn = self.cfg.GetMasterNode()
15346     return ([mn], [mn])
15347
15348   def Exec(self, feedback_fn):
15349     """Modifies the node group.
15350
15351     """
15352     result = []
15353
15354     if self.op.ndparams:
15355       self.group.ndparams = self.new_ndparams
15356       result.append(("ndparams", str(self.group.ndparams)))
15357
15358     if self.op.diskparams:
15359       self.group.diskparams = self.new_diskparams
15360       result.append(("diskparams", str(self.group.diskparams)))
15361
15362     if self.op.alloc_policy:
15363       self.group.alloc_policy = self.op.alloc_policy
15364
15365     if self.op.hv_state:
15366       self.group.hv_state_static = self.new_hv_state
15367
15368     if self.op.disk_state:
15369       self.group.disk_state_static = self.new_disk_state
15370
15371     if self.op.ipolicy:
15372       self.group.ipolicy = self.new_ipolicy
15373
15374     self.cfg.Update(self.group, feedback_fn)
15375     return result
15376
15377
15378 class LUGroupRemove(LogicalUnit):
15379   HPATH = "group-remove"
15380   HTYPE = constants.HTYPE_GROUP
15381   REQ_BGL = False
15382
15383   def ExpandNames(self):
15384     # This will raises errors.OpPrereqError on its own:
15385     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15386     self.needed_locks = {
15387       locking.LEVEL_NODEGROUP: [self.group_uuid],
15388       }
15389
15390   def CheckPrereq(self):
15391     """Check prerequisites.
15392
15393     This checks that the given group name exists as a node group, that is
15394     empty (i.e., contains no nodes), and that is not the last group of the
15395     cluster.
15396
15397     """
15398     # Verify that the group is empty.
15399     group_nodes = [node.name
15400                    for node in self.cfg.GetAllNodesInfo().values()
15401                    if node.group == self.group_uuid]
15402
15403     if group_nodes:
15404       raise errors.OpPrereqError("Group '%s' not empty, has the following"
15405                                  " nodes: %s" %
15406                                  (self.op.group_name,
15407                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
15408                                  errors.ECODE_STATE)
15409
15410     # Verify the cluster would not be left group-less.
15411     if len(self.cfg.GetNodeGroupList()) == 1:
15412       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15413                                  " removed" % self.op.group_name,
15414                                  errors.ECODE_STATE)
15415
15416   def BuildHooksEnv(self):
15417     """Build hooks env.
15418
15419     """
15420     return {
15421       "GROUP_NAME": self.op.group_name,
15422       }
15423
15424   def BuildHooksNodes(self):
15425     """Build hooks nodes.
15426
15427     """
15428     mn = self.cfg.GetMasterNode()
15429     return ([mn], [mn])
15430
15431   def Exec(self, feedback_fn):
15432     """Remove the node group.
15433
15434     """
15435     try:
15436       self.cfg.RemoveNodeGroup(self.group_uuid)
15437     except errors.ConfigurationError:
15438       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15439                                (self.op.group_name, self.group_uuid))
15440
15441     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15442
15443
15444 class LUGroupRename(LogicalUnit):
15445   HPATH = "group-rename"
15446   HTYPE = constants.HTYPE_GROUP
15447   REQ_BGL = False
15448
15449   def ExpandNames(self):
15450     # This raises errors.OpPrereqError on its own:
15451     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15452
15453     self.needed_locks = {
15454       locking.LEVEL_NODEGROUP: [self.group_uuid],
15455       }
15456
15457   def CheckPrereq(self):
15458     """Check prerequisites.
15459
15460     Ensures requested new name is not yet used.
15461
15462     """
15463     try:
15464       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15465     except errors.OpPrereqError:
15466       pass
15467     else:
15468       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15469                                  " node group (UUID: %s)" %
15470                                  (self.op.new_name, new_name_uuid),
15471                                  errors.ECODE_EXISTS)
15472
15473   def BuildHooksEnv(self):
15474     """Build hooks env.
15475
15476     """
15477     return {
15478       "OLD_NAME": self.op.group_name,
15479       "NEW_NAME": self.op.new_name,
15480       }
15481
15482   def BuildHooksNodes(self):
15483     """Build hooks nodes.
15484
15485     """
15486     mn = self.cfg.GetMasterNode()
15487
15488     all_nodes = self.cfg.GetAllNodesInfo()
15489     all_nodes.pop(mn, None)
15490
15491     run_nodes = [mn]
15492     run_nodes.extend(node.name for node in all_nodes.values()
15493                      if node.group == self.group_uuid)
15494
15495     return (run_nodes, run_nodes)
15496
15497   def Exec(self, feedback_fn):
15498     """Rename the node group.
15499
15500     """
15501     group = self.cfg.GetNodeGroup(self.group_uuid)
15502
15503     if group is None:
15504       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15505                                (self.op.group_name, self.group_uuid))
15506
15507     group.name = self.op.new_name
15508     self.cfg.Update(group, feedback_fn)
15509
15510     return self.op.new_name
15511
15512
15513 class LUGroupEvacuate(LogicalUnit):
15514   HPATH = "group-evacuate"
15515   HTYPE = constants.HTYPE_GROUP
15516   REQ_BGL = False
15517
15518   def ExpandNames(self):
15519     # This raises errors.OpPrereqError on its own:
15520     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15521
15522     if self.op.target_groups:
15523       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15524                                   self.op.target_groups)
15525     else:
15526       self.req_target_uuids = []
15527
15528     if self.group_uuid in self.req_target_uuids:
15529       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15530                                  " as a target group (targets are %s)" %
15531                                  (self.group_uuid,
15532                                   utils.CommaJoin(self.req_target_uuids)),
15533                                  errors.ECODE_INVAL)
15534
15535     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15536
15537     self.share_locks = _ShareAll()
15538     self.needed_locks = {
15539       locking.LEVEL_INSTANCE: [],
15540       locking.LEVEL_NODEGROUP: [],
15541       locking.LEVEL_NODE: [],
15542       }
15543
15544   def DeclareLocks(self, level):
15545     if level == locking.LEVEL_INSTANCE:
15546       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15547
15548       # Lock instances optimistically, needs verification once node and group
15549       # locks have been acquired
15550       self.needed_locks[locking.LEVEL_INSTANCE] = \
15551         self.cfg.GetNodeGroupInstances(self.group_uuid)
15552
15553     elif level == locking.LEVEL_NODEGROUP:
15554       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15555
15556       if self.req_target_uuids:
15557         lock_groups = set([self.group_uuid] + self.req_target_uuids)
15558
15559         # Lock all groups used by instances optimistically; this requires going
15560         # via the node before it's locked, requiring verification later on
15561         lock_groups.update(group_uuid
15562                            for instance_name in
15563                              self.owned_locks(locking.LEVEL_INSTANCE)
15564                            for group_uuid in
15565                              self.cfg.GetInstanceNodeGroups(instance_name))
15566       else:
15567         # No target groups, need to lock all of them
15568         lock_groups = locking.ALL_SET
15569
15570       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15571
15572     elif level == locking.LEVEL_NODE:
15573       # This will only lock the nodes in the group to be evacuated which
15574       # contain actual instances
15575       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15576       self._LockInstancesNodes()
15577
15578       # Lock all nodes in group to be evacuated and target groups
15579       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15580       assert self.group_uuid in owned_groups
15581       member_nodes = [node_name
15582                       for group in owned_groups
15583                       for node_name in self.cfg.GetNodeGroup(group).members]
15584       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15585
15586   def CheckPrereq(self):
15587     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15588     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15589     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15590
15591     assert owned_groups.issuperset(self.req_target_uuids)
15592     assert self.group_uuid in owned_groups
15593
15594     # Check if locked instances are still correct
15595     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15596
15597     # Get instance information
15598     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15599
15600     # Check if node groups for locked instances are still correct
15601     _CheckInstancesNodeGroups(self.cfg, self.instances,
15602                               owned_groups, owned_nodes, self.group_uuid)
15603
15604     if self.req_target_uuids:
15605       # User requested specific target groups
15606       self.target_uuids = self.req_target_uuids
15607     else:
15608       # All groups except the one to be evacuated are potential targets
15609       self.target_uuids = [group_uuid for group_uuid in owned_groups
15610                            if group_uuid != self.group_uuid]
15611
15612       if not self.target_uuids:
15613         raise errors.OpPrereqError("There are no possible target groups",
15614                                    errors.ECODE_INVAL)
15615
15616   def BuildHooksEnv(self):
15617     """Build hooks env.
15618
15619     """
15620     return {
15621       "GROUP_NAME": self.op.group_name,
15622       "TARGET_GROUPS": " ".join(self.target_uuids),
15623       }
15624
15625   def BuildHooksNodes(self):
15626     """Build hooks nodes.
15627
15628     """
15629     mn = self.cfg.GetMasterNode()
15630
15631     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15632
15633     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15634
15635     return (run_nodes, run_nodes)
15636
15637   def Exec(self, feedback_fn):
15638     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15639
15640     assert self.group_uuid not in self.target_uuids
15641
15642     req = iallocator.IAReqGroupChange(instances=instances,
15643                                       target_groups=self.target_uuids)
15644     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15645
15646     ial.Run(self.op.iallocator)
15647
15648     if not ial.success:
15649       raise errors.OpPrereqError("Can't compute group evacuation using"
15650                                  " iallocator '%s': %s" %
15651                                  (self.op.iallocator, ial.info),
15652                                  errors.ECODE_NORES)
15653
15654     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15655
15656     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15657                  len(jobs), self.op.group_name)
15658
15659     return ResultWithJobs(jobs)
15660
15661
15662 class TagsLU(NoHooksLU): # pylint: disable=W0223
15663   """Generic tags LU.
15664
15665   This is an abstract class which is the parent of all the other tags LUs.
15666
15667   """
15668   def ExpandNames(self):
15669     self.group_uuid = None
15670     self.needed_locks = {}
15671
15672     if self.op.kind == constants.TAG_NODE:
15673       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15674       lock_level = locking.LEVEL_NODE
15675       lock_name = self.op.name
15676     elif self.op.kind == constants.TAG_INSTANCE:
15677       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15678       lock_level = locking.LEVEL_INSTANCE
15679       lock_name = self.op.name
15680     elif self.op.kind == constants.TAG_NODEGROUP:
15681       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15682       lock_level = locking.LEVEL_NODEGROUP
15683       lock_name = self.group_uuid
15684     elif self.op.kind == constants.TAG_NETWORK:
15685       self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15686       lock_level = locking.LEVEL_NETWORK
15687       lock_name = self.network_uuid
15688     else:
15689       lock_level = None
15690       lock_name = None
15691
15692     if lock_level and getattr(self.op, "use_locking", True):
15693       self.needed_locks[lock_level] = lock_name
15694
15695     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15696     # not possible to acquire the BGL based on opcode parameters)
15697
15698   def CheckPrereq(self):
15699     """Check prerequisites.
15700
15701     """
15702     if self.op.kind == constants.TAG_CLUSTER:
15703       self.target = self.cfg.GetClusterInfo()
15704     elif self.op.kind == constants.TAG_NODE:
15705       self.target = self.cfg.GetNodeInfo(self.op.name)
15706     elif self.op.kind == constants.TAG_INSTANCE:
15707       self.target = self.cfg.GetInstanceInfo(self.op.name)
15708     elif self.op.kind == constants.TAG_NODEGROUP:
15709       self.target = self.cfg.GetNodeGroup(self.group_uuid)
15710     elif self.op.kind == constants.TAG_NETWORK:
15711       self.target = self.cfg.GetNetwork(self.network_uuid)
15712     else:
15713       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15714                                  str(self.op.kind), errors.ECODE_INVAL)
15715
15716
15717 class LUTagsGet(TagsLU):
15718   """Returns the tags of a given object.
15719
15720   """
15721   REQ_BGL = False
15722
15723   def ExpandNames(self):
15724     TagsLU.ExpandNames(self)
15725
15726     # Share locks as this is only a read operation
15727     self.share_locks = _ShareAll()
15728
15729   def Exec(self, feedback_fn):
15730     """Returns the tag list.
15731
15732     """
15733     return list(self.target.GetTags())
15734
15735
15736 class LUTagsSearch(NoHooksLU):
15737   """Searches the tags for a given pattern.
15738
15739   """
15740   REQ_BGL = False
15741
15742   def ExpandNames(self):
15743     self.needed_locks = {}
15744
15745   def CheckPrereq(self):
15746     """Check prerequisites.
15747
15748     This checks the pattern passed for validity by compiling it.
15749
15750     """
15751     try:
15752       self.re = re.compile(self.op.pattern)
15753     except re.error, err:
15754       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15755                                  (self.op.pattern, err), errors.ECODE_INVAL)
15756
15757   def Exec(self, feedback_fn):
15758     """Returns the tag list.
15759
15760     """
15761     cfg = self.cfg
15762     tgts = [("/cluster", cfg.GetClusterInfo())]
15763     ilist = cfg.GetAllInstancesInfo().values()
15764     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15765     nlist = cfg.GetAllNodesInfo().values()
15766     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15767     tgts.extend(("/nodegroup/%s" % n.name, n)
15768                 for n in cfg.GetAllNodeGroupsInfo().values())
15769     results = []
15770     for path, target in tgts:
15771       for tag in target.GetTags():
15772         if self.re.search(tag):
15773           results.append((path, tag))
15774     return results
15775
15776
15777 class LUTagsSet(TagsLU):
15778   """Sets a tag on a given object.
15779
15780   """
15781   REQ_BGL = False
15782
15783   def CheckPrereq(self):
15784     """Check prerequisites.
15785
15786     This checks the type and length of the tag name and value.
15787
15788     """
15789     TagsLU.CheckPrereq(self)
15790     for tag in self.op.tags:
15791       objects.TaggableObject.ValidateTag(tag)
15792
15793   def Exec(self, feedback_fn):
15794     """Sets the tag.
15795
15796     """
15797     try:
15798       for tag in self.op.tags:
15799         self.target.AddTag(tag)
15800     except errors.TagError, err:
15801       raise errors.OpExecError("Error while setting tag: %s" % str(err))
15802     self.cfg.Update(self.target, feedback_fn)
15803
15804
15805 class LUTagsDel(TagsLU):
15806   """Delete a list of tags from a given object.
15807
15808   """
15809   REQ_BGL = False
15810
15811   def CheckPrereq(self):
15812     """Check prerequisites.
15813
15814     This checks that we have the given tag.
15815
15816     """
15817     TagsLU.CheckPrereq(self)
15818     for tag in self.op.tags:
15819       objects.TaggableObject.ValidateTag(tag)
15820     del_tags = frozenset(self.op.tags)
15821     cur_tags = self.target.GetTags()
15822
15823     diff_tags = del_tags - cur_tags
15824     if diff_tags:
15825       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15826       raise errors.OpPrereqError("Tag(s) %s not found" %
15827                                  (utils.CommaJoin(diff_names), ),
15828                                  errors.ECODE_NOENT)
15829
15830   def Exec(self, feedback_fn):
15831     """Remove the tag from the object.
15832
15833     """
15834     for tag in self.op.tags:
15835       self.target.RemoveTag(tag)
15836     self.cfg.Update(self.target, feedback_fn)
15837
15838
15839 class LUTestDelay(NoHooksLU):
15840   """Sleep for a specified amount of time.
15841
15842   This LU sleeps on the master and/or nodes for a specified amount of
15843   time.
15844
15845   """
15846   REQ_BGL = False
15847
15848   def ExpandNames(self):
15849     """Expand names and set required locks.
15850
15851     This expands the node list, if any.
15852
15853     """
15854     self.needed_locks = {}
15855     if self.op.on_nodes:
15856       # _GetWantedNodes can be used here, but is not always appropriate to use
15857       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15858       # more information.
15859       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15860       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15861
15862   def _TestDelay(self):
15863     """Do the actual sleep.
15864
15865     """
15866     if self.op.on_master:
15867       if not utils.TestDelay(self.op.duration):
15868         raise errors.OpExecError("Error during master delay test")
15869     if self.op.on_nodes:
15870       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15871       for node, node_result in result.items():
15872         node_result.Raise("Failure during rpc call to node %s" % node)
15873
15874   def Exec(self, feedback_fn):
15875     """Execute the test delay opcode, with the wanted repetitions.
15876
15877     """
15878     if self.op.repeat == 0:
15879       self._TestDelay()
15880     else:
15881       top_value = self.op.repeat - 1
15882       for i in range(self.op.repeat):
15883         self.LogInfo("Test delay iteration %d/%d", i, top_value)
15884         self._TestDelay()
15885
15886
15887 class LURestrictedCommand(NoHooksLU):
15888   """Logical unit for executing restricted commands.
15889
15890   """
15891   REQ_BGL = False
15892
15893   def ExpandNames(self):
15894     if self.op.nodes:
15895       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15896
15897     self.needed_locks = {
15898       locking.LEVEL_NODE: self.op.nodes,
15899       }
15900     self.share_locks = {
15901       locking.LEVEL_NODE: not self.op.use_locking,
15902       }
15903
15904   def CheckPrereq(self):
15905     """Check prerequisites.
15906
15907     """
15908
15909   def Exec(self, feedback_fn):
15910     """Execute restricted command and return output.
15911
15912     """
15913     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15914
15915     # Check if correct locks are held
15916     assert set(self.op.nodes).issubset(owned_nodes)
15917
15918     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15919
15920     result = []
15921
15922     for node_name in self.op.nodes:
15923       nres = rpcres[node_name]
15924       if nres.fail_msg:
15925         msg = ("Command '%s' on node '%s' failed: %s" %
15926                (self.op.command, node_name, nres.fail_msg))
15927         result.append((False, msg))
15928       else:
15929         result.append((True, nres.payload))
15930
15931     return result
15932
15933
15934 class LUTestJqueue(NoHooksLU):
15935   """Utility LU to test some aspects of the job queue.
15936
15937   """
15938   REQ_BGL = False
15939
15940   # Must be lower than default timeout for WaitForJobChange to see whether it
15941   # notices changed jobs
15942   _CLIENT_CONNECT_TIMEOUT = 20.0
15943   _CLIENT_CONFIRM_TIMEOUT = 60.0
15944
15945   @classmethod
15946   def _NotifyUsingSocket(cls, cb, errcls):
15947     """Opens a Unix socket and waits for another program to connect.
15948
15949     @type cb: callable
15950     @param cb: Callback to send socket name to client
15951     @type errcls: class
15952     @param errcls: Exception class to use for errors
15953
15954     """
15955     # Using a temporary directory as there's no easy way to create temporary
15956     # sockets without writing a custom loop around tempfile.mktemp and
15957     # socket.bind
15958     tmpdir = tempfile.mkdtemp()
15959     try:
15960       tmpsock = utils.PathJoin(tmpdir, "sock")
15961
15962       logging.debug("Creating temporary socket at %s", tmpsock)
15963       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15964       try:
15965         sock.bind(tmpsock)
15966         sock.listen(1)
15967
15968         # Send details to client
15969         cb(tmpsock)
15970
15971         # Wait for client to connect before continuing
15972         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15973         try:
15974           (conn, _) = sock.accept()
15975         except socket.error, err:
15976           raise errcls("Client didn't connect in time (%s)" % err)
15977       finally:
15978         sock.close()
15979     finally:
15980       # Remove as soon as client is connected
15981       shutil.rmtree(tmpdir)
15982
15983     # Wait for client to close
15984     try:
15985       try:
15986         # pylint: disable=E1101
15987         # Instance of '_socketobject' has no ... member
15988         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15989         conn.recv(1)
15990       except socket.error, err:
15991         raise errcls("Client failed to confirm notification (%s)" % err)
15992     finally:
15993       conn.close()
15994
15995   def _SendNotification(self, test, arg, sockname):
15996     """Sends a notification to the client.
15997
15998     @type test: string
15999     @param test: Test name
16000     @param arg: Test argument (depends on test)
16001     @type sockname: string
16002     @param sockname: Socket path
16003
16004     """
16005     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
16006
16007   def _Notify(self, prereq, test, arg):
16008     """Notifies the client of a test.
16009
16010     @type prereq: bool
16011     @param prereq: Whether this is a prereq-phase test
16012     @type test: string
16013     @param test: Test name
16014     @param arg: Test argument (depends on test)
16015
16016     """
16017     if prereq:
16018       errcls = errors.OpPrereqError
16019     else:
16020       errcls = errors.OpExecError
16021
16022     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16023                                                   test, arg),
16024                                    errcls)
16025
16026   def CheckArguments(self):
16027     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16028     self.expandnames_calls = 0
16029
16030   def ExpandNames(self):
16031     checkargs_calls = getattr(self, "checkargs_calls", 0)
16032     if checkargs_calls < 1:
16033       raise errors.ProgrammerError("CheckArguments was not called")
16034
16035     self.expandnames_calls += 1
16036
16037     if self.op.notify_waitlock:
16038       self._Notify(True, constants.JQT_EXPANDNAMES, None)
16039
16040     self.LogInfo("Expanding names")
16041
16042     # Get lock on master node (just to get a lock, not for a particular reason)
16043     self.needed_locks = {
16044       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16045       }
16046
16047   def Exec(self, feedback_fn):
16048     if self.expandnames_calls < 1:
16049       raise errors.ProgrammerError("ExpandNames was not called")
16050
16051     if self.op.notify_exec:
16052       self._Notify(False, constants.JQT_EXEC, None)
16053
16054     self.LogInfo("Executing")
16055
16056     if self.op.log_messages:
16057       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16058       for idx, msg in enumerate(self.op.log_messages):
16059         self.LogInfo("Sending log message %s", idx + 1)
16060         feedback_fn(constants.JQT_MSGPREFIX + msg)
16061         # Report how many test messages have been sent
16062         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16063
16064     if self.op.fail:
16065       raise errors.OpExecError("Opcode failure was requested")
16066
16067     return True
16068
16069
16070 class LUTestAllocator(NoHooksLU):
16071   """Run allocator tests.
16072
16073   This LU runs the allocator tests
16074
16075   """
16076   def CheckPrereq(self):
16077     """Check prerequisites.
16078
16079     This checks the opcode parameters depending on the director and mode test.
16080
16081     """
16082     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16083                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
16084       for attr in ["memory", "disks", "disk_template",
16085                    "os", "tags", "nics", "vcpus"]:
16086         if not hasattr(self.op, attr):
16087           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16088                                      attr, errors.ECODE_INVAL)
16089       iname = self.cfg.ExpandInstanceName(self.op.name)
16090       if iname is not None:
16091         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16092                                    iname, errors.ECODE_EXISTS)
16093       if not isinstance(self.op.nics, list):
16094         raise errors.OpPrereqError("Invalid parameter 'nics'",
16095                                    errors.ECODE_INVAL)
16096       if not isinstance(self.op.disks, list):
16097         raise errors.OpPrereqError("Invalid parameter 'disks'",
16098                                    errors.ECODE_INVAL)
16099       for row in self.op.disks:
16100         if (not isinstance(row, dict) or
16101             constants.IDISK_SIZE not in row or
16102             not isinstance(row[constants.IDISK_SIZE], int) or
16103             constants.IDISK_MODE not in row or
16104             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16105           raise errors.OpPrereqError("Invalid contents of the 'disks'"
16106                                      " parameter", errors.ECODE_INVAL)
16107       if self.op.hypervisor is None:
16108         self.op.hypervisor = self.cfg.GetHypervisorType()
16109     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16110       fname = _ExpandInstanceName(self.cfg, self.op.name)
16111       self.op.name = fname
16112       self.relocate_from = \
16113           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16114     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16115                           constants.IALLOCATOR_MODE_NODE_EVAC):
16116       if not self.op.instances:
16117         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16118       self.op.instances = _GetWantedInstances(self, self.op.instances)
16119     else:
16120       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16121                                  self.op.mode, errors.ECODE_INVAL)
16122
16123     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16124       if self.op.iallocator is None:
16125         raise errors.OpPrereqError("Missing allocator name",
16126                                    errors.ECODE_INVAL)
16127     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16128       raise errors.OpPrereqError("Wrong allocator test '%s'" %
16129                                  self.op.direction, errors.ECODE_INVAL)
16130
16131   def Exec(self, feedback_fn):
16132     """Run the allocator test.
16133
16134     """
16135     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16136       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16137                                           memory=self.op.memory,
16138                                           disks=self.op.disks,
16139                                           disk_template=self.op.disk_template,
16140                                           os=self.op.os,
16141                                           tags=self.op.tags,
16142                                           nics=self.op.nics,
16143                                           vcpus=self.op.vcpus,
16144                                           spindle_use=self.op.spindle_use,
16145                                           hypervisor=self.op.hypervisor)
16146     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16147       req = iallocator.IAReqRelocate(name=self.op.name,
16148                                      relocate_from=list(self.relocate_from))
16149     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16150       req = iallocator.IAReqGroupChange(instances=self.op.instances,
16151                                         target_groups=self.op.target_groups)
16152     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16153       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16154                                      evac_mode=self.op.evac_mode)
16155     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16156       disk_template = self.op.disk_template
16157       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16158                                              memory=self.op.memory,
16159                                              disks=self.op.disks,
16160                                              disk_template=disk_template,
16161                                              os=self.op.os,
16162                                              tags=self.op.tags,
16163                                              nics=self.op.nics,
16164                                              vcpus=self.op.vcpus,
16165                                              spindle_use=self.op.spindle_use,
16166                                              hypervisor=self.op.hypervisor)
16167                for idx in range(self.op.count)]
16168       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16169     else:
16170       raise errors.ProgrammerError("Uncatched mode %s in"
16171                                    " LUTestAllocator.Exec", self.op.mode)
16172
16173     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16174     if self.op.direction == constants.IALLOCATOR_DIR_IN:
16175       result = ial.in_text
16176     else:
16177       ial.Run(self.op.iallocator, validate=False)
16178       result = ial.out_text
16179     return result
16180
16181
16182 class LUNetworkAdd(LogicalUnit):
16183   """Logical unit for creating networks.
16184
16185   """
16186   HPATH = "network-add"
16187   HTYPE = constants.HTYPE_NETWORK
16188   REQ_BGL = False
16189
16190   def BuildHooksNodes(self):
16191     """Build hooks nodes.
16192
16193     """
16194     mn = self.cfg.GetMasterNode()
16195     return ([mn], [mn])
16196
16197   def CheckArguments(self):
16198     if self.op.mac_prefix:
16199       self.op.mac_prefix = \
16200         utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16201
16202   def ExpandNames(self):
16203     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16204
16205     if self.op.conflicts_check:
16206       self.share_locks[locking.LEVEL_NODE] = 1
16207       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16208       self.needed_locks = {
16209         locking.LEVEL_NODE: locking.ALL_SET,
16210         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16211         }
16212     else:
16213       self.needed_locks = {}
16214
16215     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16216
16217   def CheckPrereq(self):
16218     if self.op.network is None:
16219       raise errors.OpPrereqError("Network must be given",
16220                                  errors.ECODE_INVAL)
16221
16222     uuid = self.cfg.LookupNetwork(self.op.network_name)
16223
16224     if uuid:
16225       raise errors.OpPrereqError(("Network with name '%s' already exists" %
16226                                   self.op.network_name), errors.ECODE_EXISTS)
16227
16228     # Check tag validity
16229     for tag in self.op.tags:
16230       objects.TaggableObject.ValidateTag(tag)
16231
16232   def BuildHooksEnv(self):
16233     """Build hooks env.
16234
16235     """
16236     args = {
16237       "name": self.op.network_name,
16238       "subnet": self.op.network,
16239       "gateway": self.op.gateway,
16240       "network6": self.op.network6,
16241       "gateway6": self.op.gateway6,
16242       "mac_prefix": self.op.mac_prefix,
16243       "network_type": self.op.network_type,
16244       "tags": self.op.tags,
16245       }
16246     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16247
16248   def Exec(self, feedback_fn):
16249     """Add the ip pool to the cluster.
16250
16251     """
16252     nobj = objects.Network(name=self.op.network_name,
16253                            network=self.op.network,
16254                            gateway=self.op.gateway,
16255                            network6=self.op.network6,
16256                            gateway6=self.op.gateway6,
16257                            mac_prefix=self.op.mac_prefix,
16258                            network_type=self.op.network_type,
16259                            uuid=self.network_uuid,
16260                            family=constants.IP4_VERSION)
16261     # Initialize the associated address pool
16262     try:
16263       pool = network.AddressPool.InitializeNetwork(nobj)
16264     except errors.AddressPoolError, e:
16265       raise errors.OpExecError("Cannot create IP pool for this network: %s" % e)
16266
16267     # Check if we need to reserve the nodes and the cluster master IP
16268     # These may not be allocated to any instances in routed mode, as
16269     # they wouldn't function anyway.
16270     if self.op.conflicts_check:
16271       for node in self.cfg.GetAllNodesInfo().values():
16272         for ip in [node.primary_ip, node.secondary_ip]:
16273           try:
16274             if pool.Contains(ip):
16275               pool.Reserve(ip)
16276               self.LogInfo("Reserved IP address of node '%s' (%s)",
16277                            node.name, ip)
16278           except errors.AddressPoolError:
16279             self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
16280                             node.name, ip)
16281
16282       master_ip = self.cfg.GetClusterInfo().master_ip
16283       try:
16284         if pool.Contains(master_ip):
16285           pool.Reserve(master_ip)
16286           self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16287       except errors.AddressPoolError:
16288         self.LogWarning("Cannot reserve cluster master IP address (%s)",
16289                         master_ip)
16290
16291     if self.op.add_reserved_ips:
16292       for ip in self.op.add_reserved_ips:
16293         try:
16294           pool.Reserve(ip, external=True)
16295         except errors.AddressPoolError, e:
16296           raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
16297
16298     if self.op.tags:
16299       for tag in self.op.tags:
16300         nobj.AddTag(tag)
16301
16302     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16303     del self.remove_locks[locking.LEVEL_NETWORK]
16304
16305
16306 class LUNetworkRemove(LogicalUnit):
16307   HPATH = "network-remove"
16308   HTYPE = constants.HTYPE_NETWORK
16309   REQ_BGL = False
16310
16311   def ExpandNames(self):
16312     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16313
16314     if not self.network_uuid:
16315       raise errors.OpPrereqError(("Network '%s' not found" %
16316                                   self.op.network_name), errors.ECODE_NOENT)
16317
16318     self.share_locks[locking.LEVEL_NODEGROUP] = 1
16319     self.needed_locks = {
16320       locking.LEVEL_NETWORK: [self.network_uuid],
16321       locking.LEVEL_NODEGROUP: locking.ALL_SET,
16322       }
16323
16324   def CheckPrereq(self):
16325     """Check prerequisites.
16326
16327     This checks that the given network name exists as a network, that is
16328     empty (i.e., contains no nodes), and that is not the last group of the
16329     cluster.
16330
16331     """
16332     # Verify that the network is not conncted.
16333     node_groups = [group.name
16334                    for group in self.cfg.GetAllNodeGroupsInfo().values()
16335                    if self.network_uuid in group.networks]
16336
16337     if node_groups:
16338       self.LogWarning("Network '%s' is connected to the following"
16339                       " node groups: %s" %
16340                       (self.op.network_name,
16341                        utils.CommaJoin(utils.NiceSort(node_groups))))
16342       raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16343
16344   def BuildHooksEnv(self):
16345     """Build hooks env.
16346
16347     """
16348     return {
16349       "NETWORK_NAME": self.op.network_name,
16350       }
16351
16352   def BuildHooksNodes(self):
16353     """Build hooks nodes.
16354
16355     """
16356     mn = self.cfg.GetMasterNode()
16357     return ([mn], [mn])
16358
16359   def Exec(self, feedback_fn):
16360     """Remove the network.
16361
16362     """
16363     try:
16364       self.cfg.RemoveNetwork(self.network_uuid)
16365     except errors.ConfigurationError:
16366       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16367                                (self.op.network_name, self.network_uuid))
16368
16369
16370 class LUNetworkSetParams(LogicalUnit):
16371   """Modifies the parameters of a network.
16372
16373   """
16374   HPATH = "network-modify"
16375   HTYPE = constants.HTYPE_NETWORK
16376   REQ_BGL = False
16377
16378   def CheckArguments(self):
16379     if (self.op.gateway and
16380         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16381       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16382                                  " at once", errors.ECODE_INVAL)
16383
16384   def ExpandNames(self):
16385     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16386     if self.network_uuid is None:
16387       raise errors.OpPrereqError(("Network '%s' not found" %
16388                                   self.op.network_name), errors.ECODE_NOENT)
16389
16390     self.needed_locks = {
16391       locking.LEVEL_NETWORK: [self.network_uuid],
16392       }
16393
16394   def CheckPrereq(self):
16395     """Check prerequisites.
16396
16397     """
16398     self.network = self.cfg.GetNetwork(self.network_uuid)
16399     self.gateway = self.network.gateway
16400     self.network_type = self.network.network_type
16401     self.mac_prefix = self.network.mac_prefix
16402     self.network6 = self.network.network6
16403     self.gateway6 = self.network.gateway6
16404     self.tags = self.network.tags
16405
16406     self.pool = network.AddressPool(self.network)
16407
16408     if self.op.gateway:
16409       if self.op.gateway == constants.VALUE_NONE:
16410         self.gateway = None
16411       else:
16412         self.gateway = self.op.gateway
16413         if self.pool.IsReserved(self.gateway):
16414           raise errors.OpPrereqError("Gateway IP address '%s' is already"
16415                                      " reserved" % self.gateway,
16416                                      errors.ECODE_STATE)
16417
16418     if self.op.network_type:
16419       if self.op.network_type == constants.VALUE_NONE:
16420         self.network_type = None
16421       else:
16422         self.network_type = self.op.network_type
16423
16424     if self.op.mac_prefix:
16425       if self.op.mac_prefix == constants.VALUE_NONE:
16426         self.mac_prefix = None
16427       else:
16428         self.mac_prefix = \
16429           utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16430
16431     if self.op.gateway6:
16432       if self.op.gateway6 == constants.VALUE_NONE:
16433         self.gateway6 = None
16434       else:
16435         self.gateway6 = self.op.gateway6
16436
16437     if self.op.network6:
16438       if self.op.network6 == constants.VALUE_NONE:
16439         self.network6 = None
16440       else:
16441         self.network6 = self.op.network6
16442
16443   def BuildHooksEnv(self):
16444     """Build hooks env.
16445
16446     """
16447     args = {
16448       "name": self.op.network_name,
16449       "subnet": self.network.network,
16450       "gateway": self.gateway,
16451       "network6": self.network6,
16452       "gateway6": self.gateway6,
16453       "mac_prefix": self.mac_prefix,
16454       "network_type": self.network_type,
16455       "tags": self.tags,
16456       }
16457     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16458
16459   def BuildHooksNodes(self):
16460     """Build hooks nodes.
16461
16462     """
16463     mn = self.cfg.GetMasterNode()
16464     return ([mn], [mn])
16465
16466   def Exec(self, feedback_fn):
16467     """Modifies the network.
16468
16469     """
16470     #TODO: reserve/release via temporary reservation manager
16471     #      extend cfg.ReserveIp/ReleaseIp with the external flag
16472     if self.op.gateway:
16473       if self.gateway == self.network.gateway:
16474         self.LogWarning("Gateway is already %s", self.gateway)
16475       else:
16476         if self.gateway:
16477           self.pool.Reserve(self.gateway, external=True)
16478         if self.network.gateway:
16479           self.pool.Release(self.network.gateway, external=True)
16480         self.network.gateway = self.gateway
16481
16482     if self.op.add_reserved_ips:
16483       for ip in self.op.add_reserved_ips:
16484         try:
16485           if self.pool.IsReserved(ip):
16486             self.LogWarning("IP address %s is already reserved", ip)
16487           else:
16488             self.pool.Reserve(ip, external=True)
16489         except errors.AddressPoolError, err:
16490           self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16491
16492     if self.op.remove_reserved_ips:
16493       for ip in self.op.remove_reserved_ips:
16494         if ip == self.network.gateway:
16495           self.LogWarning("Cannot unreserve Gateway's IP")
16496           continue
16497         try:
16498           if not self.pool.IsReserved(ip):
16499             self.LogWarning("IP address %s is already unreserved", ip)
16500           else:
16501             self.pool.Release(ip, external=True)
16502         except errors.AddressPoolError, err:
16503           self.LogWarning("Cannot release IP address %s: %s", ip, err)
16504
16505     if self.op.mac_prefix:
16506       self.network.mac_prefix = self.mac_prefix
16507
16508     if self.op.network6:
16509       self.network.network6 = self.network6
16510
16511     if self.op.gateway6:
16512       self.network.gateway6 = self.gateway6
16513
16514     if self.op.network_type:
16515       self.network.network_type = self.network_type
16516
16517     self.pool.Validate()
16518
16519     self.cfg.Update(self.network, feedback_fn)
16520
16521
16522 class _NetworkQuery(_QueryBase):
16523   FIELDS = query.NETWORK_FIELDS
16524
16525   def ExpandNames(self, lu):
16526     lu.needed_locks = {}
16527     lu.share_locks = _ShareAll()
16528
16529     self.do_locking = self.use_locking
16530
16531     all_networks = lu.cfg.GetAllNetworksInfo()
16532     name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16533
16534     if self.names:
16535       missing = []
16536       self.wanted = []
16537
16538       for name in self.names:
16539         if name in name_to_uuid:
16540           self.wanted.append(name_to_uuid[name])
16541         else:
16542           missing.append(name)
16543
16544       if missing:
16545         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16546                                    errors.ECODE_NOENT)
16547     else:
16548       self.wanted = locking.ALL_SET
16549
16550     if self.do_locking:
16551       lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16552       if query.NETQ_INST in self.requested_data:
16553         lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16554       if query.NETQ_GROUP in self.requested_data:
16555         lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16556
16557   def DeclareLocks(self, lu, level):
16558     pass
16559
16560   def _GetQueryData(self, lu):
16561     """Computes the list of networks and their attributes.
16562
16563     """
16564     all_networks = lu.cfg.GetAllNetworksInfo()
16565
16566     network_uuids = self._GetNames(lu, all_networks.keys(),
16567                                    locking.LEVEL_NETWORK)
16568
16569     name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16570
16571     do_instances = query.NETQ_INST in self.requested_data
16572     do_groups = query.NETQ_GROUP in self.requested_data
16573
16574     network_to_instances = None
16575     network_to_groups = None
16576
16577     # For NETQ_GROUP, we need to map network->[groups]
16578     if do_groups:
16579       all_groups = lu.cfg.GetAllNodeGroupsInfo()
16580       network_to_groups = dict((uuid, []) for uuid in network_uuids)
16581       for _, group in all_groups.iteritems():
16582         for net_uuid in network_uuids:
16583           netparams = group.networks.get(net_uuid, None)
16584           if netparams:
16585             info = (group.name, netparams[constants.NIC_MODE],
16586                     netparams[constants.NIC_LINK])
16587
16588             network_to_groups[net_uuid].append(info)
16589
16590     if do_instances:
16591       all_instances = lu.cfg.GetAllInstancesInfo()
16592       network_to_instances = dict((uuid, []) for uuid in network_uuids)
16593       for instance in all_instances.values():
16594         for nic in instance.nics:
16595           if nic.network:
16596             net_uuid = name_to_uuid[nic.network]
16597             if net_uuid in network_uuids:
16598               network_to_instances[net_uuid].append(instance.name)
16599             break
16600
16601     if query.NETQ_STATS in self.requested_data:
16602       stats = \
16603         dict((uuid,
16604               self._GetStats(network.AddressPool(all_networks[uuid])))
16605              for uuid in network_uuids)
16606     else:
16607       stats = None
16608
16609     return query.NetworkQueryData([all_networks[uuid]
16610                                    for uuid in network_uuids],
16611                                    network_to_groups,
16612                                    network_to_instances,
16613                                    stats)
16614
16615   @staticmethod
16616   def _GetStats(pool):
16617     """Returns statistics for a network address pool.
16618
16619     """
16620     return {
16621       "free_count": pool.GetFreeCount(),
16622       "reserved_count": pool.GetReservedCount(),
16623       "map": pool.GetMap(),
16624       "external_reservations":
16625         utils.CommaJoin(pool.GetExternalReservations()),
16626       }
16627
16628
16629 class LUNetworkQuery(NoHooksLU):
16630   """Logical unit for querying networks.
16631
16632   """
16633   REQ_BGL = False
16634
16635   def CheckArguments(self):
16636     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16637                             self.op.output_fields, self.op.use_locking)
16638
16639   def ExpandNames(self):
16640     self.nq.ExpandNames(self)
16641
16642   def Exec(self, feedback_fn):
16643     return self.nq.OldStyleQuery(self)
16644
16645
16646 class LUNetworkConnect(LogicalUnit):
16647   """Connect a network to a nodegroup
16648
16649   """
16650   HPATH = "network-connect"
16651   HTYPE = constants.HTYPE_NETWORK
16652   REQ_BGL = False
16653
16654   def ExpandNames(self):
16655     self.network_name = self.op.network_name
16656     self.group_name = self.op.group_name
16657     self.network_mode = self.op.network_mode
16658     self.network_link = self.op.network_link
16659
16660     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16661     if self.network_uuid is None:
16662       raise errors.OpPrereqError("Network '%s' does not exist" %
16663                                  self.network_name, errors.ECODE_NOENT)
16664
16665     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16666     if self.group_uuid is None:
16667       raise errors.OpPrereqError("Group '%s' does not exist" %
16668                                  self.group_name, errors.ECODE_NOENT)
16669
16670     self.needed_locks = {
16671       locking.LEVEL_INSTANCE: [],
16672       locking.LEVEL_NODEGROUP: [self.group_uuid],
16673       }
16674     self.share_locks[locking.LEVEL_INSTANCE] = 1
16675
16676     if self.op.conflicts_check:
16677       self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16678       self.share_locks[locking.LEVEL_NETWORK] = 1
16679
16680   def DeclareLocks(self, level):
16681     if level == locking.LEVEL_INSTANCE:
16682       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16683
16684       # Lock instances optimistically, needs verification once group lock has
16685       # been acquired
16686       if self.op.conflicts_check:
16687         self.needed_locks[locking.LEVEL_INSTANCE] = \
16688             self.cfg.GetNodeGroupInstances(self.group_uuid)
16689
16690   def BuildHooksEnv(self):
16691     ret = {
16692       "GROUP_NAME": self.group_name,
16693       "GROUP_NETWORK_MODE": self.network_mode,
16694       "GROUP_NETWORK_LINK": self.network_link,
16695       }
16696     return ret
16697
16698   def BuildHooksNodes(self):
16699     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16700     return (nodes, nodes)
16701
16702   def CheckPrereq(self):
16703     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16704
16705     assert self.group_uuid in owned_groups
16706
16707     self.netparams = {
16708       constants.NIC_MODE: self.network_mode,
16709       constants.NIC_LINK: self.network_link,
16710       }
16711     objects.NIC.CheckParameterSyntax(self.netparams)
16712
16713     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16714     #if self.network_mode == constants.NIC_MODE_BRIDGED:
16715     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16716     self.connected = False
16717     if self.network_uuid in self.group.networks:
16718       self.LogWarning("Network '%s' is already mapped to group '%s'" %
16719                       (self.network_name, self.group.name))
16720       self.connected = True
16721       return
16722
16723     if self.op.conflicts_check:
16724       pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16725
16726       _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16727                             "connect to")
16728
16729   def Exec(self, feedback_fn):
16730     if self.connected:
16731       return
16732
16733     self.group.networks[self.network_uuid] = self.netparams
16734     self.cfg.Update(self.group, feedback_fn)
16735
16736
16737 def _NetworkConflictCheck(lu, check_fn, action):
16738   """Checks for network interface conflicts with a network.
16739
16740   @type lu: L{LogicalUnit}
16741   @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16742     returning boolean
16743   @param check_fn: Function checking for conflict
16744   @type action: string
16745   @param action: Part of error message (see code)
16746   @raise errors.OpPrereqError: If conflicting IP addresses are found.
16747
16748   """
16749   # Check if locked instances are still correct
16750   owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
16751   _CheckNodeGroupInstances(lu.cfg, lu.group_uuid, owned_instances)
16752
16753   conflicts = []
16754
16755   for (_, instance) in lu.cfg.GetMultiInstanceInfo(owned_instances):
16756     instconflicts = [(idx, nic.ip)
16757                      for (idx, nic) in enumerate(instance.nics)
16758                      if check_fn(nic)]
16759
16760     if instconflicts:
16761       conflicts.append((instance.name, instconflicts))
16762
16763   if conflicts:
16764     lu.LogWarning("IP addresses from network '%s', which is about to %s"
16765                   " node group '%s', are in use: %s" %
16766                   (lu.network_name, action, lu.group.name,
16767                    utils.CommaJoin(("%s: %s" %
16768                                     (name, _FmtNetworkConflict(details)))
16769                                    for (name, details) in conflicts)))
16770
16771     raise errors.OpPrereqError("Conflicting IP addresses found; "
16772                                " remove/modify the corresponding network"
16773                                " interfaces", errors.ECODE_STATE)
16774
16775
16776 def _FmtNetworkConflict(details):
16777   """Utility for L{_NetworkConflictCheck}.
16778
16779   """
16780   return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16781                          for (idx, ipaddr) in details)
16782
16783
16784 class LUNetworkDisconnect(LogicalUnit):
16785   """Disconnect a network to a nodegroup
16786
16787   """
16788   HPATH = "network-disconnect"
16789   HTYPE = constants.HTYPE_NETWORK
16790   REQ_BGL = False
16791
16792   def ExpandNames(self):
16793     self.network_name = self.op.network_name
16794     self.group_name = self.op.group_name
16795
16796     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16797     if self.network_uuid is None:
16798       raise errors.OpPrereqError("Network '%s' does not exist" %
16799                                  self.network_name, errors.ECODE_NOENT)
16800
16801     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16802     if self.group_uuid is None:
16803       raise errors.OpPrereqError("Group '%s' does not exist" %
16804                                  self.group_name, errors.ECODE_NOENT)
16805
16806     self.needed_locks = {
16807       locking.LEVEL_INSTANCE: [],
16808       locking.LEVEL_NODEGROUP: [self.group_uuid],
16809       }
16810     self.share_locks[locking.LEVEL_INSTANCE] = 1
16811
16812   def DeclareLocks(self, level):
16813     if level == locking.LEVEL_INSTANCE:
16814       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16815
16816       # Lock instances optimistically, needs verification once group lock has
16817       # been acquired
16818       if self.op.conflicts_check:
16819         self.needed_locks[locking.LEVEL_INSTANCE] = \
16820           self.cfg.GetNodeGroupInstances(self.group_uuid)
16821
16822   def BuildHooksEnv(self):
16823     ret = {
16824       "GROUP_NAME": self.group_name,
16825       }
16826     return ret
16827
16828   def BuildHooksNodes(self):
16829     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16830     return (nodes, nodes)
16831
16832   def CheckPrereq(self):
16833     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16834
16835     assert self.group_uuid in owned_groups
16836
16837     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16838     self.connected = True
16839     if self.network_uuid not in self.group.networks:
16840       self.LogWarning("Network '%s' is not mapped to group '%s'",
16841                       self.network_name, self.group.name)
16842       self.connected = False
16843       return
16844
16845     if self.op.conflicts_check:
16846       _NetworkConflictCheck(self, lambda nic: nic.network == self.network_name,
16847                             "disconnect from")
16848
16849   def Exec(self, feedback_fn):
16850     if not self.connected:
16851       return
16852
16853     del self.group.networks[self.network_uuid]
16854     self.cfg.Update(self.group, feedback_fn)
16855
16856
16857 #: Query type implementations
16858 _QUERY_IMPL = {
16859   constants.QR_CLUSTER: _ClusterQuery,
16860   constants.QR_INSTANCE: _InstanceQuery,
16861   constants.QR_NODE: _NodeQuery,
16862   constants.QR_GROUP: _GroupQuery,
16863   constants.QR_NETWORK: _NetworkQuery,
16864   constants.QR_OS: _OsQuery,
16865   constants.QR_EXTSTORAGE: _ExtStorageQuery,
16866   constants.QR_EXPORT: _ExportQuery,
16867   }
16868
16869 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16870
16871
16872 def _GetQueryImplementation(name):
16873   """Returns the implemtnation for a query type.
16874
16875   @param name: Query type, must be one of L{constants.QR_VIA_OP}
16876
16877   """
16878   try:
16879     return _QUERY_IMPL[name]
16880   except KeyError:
16881     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16882                                errors.ECODE_INVAL)
16883
16884
16885 def _CheckForConflictingIp(lu, ip, node):
16886   """In case of conflicting IP address raise error.
16887
16888   @type ip: string
16889   @param ip: IP address
16890   @type node: string
16891   @param node: node name
16892
16893   """
16894   (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16895   if conf_net is not None:
16896     raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16897                                 (ip, conf_net)),
16898                                errors.ECODE_STATE)
16899
16900   return (None, None)