code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti import network
  65 from ganeti.masterd import iallocator
  66
  67 import ganeti.masterd.instance # pylint: disable=W0611
  68
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141
 142     # Dictionaries used to declare locking needs to mcpu
 143     self.needed_locks = None
 144     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 145     self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
 146
 147     self.add_locks = {}
 148     self.remove_locks = {}
 149
 150     # Used to force good behavior when calling helper functions
 151     self.recalculate_locks = {}
 152
 153     # logging
 154     self.Log = processor.Log # pylint: disable=C0103
 155     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 156     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 157     self.LogStep = processor.LogStep # pylint: disable=C0103
 158     # support for dry-run
 159     self.dry_run_result = None
 160     # support for generic debug attribute
 161     if (not hasattr(self.op, "debug_level") or
 162         not isinstance(self.op.debug_level, int)):
 163       self.op.debug_level = 0
 164
 165     # Tasklets
 166     self.tasklets = None
 167
 168     # Validate opcode parameters and set defaults
 169     self.op.Validate(True)
 170
 171     self.CheckArguments()
 172
 173   def CheckArguments(self):
 174     """Check syntactic validity for the opcode arguments.
 175
 176     This method is for doing a simple syntactic check and ensure
 177     validity of opcode parameters, without any cluster-related
 178     checks. While the same can be accomplished in ExpandNames and/or
 179     CheckPrereq, doing these separate is better because:
 180
 181       - ExpandNames is left as as purely a lock-related function
 182       - CheckPrereq is run after we have acquired locks (and possible
 183         waited for them)
 184
 185     The function is allowed to change the self.op attribute so that
 186     later methods can no longer worry about missing parameters.
 187
 188     """
 189     pass
 190
 191   def ExpandNames(self):
 192     """Expand names for this LU.
 193
 194     This method is called before starting to execute the opcode, and it should
 195     update all the parameters of the opcode to their canonical form (e.g. a
 196     short node name must be fully expanded after this method has successfully
 197     completed). This way locking, hooks, logging, etc. can work correctly.
 198
 199     LUs which implement this method must also populate the self.needed_locks
 200     member, as a dict with lock levels as keys, and a list of needed lock names
 201     as values. Rules:
 202
 203       - use an empty dict if you don't need any lock
 204       - if you don't need any lock at a particular level omit that
 205         level (note that in this case C{DeclareLocks} won't be called
 206         at all for that level)
 207       - if you need locks at a level, but you can't calculate it in
 208         this function, initialise that level with an empty list and do
 209         further processing in L{LogicalUnit.DeclareLocks} (see that
 210         function's docstring)
 211       - don't put anything for the BGL level
 212       - if you want all locks at a level use L{locking.ALL_SET} as a value
 213
 214     If you need to share locks (rather than acquire them exclusively) at one
 215     level you can modify self.share_locks, setting a true value (usually 1) for
 216     that level. By default locks are not shared.
 217
 218     This function can also define a list of tasklets, which then will be
 219     executed in order instead of the usual LU-level CheckPrereq and Exec
 220     functions, if those are not defined by the LU.
 221
 222     Examples::
 223
 224       # Acquire all nodes and one instance
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: locking.ALL_SET,
 227         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 228       }
 229       # Acquire just two nodes
 230       self.needed_locks = {
 231         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 232       }
 233       # Acquire no locks
 234       self.needed_locks = {} # No, you can't leave it to the default value None
 235
 236     """
 237     # The implementation of this method is mandatory only if the new LU is
 238     # concurrent, so that old LUs don't need to be changed all at the same
 239     # time.
 240     if self.REQ_BGL:
 241       self.needed_locks = {} # Exclusive LUs don't need locks.
 242     else:
 243       raise NotImplementedError
 244
 245   def DeclareLocks(self, level):
 246     """Declare LU locking needs for a level
 247
 248     While most LUs can just declare their locking needs at ExpandNames time,
 249     sometimes there's the need to calculate some locks after having acquired
 250     the ones before. This function is called just before acquiring locks at a
 251     particular level, but after acquiring the ones at lower levels, and permits
 252     such calculations. It can be used to modify self.needed_locks, and by
 253     default it does nothing.
 254
 255     This function is only called if you have something already set in
 256     self.needed_locks for the level.
 257
 258     @param level: Locking level which is going to be locked
 259     @type level: member of L{ganeti.locking.LEVELS}
 260
 261     """
 262
 263   def CheckPrereq(self):
 264     """Check prerequisites for this LU.
 265
 266     This method should check that the prerequisites for the execution
 267     of this LU are fulfilled. It can do internode communication, but
 268     it should be idempotent - no cluster or system changes are
 269     allowed.
 270
 271     The method should raise errors.OpPrereqError in case something is
 272     not fulfilled. Its return value is ignored.
 273
 274     This method should also update all the parameters of the opcode to
 275     their canonical form if it hasn't been done by ExpandNames before.
 276
 277     """
 278     if self.tasklets is not None:
 279       for (idx, tl) in enumerate(self.tasklets):
 280         logging.debug("Checking prerequisites for tasklet %s/%s",
 281                       idx + 1, len(self.tasklets))
 282         tl.CheckPrereq()
 283     else:
 284       pass
 285
 286   def Exec(self, feedback_fn):
 287     """Execute the LU.
 288
 289     This method should implement the actual work. It should raise
 290     errors.OpExecError for failures that are somewhat dealt with in
 291     code, or expected.
 292
 293     """
 294     if self.tasklets is not None:
 295       for (idx, tl) in enumerate(self.tasklets):
 296         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 297         tl.Exec(feedback_fn)
 298     else:
 299       raise NotImplementedError
 300
 301   def BuildHooksEnv(self):
 302     """Build hooks environment for this LU.
 303
 304     @rtype: dict
 305     @return: Dictionary containing the environment that will be used for
 306       running the hooks for this LU. The keys of the dict must not be prefixed
 307       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 308       will extend the environment with additional variables. If no environment
 309       should be defined, an empty dictionary should be returned (not C{None}).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def BuildHooksNodes(self):
 317     """Build list of nodes to run LU's hooks.
 318
 319     @rtype: tuple; (list, list)
 320     @return: Tuple containing a list of node names on which the hook
 321       should run before the execution and a list of node names on which the
 322       hook should run after the execution. No nodes should be returned as an
 323       empty list (and not None).
 324     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 325       will not be called.
 326
 327     """
 328     raise NotImplementedError
 329
 330   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 331     """Notify the LU about the results of its hooks.
 332
 333     This method is called every time a hooks phase is executed, and notifies
 334     the Logical Unit about the hooks' result. The LU can then use it to alter
 335     its result based on the hooks.  By default the method does nothing and the
 336     previous result is passed back unchanged but any LU can define it if it
 337     wants to use the local cluster hook-scripts somehow.
 338
 339     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 340         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 341     @param hook_results: the results of the multi-node hooks rpc call
 342     @param feedback_fn: function used send feedback back to the caller
 343     @param lu_result: the previous Exec result this LU had, or None
 344         in the PRE phase
 345     @return: the new Exec result, based on the previous result
 346         and hook results
 347
 348     """
 349     # API must be kept, thus we ignore the unused argument and could
 350     # be a function warnings
 351     # pylint: disable=W0613,R0201
 352     return lu_result
 353
 354   def _ExpandAndLockInstance(self):
 355     """Helper function to expand and lock an instance.
 356
 357     Many LUs that work on an instance take its name in self.op.instance_name
 358     and need to expand it and then declare the expanded name for locking. This
 359     function does it, and then updates self.op.instance_name to the expanded
 360     name. It also initializes needed_locks as a dict, if this hasn't been done
 361     before.
 362
 363     """
 364     if self.needed_locks is None:
 365       self.needed_locks = {}
 366     else:
 367       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 368         "_ExpandAndLockInstance called with instance-level locks set"
 369     self.op.instance_name = _ExpandInstanceName(self.cfg,
 370                                                 self.op.instance_name)
 371     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 372
 373   def _LockInstancesNodes(self, primary_only=False,
 374                           level=locking.LEVEL_NODE):
 375     """Helper function to declare instances' nodes for locking.
 376
 377     This function should be called after locking one or more instances to lock
 378     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 379     with all primary or secondary nodes for instances already locked and
 380     present in self.needed_locks[locking.LEVEL_INSTANCE].
 381
 382     It should be called from DeclareLocks, and for safety only works if
 383     self.recalculate_locks[locking.LEVEL_NODE] is set.
 384
 385     In the future it may grow parameters to just lock some instance's nodes, or
 386     to just lock primaries or secondary nodes, if needed.
 387
 388     If should be called in DeclareLocks in a way similar to::
 389
 390       if level == locking.LEVEL_NODE:
 391         self._LockInstancesNodes()
 392
 393     @type primary_only: boolean
 394     @param primary_only: only lock primary nodes of locked instances
 395     @param level: Which lock level to use for locking nodes
 396
 397     """
 398     assert level in self.recalculate_locks, \
 399       "_LockInstancesNodes helper function called with no nodes to recalculate"
 400
 401     # TODO: check if we're really been called with the instance locks held
 402
 403     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 404     # future we might want to have different behaviors depending on the value
 405     # of self.recalculate_locks[locking.LEVEL_NODE]
 406     wanted_nodes = []
 407     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 408     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 409       wanted_nodes.append(instance.primary_node)
 410       if not primary_only:
 411         wanted_nodes.extend(instance.secondary_nodes)
 412
 413     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 414       self.needed_locks[level] = wanted_nodes
 415     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 416       self.needed_locks[level].extend(wanted_nodes)
 417     else:
 418       raise errors.ProgrammerError("Unknown recalculation mode")
 419
 420     del self.recalculate_locks[level]
 421
 422
 423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 424   """Simple LU which runs no hooks.
 425
 426   This LU is intended as a parent for other LogicalUnits which will
 427   run no hooks, in order to reduce duplicate code.
 428
 429   """
 430   HPATH = None
 431   HTYPE = None
 432
 433   def BuildHooksEnv(self):
 434     """Empty BuildHooksEnv for NoHooksLu.
 435
 436     This just raises an error.
 437
 438     """
 439     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 440
 441   def BuildHooksNodes(self):
 442     """Empty BuildHooksNodes for NoHooksLU.
 443
 444     """
 445     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 446
 447
 448 class Tasklet:
 449   """Tasklet base class.
 450
 451   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 452   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 453   tasklets know nothing about locks.
 454
 455   Subclasses must follow these rules:
 456     - Implement CheckPrereq
 457     - Implement Exec
 458
 459   """
 460   def __init__(self, lu):
 461     self.lu = lu
 462
 463     # Shortcuts
 464     self.cfg = lu.cfg
 465     self.rpc = lu.rpc
 466
 467   def CheckPrereq(self):
 468     """Check prerequisites for this tasklets.
 469
 470     This method should check whether the prerequisites for the execution of
 471     this tasklet are fulfilled. It can do internode communication, but it
 472     should be idempotent - no cluster or system changes are allowed.
 473
 474     The method should raise errors.OpPrereqError in case something is not
 475     fulfilled. Its return value is ignored.
 476
 477     This method should also update all parameters to their canonical form if it
 478     hasn't been done before.
 479
 480     """
 481     pass
 482
 483   def Exec(self, feedback_fn):
 484     """Execute the tasklet.
 485
 486     This method should implement the actual work. It should raise
 487     errors.OpExecError for failures that are somewhat dealt with in code, or
 488     expected.
 489
 490     """
 491     raise NotImplementedError
 492
 493
 494 class _QueryBase:
 495   """Base for query utility classes.
 496
 497   """
 498   #: Attribute holding field definitions
 499   FIELDS = None
 500
 501   #: Field to sort by
 502   SORT_FIELD = "name"
 503
 504   def __init__(self, qfilter, fields, use_locking):
 505     """Initializes this class.
 506
 507     """
 508     self.use_locking = use_locking
 509
 510     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 511                              namefield=self.SORT_FIELD)
 512     self.requested_data = self.query.RequestedData()
 513     self.names = self.query.RequestedNames()
 514
 515     # Sort only if no names were requested
 516     self.sort_by_name = not self.names
 517
 518     self.do_locking = None
 519     self.wanted = None
 520
 521   def _GetNames(self, lu, all_names, lock_level):
 522     """Helper function to determine names asked for in the query.
 523
 524     """
 525     if self.do_locking:
 526       names = lu.owned_locks(lock_level)
 527     else:
 528       names = all_names
 529
 530     if self.wanted == locking.ALL_SET:
 531       assert not self.names
 532       # caller didn't specify names, so ordering is not important
 533       return utils.NiceSort(names)
 534
 535     # caller specified names and we must keep the same order
 536     assert self.names
 537     assert not self.do_locking or lu.glm.is_owned(lock_level)
 538
 539     missing = set(self.wanted).difference(names)
 540     if missing:
 541       raise errors.OpExecError("Some items were removed before retrieving"
 542                                " their data: %s" % missing)
 543
 544     # Return expanded names
 545     return self.wanted
 546
 547   def ExpandNames(self, lu):
 548     """Expand names for this query.
 549
 550     See L{LogicalUnit.ExpandNames}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def DeclareLocks(self, lu, level):
 556     """Declare locks for this query.
 557
 558     See L{LogicalUnit.DeclareLocks}.
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def _GetQueryData(self, lu):
 564     """Collects all data for this query.
 565
 566     @return: Query data object
 567
 568     """
 569     raise NotImplementedError()
 570
 571   def NewStyleQuery(self, lu):
 572     """Collect data and execute query.
 573
 574     """
 575     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 576                                   sort_by_name=self.sort_by_name)
 577
 578   def OldStyleQuery(self, lu):
 579     """Collect data and execute query.
 580
 581     """
 582     return self.query.OldStyleQuery(self._GetQueryData(lu),
 583                                     sort_by_name=self.sort_by_name)
 584
 585
 586 def _ShareAll():
 587   """Returns a dict declaring all lock levels shared.
 588
 589   """
 590   return dict.fromkeys(locking.LEVELS, 1)
 591
 592
 593 def _AnnotateDiskParams(instance, devs, cfg):
 594   """Little helper wrapper to the rpc annotation method.
 595
 596   @param instance: The instance object
 597   @type devs: List of L{objects.Disk}
 598   @param devs: The root devices (not any of its children!)
 599   @param cfg: The config object
 600   @returns The annotated disk copies
 601   @see L{rpc.AnnotateDiskParams}
 602
 603   """
 604   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 605                                 cfg.GetInstanceDiskParams(instance))
 606
 607
 608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 609                               cur_group_uuid):
 610   """Checks if node groups for locked instances are still correct.
 611
 612   @type cfg: L{config.ConfigWriter}
 613   @param cfg: Cluster configuration
 614   @type instances: dict; string as key, L{objects.Instance} as value
 615   @param instances: Dictionary, instance name as key, instance object as value
 616   @type owned_groups: iterable of string
 617   @param owned_groups: List of owned groups
 618   @type owned_nodes: iterable of string
 619   @param owned_nodes: List of owned nodes
 620   @type cur_group_uuid: string or None
 621   @param cur_group_uuid: Optional group UUID to check against instance's groups
 622
 623   """
 624   for (name, inst) in instances.items():
 625     assert owned_nodes.issuperset(inst.all_nodes), \
 626       "Instance %s's nodes changed while we kept the lock" % name
 627
 628     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 629
 630     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 631       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 632
 633
 634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 635                              primary_only=False):
 636   """Checks if the owned node groups are still correct for an instance.
 637
 638   @type cfg: L{config.ConfigWriter}
 639   @param cfg: The cluster configuration
 640   @type instance_name: string
 641   @param instance_name: Instance name
 642   @type owned_groups: set or frozenset
 643   @param owned_groups: List of currently owned node groups
 644   @type primary_only: boolean
 645   @param primary_only: Whether to check node groups for only the primary node
 646
 647   """
 648   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 649
 650   if not owned_groups.issuperset(inst_groups):
 651     raise errors.OpPrereqError("Instance %s's node groups changed since"
 652                                " locks were acquired, current groups are"
 653                                " are '%s', owning groups '%s'; retry the"
 654                                " operation" %
 655                                (instance_name,
 656                                 utils.CommaJoin(inst_groups),
 657                                 utils.CommaJoin(owned_groups)),
 658                                errors.ECODE_STATE)
 659
 660   return inst_groups
 661
 662
 663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 664   """Checks if the instances in a node group are still correct.
 665
 666   @type cfg: L{config.ConfigWriter}
 667   @param cfg: The cluster configuration
 668   @type group_uuid: string
 669   @param group_uuid: Node group UUID
 670   @type owned_instances: set or frozenset
 671   @param owned_instances: List of currently owned instances
 672
 673   """
 674   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 675   if owned_instances != wanted_instances:
 676     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 677                                " locks were acquired, wanted '%s', have '%s';"
 678                                " retry the operation" %
 679                                (group_uuid,
 680                                 utils.CommaJoin(wanted_instances),
 681                                 utils.CommaJoin(owned_instances)),
 682                                errors.ECODE_STATE)
 683
 684   return wanted_instances
 685
 686
 687 def _SupportsOob(cfg, node):
 688   """Tells if node supports OOB.
 689
 690   @type cfg: L{config.ConfigWriter}
 691   @param cfg: The cluster configuration
 692   @type node: L{objects.Node}
 693   @param node: The node
 694   @return: The OOB script if supported or an empty string otherwise
 695
 696   """
 697   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 698
 699
 700 def _CopyLockList(names):
 701   """Makes a copy of a list of lock names.
 702
 703   Handles L{locking.ALL_SET} correctly.
 704
 705   """
 706   if names == locking.ALL_SET:
 707     return locking.ALL_SET
 708   else:
 709     return names[:]
 710
 711
 712 def _GetWantedNodes(lu, nodes):
 713   """Returns list of checked and expanded node names.
 714
 715   @type lu: L{LogicalUnit}
 716   @param lu: the logical unit on whose behalf we execute
 717   @type nodes: list
 718   @param nodes: list of node names or None for all nodes
 719   @rtype: list
 720   @return: the list of nodes, sorted
 721   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 722
 723   """
 724   if nodes:
 725     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 726
 727   return utils.NiceSort(lu.cfg.GetNodeList())
 728
 729
 730 def _GetWantedInstances(lu, instances):
 731   """Returns list of checked and expanded instance names.
 732
 733   @type lu: L{LogicalUnit}
 734   @param lu: the logical unit on whose behalf we execute
 735   @type instances: list
 736   @param instances: list of instance names or None for all instances
 737   @rtype: list
 738   @return: the list of instances, sorted
 739   @raise errors.OpPrereqError: if the instances parameter is wrong type
 740   @raise errors.OpPrereqError: if any of the passed instances is not found
 741
 742   """
 743   if instances:
 744     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 745   else:
 746     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 747   return wanted
 748
 749
 750 def _GetUpdatedParams(old_params, update_dict,
 751                       use_default=True, use_none=False):
 752   """Return the new version of a parameter dictionary.
 753
 754   @type old_params: dict
 755   @param old_params: old parameters
 756   @type update_dict: dict
 757   @param update_dict: dict containing new parameter values, or
 758       constants.VALUE_DEFAULT to reset the parameter to its default
 759       value
 760   @param use_default: boolean
 761   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 762       values as 'to be deleted' values
 763   @param use_none: boolean
 764   @type use_none: whether to recognise C{None} values as 'to be
 765       deleted' values
 766   @rtype: dict
 767   @return: the new parameter dictionary
 768
 769   """
 770   params_copy = copy.deepcopy(old_params)
 771   for key, val in update_dict.iteritems():
 772     if ((use_default and val == constants.VALUE_DEFAULT) or
 773         (use_none and val is None)):
 774       try:
 775         del params_copy[key]
 776       except KeyError:
 777         pass
 778     else:
 779       params_copy[key] = val
 780   return params_copy
 781
 782
 783 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 784   """Return the new version of a instance policy.
 785
 786   @param group_policy: whether this policy applies to a group and thus
 787     we should support removal of policy entries
 788
 789   """
 790   use_none = use_default = group_policy
 791   ipolicy = copy.deepcopy(old_ipolicy)
 792   for key, value in new_ipolicy.items():
 793     if key not in constants.IPOLICY_ALL_KEYS:
 794       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 795                                  errors.ECODE_INVAL)
 796     if key in constants.IPOLICY_ISPECS:
 797       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 798       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 799                                        use_none=use_none,
 800                                        use_default=use_default)
 801     else:
 802       if (not value or value == [constants.VALUE_DEFAULT] or
 803           value == constants.VALUE_DEFAULT):
 804         if group_policy:
 805           del ipolicy[key]
 806         else:
 807           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 808                                      " on the cluster'" % key,
 809                                      errors.ECODE_INVAL)
 810       else:
 811         if key in constants.IPOLICY_PARAMETERS:
 812           # FIXME: we assume all such values are float
 813           try:
 814             ipolicy[key] = float(value)
 815           except (TypeError, ValueError), err:
 816             raise errors.OpPrereqError("Invalid value for attribute"
 817                                        " '%s': '%s', error: %s" %
 818                                        (key, value, err), errors.ECODE_INVAL)
 819         else:
 820           # FIXME: we assume all others are lists; this should be redone
 821           # in a nicer way
 822           ipolicy[key] = list(value)
 823   try:
 824     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 825   except errors.ConfigurationError, err:
 826     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 827                                errors.ECODE_INVAL)
 828   return ipolicy
 829
 830
 831 def _UpdateAndVerifySubDict(base, updates, type_check):
 832   """Updates and verifies a dict with sub dicts of the same type.
 833
 834   @param base: The dict with the old data
 835   @param updates: The dict with the new data
 836   @param type_check: Dict suitable to ForceDictType to verify correct types
 837   @returns: A new dict with updated and verified values
 838
 839   """
 840   def fn(old, value):
 841     new = _GetUpdatedParams(old, value)
 842     utils.ForceDictType(new, type_check)
 843     return new
 844
 845   ret = copy.deepcopy(base)
 846   ret.update(dict((key, fn(base.get(key, {}), value))
 847                   for key, value in updates.items()))
 848   return ret
 849
 850
 851 def _MergeAndVerifyHvState(op_input, obj_input):
 852   """Combines the hv state from an opcode with the one of the object
 853
 854   @param op_input: The input dict from the opcode
 855   @param obj_input: The input dict from the objects
 856   @return: The verified and updated dict
 857
 858   """
 859   if op_input:
 860     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 861     if invalid_hvs:
 862       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 863                                  " %s" % utils.CommaJoin(invalid_hvs),
 864                                  errors.ECODE_INVAL)
 865     if obj_input is None:
 866       obj_input = {}
 867     type_check = constants.HVSTS_PARAMETER_TYPES
 868     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 869
 870   return None
 871
 872
 873 def _MergeAndVerifyDiskState(op_input, obj_input):
 874   """Combines the disk state from an opcode with the one of the object
 875
 876   @param op_input: The input dict from the opcode
 877   @param obj_input: The input dict from the objects
 878   @return: The verified and updated dict
 879   """
 880   if op_input:
 881     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 882     if invalid_dst:
 883       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 884                                  utils.CommaJoin(invalid_dst),
 885                                  errors.ECODE_INVAL)
 886     type_check = constants.DSS_PARAMETER_TYPES
 887     if obj_input is None:
 888       obj_input = {}
 889     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 890                                               type_check))
 891                 for key, value in op_input.items())
 892
 893   return None
 894
 895
 896 def _ReleaseLocks(lu, level, names=None, keep=None):
 897   """Releases locks owned by an LU.
 898
 899   @type lu: L{LogicalUnit}
 900   @param level: Lock level
 901   @type names: list or None
 902   @param names: Names of locks to release
 903   @type keep: list or None
 904   @param keep: Names of locks to retain
 905
 906   """
 907   assert not (keep is not None and names is not None), \
 908          "Only one of the 'names' and the 'keep' parameters can be given"
 909
 910   if names is not None:
 911     should_release = names.__contains__
 912   elif keep:
 913     should_release = lambda name: name not in keep
 914   else:
 915     should_release = None
 916
 917   owned = lu.owned_locks(level)
 918   if not owned:
 919     # Not owning any lock at this level, do nothing
 920     pass
 921
 922   elif should_release:
 923     retain = []
 924     release = []
 925
 926     # Determine which locks to release
 927     for name in owned:
 928       if should_release(name):
 929         release.append(name)
 930       else:
 931         retain.append(name)
 932
 933     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 934
 935     # Release just some locks
 936     lu.glm.release(level, names=release)
 937
 938     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 939   else:
 940     # Release everything
 941     lu.glm.release(level)
 942
 943     assert not lu.glm.is_owned(level), "No locks should be owned"
 944
 945
 946 def _MapInstanceDisksToNodes(instances):
 947   """Creates a map from (node, volume) to instance name.
 948
 949   @type instances: list of L{objects.Instance}
 950   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 951
 952   """
 953   return dict(((node, vol), inst.name)
 954               for inst in instances
 955               for (node, vols) in inst.MapLVsByNode().items()
 956               for vol in vols)
 957
 958
 959 def _RunPostHook(lu, node_name):
 960   """Runs the post-hook for an opcode on a single node.
 961
 962   """
 963   hm = lu.proc.BuildHooksManager(lu)
 964   try:
 965     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 966   except Exception, err: # pylint: disable=W0703
 967     lu.LogWarning("Errors occurred running hooks on %s: %s",
 968                   node_name, err)
 969
 970
 971 def _CheckOutputFields(static, dynamic, selected):
 972   """Checks whether all selected fields are valid.
 973
 974   @type static: L{utils.FieldSet}
 975   @param static: static fields set
 976   @type dynamic: L{utils.FieldSet}
 977   @param dynamic: dynamic fields set
 978
 979   """
 980   f = utils.FieldSet()
 981   f.Extend(static)
 982   f.Extend(dynamic)
 983
 984   delta = f.NonMatching(selected)
 985   if delta:
 986     raise errors.OpPrereqError("Unknown output fields selected: %s"
 987                                % ",".join(delta), errors.ECODE_INVAL)
 988
 989
 990 def _CheckGlobalHvParams(params):
 991   """Validates that given hypervisor params are not global ones.
 992
 993   This will ensure that instances don't get customised versions of
 994   global params.
 995
 996   """
 997   used_globals = constants.HVC_GLOBALS.intersection(params)
 998   if used_globals:
 999     msg = ("The following hypervisor parameters are global and cannot"
1000            " be customized at instance level, please modify them at"
1001            " cluster level: %s" % utils.CommaJoin(used_globals))
1002     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1003
1004
1005 def _CheckNodeOnline(lu, node, msg=None):
1006   """Ensure that a given node is online.
1007
1008   @param lu: the LU on behalf of which we make the check
1009   @param node: the node to check
1010   @param msg: if passed, should be a message to replace the default one
1011   @raise errors.OpPrereqError: if the node is offline
1012
1013   """
1014   if msg is None:
1015     msg = "Can't use offline node"
1016   if lu.cfg.GetNodeInfo(node).offline:
1017     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1018
1019
1020 def _CheckNodeNotDrained(lu, node):
1021   """Ensure that a given node is not drained.
1022
1023   @param lu: the LU on behalf of which we make the check
1024   @param node: the node to check
1025   @raise errors.OpPrereqError: if the node is drained
1026
1027   """
1028   if lu.cfg.GetNodeInfo(node).drained:
1029     raise errors.OpPrereqError("Can't use drained node %s" % node,
1030                                errors.ECODE_STATE)
1031
1032
1033 def _CheckNodeVmCapable(lu, node):
1034   """Ensure that a given node is vm capable.
1035
1036   @param lu: the LU on behalf of which we make the check
1037   @param node: the node to check
1038   @raise errors.OpPrereqError: if the node is not vm capable
1039
1040   """
1041   if not lu.cfg.GetNodeInfo(node).vm_capable:
1042     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1043                                errors.ECODE_STATE)
1044
1045
1046 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1047   """Ensure that a node supports a given OS.
1048
1049   @param lu: the LU on behalf of which we make the check
1050   @param node: the node to check
1051   @param os_name: the OS to query about
1052   @param force_variant: whether to ignore variant errors
1053   @raise errors.OpPrereqError: if the node is not supporting the OS
1054
1055   """
1056   result = lu.rpc.call_os_get(node, os_name)
1057   result.Raise("OS '%s' not in supported OS list for node %s" %
1058                (os_name, node),
1059                prereq=True, ecode=errors.ECODE_INVAL)
1060   if not force_variant:
1061     _CheckOSVariant(result.payload, os_name)
1062
1063
1064 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1065   """Ensure that a node has the given secondary ip.
1066
1067   @type lu: L{LogicalUnit}
1068   @param lu: the LU on behalf of which we make the check
1069   @type node: string
1070   @param node: the node to check
1071   @type secondary_ip: string
1072   @param secondary_ip: the ip to check
1073   @type prereq: boolean
1074   @param prereq: whether to throw a prerequisite or an execute error
1075   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1076   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1077
1078   """
1079   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1080   result.Raise("Failure checking secondary ip on node %s" % node,
1081                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1082   if not result.payload:
1083     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1084            " please fix and re-run this command" % secondary_ip)
1085     if prereq:
1086       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1087     else:
1088       raise errors.OpExecError(msg)
1089
1090
1091 def _GetClusterDomainSecret():
1092   """Reads the cluster domain secret.
1093
1094   """
1095   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1096                                strict=True)
1097
1098
1099 def _CheckInstanceState(lu, instance, req_states, msg=None):
1100   """Ensure that an instance is in one of the required states.
1101
1102   @param lu: the LU on behalf of which we make the check
1103   @param instance: the instance to check
1104   @param msg: if passed, should be a message to replace the default one
1105   @raise errors.OpPrereqError: if the instance is not in the required state
1106
1107   """
1108   if msg is None:
1109     msg = ("can't use instance from outside %s states" %
1110            utils.CommaJoin(req_states))
1111   if instance.admin_state not in req_states:
1112     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1113                                (instance.name, instance.admin_state, msg),
1114                                errors.ECODE_STATE)
1115
1116   if constants.ADMINST_UP not in req_states:
1117     pnode = instance.primary_node
1118     if not lu.cfg.GetNodeInfo(pnode).offline:
1119       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1120       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1121                   prereq=True, ecode=errors.ECODE_ENVIRON)
1122       if instance.name in ins_l.payload:
1123         raise errors.OpPrereqError("Instance %s is running, %s" %
1124                                    (instance.name, msg), errors.ECODE_STATE)
1125     else:
1126       lu.LogWarning("Primary node offline, ignoring check that instance"
1127                      " is down")
1128
1129
1130 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1131   """Computes if value is in the desired range.
1132
1133   @param name: name of the parameter for which we perform the check
1134   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1135       not just 'disk')
1136   @param ipolicy: dictionary containing min, max and std values
1137   @param value: actual value that we want to use
1138   @return: None or element not meeting the criteria
1139
1140
1141   """
1142   if value in [None, constants.VALUE_AUTO]:
1143     return None
1144   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1145   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1146   if value > max_v or min_v > value:
1147     if qualifier:
1148       fqn = "%s/%s" % (name, qualifier)
1149     else:
1150       fqn = name
1151     return ("%s value %s is not in range [%s, %s]" %
1152             (fqn, value, min_v, max_v))
1153   return None
1154
1155
1156 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1157                                  nic_count, disk_sizes, spindle_use,
1158                                  _compute_fn=_ComputeMinMaxSpec):
1159   """Verifies ipolicy against provided specs.
1160
1161   @type ipolicy: dict
1162   @param ipolicy: The ipolicy
1163   @type mem_size: int
1164   @param mem_size: The memory size
1165   @type cpu_count: int
1166   @param cpu_count: Used cpu cores
1167   @type disk_count: int
1168   @param disk_count: Number of disks used
1169   @type nic_count: int
1170   @param nic_count: Number of nics used
1171   @type disk_sizes: list of ints
1172   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1173   @type spindle_use: int
1174   @param spindle_use: The number of spindles this instance uses
1175   @param _compute_fn: The compute function (unittest only)
1176   @return: A list of violations, or an empty list of no violations are found
1177
1178   """
1179   assert disk_count == len(disk_sizes)
1180
1181   test_settings = [
1182     (constants.ISPEC_MEM_SIZE, "", mem_size),
1183     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1184     (constants.ISPEC_DISK_COUNT, "", disk_count),
1185     (constants.ISPEC_NIC_COUNT, "", nic_count),
1186     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1187     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1188          for idx, d in enumerate(disk_sizes)]
1189
1190   return filter(None,
1191                 (_compute_fn(name, qualifier, ipolicy, value)
1192                  for (name, qualifier, value) in test_settings))
1193
1194
1195 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1196                                      _compute_fn=_ComputeIPolicySpecViolation):
1197   """Compute if instance meets the specs of ipolicy.
1198
1199   @type ipolicy: dict
1200   @param ipolicy: The ipolicy to verify against
1201   @type instance: L{objects.Instance}
1202   @param instance: The instance to verify
1203   @param _compute_fn: The function to verify ipolicy (unittest only)
1204   @see: L{_ComputeIPolicySpecViolation}
1205
1206   """
1207   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1208   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1209   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1210   disk_count = len(instance.disks)
1211   disk_sizes = [disk.size for disk in instance.disks]
1212   nic_count = len(instance.nics)
1213
1214   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1215                      disk_sizes, spindle_use)
1216
1217
1218 def _ComputeIPolicyInstanceSpecViolation(
1219   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1220   """Compute if instance specs meets the specs of ipolicy.
1221
1222   @type ipolicy: dict
1223   @param ipolicy: The ipolicy to verify against
1224   @param instance_spec: dict
1225   @param instance_spec: The instance spec to verify
1226   @param _compute_fn: The function to verify ipolicy (unittest only)
1227   @see: L{_ComputeIPolicySpecViolation}
1228
1229   """
1230   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1231   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1232   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1233   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1234   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1235   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1236
1237   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1238                      disk_sizes, spindle_use)
1239
1240
1241 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1242                                  target_group,
1243                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1244   """Compute if instance meets the specs of the new target group.
1245
1246   @param ipolicy: The ipolicy to verify
1247   @param instance: The instance object to verify
1248   @param current_group: The current group of the instance
1249   @param target_group: The new group of the instance
1250   @param _compute_fn: The function to verify ipolicy (unittest only)
1251   @see: L{_ComputeIPolicySpecViolation}
1252
1253   """
1254   if current_group == target_group:
1255     return []
1256   else:
1257     return _compute_fn(ipolicy, instance)
1258
1259
1260 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1261                             _compute_fn=_ComputeIPolicyNodeViolation):
1262   """Checks that the target node is correct in terms of instance policy.
1263
1264   @param ipolicy: The ipolicy to verify
1265   @param instance: The instance object to verify
1266   @param node: The new node to relocate
1267   @param ignore: Ignore violations of the ipolicy
1268   @param _compute_fn: The function to verify ipolicy (unittest only)
1269   @see: L{_ComputeIPolicySpecViolation}
1270
1271   """
1272   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1273   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1274
1275   if res:
1276     msg = ("Instance does not meet target node group's (%s) instance"
1277            " policy: %s") % (node.group, utils.CommaJoin(res))
1278     if ignore:
1279       lu.LogWarning(msg)
1280     else:
1281       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1282
1283
1284 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1285   """Computes a set of any instances that would violate the new ipolicy.
1286
1287   @param old_ipolicy: The current (still in-place) ipolicy
1288   @param new_ipolicy: The new (to become) ipolicy
1289   @param instances: List of instances to verify
1290   @return: A list of instances which violates the new ipolicy but
1291       did not before
1292
1293   """
1294   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1295           _ComputeViolatingInstances(old_ipolicy, instances))
1296
1297
1298 def _ExpandItemName(fn, name, kind):
1299   """Expand an item name.
1300
1301   @param fn: the function to use for expansion
1302   @param name: requested item name
1303   @param kind: text description ('Node' or 'Instance')
1304   @return: the resolved (full) name
1305   @raise errors.OpPrereqError: if the item is not found
1306
1307   """
1308   full_name = fn(name)
1309   if full_name is None:
1310     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1311                                errors.ECODE_NOENT)
1312   return full_name
1313
1314
1315 def _ExpandNodeName(cfg, name):
1316   """Wrapper over L{_ExpandItemName} for nodes."""
1317   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1318
1319
1320 def _ExpandInstanceName(cfg, name):
1321   """Wrapper over L{_ExpandItemName} for instance."""
1322   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1323
1324
1325 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1326                          network_type, mac_prefix, tags):
1327   """Builds network related env variables for hooks
1328
1329   This builds the hook environment from individual variables.
1330
1331   @type name: string
1332   @param name: the name of the network
1333   @type subnet: string
1334   @param subnet: the ipv4 subnet
1335   @type gateway: string
1336   @param gateway: the ipv4 gateway
1337   @type network6: string
1338   @param network6: the ipv6 subnet
1339   @type gateway6: string
1340   @param gateway6: the ipv6 gateway
1341   @type network_type: string
1342   @param network_type: the type of the network
1343   @type mac_prefix: string
1344   @param mac_prefix: the mac_prefix
1345   @type tags: list
1346   @param tags: the tags of the network
1347
1348   """
1349   env = {}
1350   if name:
1351     env["NETWORK_NAME"] = name
1352   if subnet:
1353     env["NETWORK_SUBNET"] = subnet
1354   if gateway:
1355     env["NETWORK_GATEWAY"] = gateway
1356   if network6:
1357     env["NETWORK_SUBNET6"] = network6
1358   if gateway6:
1359     env["NETWORK_GATEWAY6"] = gateway6
1360   if mac_prefix:
1361     env["NETWORK_MAC_PREFIX"] = mac_prefix
1362   if network_type:
1363     env["NETWORK_TYPE"] = network_type
1364   if tags:
1365     env["NETWORK_TAGS"] = " ".join(tags)
1366
1367   return env
1368
1369
1370 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1371                           minmem, maxmem, vcpus, nics, disk_template, disks,
1372                           bep, hvp, hypervisor_name, tags):
1373   """Builds instance related env variables for hooks
1374
1375   This builds the hook environment from individual variables.
1376
1377   @type name: string
1378   @param name: the name of the instance
1379   @type primary_node: string
1380   @param primary_node: the name of the instance's primary node
1381   @type secondary_nodes: list
1382   @param secondary_nodes: list of secondary nodes as strings
1383   @type os_type: string
1384   @param os_type: the name of the instance's OS
1385   @type status: string
1386   @param status: the desired status of the instance
1387   @type minmem: string
1388   @param minmem: the minimum memory size of the instance
1389   @type maxmem: string
1390   @param maxmem: the maximum memory size of the instance
1391   @type vcpus: string
1392   @param vcpus: the count of VCPUs the instance has
1393   @type nics: list
1394   @param nics: list of tuples (ip, mac, mode, link, network) representing
1395       the NICs the instance has
1396   @type disk_template: string
1397   @param disk_template: the disk template of the instance
1398   @type disks: list
1399   @param disks: the list of (size, mode) pairs
1400   @type bep: dict
1401   @param bep: the backend parameters for the instance
1402   @type hvp: dict
1403   @param hvp: the hypervisor parameters for the instance
1404   @type hypervisor_name: string
1405   @param hypervisor_name: the hypervisor for the instance
1406   @type tags: list
1407   @param tags: list of instance tags as strings
1408   @rtype: dict
1409   @return: the hook environment for this instance
1410
1411   """
1412   env = {
1413     "OP_TARGET": name,
1414     "INSTANCE_NAME": name,
1415     "INSTANCE_PRIMARY": primary_node,
1416     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1417     "INSTANCE_OS_TYPE": os_type,
1418     "INSTANCE_STATUS": status,
1419     "INSTANCE_MINMEM": minmem,
1420     "INSTANCE_MAXMEM": maxmem,
1421     # TODO(2.7) remove deprecated "memory" value
1422     "INSTANCE_MEMORY": maxmem,
1423     "INSTANCE_VCPUS": vcpus,
1424     "INSTANCE_DISK_TEMPLATE": disk_template,
1425     "INSTANCE_HYPERVISOR": hypervisor_name,
1426   }
1427   if nics:
1428     nic_count = len(nics)
1429     for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1430       if ip is None:
1431         ip = ""
1432       env["INSTANCE_NIC%d_IP" % idx] = ip
1433       env["INSTANCE_NIC%d_MAC" % idx] = mac
1434       env["INSTANCE_NIC%d_MODE" % idx] = mode
1435       env["INSTANCE_NIC%d_LINK" % idx] = link
1436       if network:
1437         env["INSTANCE_NIC%d_NETWORK" % idx] = net
1438         if netinfo:
1439           nobj = objects.Network.FromDict(netinfo)
1440           if nobj.network:
1441             env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1442           if nobj.gateway:
1443             env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1444           if nobj.network6:
1445             env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1446           if nobj.gateway6:
1447             env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1448           if nobj.mac_prefix:
1449             env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1450           if nobj.network_type:
1451             env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1452           if nobj.tags:
1453             env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1454       if mode == constants.NIC_MODE_BRIDGED:
1455         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1456   else:
1457     nic_count = 0
1458
1459   env["INSTANCE_NIC_COUNT"] = nic_count
1460
1461   if disks:
1462     disk_count = len(disks)
1463     for idx, (size, mode) in enumerate(disks):
1464       env["INSTANCE_DISK%d_SIZE" % idx] = size
1465       env["INSTANCE_DISK%d_MODE" % idx] = mode
1466   else:
1467     disk_count = 0
1468
1469   env["INSTANCE_DISK_COUNT"] = disk_count
1470
1471   if not tags:
1472     tags = []
1473
1474   env["INSTANCE_TAGS"] = " ".join(tags)
1475
1476   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1477     for key, value in source.items():
1478       env["INSTANCE_%s_%s" % (kind, key)] = value
1479
1480   return env
1481
1482
1483 def _NICToTuple(lu, nic):
1484   """Build a tupple of nic information.
1485
1486   @type lu:  L{LogicalUnit}
1487   @param lu: the logical unit on whose behalf we execute
1488   @type nic: L{objects.NIC}
1489   @param nic: nic to convert to hooks tuple
1490
1491   """
1492   ip = nic.ip
1493   mac = nic.mac
1494   cluster = lu.cfg.GetClusterInfo()
1495   filled_params = cluster.SimpleFillNIC(nic.nicparams)
1496   mode = filled_params[constants.NIC_MODE]
1497   link = filled_params[constants.NIC_LINK]
1498   net = nic.network
1499   netinfo = None
1500   if net:
1501     net_uuid = lu.cfg.LookupNetwork(net)
1502     if net_uuid:
1503       nobj = lu.cfg.GetNetwork(net_uuid)
1504       netinfo = objects.Network.ToDict(nobj)
1505   return (ip, mac, mode, link, net, netinfo)
1506
1507
1508 def _NICListToTuple(lu, nics):
1509   """Build a list of nic information tuples.
1510
1511   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1512   value in LUInstanceQueryData.
1513
1514   @type lu:  L{LogicalUnit}
1515   @param lu: the logical unit on whose behalf we execute
1516   @type nics: list of L{objects.NIC}
1517   @param nics: list of nics to convert to hooks tuples
1518
1519   """
1520   hooks_nics = []
1521   for nic in nics:
1522     hooks_nics.append(_NICToTuple(lu, nic))
1523   return hooks_nics
1524
1525
1526 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1527   """Builds instance related env variables for hooks from an object.
1528
1529   @type lu: L{LogicalUnit}
1530   @param lu: the logical unit on whose behalf we execute
1531   @type instance: L{objects.Instance}
1532   @param instance: the instance for which we should build the
1533       environment
1534   @type override: dict
1535   @param override: dictionary with key/values that will override
1536       our values
1537   @rtype: dict
1538   @return: the hook environment dictionary
1539
1540   """
1541   cluster = lu.cfg.GetClusterInfo()
1542   bep = cluster.FillBE(instance)
1543   hvp = cluster.FillHV(instance)
1544   args = {
1545     "name": instance.name,
1546     "primary_node": instance.primary_node,
1547     "secondary_nodes": instance.secondary_nodes,
1548     "os_type": instance.os,
1549     "status": instance.admin_state,
1550     "maxmem": bep[constants.BE_MAXMEM],
1551     "minmem": bep[constants.BE_MINMEM],
1552     "vcpus": bep[constants.BE_VCPUS],
1553     "nics": _NICListToTuple(lu, instance.nics),
1554     "disk_template": instance.disk_template,
1555     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1556     "bep": bep,
1557     "hvp": hvp,
1558     "hypervisor_name": instance.hypervisor,
1559     "tags": instance.tags,
1560   }
1561   if override:
1562     args.update(override)
1563   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1564
1565
1566 def _AdjustCandidatePool(lu, exceptions):
1567   """Adjust the candidate pool after node operations.
1568
1569   """
1570   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1571   if mod_list:
1572     lu.LogInfo("Promoted nodes to master candidate role: %s",
1573                utils.CommaJoin(node.name for node in mod_list))
1574     for name in mod_list:
1575       lu.context.ReaddNode(name)
1576   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1577   if mc_now > mc_max:
1578     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1579                (mc_now, mc_max))
1580
1581
1582 def _DecideSelfPromotion(lu, exceptions=None):
1583   """Decide whether I should promote myself as a master candidate.
1584
1585   """
1586   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1587   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1588   # the new node will increase mc_max with one, so:
1589   mc_should = min(mc_should + 1, cp_size)
1590   return mc_now < mc_should
1591
1592
1593 def _ComputeViolatingInstances(ipolicy, instances):
1594   """Computes a set of instances who violates given ipolicy.
1595
1596   @param ipolicy: The ipolicy to verify
1597   @type instances: object.Instance
1598   @param instances: List of instances to verify
1599   @return: A frozenset of instance names violating the ipolicy
1600
1601   """
1602   return frozenset([inst.name for inst in instances
1603                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1604
1605
1606 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1607   """Check that the brigdes needed by a list of nics exist.
1608
1609   """
1610   cluster = lu.cfg.GetClusterInfo()
1611   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1612   brlist = [params[constants.NIC_LINK] for params in paramslist
1613             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1614   if brlist:
1615     result = lu.rpc.call_bridges_exist(target_node, brlist)
1616     result.Raise("Error checking bridges on destination node '%s'" %
1617                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1618
1619
1620 def _CheckInstanceBridgesExist(lu, instance, node=None):
1621   """Check that the brigdes needed by an instance exist.
1622
1623   """
1624   if node is None:
1625     node = instance.primary_node
1626   _CheckNicsBridgesExist(lu, instance.nics, node)
1627
1628
1629 def _CheckOSVariant(os_obj, name):
1630   """Check whether an OS name conforms to the os variants specification.
1631
1632   @type os_obj: L{objects.OS}
1633   @param os_obj: OS object to check
1634   @type name: string
1635   @param name: OS name passed by the user, to check for validity
1636
1637   """
1638   variant = objects.OS.GetVariant(name)
1639   if not os_obj.supported_variants:
1640     if variant:
1641       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1642                                  " passed)" % (os_obj.name, variant),
1643                                  errors.ECODE_INVAL)
1644     return
1645   if not variant:
1646     raise errors.OpPrereqError("OS name must include a variant",
1647                                errors.ECODE_INVAL)
1648
1649   if variant not in os_obj.supported_variants:
1650     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1651
1652
1653 def _GetNodeInstancesInner(cfg, fn):
1654   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1655
1656
1657 def _GetNodeInstances(cfg, node_name):
1658   """Returns a list of all primary and secondary instances on a node.
1659
1660   """
1661
1662   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1663
1664
1665 def _GetNodePrimaryInstances(cfg, node_name):
1666   """Returns primary instances on a node.
1667
1668   """
1669   return _GetNodeInstancesInner(cfg,
1670                                 lambda inst: node_name == inst.primary_node)
1671
1672
1673 def _GetNodeSecondaryInstances(cfg, node_name):
1674   """Returns secondary instances on a node.
1675
1676   """
1677   return _GetNodeInstancesInner(cfg,
1678                                 lambda inst: node_name in inst.secondary_nodes)
1679
1680
1681 def _GetStorageTypeArgs(cfg, storage_type):
1682   """Returns the arguments for a storage type.
1683
1684   """
1685   # Special case for file storage
1686   if storage_type == constants.ST_FILE:
1687     # storage.FileStorage wants a list of storage directories
1688     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1689
1690   return []
1691
1692
1693 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1694   faulty = []
1695
1696   for dev in instance.disks:
1697     cfg.SetDiskID(dev, node_name)
1698
1699   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1700                                                                 instance))
1701   result.Raise("Failed to get disk status from node %s" % node_name,
1702                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1703
1704   for idx, bdev_status in enumerate(result.payload):
1705     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1706       faulty.append(idx)
1707
1708   return faulty
1709
1710
1711 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1712   """Check the sanity of iallocator and node arguments and use the
1713   cluster-wide iallocator if appropriate.
1714
1715   Check that at most one of (iallocator, node) is specified. If none is
1716   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1717   then the LU's opcode's iallocator slot is filled with the cluster-wide
1718   default iallocator.
1719
1720   @type iallocator_slot: string
1721   @param iallocator_slot: the name of the opcode iallocator slot
1722   @type node_slot: string
1723   @param node_slot: the name of the opcode target node slot
1724
1725   """
1726   node = getattr(lu.op, node_slot, None)
1727   ialloc = getattr(lu.op, iallocator_slot, None)
1728   if node == []:
1729     node = None
1730
1731   if node is not None and ialloc is not None:
1732     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1733                                errors.ECODE_INVAL)
1734   elif ((node is None and ialloc is None) or
1735         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1736     default_iallocator = lu.cfg.GetDefaultIAllocator()
1737     if default_iallocator:
1738       setattr(lu.op, iallocator_slot, default_iallocator)
1739     else:
1740       raise errors.OpPrereqError("No iallocator or node given and no"
1741                                  " cluster-wide default iallocator found;"
1742                                  " please specify either an iallocator or a"
1743                                  " node, or set a cluster-wide default"
1744                                  " iallocator", errors.ECODE_INVAL)
1745
1746
1747 def _GetDefaultIAllocator(cfg, ialloc):
1748   """Decides on which iallocator to use.
1749
1750   @type cfg: L{config.ConfigWriter}
1751   @param cfg: Cluster configuration object
1752   @type ialloc: string or None
1753   @param ialloc: Iallocator specified in opcode
1754   @rtype: string
1755   @return: Iallocator name
1756
1757   """
1758   if not ialloc:
1759     # Use default iallocator
1760     ialloc = cfg.GetDefaultIAllocator()
1761
1762   if not ialloc:
1763     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1764                                " opcode nor as a cluster-wide default",
1765                                errors.ECODE_INVAL)
1766
1767   return ialloc
1768
1769
1770 def _CheckHostnameSane(lu, name):
1771   """Ensures that a given hostname resolves to a 'sane' name.
1772
1773   The given name is required to be a prefix of the resolved hostname,
1774   to prevent accidental mismatches.
1775
1776   @param lu: the logical unit on behalf of which we're checking
1777   @param name: the name we should resolve and check
1778   @return: the resolved hostname object
1779
1780   """
1781   hostname = netutils.GetHostname(name=name)
1782   if hostname.name != name:
1783     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1784   if not utils.MatchNameComponent(name, [hostname.name]):
1785     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1786                                 " same as given hostname '%s'") %
1787                                 (hostname.name, name), errors.ECODE_INVAL)
1788   return hostname
1789
1790
1791 class LUClusterPostInit(LogicalUnit):
1792   """Logical unit for running hooks after cluster initialization.
1793
1794   """
1795   HPATH = "cluster-init"
1796   HTYPE = constants.HTYPE_CLUSTER
1797
1798   def BuildHooksEnv(self):
1799     """Build hooks env.
1800
1801     """
1802     return {
1803       "OP_TARGET": self.cfg.GetClusterName(),
1804       }
1805
1806   def BuildHooksNodes(self):
1807     """Build hooks nodes.
1808
1809     """
1810     return ([], [self.cfg.GetMasterNode()])
1811
1812   def Exec(self, feedback_fn):
1813     """Nothing to do.
1814
1815     """
1816     return True
1817
1818
1819 class LUClusterDestroy(LogicalUnit):
1820   """Logical unit for destroying the cluster.
1821
1822   """
1823   HPATH = "cluster-destroy"
1824   HTYPE = constants.HTYPE_CLUSTER
1825
1826   def BuildHooksEnv(self):
1827     """Build hooks env.
1828
1829     """
1830     return {
1831       "OP_TARGET": self.cfg.GetClusterName(),
1832       }
1833
1834   def BuildHooksNodes(self):
1835     """Build hooks nodes.
1836
1837     """
1838     return ([], [])
1839
1840   def CheckPrereq(self):
1841     """Check prerequisites.
1842
1843     This checks whether the cluster is empty.
1844
1845     Any errors are signaled by raising errors.OpPrereqError.
1846
1847     """
1848     master = self.cfg.GetMasterNode()
1849
1850     nodelist = self.cfg.GetNodeList()
1851     if len(nodelist) != 1 or nodelist[0] != master:
1852       raise errors.OpPrereqError("There are still %d node(s) in"
1853                                  " this cluster." % (len(nodelist) - 1),
1854                                  errors.ECODE_INVAL)
1855     instancelist = self.cfg.GetInstanceList()
1856     if instancelist:
1857       raise errors.OpPrereqError("There are still %d instance(s) in"
1858                                  " this cluster." % len(instancelist),
1859                                  errors.ECODE_INVAL)
1860
1861   def Exec(self, feedback_fn):
1862     """Destroys the cluster.
1863
1864     """
1865     master_params = self.cfg.GetMasterNetworkParameters()
1866
1867     # Run post hooks on master node before it's removed
1868     _RunPostHook(self, master_params.name)
1869
1870     ems = self.cfg.GetUseExternalMipScript()
1871     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1872                                                      master_params, ems)
1873     if result.fail_msg:
1874       self.LogWarning("Error disabling the master IP address: %s",
1875                       result.fail_msg)
1876
1877     return master_params.name
1878
1879
1880 def _VerifyCertificate(filename):
1881   """Verifies a certificate for L{LUClusterVerifyConfig}.
1882
1883   @type filename: string
1884   @param filename: Path to PEM file
1885
1886   """
1887   try:
1888     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1889                                            utils.ReadFile(filename))
1890   except Exception, err: # pylint: disable=W0703
1891     return (LUClusterVerifyConfig.ETYPE_ERROR,
1892             "Failed to load X509 certificate %s: %s" % (filename, err))
1893
1894   (errcode, msg) = \
1895     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1896                                 constants.SSL_CERT_EXPIRATION_ERROR)
1897
1898   if msg:
1899     fnamemsg = "While verifying %s: %s" % (filename, msg)
1900   else:
1901     fnamemsg = None
1902
1903   if errcode is None:
1904     return (None, fnamemsg)
1905   elif errcode == utils.CERT_WARNING:
1906     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1907   elif errcode == utils.CERT_ERROR:
1908     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1909
1910   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1911
1912
1913 def _GetAllHypervisorParameters(cluster, instances):
1914   """Compute the set of all hypervisor parameters.
1915
1916   @type cluster: L{objects.Cluster}
1917   @param cluster: the cluster object
1918   @param instances: list of L{objects.Instance}
1919   @param instances: additional instances from which to obtain parameters
1920   @rtype: list of (origin, hypervisor, parameters)
1921   @return: a list with all parameters found, indicating the hypervisor they
1922        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1923
1924   """
1925   hvp_data = []
1926
1927   for hv_name in cluster.enabled_hypervisors:
1928     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1929
1930   for os_name, os_hvp in cluster.os_hvp.items():
1931     for hv_name, hv_params in os_hvp.items():
1932       if hv_params:
1933         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1934         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1935
1936   # TODO: collapse identical parameter values in a single one
1937   for instance in instances:
1938     if instance.hvparams:
1939       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1940                        cluster.FillHV(instance)))
1941
1942   return hvp_data
1943
1944
1945 class _VerifyErrors(object):
1946   """Mix-in for cluster/group verify LUs.
1947
1948   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1949   self.op and self._feedback_fn to be available.)
1950
1951   """
1952
1953   ETYPE_FIELD = "code"
1954   ETYPE_ERROR = "ERROR"
1955   ETYPE_WARNING = "WARNING"
1956
1957   def _Error(self, ecode, item, msg, *args, **kwargs):
1958     """Format an error message.
1959
1960     Based on the opcode's error_codes parameter, either format a
1961     parseable error code, or a simpler error string.
1962
1963     This must be called only from Exec and functions called from Exec.
1964
1965     """
1966     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1967     itype, etxt, _ = ecode
1968     # first complete the msg
1969     if args:
1970       msg = msg % args
1971     # then format the whole message
1972     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1973       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1974     else:
1975       if item:
1976         item = " " + item
1977       else:
1978         item = ""
1979       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1980     # and finally report it via the feedback_fn
1981     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1982
1983   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1984     """Log an error message if the passed condition is True.
1985
1986     """
1987     cond = (bool(cond)
1988             or self.op.debug_simulate_errors) # pylint: disable=E1101
1989
1990     # If the error code is in the list of ignored errors, demote the error to a
1991     # warning
1992     (_, etxt, _) = ecode
1993     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1994       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1995
1996     if cond:
1997       self._Error(ecode, *args, **kwargs)
1998
1999     # do not mark the operation as failed for WARN cases only
2000     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2001       self.bad = self.bad or cond
2002
2003
2004 class LUClusterVerify(NoHooksLU):
2005   """Submits all jobs necessary to verify the cluster.
2006
2007   """
2008   REQ_BGL = False
2009
2010   def ExpandNames(self):
2011     self.needed_locks = {}
2012
2013   def Exec(self, feedback_fn):
2014     jobs = []
2015
2016     if self.op.group_name:
2017       groups = [self.op.group_name]
2018       depends_fn = lambda: None
2019     else:
2020       groups = self.cfg.GetNodeGroupList()
2021
2022       # Verify global configuration
2023       jobs.append([
2024         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2025         ])
2026
2027       # Always depend on global verification
2028       depends_fn = lambda: [(-len(jobs), [])]
2029
2030     jobs.extend(
2031       [opcodes.OpClusterVerifyGroup(group_name=group,
2032                                     ignore_errors=self.op.ignore_errors,
2033                                     depends=depends_fn())]
2034       for group in groups)
2035
2036     # Fix up all parameters
2037     for op in itertools.chain(*jobs): # pylint: disable=W0142
2038       op.debug_simulate_errors = self.op.debug_simulate_errors
2039       op.verbose = self.op.verbose
2040       op.error_codes = self.op.error_codes
2041       try:
2042         op.skip_checks = self.op.skip_checks
2043       except AttributeError:
2044         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2045
2046     return ResultWithJobs(jobs)
2047
2048
2049 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2050   """Verifies the cluster config.
2051
2052   """
2053   REQ_BGL = False
2054
2055   def _VerifyHVP(self, hvp_data):
2056     """Verifies locally the syntax of the hypervisor parameters.
2057
2058     """
2059     for item, hv_name, hv_params in hvp_data:
2060       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2061              (item, hv_name))
2062       try:
2063         hv_class = hypervisor.GetHypervisor(hv_name)
2064         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2065         hv_class.CheckParameterSyntax(hv_params)
2066       except errors.GenericError, err:
2067         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2068
2069   def ExpandNames(self):
2070     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2071     self.share_locks = _ShareAll()
2072
2073   def CheckPrereq(self):
2074     """Check prerequisites.
2075
2076     """
2077     # Retrieve all information
2078     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2079     self.all_node_info = self.cfg.GetAllNodesInfo()
2080     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2081
2082   def Exec(self, feedback_fn):
2083     """Verify integrity of cluster, performing various test on nodes.
2084
2085     """
2086     self.bad = False
2087     self._feedback_fn = feedback_fn
2088
2089     feedback_fn("* Verifying cluster config")
2090
2091     for msg in self.cfg.VerifyConfig():
2092       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2093
2094     feedback_fn("* Verifying cluster certificate files")
2095
2096     for cert_filename in pathutils.ALL_CERT_FILES:
2097       (errcode, msg) = _VerifyCertificate(cert_filename)
2098       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2099
2100     feedback_fn("* Verifying hypervisor parameters")
2101
2102     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2103                                                 self.all_inst_info.values()))
2104
2105     feedback_fn("* Verifying all nodes belong to an existing group")
2106
2107     # We do this verification here because, should this bogus circumstance
2108     # occur, it would never be caught by VerifyGroup, which only acts on
2109     # nodes/instances reachable from existing node groups.
2110
2111     dangling_nodes = set(node.name for node in self.all_node_info.values()
2112                          if node.group not in self.all_group_info)
2113
2114     dangling_instances = {}
2115     no_node_instances = []
2116
2117     for inst in self.all_inst_info.values():
2118       if inst.primary_node in dangling_nodes:
2119         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2120       elif inst.primary_node not in self.all_node_info:
2121         no_node_instances.append(inst.name)
2122
2123     pretty_dangling = [
2124         "%s (%s)" %
2125         (node.name,
2126          utils.CommaJoin(dangling_instances.get(node.name,
2127                                                 ["no instances"])))
2128         for node in dangling_nodes]
2129
2130     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2131                   None,
2132                   "the following nodes (and their instances) belong to a non"
2133                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2134
2135     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2136                   None,
2137                   "the following instances have a non-existing primary-node:"
2138                   " %s", utils.CommaJoin(no_node_instances))
2139
2140     return not self.bad
2141
2142
2143 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2144   """Verifies the status of a node group.
2145
2146   """
2147   HPATH = "cluster-verify"
2148   HTYPE = constants.HTYPE_CLUSTER
2149   REQ_BGL = False
2150
2151   _HOOKS_INDENT_RE = re.compile("^", re.M)
2152
2153   class NodeImage(object):
2154     """A class representing the logical and physical status of a node.
2155
2156     @type name: string
2157     @ivar name: the node name to which this object refers
2158     @ivar volumes: a structure as returned from
2159         L{ganeti.backend.GetVolumeList} (runtime)
2160     @ivar instances: a list of running instances (runtime)
2161     @ivar pinst: list of configured primary instances (config)
2162     @ivar sinst: list of configured secondary instances (config)
2163     @ivar sbp: dictionary of {primary-node: list of instances} for all
2164         instances for which this node is secondary (config)
2165     @ivar mfree: free memory, as reported by hypervisor (runtime)
2166     @ivar dfree: free disk, as reported by the node (runtime)
2167     @ivar offline: the offline status (config)
2168     @type rpc_fail: boolean
2169     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2170         not whether the individual keys were correct) (runtime)
2171     @type lvm_fail: boolean
2172     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2173     @type hyp_fail: boolean
2174     @ivar hyp_fail: whether the RPC call didn't return the instance list
2175     @type ghost: boolean
2176     @ivar ghost: whether this is a known node or not (config)
2177     @type os_fail: boolean
2178     @ivar os_fail: whether the RPC call didn't return valid OS data
2179     @type oslist: list
2180     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2181     @type vm_capable: boolean
2182     @ivar vm_capable: whether the node can host instances
2183
2184     """
2185     def __init__(self, offline=False, name=None, vm_capable=True):
2186       self.name = name
2187       self.volumes = {}
2188       self.instances = []
2189       self.pinst = []
2190       self.sinst = []
2191       self.sbp = {}
2192       self.mfree = 0
2193       self.dfree = 0
2194       self.offline = offline
2195       self.vm_capable = vm_capable
2196       self.rpc_fail = False
2197       self.lvm_fail = False
2198       self.hyp_fail = False
2199       self.ghost = False
2200       self.os_fail = False
2201       self.oslist = {}
2202
2203   def ExpandNames(self):
2204     # This raises errors.OpPrereqError on its own:
2205     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2206
2207     # Get instances in node group; this is unsafe and needs verification later
2208     inst_names = \
2209       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2210
2211     self.needed_locks = {
2212       locking.LEVEL_INSTANCE: inst_names,
2213       locking.LEVEL_NODEGROUP: [self.group_uuid],
2214       locking.LEVEL_NODE: [],
2215
2216       # This opcode is run by watcher every five minutes and acquires all nodes
2217       # for a group. It doesn't run for a long time, so it's better to acquire
2218       # the node allocation lock as well.
2219       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2220       }
2221
2222     self.share_locks = _ShareAll()
2223
2224   def DeclareLocks(self, level):
2225     if level == locking.LEVEL_NODE:
2226       # Get members of node group; this is unsafe and needs verification later
2227       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2228
2229       all_inst_info = self.cfg.GetAllInstancesInfo()
2230
2231       # In Exec(), we warn about mirrored instances that have primary and
2232       # secondary living in separate node groups. To fully verify that
2233       # volumes for these instances are healthy, we will need to do an
2234       # extra call to their secondaries. We ensure here those nodes will
2235       # be locked.
2236       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2237         # Important: access only the instances whose lock is owned
2238         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2239           nodes.update(all_inst_info[inst].secondary_nodes)
2240
2241       self.needed_locks[locking.LEVEL_NODE] = nodes
2242
2243   def CheckPrereq(self):
2244     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2245     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2246
2247     group_nodes = set(self.group_info.members)
2248     group_instances = \
2249       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2250
2251     unlocked_nodes = \
2252         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2253
2254     unlocked_instances = \
2255         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2256
2257     if unlocked_nodes:
2258       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2259                                  utils.CommaJoin(unlocked_nodes),
2260                                  errors.ECODE_STATE)
2261
2262     if unlocked_instances:
2263       raise errors.OpPrereqError("Missing lock for instances: %s" %
2264                                  utils.CommaJoin(unlocked_instances),
2265                                  errors.ECODE_STATE)
2266
2267     self.all_node_info = self.cfg.GetAllNodesInfo()
2268     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2269
2270     self.my_node_names = utils.NiceSort(group_nodes)
2271     self.my_inst_names = utils.NiceSort(group_instances)
2272
2273     self.my_node_info = dict((name, self.all_node_info[name])
2274                              for name in self.my_node_names)
2275
2276     self.my_inst_info = dict((name, self.all_inst_info[name])
2277                              for name in self.my_inst_names)
2278
2279     # We detect here the nodes that will need the extra RPC calls for verifying
2280     # split LV volumes; they should be locked.
2281     extra_lv_nodes = set()
2282
2283     for inst in self.my_inst_info.values():
2284       if inst.disk_template in constants.DTS_INT_MIRROR:
2285         for nname in inst.all_nodes:
2286           if self.all_node_info[nname].group != self.group_uuid:
2287             extra_lv_nodes.add(nname)
2288
2289     unlocked_lv_nodes = \
2290         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2291
2292     if unlocked_lv_nodes:
2293       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2294                                  utils.CommaJoin(unlocked_lv_nodes),
2295                                  errors.ECODE_STATE)
2296     self.extra_lv_nodes = list(extra_lv_nodes)
2297
2298   def _VerifyNode(self, ninfo, nresult):
2299     """Perform some basic validation on data returned from a node.
2300
2301       - check the result data structure is well formed and has all the
2302         mandatory fields
2303       - check ganeti version
2304
2305     @type ninfo: L{objects.Node}
2306     @param ninfo: the node to check
2307     @param nresult: the results from the node
2308     @rtype: boolean
2309     @return: whether overall this call was successful (and we can expect
2310          reasonable values in the respose)
2311
2312     """
2313     node = ninfo.name
2314     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2315
2316     # main result, nresult should be a non-empty dict
2317     test = not nresult or not isinstance(nresult, dict)
2318     _ErrorIf(test, constants.CV_ENODERPC, node,
2319                   "unable to verify node: no data returned")
2320     if test:
2321       return False
2322
2323     # compares ganeti version
2324     local_version = constants.PROTOCOL_VERSION
2325     remote_version = nresult.get("version", None)
2326     test = not (remote_version and
2327                 isinstance(remote_version, (list, tuple)) and
2328                 len(remote_version) == 2)
2329     _ErrorIf(test, constants.CV_ENODERPC, node,
2330              "connection to node returned invalid data")
2331     if test:
2332       return False
2333
2334     test = local_version != remote_version[0]
2335     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2336              "incompatible protocol versions: master %s,"
2337              " node %s", local_version, remote_version[0])
2338     if test:
2339       return False
2340
2341     # node seems compatible, we can actually try to look into its results
2342
2343     # full package version
2344     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2345                   constants.CV_ENODEVERSION, node,
2346                   "software version mismatch: master %s, node %s",
2347                   constants.RELEASE_VERSION, remote_version[1],
2348                   code=self.ETYPE_WARNING)
2349
2350     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2351     if ninfo.vm_capable and isinstance(hyp_result, dict):
2352       for hv_name, hv_result in hyp_result.iteritems():
2353         test = hv_result is not None
2354         _ErrorIf(test, constants.CV_ENODEHV, node,
2355                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2356
2357     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2358     if ninfo.vm_capable and isinstance(hvp_result, list):
2359       for item, hv_name, hv_result in hvp_result:
2360         _ErrorIf(True, constants.CV_ENODEHV, node,
2361                  "hypervisor %s parameter verify failure (source %s): %s",
2362                  hv_name, item, hv_result)
2363
2364     test = nresult.get(constants.NV_NODESETUP,
2365                        ["Missing NODESETUP results"])
2366     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2367              "; ".join(test))
2368
2369     return True
2370
2371   def _VerifyNodeTime(self, ninfo, nresult,
2372                       nvinfo_starttime, nvinfo_endtime):
2373     """Check the node time.
2374
2375     @type ninfo: L{objects.Node}
2376     @param ninfo: the node to check
2377     @param nresult: the remote results for the node
2378     @param nvinfo_starttime: the start time of the RPC call
2379     @param nvinfo_endtime: the end time of the RPC call
2380
2381     """
2382     node = ninfo.name
2383     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2384
2385     ntime = nresult.get(constants.NV_TIME, None)
2386     try:
2387       ntime_merged = utils.MergeTime(ntime)
2388     except (ValueError, TypeError):
2389       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2390       return
2391
2392     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2393       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2394     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2395       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2396     else:
2397       ntime_diff = None
2398
2399     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2400              "Node time diverges by at least %s from master node time",
2401              ntime_diff)
2402
2403   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2404     """Check the node LVM results.
2405
2406     @type ninfo: L{objects.Node}
2407     @param ninfo: the node to check
2408     @param nresult: the remote results for the node
2409     @param vg_name: the configured VG name
2410
2411     """
2412     if vg_name is None:
2413       return
2414
2415     node = ninfo.name
2416     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2417
2418     # checks vg existence and size > 20G
2419     vglist = nresult.get(constants.NV_VGLIST, None)
2420     test = not vglist
2421     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2422     if not test:
2423       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2424                                             constants.MIN_VG_SIZE)
2425       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2426
2427     # check pv names
2428     pvlist = nresult.get(constants.NV_PVLIST, None)
2429     test = pvlist is None
2430     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2431     if not test:
2432       # check that ':' is not present in PV names, since it's a
2433       # special character for lvcreate (denotes the range of PEs to
2434       # use on the PV)
2435       for _, pvname, owner_vg in pvlist:
2436         test = ":" in pvname
2437         _ErrorIf(test, constants.CV_ENODELVM, node,
2438                  "Invalid character ':' in PV '%s' of VG '%s'",
2439                  pvname, owner_vg)
2440
2441   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2442     """Check the node bridges.
2443
2444     @type ninfo: L{objects.Node}
2445     @param ninfo: the node to check
2446     @param nresult: the remote results for the node
2447     @param bridges: the expected list of bridges
2448
2449     """
2450     if not bridges:
2451       return
2452
2453     node = ninfo.name
2454     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2455
2456     missing = nresult.get(constants.NV_BRIDGES, None)
2457     test = not isinstance(missing, list)
2458     _ErrorIf(test, constants.CV_ENODENET, node,
2459              "did not return valid bridge information")
2460     if not test:
2461       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2462                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2463
2464   def _VerifyNodeUserScripts(self, ninfo, nresult):
2465     """Check the results of user scripts presence and executability on the node
2466
2467     @type ninfo: L{objects.Node}
2468     @param ninfo: the node to check
2469     @param nresult: the remote results for the node
2470
2471     """
2472     node = ninfo.name
2473
2474     test = not constants.NV_USERSCRIPTS in nresult
2475     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2476                   "did not return user scripts information")
2477
2478     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2479     if not test:
2480       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2481                     "user scripts not present or not executable: %s" %
2482                     utils.CommaJoin(sorted(broken_scripts)))
2483
2484   def _VerifyNodeNetwork(self, ninfo, nresult):
2485     """Check the node network connectivity results.
2486
2487     @type ninfo: L{objects.Node}
2488     @param ninfo: the node to check
2489     @param nresult: the remote results for the node
2490
2491     """
2492     node = ninfo.name
2493     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2494
2495     test = constants.NV_NODELIST not in nresult
2496     _ErrorIf(test, constants.CV_ENODESSH, node,
2497              "node hasn't returned node ssh connectivity data")
2498     if not test:
2499       if nresult[constants.NV_NODELIST]:
2500         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2501           _ErrorIf(True, constants.CV_ENODESSH, node,
2502                    "ssh communication with node '%s': %s", a_node, a_msg)
2503
2504     test = constants.NV_NODENETTEST not in nresult
2505     _ErrorIf(test, constants.CV_ENODENET, node,
2506              "node hasn't returned node tcp connectivity data")
2507     if not test:
2508       if nresult[constants.NV_NODENETTEST]:
2509         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2510         for anode in nlist:
2511           _ErrorIf(True, constants.CV_ENODENET, node,
2512                    "tcp communication with node '%s': %s",
2513                    anode, nresult[constants.NV_NODENETTEST][anode])
2514
2515     test = constants.NV_MASTERIP not in nresult
2516     _ErrorIf(test, constants.CV_ENODENET, node,
2517              "node hasn't returned node master IP reachability data")
2518     if not test:
2519       if not nresult[constants.NV_MASTERIP]:
2520         if node == self.master_node:
2521           msg = "the master node cannot reach the master IP (not configured?)"
2522         else:
2523           msg = "cannot reach the master IP"
2524         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2525
2526   def _VerifyInstance(self, instance, instanceconfig, node_image,
2527                       diskstatus):
2528     """Verify an instance.
2529
2530     This function checks to see if the required block devices are
2531     available on the instance's node.
2532
2533     """
2534     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2535     node_current = instanceconfig.primary_node
2536
2537     node_vol_should = {}
2538     instanceconfig.MapLVsByNode(node_vol_should)
2539
2540     cluster = self.cfg.GetClusterInfo()
2541     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2542                                                             self.group_info)
2543     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2544     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2545              code=self.ETYPE_WARNING)
2546
2547     for node in node_vol_should:
2548       n_img = node_image[node]
2549       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2550         # ignore missing volumes on offline or broken nodes
2551         continue
2552       for volume in node_vol_should[node]:
2553         test = volume not in n_img.volumes
2554         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2555                  "volume %s missing on node %s", volume, node)
2556
2557     if instanceconfig.admin_state == constants.ADMINST_UP:
2558       pri_img = node_image[node_current]
2559       test = instance not in pri_img.instances and not pri_img.offline
2560       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2561                "instance not running on its primary node %s",
2562                node_current)
2563
2564     diskdata = [(nname, success, status, idx)
2565                 for (nname, disks) in diskstatus.items()
2566                 for idx, (success, status) in enumerate(disks)]
2567
2568     for nname, success, bdev_status, idx in diskdata:
2569       # the 'ghost node' construction in Exec() ensures that we have a
2570       # node here
2571       snode = node_image[nname]
2572       bad_snode = snode.ghost or snode.offline
2573       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2574                not success and not bad_snode,
2575                constants.CV_EINSTANCEFAULTYDISK, instance,
2576                "couldn't retrieve status for disk/%s on %s: %s",
2577                idx, nname, bdev_status)
2578       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2579                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2580                constants.CV_EINSTANCEFAULTYDISK, instance,
2581                "disk/%s on %s is faulty", idx, nname)
2582
2583   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2584     """Verify if there are any unknown volumes in the cluster.
2585
2586     The .os, .swap and backup volumes are ignored. All other volumes are
2587     reported as unknown.
2588
2589     @type reserved: L{ganeti.utils.FieldSet}
2590     @param reserved: a FieldSet of reserved volume names
2591
2592     """
2593     for node, n_img in node_image.items():
2594       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2595           self.all_node_info[node].group != self.group_uuid):
2596         # skip non-healthy nodes
2597         continue
2598       for volume in n_img.volumes:
2599         test = ((node not in node_vol_should or
2600                 volume not in node_vol_should[node]) and
2601                 not reserved.Matches(volume))
2602         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2603                       "volume %s is unknown", volume)
2604
2605   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2606     """Verify N+1 Memory Resilience.
2607
2608     Check that if one single node dies we can still start all the
2609     instances it was primary for.
2610
2611     """
2612     cluster_info = self.cfg.GetClusterInfo()
2613     for node, n_img in node_image.items():
2614       # This code checks that every node which is now listed as
2615       # secondary has enough memory to host all instances it is
2616       # supposed to should a single other node in the cluster fail.
2617       # FIXME: not ready for failover to an arbitrary node
2618       # FIXME: does not support file-backed instances
2619       # WARNING: we currently take into account down instances as well
2620       # as up ones, considering that even if they're down someone
2621       # might want to start them even in the event of a node failure.
2622       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2623         # we're skipping nodes marked offline and nodes in other groups from
2624         # the N+1 warning, since most likely we don't have good memory
2625         # infromation from them; we already list instances living on such
2626         # nodes, and that's enough warning
2627         continue
2628       #TODO(dynmem): also consider ballooning out other instances
2629       for prinode, instances in n_img.sbp.items():
2630         needed_mem = 0
2631         for instance in instances:
2632           bep = cluster_info.FillBE(instance_cfg[instance])
2633           if bep[constants.BE_AUTO_BALANCE]:
2634             needed_mem += bep[constants.BE_MINMEM]
2635         test = n_img.mfree < needed_mem
2636         self._ErrorIf(test, constants.CV_ENODEN1, node,
2637                       "not enough memory to accomodate instance failovers"
2638                       " should node %s fail (%dMiB needed, %dMiB available)",
2639                       prinode, needed_mem, n_img.mfree)
2640
2641   @classmethod
2642   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2643                    (files_all, files_opt, files_mc, files_vm)):
2644     """Verifies file checksums collected from all nodes.
2645
2646     @param errorif: Callback for reporting errors
2647     @param nodeinfo: List of L{objects.Node} objects
2648     @param master_node: Name of master node
2649     @param all_nvinfo: RPC results
2650
2651     """
2652     # Define functions determining which nodes to consider for a file
2653     files2nodefn = [
2654       (files_all, None),
2655       (files_mc, lambda node: (node.master_candidate or
2656                                node.name == master_node)),
2657       (files_vm, lambda node: node.vm_capable),
2658       ]
2659
2660     # Build mapping from filename to list of nodes which should have the file
2661     nodefiles = {}
2662     for (files, fn) in files2nodefn:
2663       if fn is None:
2664         filenodes = nodeinfo
2665       else:
2666         filenodes = filter(fn, nodeinfo)
2667       nodefiles.update((filename,
2668                         frozenset(map(operator.attrgetter("name"), filenodes)))
2669                        for filename in files)
2670
2671     assert set(nodefiles) == (files_all | files_mc | files_vm)
2672
2673     fileinfo = dict((filename, {}) for filename in nodefiles)
2674     ignore_nodes = set()
2675
2676     for node in nodeinfo:
2677       if node.offline:
2678         ignore_nodes.add(node.name)
2679         continue
2680
2681       nresult = all_nvinfo[node.name]
2682
2683       if nresult.fail_msg or not nresult.payload:
2684         node_files = None
2685       else:
2686         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2687         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2688                           for (key, value) in fingerprints.items())
2689         del fingerprints
2690
2691       test = not (node_files and isinstance(node_files, dict))
2692       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2693               "Node did not return file checksum data")
2694       if test:
2695         ignore_nodes.add(node.name)
2696         continue
2697
2698       # Build per-checksum mapping from filename to nodes having it
2699       for (filename, checksum) in node_files.items():
2700         assert filename in nodefiles
2701         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2702
2703     for (filename, checksums) in fileinfo.items():
2704       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2705
2706       # Nodes having the file
2707       with_file = frozenset(node_name
2708                             for nodes in fileinfo[filename].values()
2709                             for node_name in nodes) - ignore_nodes
2710
2711       expected_nodes = nodefiles[filename] - ignore_nodes
2712
2713       # Nodes missing file
2714       missing_file = expected_nodes - with_file
2715
2716       if filename in files_opt:
2717         # All or no nodes
2718         errorif(missing_file and missing_file != expected_nodes,
2719                 constants.CV_ECLUSTERFILECHECK, None,
2720                 "File %s is optional, but it must exist on all or no"
2721                 " nodes (not found on %s)",
2722                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2723       else:
2724         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2725                 "File %s is missing from node(s) %s", filename,
2726                 utils.CommaJoin(utils.NiceSort(missing_file)))
2727
2728         # Warn if a node has a file it shouldn't
2729         unexpected = with_file - expected_nodes
2730         errorif(unexpected,
2731                 constants.CV_ECLUSTERFILECHECK, None,
2732                 "File %s should not exist on node(s) %s",
2733                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2734
2735       # See if there are multiple versions of the file
2736       test = len(checksums) > 1
2737       if test:
2738         variants = ["variant %s on %s" %
2739                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2740                     for (idx, (checksum, nodes)) in
2741                       enumerate(sorted(checksums.items()))]
2742       else:
2743         variants = []
2744
2745       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2746               "File %s found with %s different checksums (%s)",
2747               filename, len(checksums), "; ".join(variants))
2748
2749   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2750                       drbd_map):
2751     """Verifies and the node DRBD status.
2752
2753     @type ninfo: L{objects.Node}
2754     @param ninfo: the node to check
2755     @param nresult: the remote results for the node
2756     @param instanceinfo: the dict of instances
2757     @param drbd_helper: the configured DRBD usermode helper
2758     @param drbd_map: the DRBD map as returned by
2759         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2760
2761     """
2762     node = ninfo.name
2763     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2764
2765     if drbd_helper:
2766       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2767       test = (helper_result is None)
2768       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2769                "no drbd usermode helper returned")
2770       if helper_result:
2771         status, payload = helper_result
2772         test = not status
2773         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2774                  "drbd usermode helper check unsuccessful: %s", payload)
2775         test = status and (payload != drbd_helper)
2776         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2777                  "wrong drbd usermode helper: %s", payload)
2778
2779     # compute the DRBD minors
2780     node_drbd = {}
2781     for minor, instance in drbd_map[node].items():
2782       test = instance not in instanceinfo
2783       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2784                "ghost instance '%s' in temporary DRBD map", instance)
2785         # ghost instance should not be running, but otherwise we
2786         # don't give double warnings (both ghost instance and
2787         # unallocated minor in use)
2788       if test:
2789         node_drbd[minor] = (instance, False)
2790       else:
2791         instance = instanceinfo[instance]
2792         node_drbd[minor] = (instance.name,
2793                             instance.admin_state == constants.ADMINST_UP)
2794
2795     # and now check them
2796     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2797     test = not isinstance(used_minors, (tuple, list))
2798     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2799              "cannot parse drbd status file: %s", str(used_minors))
2800     if test:
2801       # we cannot check drbd status
2802       return
2803
2804     for minor, (iname, must_exist) in node_drbd.items():
2805       test = minor not in used_minors and must_exist
2806       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2807                "drbd minor %d of instance %s is not active", minor, iname)
2808     for minor in used_minors:
2809       test = minor not in node_drbd
2810       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2811                "unallocated drbd minor %d is in use", minor)
2812
2813   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2814     """Builds the node OS structures.
2815
2816     @type ninfo: L{objects.Node}
2817     @param ninfo: the node to check
2818     @param nresult: the remote results for the node
2819     @param nimg: the node image object
2820
2821     """
2822     node = ninfo.name
2823     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2824
2825     remote_os = nresult.get(constants.NV_OSLIST, None)
2826     test = (not isinstance(remote_os, list) or
2827             not compat.all(isinstance(v, list) and len(v) == 7
2828                            for v in remote_os))
2829
2830     _ErrorIf(test, constants.CV_ENODEOS, node,
2831              "node hasn't returned valid OS data")
2832
2833     nimg.os_fail = test
2834
2835     if test:
2836       return
2837
2838     os_dict = {}
2839
2840     for (name, os_path, status, diagnose,
2841          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2842
2843       if name not in os_dict:
2844         os_dict[name] = []
2845
2846       # parameters is a list of lists instead of list of tuples due to
2847       # JSON lacking a real tuple type, fix it:
2848       parameters = [tuple(v) for v in parameters]
2849       os_dict[name].append((os_path, status, diagnose,
2850                             set(variants), set(parameters), set(api_ver)))
2851
2852     nimg.oslist = os_dict
2853
2854   def _VerifyNodeOS(self, ninfo, nimg, base):
2855     """Verifies the node OS list.
2856
2857     @type ninfo: L{objects.Node}
2858     @param ninfo: the node to check
2859     @param nimg: the node image object
2860     @param base: the 'template' node we match against (e.g. from the master)
2861
2862     """
2863     node = ninfo.name
2864     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2865
2866     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2867
2868     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2869     for os_name, os_data in nimg.oslist.items():
2870       assert os_data, "Empty OS status for OS %s?!" % os_name
2871       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2872       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2873                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2874       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2875                "OS '%s' has multiple entries (first one shadows the rest): %s",
2876                os_name, utils.CommaJoin([v[0] for v in os_data]))
2877       # comparisons with the 'base' image
2878       test = os_name not in base.oslist
2879       _ErrorIf(test, constants.CV_ENODEOS, node,
2880                "Extra OS %s not present on reference node (%s)",
2881                os_name, base.name)
2882       if test:
2883         continue
2884       assert base.oslist[os_name], "Base node has empty OS status?"
2885       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2886       if not b_status:
2887         # base OS is invalid, skipping
2888         continue
2889       for kind, a, b in [("API version", f_api, b_api),
2890                          ("variants list", f_var, b_var),
2891                          ("parameters", beautify_params(f_param),
2892                           beautify_params(b_param))]:
2893         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2894                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2895                  kind, os_name, base.name,
2896                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2897
2898     # check any missing OSes
2899     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2900     _ErrorIf(missing, constants.CV_ENODEOS, node,
2901              "OSes present on reference node %s but missing on this node: %s",
2902              base.name, utils.CommaJoin(missing))
2903
2904   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2905     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2906
2907     @type ninfo: L{objects.Node}
2908     @param ninfo: the node to check
2909     @param nresult: the remote results for the node
2910     @type is_master: bool
2911     @param is_master: Whether node is the master node
2912
2913     """
2914     node = ninfo.name
2915
2916     if (is_master and
2917         (constants.ENABLE_FILE_STORAGE or
2918          constants.ENABLE_SHARED_FILE_STORAGE)):
2919       try:
2920         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2921       except KeyError:
2922         # This should never happen
2923         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2924                       "Node did not return forbidden file storage paths")
2925       else:
2926         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2927                       "Found forbidden file storage paths: %s",
2928                       utils.CommaJoin(fspaths))
2929     else:
2930       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2931                     constants.CV_ENODEFILESTORAGEPATHS, node,
2932                     "Node should not have returned forbidden file storage"
2933                     " paths")
2934
2935   def _VerifyOob(self, ninfo, nresult):
2936     """Verifies out of band functionality of a node.
2937
2938     @type ninfo: L{objects.Node}
2939     @param ninfo: the node to check
2940     @param nresult: the remote results for the node
2941
2942     """
2943     node = ninfo.name
2944     # We just have to verify the paths on master and/or master candidates
2945     # as the oob helper is invoked on the master
2946     if ((ninfo.master_candidate or ninfo.master_capable) and
2947         constants.NV_OOB_PATHS in nresult):
2948       for path_result in nresult[constants.NV_OOB_PATHS]:
2949         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2950
2951   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2952     """Verifies and updates the node volume data.
2953
2954     This function will update a L{NodeImage}'s internal structures
2955     with data from the remote call.
2956
2957     @type ninfo: L{objects.Node}
2958     @param ninfo: the node to check
2959     @param nresult: the remote results for the node
2960     @param nimg: the node image object
2961     @param vg_name: the configured VG name
2962
2963     """
2964     node = ninfo.name
2965     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2966
2967     nimg.lvm_fail = True
2968     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2969     if vg_name is None:
2970       pass
2971     elif isinstance(lvdata, basestring):
2972       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2973                utils.SafeEncode(lvdata))
2974     elif not isinstance(lvdata, dict):
2975       _ErrorIf(True, constants.CV_ENODELVM, node,
2976                "rpc call to node failed (lvlist)")
2977     else:
2978       nimg.volumes = lvdata
2979       nimg.lvm_fail = False
2980
2981   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2982     """Verifies and updates the node instance list.
2983
2984     If the listing was successful, then updates this node's instance
2985     list. Otherwise, it marks the RPC call as failed for the instance
2986     list key.
2987
2988     @type ninfo: L{objects.Node}
2989     @param ninfo: the node to check
2990     @param nresult: the remote results for the node
2991     @param nimg: the node image object
2992
2993     """
2994     idata = nresult.get(constants.NV_INSTANCELIST, None)
2995     test = not isinstance(idata, list)
2996     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2997                   "rpc call to node failed (instancelist): %s",
2998                   utils.SafeEncode(str(idata)))
2999     if test:
3000       nimg.hyp_fail = True
3001     else:
3002       nimg.instances = idata
3003
3004   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3005     """Verifies and computes a node information map
3006
3007     @type ninfo: L{objects.Node}
3008     @param ninfo: the node to check
3009     @param nresult: the remote results for the node
3010     @param nimg: the node image object
3011     @param vg_name: the configured VG name
3012
3013     """
3014     node = ninfo.name
3015     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3016
3017     # try to read free memory (from the hypervisor)
3018     hv_info = nresult.get(constants.NV_HVINFO, None)
3019     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3020     _ErrorIf(test, constants.CV_ENODEHV, node,
3021              "rpc call to node failed (hvinfo)")
3022     if not test:
3023       try:
3024         nimg.mfree = int(hv_info["memory_free"])
3025       except (ValueError, TypeError):
3026         _ErrorIf(True, constants.CV_ENODERPC, node,
3027                  "node returned invalid nodeinfo, check hypervisor")
3028
3029     # FIXME: devise a free space model for file based instances as well
3030     if vg_name is not None:
3031       test = (constants.NV_VGLIST not in nresult or
3032               vg_name not in nresult[constants.NV_VGLIST])
3033       _ErrorIf(test, constants.CV_ENODELVM, node,
3034                "node didn't return data for the volume group '%s'"
3035                " - it is either missing or broken", vg_name)
3036       if not test:
3037         try:
3038           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3039         except (ValueError, TypeError):
3040           _ErrorIf(True, constants.CV_ENODERPC, node,
3041                    "node returned invalid LVM info, check LVM status")
3042
3043   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3044     """Gets per-disk status information for all instances.
3045
3046     @type nodelist: list of strings
3047     @param nodelist: Node names
3048     @type node_image: dict of (name, L{objects.Node})
3049     @param node_image: Node objects
3050     @type instanceinfo: dict of (name, L{objects.Instance})
3051     @param instanceinfo: Instance objects
3052     @rtype: {instance: {node: [(succes, payload)]}}
3053     @return: a dictionary of per-instance dictionaries with nodes as
3054         keys and disk information as values; the disk information is a
3055         list of tuples (success, payload)
3056
3057     """
3058     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3059
3060     node_disks = {}
3061     node_disks_devonly = {}
3062     diskless_instances = set()
3063     diskless = constants.DT_DISKLESS
3064
3065     for nname in nodelist:
3066       node_instances = list(itertools.chain(node_image[nname].pinst,
3067                                             node_image[nname].sinst))
3068       diskless_instances.update(inst for inst in node_instances
3069                                 if instanceinfo[inst].disk_template == diskless)
3070       disks = [(inst, disk)
3071                for inst in node_instances
3072                for disk in instanceinfo[inst].disks]
3073
3074       if not disks:
3075         # No need to collect data
3076         continue
3077
3078       node_disks[nname] = disks
3079
3080       # _AnnotateDiskParams makes already copies of the disks
3081       devonly = []
3082       for (inst, dev) in disks:
3083         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3084         self.cfg.SetDiskID(anno_disk, nname)
3085         devonly.append(anno_disk)
3086
3087       node_disks_devonly[nname] = devonly
3088
3089     assert len(node_disks) == len(node_disks_devonly)
3090
3091     # Collect data from all nodes with disks
3092     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3093                                                           node_disks_devonly)
3094
3095     assert len(result) == len(node_disks)
3096
3097     instdisk = {}
3098
3099     for (nname, nres) in result.items():
3100       disks = node_disks[nname]
3101
3102       if nres.offline:
3103         # No data from this node
3104         data = len(disks) * [(False, "node offline")]
3105       else:
3106         msg = nres.fail_msg
3107         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3108                  "while getting disk information: %s", msg)
3109         if msg:
3110           # No data from this node
3111           data = len(disks) * [(False, msg)]
3112         else:
3113           data = []
3114           for idx, i in enumerate(nres.payload):
3115             if isinstance(i, (tuple, list)) and len(i) == 2:
3116               data.append(i)
3117             else:
3118               logging.warning("Invalid result from node %s, entry %d: %s",
3119                               nname, idx, i)
3120               data.append((False, "Invalid result from the remote node"))
3121
3122       for ((inst, _), status) in zip(disks, data):
3123         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3124
3125     # Add empty entries for diskless instances.
3126     for inst in diskless_instances:
3127       assert inst not in instdisk
3128       instdisk[inst] = {}
3129
3130     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3131                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3132                       compat.all(isinstance(s, (tuple, list)) and
3133                                  len(s) == 2 for s in statuses)
3134                       for inst, nnames in instdisk.items()
3135                       for nname, statuses in nnames.items())
3136     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3137
3138     return instdisk
3139
3140   @staticmethod
3141   def _SshNodeSelector(group_uuid, all_nodes):
3142     """Create endless iterators for all potential SSH check hosts.
3143
3144     """
3145     nodes = [node for node in all_nodes
3146              if (node.group != group_uuid and
3147                  not node.offline)]
3148     keyfunc = operator.attrgetter("group")
3149
3150     return map(itertools.cycle,
3151                [sorted(map(operator.attrgetter("name"), names))
3152                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3153                                                   keyfunc)])
3154
3155   @classmethod
3156   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3157     """Choose which nodes should talk to which other nodes.
3158
3159     We will make nodes contact all nodes in their group, and one node from
3160     every other group.
3161
3162     @warning: This algorithm has a known issue if one node group is much
3163       smaller than others (e.g. just one node). In such a case all other
3164       nodes will talk to the single node.
3165
3166     """
3167     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3168     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3169
3170     return (online_nodes,
3171             dict((name, sorted([i.next() for i in sel]))
3172                  for name in online_nodes))
3173
3174   def BuildHooksEnv(self):
3175     """Build hooks env.
3176
3177     Cluster-Verify hooks just ran in the post phase and their failure makes
3178     the output be logged in the verify output and the verification to fail.
3179
3180     """
3181     env = {
3182       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3183       }
3184
3185     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3186                for node in self.my_node_info.values())
3187
3188     return env
3189
3190   def BuildHooksNodes(self):
3191     """Build hooks nodes.
3192
3193     """
3194     return ([], self.my_node_names)
3195
3196   def Exec(self, feedback_fn):
3197     """Verify integrity of the node group, performing various test on nodes.
3198
3199     """
3200     # This method has too many local variables. pylint: disable=R0914
3201     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3202
3203     if not self.my_node_names:
3204       # empty node group
3205       feedback_fn("* Empty node group, skipping verification")
3206       return True
3207
3208     self.bad = False
3209     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3210     verbose = self.op.verbose
3211     self._feedback_fn = feedback_fn
3212
3213     vg_name = self.cfg.GetVGName()
3214     drbd_helper = self.cfg.GetDRBDHelper()
3215     cluster = self.cfg.GetClusterInfo()
3216     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3217     hypervisors = cluster.enabled_hypervisors
3218     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3219
3220     i_non_redundant = [] # Non redundant instances
3221     i_non_a_balanced = [] # Non auto-balanced instances
3222     i_offline = 0 # Count of offline instances
3223     n_offline = 0 # Count of offline nodes
3224     n_drained = 0 # Count of nodes being drained
3225     node_vol_should = {}
3226
3227     # FIXME: verify OS list
3228
3229     # File verification
3230     filemap = _ComputeAncillaryFiles(cluster, False)
3231
3232     # do local checksums
3233     master_node = self.master_node = self.cfg.GetMasterNode()
3234     master_ip = self.cfg.GetMasterIP()
3235
3236     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3237
3238     user_scripts = []
3239     if self.cfg.GetUseExternalMipScript():
3240       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3241
3242     node_verify_param = {
3243       constants.NV_FILELIST:
3244         map(vcluster.MakeVirtualPath,
3245             utils.UniqueSequence(filename
3246                                  for files in filemap
3247                                  for filename in files)),
3248       constants.NV_NODELIST:
3249         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3250                                   self.all_node_info.values()),
3251       constants.NV_HYPERVISOR: hypervisors,
3252       constants.NV_HVPARAMS:
3253         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3254       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3255                                  for node in node_data_list
3256                                  if not node.offline],
3257       constants.NV_INSTANCELIST: hypervisors,
3258       constants.NV_VERSION: None,
3259       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3260       constants.NV_NODESETUP: None,
3261       constants.NV_TIME: None,
3262       constants.NV_MASTERIP: (master_node, master_ip),
3263       constants.NV_OSLIST: None,
3264       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3265       constants.NV_USERSCRIPTS: user_scripts,
3266       }
3267
3268     if vg_name is not None:
3269       node_verify_param[constants.NV_VGLIST] = None
3270       node_verify_param[constants.NV_LVLIST] = vg_name
3271       node_verify_param[constants.NV_PVLIST] = [vg_name]
3272
3273     if drbd_helper:
3274       node_verify_param[constants.NV_DRBDLIST] = None
3275       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3276
3277     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3278       # Load file storage paths only from master node
3279       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3280
3281     # bridge checks
3282     # FIXME: this needs to be changed per node-group, not cluster-wide
3283     bridges = set()
3284     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3285     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3286       bridges.add(default_nicpp[constants.NIC_LINK])
3287     for instance in self.my_inst_info.values():
3288       for nic in instance.nics:
3289         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3290         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3291           bridges.add(full_nic[constants.NIC_LINK])
3292
3293     if bridges:
3294       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3295
3296     # Build our expected cluster state
3297     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3298                                                  name=node.name,
3299                                                  vm_capable=node.vm_capable))
3300                       for node in node_data_list)
3301
3302     # Gather OOB paths
3303     oob_paths = []
3304     for node in self.all_node_info.values():
3305       path = _SupportsOob(self.cfg, node)
3306       if path and path not in oob_paths:
3307         oob_paths.append(path)
3308
3309     if oob_paths:
3310       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3311
3312     for instance in self.my_inst_names:
3313       inst_config = self.my_inst_info[instance]
3314       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3315         i_offline += 1
3316
3317       for nname in inst_config.all_nodes:
3318         if nname not in node_image:
3319           gnode = self.NodeImage(name=nname)
3320           gnode.ghost = (nname not in self.all_node_info)
3321           node_image[nname] = gnode
3322
3323       inst_config.MapLVsByNode(node_vol_should)
3324
3325       pnode = inst_config.primary_node
3326       node_image[pnode].pinst.append(instance)
3327
3328       for snode in inst_config.secondary_nodes:
3329         nimg = node_image[snode]
3330         nimg.sinst.append(instance)
3331         if pnode not in nimg.sbp:
3332           nimg.sbp[pnode] = []
3333         nimg.sbp[pnode].append(instance)
3334
3335     # At this point, we have the in-memory data structures complete,
3336     # except for the runtime information, which we'll gather next
3337
3338     # Due to the way our RPC system works, exact response times cannot be
3339     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3340     # time before and after executing the request, we can at least have a time
3341     # window.
3342     nvinfo_starttime = time.time()
3343     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3344                                            node_verify_param,
3345                                            self.cfg.GetClusterName())
3346     nvinfo_endtime = time.time()
3347
3348     if self.extra_lv_nodes and vg_name is not None:
3349       extra_lv_nvinfo = \
3350           self.rpc.call_node_verify(self.extra_lv_nodes,
3351                                     {constants.NV_LVLIST: vg_name},
3352                                     self.cfg.GetClusterName())
3353     else:
3354       extra_lv_nvinfo = {}
3355
3356     all_drbd_map = self.cfg.ComputeDRBDMap()
3357
3358     feedback_fn("* Gathering disk information (%s nodes)" %
3359                 len(self.my_node_names))
3360     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3361                                      self.my_inst_info)
3362
3363     feedback_fn("* Verifying configuration file consistency")
3364
3365     # If not all nodes are being checked, we need to make sure the master node
3366     # and a non-checked vm_capable node are in the list.
3367     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3368     if absent_nodes:
3369       vf_nvinfo = all_nvinfo.copy()
3370       vf_node_info = list(self.my_node_info.values())
3371       additional_nodes = []
3372       if master_node not in self.my_node_info:
3373         additional_nodes.append(master_node)
3374         vf_node_info.append(self.all_node_info[master_node])
3375       # Add the first vm_capable node we find which is not included,
3376       # excluding the master node (which we already have)
3377       for node in absent_nodes:
3378         nodeinfo = self.all_node_info[node]
3379         if (nodeinfo.vm_capable and not nodeinfo.offline and
3380             node != master_node):
3381           additional_nodes.append(node)
3382           vf_node_info.append(self.all_node_info[node])
3383           break
3384       key = constants.NV_FILELIST
3385       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3386                                                  {key: node_verify_param[key]},
3387                                                  self.cfg.GetClusterName()))
3388     else:
3389       vf_nvinfo = all_nvinfo
3390       vf_node_info = self.my_node_info.values()
3391
3392     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3393
3394     feedback_fn("* Verifying node status")
3395
3396     refos_img = None
3397
3398     for node_i in node_data_list:
3399       node = node_i.name
3400       nimg = node_image[node]
3401
3402       if node_i.offline:
3403         if verbose:
3404           feedback_fn("* Skipping offline node %s" % (node,))
3405         n_offline += 1
3406         continue
3407
3408       if node == master_node:
3409         ntype = "master"
3410       elif node_i.master_candidate:
3411         ntype = "master candidate"
3412       elif node_i.drained:
3413         ntype = "drained"
3414         n_drained += 1
3415       else:
3416         ntype = "regular"
3417       if verbose:
3418         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3419
3420       msg = all_nvinfo[node].fail_msg
3421       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3422                msg)
3423       if msg:
3424         nimg.rpc_fail = True
3425         continue
3426
3427       nresult = all_nvinfo[node].payload
3428
3429       nimg.call_ok = self._VerifyNode(node_i, nresult)
3430       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3431       self._VerifyNodeNetwork(node_i, nresult)
3432       self._VerifyNodeUserScripts(node_i, nresult)
3433       self._VerifyOob(node_i, nresult)
3434       self._VerifyFileStoragePaths(node_i, nresult,
3435                                    node == master_node)
3436
3437       if nimg.vm_capable:
3438         self._VerifyNodeLVM(node_i, nresult, vg_name)
3439         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3440                              all_drbd_map)
3441
3442         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3443         self._UpdateNodeInstances(node_i, nresult, nimg)
3444         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3445         self._UpdateNodeOS(node_i, nresult, nimg)
3446
3447         if not nimg.os_fail:
3448           if refos_img is None:
3449             refos_img = nimg
3450           self._VerifyNodeOS(node_i, nimg, refos_img)
3451         self._VerifyNodeBridges(node_i, nresult, bridges)
3452
3453         # Check whether all running instancies are primary for the node. (This
3454         # can no longer be done from _VerifyInstance below, since some of the
3455         # wrong instances could be from other node groups.)
3456         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3457
3458         for inst in non_primary_inst:
3459           test = inst in self.all_inst_info
3460           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3461                    "instance should not run on node %s", node_i.name)
3462           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3463                    "node is running unknown instance %s", inst)
3464
3465     for node, result in extra_lv_nvinfo.items():
3466       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3467                               node_image[node], vg_name)
3468
3469     feedback_fn("* Verifying instance status")
3470     for instance in self.my_inst_names:
3471       if verbose:
3472         feedback_fn("* Verifying instance %s" % instance)
3473       inst_config = self.my_inst_info[instance]
3474       self._VerifyInstance(instance, inst_config, node_image,
3475                            instdisk[instance])
3476       inst_nodes_offline = []
3477
3478       pnode = inst_config.primary_node
3479       pnode_img = node_image[pnode]
3480       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3481                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3482                " primary node failed", instance)
3483
3484       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3485                pnode_img.offline,
3486                constants.CV_EINSTANCEBADNODE, instance,
3487                "instance is marked as running and lives on offline node %s",
3488                inst_config.primary_node)
3489
3490       # If the instance is non-redundant we cannot survive losing its primary
3491       # node, so we are not N+1 compliant.
3492       if inst_config.disk_template not in constants.DTS_MIRRORED:
3493         i_non_redundant.append(instance)
3494
3495       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3496                constants.CV_EINSTANCELAYOUT,
3497                instance, "instance has multiple secondary nodes: %s",
3498                utils.CommaJoin(inst_config.secondary_nodes),
3499                code=self.ETYPE_WARNING)
3500
3501       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3502         pnode = inst_config.primary_node
3503         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3504         instance_groups = {}
3505
3506         for node in instance_nodes:
3507           instance_groups.setdefault(self.all_node_info[node].group,
3508                                      []).append(node)
3509
3510         pretty_list = [
3511           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3512           # Sort so that we always list the primary node first.
3513           for group, nodes in sorted(instance_groups.items(),
3514                                      key=lambda (_, nodes): pnode in nodes,
3515                                      reverse=True)]
3516
3517         self._ErrorIf(len(instance_groups) > 1,
3518                       constants.CV_EINSTANCESPLITGROUPS,
3519                       instance, "instance has primary and secondary nodes in"
3520                       " different groups: %s", utils.CommaJoin(pretty_list),
3521                       code=self.ETYPE_WARNING)
3522
3523       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3524         i_non_a_balanced.append(instance)
3525
3526       for snode in inst_config.secondary_nodes:
3527         s_img = node_image[snode]
3528         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3529                  snode, "instance %s, connection to secondary node failed",
3530                  instance)
3531
3532         if s_img.offline:
3533           inst_nodes_offline.append(snode)
3534
3535       # warn that the instance lives on offline nodes
3536       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3537                "instance has offline secondary node(s) %s",
3538                utils.CommaJoin(inst_nodes_offline))
3539       # ... or ghost/non-vm_capable nodes
3540       for node in inst_config.all_nodes:
3541         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3542                  instance, "instance lives on ghost node %s", node)
3543         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3544                  instance, "instance lives on non-vm_capable node %s", node)
3545
3546     feedback_fn("* Verifying orphan volumes")
3547     reserved = utils.FieldSet(*cluster.reserved_lvs)
3548
3549     # We will get spurious "unknown volume" warnings if any node of this group
3550     # is secondary for an instance whose primary is in another group. To avoid
3551     # them, we find these instances and add their volumes to node_vol_should.
3552     for inst in self.all_inst_info.values():
3553       for secondary in inst.secondary_nodes:
3554         if (secondary in self.my_node_info
3555             and inst.name not in self.my_inst_info):
3556           inst.MapLVsByNode(node_vol_should)
3557           break
3558
3559     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3560
3561     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3562       feedback_fn("* Verifying N+1 Memory redundancy")
3563       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3564
3565     feedback_fn("* Other Notes")
3566     if i_non_redundant:
3567       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3568                   % len(i_non_redundant))
3569
3570     if i_non_a_balanced:
3571       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3572                   % len(i_non_a_balanced))
3573
3574     if i_offline:
3575       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3576
3577     if n_offline:
3578       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3579
3580     if n_drained:
3581       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3582
3583     return not self.bad
3584
3585   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3586     """Analyze the post-hooks' result
3587
3588     This method analyses the hook result, handles it, and sends some
3589     nicely-formatted feedback back to the user.
3590
3591     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3592         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3593     @param hooks_results: the results of the multi-node hooks rpc call
3594     @param feedback_fn: function used send feedback back to the caller
3595     @param lu_result: previous Exec result
3596     @return: the new Exec result, based on the previous result
3597         and hook results
3598
3599     """
3600     # We only really run POST phase hooks, only for non-empty groups,
3601     # and are only interested in their results
3602     if not self.my_node_names:
3603       # empty node group
3604       pass
3605     elif phase == constants.HOOKS_PHASE_POST:
3606       # Used to change hooks' output to proper indentation
3607       feedback_fn("* Hooks Results")
3608       assert hooks_results, "invalid result from hooks"
3609
3610       for node_name in hooks_results:
3611         res = hooks_results[node_name]
3612         msg = res.fail_msg
3613         test = msg and not res.offline
3614         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3615                       "Communication failure in hooks execution: %s", msg)
3616         if res.offline or msg:
3617           # No need to investigate payload if node is offline or gave
3618           # an error.
3619           continue
3620         for script, hkr, output in res.payload:
3621           test = hkr == constants.HKR_FAIL
3622           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3623                         "Script %s failed, output:", script)
3624           if test:
3625             output = self._HOOKS_INDENT_RE.sub("      ", output)
3626             feedback_fn("%s" % output)
3627             lu_result = False
3628
3629     return lu_result
3630
3631
3632 class LUClusterVerifyDisks(NoHooksLU):
3633   """Verifies the cluster disks status.
3634
3635   """
3636   REQ_BGL = False
3637
3638   def ExpandNames(self):
3639     self.share_locks = _ShareAll()
3640     self.needed_locks = {
3641       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3642       }
3643
3644   def Exec(self, feedback_fn):
3645     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3646
3647     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3648     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3649                            for group in group_names])
3650
3651
3652 class LUGroupVerifyDisks(NoHooksLU):
3653   """Verifies the status of all disks in a node group.
3654
3655   """
3656   REQ_BGL = False
3657
3658   def ExpandNames(self):
3659     # Raises errors.OpPrereqError on its own if group can't be found
3660     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3661
3662     self.share_locks = _ShareAll()
3663     self.needed_locks = {
3664       locking.LEVEL_INSTANCE: [],
3665       locking.LEVEL_NODEGROUP: [],
3666       locking.LEVEL_NODE: [],
3667
3668       # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3669       # starts one instance of this opcode for every group, which means all
3670       # nodes will be locked for a short amount of time, so it's better to
3671       # acquire the node allocation lock as well.
3672       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3673       }
3674
3675   def DeclareLocks(self, level):
3676     if level == locking.LEVEL_INSTANCE:
3677       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3678
3679       # Lock instances optimistically, needs verification once node and group
3680       # locks have been acquired
3681       self.needed_locks[locking.LEVEL_INSTANCE] = \
3682         self.cfg.GetNodeGroupInstances(self.group_uuid)
3683
3684     elif level == locking.LEVEL_NODEGROUP:
3685       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3686
3687       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3688         set([self.group_uuid] +
3689             # Lock all groups used by instances optimistically; this requires
3690             # going via the node before it's locked, requiring verification
3691             # later on
3692             [group_uuid
3693              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3694              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3695
3696     elif level == locking.LEVEL_NODE:
3697       # This will only lock the nodes in the group to be verified which contain
3698       # actual instances
3699       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3700       self._LockInstancesNodes()
3701
3702       # Lock all nodes in group to be verified
3703       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3704       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3705       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3706
3707   def CheckPrereq(self):
3708     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3709     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3710     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3711
3712     assert self.group_uuid in owned_groups
3713
3714     # Check if locked instances are still correct
3715     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3716
3717     # Get instance information
3718     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3719
3720     # Check if node groups for locked instances are still correct
3721     _CheckInstancesNodeGroups(self.cfg, self.instances,
3722                               owned_groups, owned_nodes, self.group_uuid)
3723
3724   def Exec(self, feedback_fn):
3725     """Verify integrity of cluster disks.
3726
3727     @rtype: tuple of three items
3728     @return: a tuple of (dict of node-to-node_error, list of instances
3729         which need activate-disks, dict of instance: (node, volume) for
3730         missing volumes
3731
3732     """
3733     res_nodes = {}
3734     res_instances = set()
3735     res_missing = {}
3736
3737     nv_dict = _MapInstanceDisksToNodes(
3738       [inst for inst in self.instances.values()
3739        if inst.admin_state == constants.ADMINST_UP])
3740
3741     if nv_dict:
3742       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3743                              set(self.cfg.GetVmCapableNodeList()))
3744
3745       node_lvs = self.rpc.call_lv_list(nodes, [])
3746
3747       for (node, node_res) in node_lvs.items():
3748         if node_res.offline:
3749           continue
3750
3751         msg = node_res.fail_msg
3752         if msg:
3753           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3754           res_nodes[node] = msg
3755           continue
3756
3757         for lv_name, (_, _, lv_online) in node_res.payload.items():
3758           inst = nv_dict.pop((node, lv_name), None)
3759           if not (lv_online or inst is None):
3760             res_instances.add(inst)
3761
3762       # any leftover items in nv_dict are missing LVs, let's arrange the data
3763       # better
3764       for key, inst in nv_dict.iteritems():
3765         res_missing.setdefault(inst, []).append(list(key))
3766
3767     return (res_nodes, list(res_instances), res_missing)
3768
3769
3770 class LUClusterRepairDiskSizes(NoHooksLU):
3771   """Verifies the cluster disks sizes.
3772
3773   """
3774   REQ_BGL = False
3775
3776   def ExpandNames(self):
3777     if self.op.instances:
3778       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3779       # Not getting the node allocation lock as only a specific set of
3780       # instances (and their nodes) is going to be acquired
3781       self.needed_locks = {
3782         locking.LEVEL_NODE_RES: [],
3783         locking.LEVEL_INSTANCE: self.wanted_names,
3784         }
3785       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3786     else:
3787       self.wanted_names = None
3788       self.needed_locks = {
3789         locking.LEVEL_NODE_RES: locking.ALL_SET,
3790         locking.LEVEL_INSTANCE: locking.ALL_SET,
3791
3792         # This opcode is acquires the node locks for all instances
3793         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3794         }
3795
3796     self.share_locks = {
3797       locking.LEVEL_NODE_RES: 1,
3798       locking.LEVEL_INSTANCE: 0,
3799       locking.LEVEL_NODE_ALLOC: 1,
3800       }
3801
3802   def DeclareLocks(self, level):
3803     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3804       self._LockInstancesNodes(primary_only=True, level=level)
3805
3806   def CheckPrereq(self):
3807     """Check prerequisites.
3808
3809     This only checks the optional instance list against the existing names.
3810
3811     """
3812     if self.wanted_names is None:
3813       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3814
3815     self.wanted_instances = \
3816         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3817
3818   def _EnsureChildSizes(self, disk):
3819     """Ensure children of the disk have the needed disk size.
3820
3821     This is valid mainly for DRBD8 and fixes an issue where the
3822     children have smaller disk size.
3823
3824     @param disk: an L{ganeti.objects.Disk} object
3825
3826     """
3827     if disk.dev_type == constants.LD_DRBD8:
3828       assert disk.children, "Empty children for DRBD8?"
3829       fchild = disk.children[0]
3830       mismatch = fchild.size < disk.size
3831       if mismatch:
3832         self.LogInfo("Child disk has size %d, parent %d, fixing",
3833                      fchild.size, disk.size)
3834         fchild.size = disk.size
3835
3836       # and we recurse on this child only, not on the metadev
3837       return self._EnsureChildSizes(fchild) or mismatch
3838     else:
3839       return False
3840
3841   def Exec(self, feedback_fn):
3842     """Verify the size of cluster disks.
3843
3844     """
3845     # TODO: check child disks too
3846     # TODO: check differences in size between primary/secondary nodes
3847     per_node_disks = {}
3848     for instance in self.wanted_instances:
3849       pnode = instance.primary_node
3850       if pnode not in per_node_disks:
3851         per_node_disks[pnode] = []
3852       for idx, disk in enumerate(instance.disks):
3853         per_node_disks[pnode].append((instance, idx, disk))
3854
3855     assert not (frozenset(per_node_disks.keys()) -
3856                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3857       "Not owning correct locks"
3858     assert not self.owned_locks(locking.LEVEL_NODE)
3859
3860     changed = []
3861     for node, dskl in per_node_disks.items():
3862       newl = [v[2].Copy() for v in dskl]
3863       for dsk in newl:
3864         self.cfg.SetDiskID(dsk, node)
3865       result = self.rpc.call_blockdev_getsize(node, newl)
3866       if result.fail_msg:
3867         self.LogWarning("Failure in blockdev_getsize call to node"
3868                         " %s, ignoring", node)
3869         continue
3870       if len(result.payload) != len(dskl):
3871         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3872                         " result.payload=%s", node, len(dskl), result.payload)
3873         self.LogWarning("Invalid result from node %s, ignoring node results",
3874                         node)
3875         continue
3876       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3877         if size is None:
3878           self.LogWarning("Disk %d of instance %s did not return size"
3879                           " information, ignoring", idx, instance.name)
3880           continue
3881         if not isinstance(size, (int, long)):
3882           self.LogWarning("Disk %d of instance %s did not return valid"
3883                           " size information, ignoring", idx, instance.name)
3884           continue
3885         size = size >> 20
3886         if size != disk.size:
3887           self.LogInfo("Disk %d of instance %s has mismatched size,"
3888                        " correcting: recorded %d, actual %d", idx,
3889                        instance.name, disk.size, size)
3890           disk.size = size
3891           self.cfg.Update(instance, feedback_fn)
3892           changed.append((instance.name, idx, size))
3893         if self._EnsureChildSizes(disk):
3894           self.cfg.Update(instance, feedback_fn)
3895           changed.append((instance.name, idx, disk.size))
3896     return changed
3897
3898
3899 class LUClusterRename(LogicalUnit):
3900   """Rename the cluster.
3901
3902   """
3903   HPATH = "cluster-rename"
3904   HTYPE = constants.HTYPE_CLUSTER
3905
3906   def BuildHooksEnv(self):
3907     """Build hooks env.
3908
3909     """
3910     return {
3911       "OP_TARGET": self.cfg.GetClusterName(),
3912       "NEW_NAME": self.op.name,
3913       }
3914
3915   def BuildHooksNodes(self):
3916     """Build hooks nodes.
3917
3918     """
3919     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3920
3921   def CheckPrereq(self):
3922     """Verify that the passed name is a valid one.
3923
3924     """
3925     hostname = netutils.GetHostname(name=self.op.name,
3926                                     family=self.cfg.GetPrimaryIPFamily())
3927
3928     new_name = hostname.name
3929     self.ip = new_ip = hostname.ip
3930     old_name = self.cfg.GetClusterName()
3931     old_ip = self.cfg.GetMasterIP()
3932     if new_name == old_name and new_ip == old_ip:
3933       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3934                                  " cluster has changed",
3935                                  errors.ECODE_INVAL)
3936     if new_ip != old_ip:
3937       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3938         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3939                                    " reachable on the network" %
3940                                    new_ip, errors.ECODE_NOTUNIQUE)
3941
3942     self.op.name = new_name
3943
3944   def Exec(self, feedback_fn):
3945     """Rename the cluster.
3946
3947     """
3948     clustername = self.op.name
3949     new_ip = self.ip
3950
3951     # shutdown the master IP
3952     master_params = self.cfg.GetMasterNetworkParameters()
3953     ems = self.cfg.GetUseExternalMipScript()
3954     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3955                                                      master_params, ems)
3956     result.Raise("Could not disable the master role")
3957
3958     try:
3959       cluster = self.cfg.GetClusterInfo()
3960       cluster.cluster_name = clustername
3961       cluster.master_ip = new_ip
3962       self.cfg.Update(cluster, feedback_fn)
3963
3964       # update the known hosts file
3965       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3966       node_list = self.cfg.GetOnlineNodeList()
3967       try:
3968         node_list.remove(master_params.name)
3969       except ValueError:
3970         pass
3971       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3972     finally:
3973       master_params.ip = new_ip
3974       result = self.rpc.call_node_activate_master_ip(master_params.name,
3975                                                      master_params, ems)
3976       msg = result.fail_msg
3977       if msg:
3978         self.LogWarning("Could not re-enable the master role on"
3979                         " the master, please restart manually: %s", msg)
3980
3981     return clustername
3982
3983
3984 def _ValidateNetmask(cfg, netmask):
3985   """Checks if a netmask is valid.
3986
3987   @type cfg: L{config.ConfigWriter}
3988   @param cfg: The cluster configuration
3989   @type netmask: int
3990   @param netmask: the netmask to be verified
3991   @raise errors.OpPrereqError: if the validation fails
3992
3993   """
3994   ip_family = cfg.GetPrimaryIPFamily()
3995   try:
3996     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3997   except errors.ProgrammerError:
3998     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3999                                ip_family, errors.ECODE_INVAL)
4000   if not ipcls.ValidateNetmask(netmask):
4001     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4002                                 (netmask), errors.ECODE_INVAL)
4003
4004
4005 class LUClusterSetParams(LogicalUnit):
4006   """Change the parameters of the cluster.
4007
4008   """
4009   HPATH = "cluster-modify"
4010   HTYPE = constants.HTYPE_CLUSTER
4011   REQ_BGL = False
4012
4013   def CheckArguments(self):
4014     """Check parameters
4015
4016     """
4017     if self.op.uid_pool:
4018       uidpool.CheckUidPool(self.op.uid_pool)
4019
4020     if self.op.add_uids:
4021       uidpool.CheckUidPool(self.op.add_uids)
4022
4023     if self.op.remove_uids:
4024       uidpool.CheckUidPool(self.op.remove_uids)
4025
4026     if self.op.master_netmask is not None:
4027       _ValidateNetmask(self.cfg, self.op.master_netmask)
4028
4029     if self.op.diskparams:
4030       for dt_params in self.op.diskparams.values():
4031         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4032       try:
4033         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4034       except errors.OpPrereqError, err:
4035         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4036                                    errors.ECODE_INVAL)
4037
4038   def ExpandNames(self):
4039     # FIXME: in the future maybe other cluster params won't require checking on
4040     # all nodes to be modified.
4041     # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4042     # resource locks the right thing, shouldn't it be the BGL instead?
4043     self.needed_locks = {
4044       locking.LEVEL_NODE: locking.ALL_SET,
4045       locking.LEVEL_INSTANCE: locking.ALL_SET,
4046       locking.LEVEL_NODEGROUP: locking.ALL_SET,
4047       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4048     }
4049     self.share_locks = _ShareAll()
4050
4051   def BuildHooksEnv(self):
4052     """Build hooks env.
4053
4054     """
4055     return {
4056       "OP_TARGET": self.cfg.GetClusterName(),
4057       "NEW_VG_NAME": self.op.vg_name,
4058       }
4059
4060   def BuildHooksNodes(self):
4061     """Build hooks nodes.
4062
4063     """
4064     mn = self.cfg.GetMasterNode()
4065     return ([mn], [mn])
4066
4067   def CheckPrereq(self):
4068     """Check prerequisites.
4069
4070     This checks whether the given params don't conflict and
4071     if the given volume group is valid.
4072
4073     """
4074     if self.op.vg_name is not None and not self.op.vg_name:
4075       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4076         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4077                                    " instances exist", errors.ECODE_INVAL)
4078
4079     if self.op.drbd_helper is not None and not self.op.drbd_helper:
4080       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4081         raise errors.OpPrereqError("Cannot disable drbd helper while"
4082                                    " drbd-based instances exist",
4083                                    errors.ECODE_INVAL)
4084
4085     node_list = self.owned_locks(locking.LEVEL_NODE)
4086
4087     # if vg_name not None, checks given volume group on all nodes
4088     if self.op.vg_name:
4089       vglist = self.rpc.call_vg_list(node_list)
4090       for node in node_list:
4091         msg = vglist[node].fail_msg
4092         if msg:
4093           # ignoring down node
4094           self.LogWarning("Error while gathering data on node %s"
4095                           " (ignoring node): %s", node, msg)
4096           continue
4097         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4098                                               self.op.vg_name,
4099                                               constants.MIN_VG_SIZE)
4100         if vgstatus:
4101           raise errors.OpPrereqError("Error on node '%s': %s" %
4102                                      (node, vgstatus), errors.ECODE_ENVIRON)
4103
4104     if self.op.drbd_helper:
4105       # checks given drbd helper on all nodes
4106       helpers = self.rpc.call_drbd_helper(node_list)
4107       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4108         if ninfo.offline:
4109           self.LogInfo("Not checking drbd helper on offline node %s", node)
4110           continue
4111         msg = helpers[node].fail_msg
4112         if msg:
4113           raise errors.OpPrereqError("Error checking drbd helper on node"
4114                                      " '%s': %s" % (node, msg),
4115                                      errors.ECODE_ENVIRON)
4116         node_helper = helpers[node].payload
4117         if node_helper != self.op.drbd_helper:
4118           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4119                                      (node, node_helper), errors.ECODE_ENVIRON)
4120
4121     self.cluster = cluster = self.cfg.GetClusterInfo()
4122     # validate params changes
4123     if self.op.beparams:
4124       objects.UpgradeBeParams(self.op.beparams)
4125       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4126       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4127
4128     if self.op.ndparams:
4129       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4130       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4131
4132       # TODO: we need a more general way to handle resetting
4133       # cluster-level parameters to default values
4134       if self.new_ndparams["oob_program"] == "":
4135         self.new_ndparams["oob_program"] = \
4136             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4137
4138     if self.op.hv_state:
4139       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4140                                             self.cluster.hv_state_static)
4141       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4142                                for hv, values in new_hv_state.items())
4143
4144     if self.op.disk_state:
4145       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4146                                                 self.cluster.disk_state_static)
4147       self.new_disk_state = \
4148         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4149                             for name, values in svalues.items()))
4150              for storage, svalues in new_disk_state.items())
4151
4152     if self.op.ipolicy:
4153       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4154                                             group_policy=False)
4155
4156       all_instances = self.cfg.GetAllInstancesInfo().values()
4157       violations = set()
4158       for group in self.cfg.GetAllNodeGroupsInfo().values():
4159         instances = frozenset([inst for inst in all_instances
4160                                if compat.any(node in group.members
4161                                              for node in inst.all_nodes)])
4162         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4163         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4164         new = _ComputeNewInstanceViolations(ipol,
4165                                             new_ipolicy, instances)
4166         if new:
4167           violations.update(new)
4168
4169       if violations:
4170         self.LogWarning("After the ipolicy change the following instances"
4171                         " violate them: %s",
4172                         utils.CommaJoin(utils.NiceSort(violations)))
4173
4174     if self.op.nicparams:
4175       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4176       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4177       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4178       nic_errors = []
4179
4180       # check all instances for consistency
4181       for instance in self.cfg.GetAllInstancesInfo().values():
4182         for nic_idx, nic in enumerate(instance.nics):
4183           params_copy = copy.deepcopy(nic.nicparams)
4184           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4185
4186           # check parameter syntax
4187           try:
4188             objects.NIC.CheckParameterSyntax(params_filled)
4189           except errors.ConfigurationError, err:
4190             nic_errors.append("Instance %s, nic/%d: %s" %
4191                               (instance.name, nic_idx, err))
4192
4193           # if we're moving instances to routed, check that they have an ip
4194           target_mode = params_filled[constants.NIC_MODE]
4195           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4196             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4197                               " address" % (instance.name, nic_idx))
4198       if nic_errors:
4199         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4200                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4201
4202     # hypervisor list/parameters
4203     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4204     if self.op.hvparams:
4205       for hv_name, hv_dict in self.op.hvparams.items():
4206         if hv_name not in self.new_hvparams:
4207           self.new_hvparams[hv_name] = hv_dict
4208         else:
4209           self.new_hvparams[hv_name].update(hv_dict)
4210
4211     # disk template parameters
4212     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4213     if self.op.diskparams:
4214       for dt_name, dt_params in self.op.diskparams.items():
4215         if dt_name not in self.op.diskparams:
4216           self.new_diskparams[dt_name] = dt_params
4217         else:
4218           self.new_diskparams[dt_name].update(dt_params)
4219
4220     # os hypervisor parameters
4221     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4222     if self.op.os_hvp:
4223       for os_name, hvs in self.op.os_hvp.items():
4224         if os_name not in self.new_os_hvp:
4225           self.new_os_hvp[os_name] = hvs
4226         else:
4227           for hv_name, hv_dict in hvs.items():
4228             if hv_name not in self.new_os_hvp[os_name]:
4229               self.new_os_hvp[os_name][hv_name] = hv_dict
4230             else:
4231               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4232
4233     # os parameters
4234     self.new_osp = objects.FillDict(cluster.osparams, {})
4235     if self.op.osparams:
4236       for os_name, osp in self.op.osparams.items():
4237         if os_name not in self.new_osp:
4238           self.new_osp[os_name] = {}
4239
4240         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4241                                                   use_none=True)
4242
4243         if not self.new_osp[os_name]:
4244           # we removed all parameters
4245           del self.new_osp[os_name]
4246         else:
4247           # check the parameter validity (remote check)
4248           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4249                          os_name, self.new_osp[os_name])
4250
4251     # changes to the hypervisor list
4252     if self.op.enabled_hypervisors is not None:
4253       self.hv_list = self.op.enabled_hypervisors
4254       for hv in self.hv_list:
4255         # if the hypervisor doesn't already exist in the cluster
4256         # hvparams, we initialize it to empty, and then (in both
4257         # cases) we make sure to fill the defaults, as we might not
4258         # have a complete defaults list if the hypervisor wasn't
4259         # enabled before
4260         if hv not in new_hvp:
4261           new_hvp[hv] = {}
4262         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4263         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4264     else:
4265       self.hv_list = cluster.enabled_hypervisors
4266
4267     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4268       # either the enabled list has changed, or the parameters have, validate
4269       for hv_name, hv_params in self.new_hvparams.items():
4270         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4271             (self.op.enabled_hypervisors and
4272              hv_name in self.op.enabled_hypervisors)):
4273           # either this is a new hypervisor, or its parameters have changed
4274           hv_class = hypervisor.GetHypervisor(hv_name)
4275           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4276           hv_class.CheckParameterSyntax(hv_params)
4277           _CheckHVParams(self, node_list, hv_name, hv_params)
4278
4279     if self.op.os_hvp:
4280       # no need to check any newly-enabled hypervisors, since the
4281       # defaults have already been checked in the above code-block
4282       for os_name, os_hvp in self.new_os_hvp.items():
4283         for hv_name, hv_params in os_hvp.items():
4284           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4285           # we need to fill in the new os_hvp on top of the actual hv_p
4286           cluster_defaults = self.new_hvparams.get(hv_name, {})
4287           new_osp = objects.FillDict(cluster_defaults, hv_params)
4288           hv_class = hypervisor.GetHypervisor(hv_name)
4289           hv_class.CheckParameterSyntax(new_osp)
4290           _CheckHVParams(self, node_list, hv_name, new_osp)
4291
4292     if self.op.default_iallocator:
4293       alloc_script = utils.FindFile(self.op.default_iallocator,
4294                                     constants.IALLOCATOR_SEARCH_PATH,
4295                                     os.path.isfile)
4296       if alloc_script is None:
4297         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4298                                    " specified" % self.op.default_iallocator,
4299                                    errors.ECODE_INVAL)
4300
4301   def Exec(self, feedback_fn):
4302     """Change the parameters of the cluster.
4303
4304     """
4305     if self.op.vg_name is not None:
4306       new_volume = self.op.vg_name
4307       if not new_volume:
4308         new_volume = None
4309       if new_volume != self.cfg.GetVGName():
4310         self.cfg.SetVGName(new_volume)
4311       else:
4312         feedback_fn("Cluster LVM configuration already in desired"
4313                     " state, not changing")
4314     if self.op.drbd_helper is not None:
4315       new_helper = self.op.drbd_helper
4316       if not new_helper:
4317         new_helper = None
4318       if new_helper != self.cfg.GetDRBDHelper():
4319         self.cfg.SetDRBDHelper(new_helper)
4320       else:
4321         feedback_fn("Cluster DRBD helper already in desired state,"
4322                     " not changing")
4323     if self.op.hvparams:
4324       self.cluster.hvparams = self.new_hvparams
4325     if self.op.os_hvp:
4326       self.cluster.os_hvp = self.new_os_hvp
4327     if self.op.enabled_hypervisors is not None:
4328       self.cluster.hvparams = self.new_hvparams
4329       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4330     if self.op.beparams:
4331       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4332     if self.op.nicparams:
4333       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4334     if self.op.ipolicy:
4335       self.cluster.ipolicy = self.new_ipolicy
4336     if self.op.osparams:
4337       self.cluster.osparams = self.new_osp
4338     if self.op.ndparams:
4339       self.cluster.ndparams = self.new_ndparams
4340     if self.op.diskparams:
4341       self.cluster.diskparams = self.new_diskparams
4342     if self.op.hv_state:
4343       self.cluster.hv_state_static = self.new_hv_state
4344     if self.op.disk_state:
4345       self.cluster.disk_state_static = self.new_disk_state
4346
4347     if self.op.candidate_pool_size is not None:
4348       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4349       # we need to update the pool size here, otherwise the save will fail
4350       _AdjustCandidatePool(self, [])
4351
4352     if self.op.maintain_node_health is not None:
4353       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4354         feedback_fn("Note: CONFD was disabled at build time, node health"
4355                     " maintenance is not useful (still enabling it)")
4356       self.cluster.maintain_node_health = self.op.maintain_node_health
4357
4358     if self.op.prealloc_wipe_disks is not None:
4359       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4360
4361     if self.op.add_uids is not None:
4362       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4363
4364     if self.op.remove_uids is not None:
4365       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4366
4367     if self.op.uid_pool is not None:
4368       self.cluster.uid_pool = self.op.uid_pool
4369
4370     if self.op.default_iallocator is not None:
4371       self.cluster.default_iallocator = self.op.default_iallocator
4372
4373     if self.op.reserved_lvs is not None:
4374       self.cluster.reserved_lvs = self.op.reserved_lvs
4375
4376     if self.op.use_external_mip_script is not None:
4377       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4378
4379     def helper_os(aname, mods, desc):
4380       desc += " OS list"
4381       lst = getattr(self.cluster, aname)
4382       for key, val in mods:
4383         if key == constants.DDM_ADD:
4384           if val in lst:
4385             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4386           else:
4387             lst.append(val)
4388         elif key == constants.DDM_REMOVE:
4389           if val in lst:
4390             lst.remove(val)
4391           else:
4392             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4393         else:
4394           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4395
4396     if self.op.hidden_os:
4397       helper_os("hidden_os", self.op.hidden_os, "hidden")
4398
4399     if self.op.blacklisted_os:
4400       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4401
4402     if self.op.master_netdev:
4403       master_params = self.cfg.GetMasterNetworkParameters()
4404       ems = self.cfg.GetUseExternalMipScript()
4405       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4406                   self.cluster.master_netdev)
4407       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4408                                                        master_params, ems)
4409       result.Raise("Could not disable the master ip")
4410       feedback_fn("Changing master_netdev from %s to %s" %
4411                   (master_params.netdev, self.op.master_netdev))
4412       self.cluster.master_netdev = self.op.master_netdev
4413
4414     if self.op.master_netmask:
4415       master_params = self.cfg.GetMasterNetworkParameters()
4416       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4417       result = self.rpc.call_node_change_master_netmask(master_params.name,
4418                                                         master_params.netmask,
4419                                                         self.op.master_netmask,
4420                                                         master_params.ip,
4421                                                         master_params.netdev)
4422       if result.fail_msg:
4423         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4424         feedback_fn(msg)
4425
4426       self.cluster.master_netmask = self.op.master_netmask
4427
4428     self.cfg.Update(self.cluster, feedback_fn)
4429
4430     if self.op.master_netdev:
4431       master_params = self.cfg.GetMasterNetworkParameters()
4432       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4433                   self.op.master_netdev)
4434       ems = self.cfg.GetUseExternalMipScript()
4435       result = self.rpc.call_node_activate_master_ip(master_params.name,
4436                                                      master_params, ems)
4437       if result.fail_msg:
4438         self.LogWarning("Could not re-enable the master ip on"
4439                         " the master, please restart manually: %s",
4440                         result.fail_msg)
4441
4442
4443 def _UploadHelper(lu, nodes, fname):
4444   """Helper for uploading a file and showing warnings.
4445
4446   """
4447   if os.path.exists(fname):
4448     result = lu.rpc.call_upload_file(nodes, fname)
4449     for to_node, to_result in result.items():
4450       msg = to_result.fail_msg
4451       if msg:
4452         msg = ("Copy of file %s to node %s failed: %s" %
4453                (fname, to_node, msg))
4454         lu.LogWarning(msg)
4455
4456
4457 def _ComputeAncillaryFiles(cluster, redist):
4458   """Compute files external to Ganeti which need to be consistent.
4459
4460   @type redist: boolean
4461   @param redist: Whether to include files which need to be redistributed
4462
4463   """
4464   # Compute files for all nodes
4465   files_all = set([
4466     pathutils.SSH_KNOWN_HOSTS_FILE,
4467     pathutils.CONFD_HMAC_KEY,
4468     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4469     pathutils.SPICE_CERT_FILE,
4470     pathutils.SPICE_CACERT_FILE,
4471     pathutils.RAPI_USERS_FILE,
4472     ])
4473
4474   if redist:
4475     # we need to ship at least the RAPI certificate
4476     files_all.add(pathutils.RAPI_CERT_FILE)
4477   else:
4478     files_all.update(pathutils.ALL_CERT_FILES)
4479     files_all.update(ssconf.SimpleStore().GetFileList())
4480
4481   if cluster.modify_etc_hosts:
4482     files_all.add(pathutils.ETC_HOSTS)
4483
4484   if cluster.use_external_mip_script:
4485     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4486
4487   # Files which are optional, these must:
4488   # - be present in one other category as well
4489   # - either exist or not exist on all nodes of that category (mc, vm all)
4490   files_opt = set([
4491     pathutils.RAPI_USERS_FILE,
4492     ])
4493
4494   # Files which should only be on master candidates
4495   files_mc = set()
4496
4497   if not redist:
4498     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4499
4500   # File storage
4501   if (not redist and
4502       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4503     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4504     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4505
4506   # Files which should only be on VM-capable nodes
4507   files_vm = set(
4508     filename
4509     for hv_name in cluster.enabled_hypervisors
4510     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4511
4512   files_opt |= set(
4513     filename
4514     for hv_name in cluster.enabled_hypervisors
4515     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4516
4517   # Filenames in each category must be unique
4518   all_files_set = files_all | files_mc | files_vm
4519   assert (len(all_files_set) ==
4520           sum(map(len, [files_all, files_mc, files_vm]))), \
4521          "Found file listed in more than one file list"
4522
4523   # Optional files must be present in one other category
4524   assert all_files_set.issuperset(files_opt), \
4525          "Optional file not in a different required list"
4526
4527   # This one file should never ever be re-distributed via RPC
4528   assert not (redist and
4529               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4530
4531   return (files_all, files_opt, files_mc, files_vm)
4532
4533
4534 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4535   """Distribute additional files which are part of the cluster configuration.
4536
4537   ConfigWriter takes care of distributing the config and ssconf files, but
4538   there are more files which should be distributed to all nodes. This function
4539   makes sure those are copied.
4540
4541   @param lu: calling logical unit
4542   @param additional_nodes: list of nodes not in the config to distribute to
4543   @type additional_vm: boolean
4544   @param additional_vm: whether the additional nodes are vm-capable or not
4545
4546   """
4547   # Gather target nodes
4548   cluster = lu.cfg.GetClusterInfo()
4549   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4550
4551   online_nodes = lu.cfg.GetOnlineNodeList()
4552   online_set = frozenset(online_nodes)
4553   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4554
4555   if additional_nodes is not None:
4556     online_nodes.extend(additional_nodes)
4557     if additional_vm:
4558       vm_nodes.extend(additional_nodes)
4559
4560   # Never distribute to master node
4561   for nodelist in [online_nodes, vm_nodes]:
4562     if master_info.name in nodelist:
4563       nodelist.remove(master_info.name)
4564
4565   # Gather file lists
4566   (files_all, _, files_mc, files_vm) = \
4567     _ComputeAncillaryFiles(cluster, True)
4568
4569   # Never re-distribute configuration file from here
4570   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4571               pathutils.CLUSTER_CONF_FILE in files_vm)
4572   assert not files_mc, "Master candidates not handled in this function"
4573
4574   filemap = [
4575     (online_nodes, files_all),
4576     (vm_nodes, files_vm),
4577     ]
4578
4579   # Upload the files
4580   for (node_list, files) in filemap:
4581     for fname in files:
4582       _UploadHelper(lu, node_list, fname)
4583
4584
4585 class LUClusterRedistConf(NoHooksLU):
4586   """Force the redistribution of cluster configuration.
4587
4588   This is a very simple LU.
4589
4590   """
4591   REQ_BGL = False
4592
4593   def ExpandNames(self):
4594     self.needed_locks = {
4595       locking.LEVEL_NODE: locking.ALL_SET,
4596       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4597     }
4598     self.share_locks = _ShareAll()
4599
4600   def Exec(self, feedback_fn):
4601     """Redistribute the configuration.
4602
4603     """
4604     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4605     _RedistributeAncillaryFiles(self)
4606
4607
4608 class LUClusterActivateMasterIp(NoHooksLU):
4609   """Activate the master IP on the master node.
4610
4611   """
4612   def Exec(self, feedback_fn):
4613     """Activate the master IP.
4614
4615     """
4616     master_params = self.cfg.GetMasterNetworkParameters()
4617     ems = self.cfg.GetUseExternalMipScript()
4618     result = self.rpc.call_node_activate_master_ip(master_params.name,
4619                                                    master_params, ems)
4620     result.Raise("Could not activate the master IP")
4621
4622
4623 class LUClusterDeactivateMasterIp(NoHooksLU):
4624   """Deactivate the master IP on the master node.
4625
4626   """
4627   def Exec(self, feedback_fn):
4628     """Deactivate the master IP.
4629
4630     """
4631     master_params = self.cfg.GetMasterNetworkParameters()
4632     ems = self.cfg.GetUseExternalMipScript()
4633     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4634                                                      master_params, ems)
4635     result.Raise("Could not deactivate the master IP")
4636
4637
4638 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4639   """Sleep and poll for an instance's disk to sync.
4640
4641   """
4642   if not instance.disks or disks is not None and not disks:
4643     return True
4644
4645   disks = _ExpandCheckDisks(instance, disks)
4646
4647   if not oneshot:
4648     lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4649
4650   node = instance.primary_node
4651
4652   for dev in disks:
4653     lu.cfg.SetDiskID(dev, node)
4654
4655   # TODO: Convert to utils.Retry
4656
4657   retries = 0
4658   degr_retries = 10 # in seconds, as we sleep 1 second each time
4659   while True:
4660     max_time = 0
4661     done = True
4662     cumul_degraded = False
4663     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4664     msg = rstats.fail_msg
4665     if msg:
4666       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4667       retries += 1
4668       if retries >= 10:
4669         raise errors.RemoteError("Can't contact node %s for mirror data,"
4670                                  " aborting." % node)
4671       time.sleep(6)
4672       continue
4673     rstats = rstats.payload
4674     retries = 0
4675     for i, mstat in enumerate(rstats):
4676       if mstat is None:
4677         lu.LogWarning("Can't compute data for node %s/%s",
4678                            node, disks[i].iv_name)
4679         continue
4680
4681       cumul_degraded = (cumul_degraded or
4682                         (mstat.is_degraded and mstat.sync_percent is None))
4683       if mstat.sync_percent is not None:
4684         done = False
4685         if mstat.estimated_time is not None:
4686           rem_time = ("%s remaining (estimated)" %
4687                       utils.FormatSeconds(mstat.estimated_time))
4688           max_time = mstat.estimated_time
4689         else:
4690           rem_time = "no time estimate"
4691         lu.LogInfo("- device %s: %5.2f%% done, %s",
4692                    disks[i].iv_name, mstat.sync_percent, rem_time)
4693
4694     # if we're done but degraded, let's do a few small retries, to
4695     # make sure we see a stable and not transient situation; therefore
4696     # we force restart of the loop
4697     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4698       logging.info("Degraded disks found, %d retries left", degr_retries)
4699       degr_retries -= 1
4700       time.sleep(1)
4701       continue
4702
4703     if done or oneshot:
4704       break
4705
4706     time.sleep(min(60, max_time))
4707
4708   if done:
4709     lu.LogInfo("Instance %s's disks are in sync", instance.name)
4710
4711   return not cumul_degraded
4712
4713
4714 def _BlockdevFind(lu, node, dev, instance):
4715   """Wrapper around call_blockdev_find to annotate diskparams.
4716
4717   @param lu: A reference to the lu object
4718   @param node: The node to call out
4719   @param dev: The device to find
4720   @param instance: The instance object the device belongs to
4721   @returns The result of the rpc call
4722
4723   """
4724   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4725   return lu.rpc.call_blockdev_find(node, disk)
4726
4727
4728 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4729   """Wrapper around L{_CheckDiskConsistencyInner}.
4730
4731   """
4732   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4733   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4734                                     ldisk=ldisk)
4735
4736
4737 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4738                                ldisk=False):
4739   """Check that mirrors are not degraded.
4740
4741   @attention: The device has to be annotated already.
4742
4743   The ldisk parameter, if True, will change the test from the
4744   is_degraded attribute (which represents overall non-ok status for
4745   the device(s)) to the ldisk (representing the local storage status).
4746
4747   """
4748   lu.cfg.SetDiskID(dev, node)
4749
4750   result = True
4751
4752   if on_primary or dev.AssembleOnSecondary():
4753     rstats = lu.rpc.call_blockdev_find(node, dev)
4754     msg = rstats.fail_msg
4755     if msg:
4756       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4757       result = False
4758     elif not rstats.payload:
4759       lu.LogWarning("Can't find disk on node %s", node)
4760       result = False
4761     else:
4762       if ldisk:
4763         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4764       else:
4765         result = result and not rstats.payload.is_degraded
4766
4767   if dev.children:
4768     for child in dev.children:
4769       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4770                                                      on_primary)
4771
4772   return result
4773
4774
4775 class LUOobCommand(NoHooksLU):
4776   """Logical unit for OOB handling.
4777
4778   """
4779   REQ_BGL = False
4780   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4781
4782   def ExpandNames(self):
4783     """Gather locks we need.
4784
4785     """
4786     if self.op.node_names:
4787       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4788       lock_names = self.op.node_names
4789     else:
4790       lock_names = locking.ALL_SET
4791
4792     self.needed_locks = {
4793       locking.LEVEL_NODE: lock_names,
4794       }
4795
4796     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4797
4798     if not self.op.node_names:
4799       # Acquire node allocation lock only if all nodes are affected
4800       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4801
4802   def CheckPrereq(self):
4803     """Check prerequisites.
4804
4805     This checks:
4806      - the node exists in the configuration
4807      - OOB is supported
4808
4809     Any errors are signaled by raising errors.OpPrereqError.
4810
4811     """
4812     self.nodes = []
4813     self.master_node = self.cfg.GetMasterNode()
4814
4815     assert self.op.power_delay >= 0.0
4816
4817     if self.op.node_names:
4818       if (self.op.command in self._SKIP_MASTER and
4819           self.master_node in self.op.node_names):
4820         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4821         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4822
4823         if master_oob_handler:
4824           additional_text = ("run '%s %s %s' if you want to operate on the"
4825                              " master regardless") % (master_oob_handler,
4826                                                       self.op.command,
4827                                                       self.master_node)
4828         else:
4829           additional_text = "it does not support out-of-band operations"
4830
4831         raise errors.OpPrereqError(("Operating on the master node %s is not"
4832                                     " allowed for %s; %s") %
4833                                    (self.master_node, self.op.command,
4834                                     additional_text), errors.ECODE_INVAL)
4835     else:
4836       self.op.node_names = self.cfg.GetNodeList()
4837       if self.op.command in self._SKIP_MASTER:
4838         self.op.node_names.remove(self.master_node)
4839
4840     if self.op.command in self._SKIP_MASTER:
4841       assert self.master_node not in self.op.node_names
4842
4843     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4844       if node is None:
4845         raise errors.OpPrereqError("Node %s not found" % node_name,
4846                                    errors.ECODE_NOENT)
4847       else:
4848         self.nodes.append(node)
4849
4850       if (not self.op.ignore_status and
4851           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4852         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4853                                     " not marked offline") % node_name,
4854                                    errors.ECODE_STATE)
4855
4856   def Exec(self, feedback_fn):
4857     """Execute OOB and return result if we expect any.
4858
4859     """
4860     master_node = self.master_node
4861     ret = []
4862
4863     for idx, node in enumerate(utils.NiceSort(self.nodes,
4864                                               key=lambda node: node.name)):
4865       node_entry = [(constants.RS_NORMAL, node.name)]
4866       ret.append(node_entry)
4867
4868       oob_program = _SupportsOob(self.cfg, node)
4869
4870       if not oob_program:
4871         node_entry.append((constants.RS_UNAVAIL, None))
4872         continue
4873
4874       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4875                    self.op.command, oob_program, node.name)
4876       result = self.rpc.call_run_oob(master_node, oob_program,
4877                                      self.op.command, node.name,
4878                                      self.op.timeout)
4879
4880       if result.fail_msg:
4881         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4882                         node.name, result.fail_msg)
4883         node_entry.append((constants.RS_NODATA, None))
4884       else:
4885         try:
4886           self._CheckPayload(result)
4887         except errors.OpExecError, err:
4888           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4889                           node.name, err)
4890           node_entry.append((constants.RS_NODATA, None))
4891         else:
4892           if self.op.command == constants.OOB_HEALTH:
4893             # For health we should log important events
4894             for item, status in result.payload:
4895               if status in [constants.OOB_STATUS_WARNING,
4896                             constants.OOB_STATUS_CRITICAL]:
4897                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4898                                 item, node.name, status)
4899
4900           if self.op.command == constants.OOB_POWER_ON:
4901             node.powered = True
4902           elif self.op.command == constants.OOB_POWER_OFF:
4903             node.powered = False
4904           elif self.op.command == constants.OOB_POWER_STATUS:
4905             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4906             if powered != node.powered:
4907               logging.warning(("Recorded power state (%s) of node '%s' does not"
4908                                " match actual power state (%s)"), node.powered,
4909                               node.name, powered)
4910
4911           # For configuration changing commands we should update the node
4912           if self.op.command in (constants.OOB_POWER_ON,
4913                                  constants.OOB_POWER_OFF):
4914             self.cfg.Update(node, feedback_fn)
4915
4916           node_entry.append((constants.RS_NORMAL, result.payload))
4917
4918           if (self.op.command == constants.OOB_POWER_ON and
4919               idx < len(self.nodes) - 1):
4920             time.sleep(self.op.power_delay)
4921
4922     return ret
4923
4924   def _CheckPayload(self, result):
4925     """Checks if the payload is valid.
4926
4927     @param result: RPC result
4928     @raises errors.OpExecError: If payload is not valid
4929
4930     """
4931     errs = []
4932     if self.op.command == constants.OOB_HEALTH:
4933       if not isinstance(result.payload, list):
4934         errs.append("command 'health' is expected to return a list but got %s" %
4935                     type(result.payload))
4936       else:
4937         for item, status in result.payload:
4938           if status not in constants.OOB_STATUSES:
4939             errs.append("health item '%s' has invalid status '%s'" %
4940                         (item, status))
4941
4942     if self.op.command == constants.OOB_POWER_STATUS:
4943       if not isinstance(result.payload, dict):
4944         errs.append("power-status is expected to return a dict but got %s" %
4945                     type(result.payload))
4946
4947     if self.op.command in [
4948       constants.OOB_POWER_ON,
4949       constants.OOB_POWER_OFF,
4950       constants.OOB_POWER_CYCLE,
4951       ]:
4952       if result.payload is not None:
4953         errs.append("%s is expected to not return payload but got '%s'" %
4954                     (self.op.command, result.payload))
4955
4956     if errs:
4957       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4958                                utils.CommaJoin(errs))
4959
4960
4961 class _OsQuery(_QueryBase):
4962   FIELDS = query.OS_FIELDS
4963
4964   def ExpandNames(self, lu):
4965     # Lock all nodes in shared mode
4966     # Temporary removal of locks, should be reverted later
4967     # TODO: reintroduce locks when they are lighter-weight
4968     lu.needed_locks = {}
4969     #self.share_locks[locking.LEVEL_NODE] = 1
4970     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4971
4972     # The following variables interact with _QueryBase._GetNames
4973     if self.names:
4974       self.wanted = self.names
4975     else:
4976       self.wanted = locking.ALL_SET
4977
4978     self.do_locking = self.use_locking
4979
4980   def DeclareLocks(self, lu, level):
4981     pass
4982
4983   @staticmethod
4984   def _DiagnoseByOS(rlist):
4985     """Remaps a per-node return list into an a per-os per-node dictionary
4986
4987     @param rlist: a map with node names as keys and OS objects as values
4988
4989     @rtype: dict
4990     @return: a dictionary with osnames as keys and as value another
4991         map, with nodes as keys and tuples of (path, status, diagnose,
4992         variants, parameters, api_versions) as values, eg::
4993
4994           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4995                                      (/srv/..., False, "invalid api")],
4996                            "node2": [(/srv/..., True, "", [], [])]}
4997           }
4998
4999     """
5000     all_os = {}
5001     # we build here the list of nodes that didn't fail the RPC (at RPC
5002     # level), so that nodes with a non-responding node daemon don't
5003     # make all OSes invalid
5004     good_nodes = [node_name for node_name in rlist
5005                   if not rlist[node_name].fail_msg]
5006     for node_name, nr in rlist.items():
5007       if nr.fail_msg or not nr.payload:
5008         continue
5009       for (name, path, status, diagnose, variants,
5010            params, api_versions) in nr.payload:
5011         if name not in all_os:
5012           # build a list of nodes for this os containing empty lists
5013           # for each node in node_list
5014           all_os[name] = {}
5015           for nname in good_nodes:
5016             all_os[name][nname] = []
5017         # convert params from [name, help] to (name, help)
5018         params = [tuple(v) for v in params]
5019         all_os[name][node_name].append((path, status, diagnose,
5020                                         variants, params, api_versions))
5021     return all_os
5022
5023   def _GetQueryData(self, lu):
5024     """Computes the list of nodes and their attributes.
5025
5026     """
5027     # Locking is not used
5028     assert not (compat.any(lu.glm.is_owned(level)
5029                            for level in locking.LEVELS
5030                            if level != locking.LEVEL_CLUSTER) or
5031                 self.do_locking or self.use_locking)
5032
5033     valid_nodes = [node.name
5034                    for node in lu.cfg.GetAllNodesInfo().values()
5035                    if not node.offline and node.vm_capable]
5036     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5037     cluster = lu.cfg.GetClusterInfo()
5038
5039     data = {}
5040
5041     for (os_name, os_data) in pol.items():
5042       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5043                           hidden=(os_name in cluster.hidden_os),
5044                           blacklisted=(os_name in cluster.blacklisted_os))
5045
5046       variants = set()
5047       parameters = set()
5048       api_versions = set()
5049
5050       for idx, osl in enumerate(os_data.values()):
5051         info.valid = bool(info.valid and osl and osl[0][1])
5052         if not info.valid:
5053           break
5054
5055         (node_variants, node_params, node_api) = osl[0][3:6]
5056         if idx == 0:
5057           # First entry
5058           variants.update(node_variants)
5059           parameters.update(node_params)
5060           api_versions.update(node_api)
5061         else:
5062           # Filter out inconsistent values
5063           variants.intersection_update(node_variants)
5064           parameters.intersection_update(node_params)
5065           api_versions.intersection_update(node_api)
5066
5067       info.variants = list(variants)
5068       info.parameters = list(parameters)
5069       info.api_versions = list(api_versions)
5070
5071       data[os_name] = info
5072
5073     # Prepare data in requested order
5074     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5075             if name in data]
5076
5077
5078 class LUOsDiagnose(NoHooksLU):
5079   """Logical unit for OS diagnose/query.
5080
5081   """
5082   REQ_BGL = False
5083
5084   @staticmethod
5085   def _BuildFilter(fields, names):
5086     """Builds a filter for querying OSes.
5087
5088     """
5089     name_filter = qlang.MakeSimpleFilter("name", names)
5090
5091     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5092     # respective field is not requested
5093     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5094                      for fname in ["hidden", "blacklisted"]
5095                      if fname not in fields]
5096     if "valid" not in fields:
5097       status_filter.append([qlang.OP_TRUE, "valid"])
5098
5099     if status_filter:
5100       status_filter.insert(0, qlang.OP_AND)
5101     else:
5102       status_filter = None
5103
5104     if name_filter and status_filter:
5105       return [qlang.OP_AND, name_filter, status_filter]
5106     elif name_filter:
5107       return name_filter
5108     else:
5109       return status_filter
5110
5111   def CheckArguments(self):
5112     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5113                        self.op.output_fields, False)
5114
5115   def ExpandNames(self):
5116     self.oq.ExpandNames(self)
5117
5118   def Exec(self, feedback_fn):
5119     return self.oq.OldStyleQuery(self)
5120
5121
5122 class LUNodeRemove(LogicalUnit):
5123   """Logical unit for removing a node.
5124
5125   """
5126   HPATH = "node-remove"
5127   HTYPE = constants.HTYPE_NODE
5128
5129   def BuildHooksEnv(self):
5130     """Build hooks env.
5131
5132     """
5133     return {
5134       "OP_TARGET": self.op.node_name,
5135       "NODE_NAME": self.op.node_name,
5136       }
5137
5138   def BuildHooksNodes(self):
5139     """Build hooks nodes.
5140
5141     This doesn't run on the target node in the pre phase as a failed
5142     node would then be impossible to remove.
5143
5144     """
5145     all_nodes = self.cfg.GetNodeList()
5146     try:
5147       all_nodes.remove(self.op.node_name)
5148     except ValueError:
5149       pass
5150     return (all_nodes, all_nodes)
5151
5152   def CheckPrereq(self):
5153     """Check prerequisites.
5154
5155     This checks:
5156      - the node exists in the configuration
5157      - it does not have primary or secondary instances
5158      - it's not the master
5159
5160     Any errors are signaled by raising errors.OpPrereqError.
5161
5162     """
5163     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5164     node = self.cfg.GetNodeInfo(self.op.node_name)
5165     assert node is not None
5166
5167     masternode = self.cfg.GetMasterNode()
5168     if node.name == masternode:
5169       raise errors.OpPrereqError("Node is the master node, failover to another"
5170                                  " node is required", errors.ECODE_INVAL)
5171
5172     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5173       if node.name in instance.all_nodes:
5174         raise errors.OpPrereqError("Instance %s is still running on the node,"
5175                                    " please remove first" % instance_name,
5176                                    errors.ECODE_INVAL)
5177     self.op.node_name = node.name
5178     self.node = node
5179
5180   def Exec(self, feedback_fn):
5181     """Removes the node from the cluster.
5182
5183     """
5184     node = self.node
5185     logging.info("Stopping the node daemon and removing configs from node %s",
5186                  node.name)
5187
5188     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5189
5190     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5191       "Not owning BGL"
5192
5193     # Promote nodes to master candidate as needed
5194     _AdjustCandidatePool(self, exceptions=[node.name])
5195     self.context.RemoveNode(node.name)
5196
5197     # Run post hooks on the node before it's removed
5198     _RunPostHook(self, node.name)
5199
5200     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5201     msg = result.fail_msg
5202     if msg:
5203       self.LogWarning("Errors encountered on the remote node while leaving"
5204                       " the cluster: %s", msg)
5205
5206     # Remove node from our /etc/hosts
5207     if self.cfg.GetClusterInfo().modify_etc_hosts:
5208       master_node = self.cfg.GetMasterNode()
5209       result = self.rpc.call_etc_hosts_modify(master_node,
5210                                               constants.ETC_HOSTS_REMOVE,
5211                                               node.name, None)
5212       result.Raise("Can't update hosts file with new host data")
5213       _RedistributeAncillaryFiles(self)
5214
5215
5216 class _NodeQuery(_QueryBase):
5217   FIELDS = query.NODE_FIELDS
5218
5219   def ExpandNames(self, lu):
5220     lu.needed_locks = {}
5221     lu.share_locks = _ShareAll()
5222
5223     if self.names:
5224       self.wanted = _GetWantedNodes(lu, self.names)
5225     else:
5226       self.wanted = locking.ALL_SET
5227
5228     self.do_locking = (self.use_locking and
5229                        query.NQ_LIVE in self.requested_data)
5230
5231     if self.do_locking:
5232       # If any non-static field is requested we need to lock the nodes
5233       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5234       lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5235
5236   def DeclareLocks(self, lu, level):
5237     pass
5238
5239   def _GetQueryData(self, lu):
5240     """Computes the list of nodes and their attributes.
5241
5242     """
5243     all_info = lu.cfg.GetAllNodesInfo()
5244
5245     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5246
5247     # Gather data as requested
5248     if query.NQ_LIVE in self.requested_data:
5249       # filter out non-vm_capable nodes
5250       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5251
5252       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5253                                         [lu.cfg.GetHypervisorType()])
5254       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5255                        for (name, nresult) in node_data.items()
5256                        if not nresult.fail_msg and nresult.payload)
5257     else:
5258       live_data = None
5259
5260     if query.NQ_INST in self.requested_data:
5261       node_to_primary = dict([(name, set()) for name in nodenames])
5262       node_to_secondary = dict([(name, set()) for name in nodenames])
5263
5264       inst_data = lu.cfg.GetAllInstancesInfo()
5265
5266       for inst in inst_data.values():
5267         if inst.primary_node in node_to_primary:
5268           node_to_primary[inst.primary_node].add(inst.name)
5269         for secnode in inst.secondary_nodes:
5270           if secnode in node_to_secondary:
5271             node_to_secondary[secnode].add(inst.name)
5272     else:
5273       node_to_primary = None
5274       node_to_secondary = None
5275
5276     if query.NQ_OOB in self.requested_data:
5277       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5278                          for name, node in all_info.iteritems())
5279     else:
5280       oob_support = None
5281
5282     if query.NQ_GROUP in self.requested_data:
5283       groups = lu.cfg.GetAllNodeGroupsInfo()
5284     else:
5285       groups = {}
5286
5287     return query.NodeQueryData([all_info[name] for name in nodenames],
5288                                live_data, lu.cfg.GetMasterNode(),
5289                                node_to_primary, node_to_secondary, groups,
5290                                oob_support, lu.cfg.GetClusterInfo())
5291
5292
5293 class LUNodeQuery(NoHooksLU):
5294   """Logical unit for querying nodes.
5295
5296   """
5297   # pylint: disable=W0142
5298   REQ_BGL = False
5299
5300   def CheckArguments(self):
5301     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5302                          self.op.output_fields, self.op.use_locking)
5303
5304   def ExpandNames(self):
5305     self.nq.ExpandNames(self)
5306
5307   def DeclareLocks(self, level):
5308     self.nq.DeclareLocks(self, level)
5309
5310   def Exec(self, feedback_fn):
5311     return self.nq.OldStyleQuery(self)
5312
5313
5314 class LUNodeQueryvols(NoHooksLU):
5315   """Logical unit for getting volumes on node(s).
5316
5317   """
5318   REQ_BGL = False
5319   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5320   _FIELDS_STATIC = utils.FieldSet("node")
5321
5322   def CheckArguments(self):
5323     _CheckOutputFields(static=self._FIELDS_STATIC,
5324                        dynamic=self._FIELDS_DYNAMIC,
5325                        selected=self.op.output_fields)
5326
5327   def ExpandNames(self):
5328     self.share_locks = _ShareAll()
5329
5330     if self.op.nodes:
5331       self.needed_locks = {
5332         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5333         }
5334     else:
5335       self.needed_locks = {
5336         locking.LEVEL_NODE: locking.ALL_SET,
5337         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5338         }
5339
5340   def Exec(self, feedback_fn):
5341     """Computes the list of nodes and their attributes.
5342
5343     """
5344     nodenames = self.owned_locks(locking.LEVEL_NODE)
5345     volumes = self.rpc.call_node_volumes(nodenames)
5346
5347     ilist = self.cfg.GetAllInstancesInfo()
5348     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5349
5350     output = []
5351     for node in nodenames:
5352       nresult = volumes[node]
5353       if nresult.offline:
5354         continue
5355       msg = nresult.fail_msg
5356       if msg:
5357         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5358         continue
5359
5360       node_vols = sorted(nresult.payload,
5361                          key=operator.itemgetter("dev"))
5362
5363       for vol in node_vols:
5364         node_output = []
5365         for field in self.op.output_fields:
5366           if field == "node":
5367             val = node
5368           elif field == "phys":
5369             val = vol["dev"]
5370           elif field == "vg":
5371             val = vol["vg"]
5372           elif field == "name":
5373             val = vol["name"]
5374           elif field == "size":
5375             val = int(float(vol["size"]))
5376           elif field == "instance":
5377             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5378           else:
5379             raise errors.ParameterError(field)
5380           node_output.append(str(val))
5381
5382         output.append(node_output)
5383
5384     return output
5385
5386
5387 class LUNodeQueryStorage(NoHooksLU):
5388   """Logical unit for getting information on storage units on node(s).
5389
5390   """
5391   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5392   REQ_BGL = False
5393
5394   def CheckArguments(self):
5395     _CheckOutputFields(static=self._FIELDS_STATIC,
5396                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5397                        selected=self.op.output_fields)
5398
5399   def ExpandNames(self):
5400     self.share_locks = _ShareAll()
5401
5402     if self.op.nodes:
5403       self.needed_locks = {
5404         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5405         }
5406     else:
5407       self.needed_locks = {
5408         locking.LEVEL_NODE: locking.ALL_SET,
5409         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5410         }
5411
5412   def Exec(self, feedback_fn):
5413     """Computes the list of nodes and their attributes.
5414
5415     """
5416     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5417
5418     # Always get name to sort by
5419     if constants.SF_NAME in self.op.output_fields:
5420       fields = self.op.output_fields[:]
5421     else:
5422       fields = [constants.SF_NAME] + self.op.output_fields
5423
5424     # Never ask for node or type as it's only known to the LU
5425     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5426       while extra in fields:
5427         fields.remove(extra)
5428
5429     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5430     name_idx = field_idx[constants.SF_NAME]
5431
5432     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5433     data = self.rpc.call_storage_list(self.nodes,
5434                                       self.op.storage_type, st_args,
5435                                       self.op.name, fields)
5436
5437     result = []
5438
5439     for node in utils.NiceSort(self.nodes):
5440       nresult = data[node]
5441       if nresult.offline:
5442         continue
5443
5444       msg = nresult.fail_msg
5445       if msg:
5446         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5447         continue
5448
5449       rows = dict([(row[name_idx], row) for row in nresult.payload])
5450
5451       for name in utils.NiceSort(rows.keys()):
5452         row = rows[name]
5453
5454         out = []
5455
5456         for field in self.op.output_fields:
5457           if field == constants.SF_NODE:
5458             val = node
5459           elif field == constants.SF_TYPE:
5460             val = self.op.storage_type
5461           elif field in field_idx:
5462             val = row[field_idx[field]]
5463           else:
5464             raise errors.ParameterError(field)
5465
5466           out.append(val)
5467
5468         result.append(out)
5469
5470     return result
5471
5472
5473 class _InstanceQuery(_QueryBase):
5474   FIELDS = query.INSTANCE_FIELDS
5475
5476   def ExpandNames(self, lu):
5477     lu.needed_locks = {}
5478     lu.share_locks = _ShareAll()
5479
5480     if self.names:
5481       self.wanted = _GetWantedInstances(lu, self.names)
5482     else:
5483       self.wanted = locking.ALL_SET
5484
5485     self.do_locking = (self.use_locking and
5486                        query.IQ_LIVE in self.requested_data)
5487     if self.do_locking:
5488       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5489       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5490       lu.needed_locks[locking.LEVEL_NODE] = []
5491       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5492
5493     self.do_grouplocks = (self.do_locking and
5494                           query.IQ_NODES in self.requested_data)
5495
5496   def DeclareLocks(self, lu, level):
5497     if self.do_locking:
5498       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5499         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5500
5501         # Lock all groups used by instances optimistically; this requires going
5502         # via the node before it's locked, requiring verification later on
5503         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5504           set(group_uuid
5505               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5506               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5507       elif level == locking.LEVEL_NODE:
5508         lu._LockInstancesNodes() # pylint: disable=W0212
5509
5510   @staticmethod
5511   def _CheckGroupLocks(lu):
5512     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5513     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5514
5515     # Check if node groups for locked instances are still correct
5516     for instance_name in owned_instances:
5517       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5518
5519   def _GetQueryData(self, lu):
5520     """Computes the list of instances and their attributes.
5521
5522     """
5523     if self.do_grouplocks:
5524       self._CheckGroupLocks(lu)
5525
5526     cluster = lu.cfg.GetClusterInfo()
5527     all_info = lu.cfg.GetAllInstancesInfo()
5528
5529     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5530
5531     instance_list = [all_info[name] for name in instance_names]
5532     nodes = frozenset(itertools.chain(*(inst.all_nodes
5533                                         for inst in instance_list)))
5534     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5535     bad_nodes = []
5536     offline_nodes = []
5537     wrongnode_inst = set()
5538
5539     # Gather data as requested
5540     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5541       live_data = {}
5542       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5543       for name in nodes:
5544         result = node_data[name]
5545         if result.offline:
5546           # offline nodes will be in both lists
5547           assert result.fail_msg
5548           offline_nodes.append(name)
5549         if result.fail_msg:
5550           bad_nodes.append(name)
5551         elif result.payload:
5552           for inst in result.payload:
5553             if inst in all_info:
5554               if all_info[inst].primary_node == name:
5555                 live_data.update(result.payload)
5556               else:
5557                 wrongnode_inst.add(inst)
5558             else:
5559               # orphan instance; we don't list it here as we don't
5560               # handle this case yet in the output of instance listing
5561               logging.warning("Orphan instance '%s' found on node %s",
5562                               inst, name)
5563         # else no instance is alive
5564     else:
5565       live_data = {}
5566
5567     if query.IQ_DISKUSAGE in self.requested_data:
5568       gmi = ganeti.masterd.instance
5569       disk_usage = dict((inst.name,
5570                          gmi.ComputeDiskSize(inst.disk_template,
5571                                              [{constants.IDISK_SIZE: disk.size}
5572                                               for disk in inst.disks]))
5573                         for inst in instance_list)
5574     else:
5575       disk_usage = None
5576
5577     if query.IQ_CONSOLE in self.requested_data:
5578       consinfo = {}
5579       for inst in instance_list:
5580         if inst.name in live_data:
5581           # Instance is running
5582           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5583         else:
5584           consinfo[inst.name] = None
5585       assert set(consinfo.keys()) == set(instance_names)
5586     else:
5587       consinfo = None
5588
5589     if query.IQ_NODES in self.requested_data:
5590       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5591                                             instance_list)))
5592       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5593       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5594                     for uuid in set(map(operator.attrgetter("group"),
5595                                         nodes.values())))
5596     else:
5597       nodes = None
5598       groups = None
5599
5600     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5601                                    disk_usage, offline_nodes, bad_nodes,
5602                                    live_data, wrongnode_inst, consinfo,
5603                                    nodes, groups)
5604
5605
5606 class LUQuery(NoHooksLU):
5607   """Query for resources/items of a certain kind.
5608
5609   """
5610   # pylint: disable=W0142
5611   REQ_BGL = False
5612
5613   def CheckArguments(self):
5614     qcls = _GetQueryImplementation(self.op.what)
5615
5616     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5617
5618   def ExpandNames(self):
5619     self.impl.ExpandNames(self)
5620
5621   def DeclareLocks(self, level):
5622     self.impl.DeclareLocks(self, level)
5623
5624   def Exec(self, feedback_fn):
5625     return self.impl.NewStyleQuery(self)
5626
5627
5628 class LUQueryFields(NoHooksLU):
5629   """Query for resources/items of a certain kind.
5630
5631   """
5632   # pylint: disable=W0142
5633   REQ_BGL = False
5634
5635   def CheckArguments(self):
5636     self.qcls = _GetQueryImplementation(self.op.what)
5637
5638   def ExpandNames(self):
5639     self.needed_locks = {}
5640
5641   def Exec(self, feedback_fn):
5642     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5643
5644
5645 class LUNodeModifyStorage(NoHooksLU):
5646   """Logical unit for modifying a storage volume on a node.
5647
5648   """
5649   REQ_BGL = False
5650
5651   def CheckArguments(self):
5652     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5653
5654     storage_type = self.op.storage_type
5655
5656     try:
5657       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5658     except KeyError:
5659       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5660                                  " modified" % storage_type,
5661                                  errors.ECODE_INVAL)
5662
5663     diff = set(self.op.changes.keys()) - modifiable
5664     if diff:
5665       raise errors.OpPrereqError("The following fields can not be modified for"
5666                                  " storage units of type '%s': %r" %
5667                                  (storage_type, list(diff)),
5668                                  errors.ECODE_INVAL)
5669
5670   def ExpandNames(self):
5671     self.needed_locks = {
5672       locking.LEVEL_NODE: self.op.node_name,
5673       }
5674
5675   def Exec(self, feedback_fn):
5676     """Computes the list of nodes and their attributes.
5677
5678     """
5679     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5680     result = self.rpc.call_storage_modify(self.op.node_name,
5681                                           self.op.storage_type, st_args,
5682                                           self.op.name, self.op.changes)
5683     result.Raise("Failed to modify storage unit '%s' on %s" %
5684                  (self.op.name, self.op.node_name))
5685
5686
5687 class LUNodeAdd(LogicalUnit):
5688   """Logical unit for adding node to the cluster.
5689
5690   """
5691   HPATH = "node-add"
5692   HTYPE = constants.HTYPE_NODE
5693   _NFLAGS = ["master_capable", "vm_capable"]
5694
5695   def CheckArguments(self):
5696     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5697     # validate/normalize the node name
5698     self.hostname = netutils.GetHostname(name=self.op.node_name,
5699                                          family=self.primary_ip_family)
5700     self.op.node_name = self.hostname.name
5701
5702     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5703       raise errors.OpPrereqError("Cannot readd the master node",
5704                                  errors.ECODE_STATE)
5705
5706     if self.op.readd and self.op.group:
5707       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5708                                  " being readded", errors.ECODE_INVAL)
5709
5710   def BuildHooksEnv(self):
5711     """Build hooks env.
5712
5713     This will run on all nodes before, and on all nodes + the new node after.
5714
5715     """
5716     return {
5717       "OP_TARGET": self.op.node_name,
5718       "NODE_NAME": self.op.node_name,
5719       "NODE_PIP": self.op.primary_ip,
5720       "NODE_SIP": self.op.secondary_ip,
5721       "MASTER_CAPABLE": str(self.op.master_capable),
5722       "VM_CAPABLE": str(self.op.vm_capable),
5723       }
5724
5725   def BuildHooksNodes(self):
5726     """Build hooks nodes.
5727
5728     """
5729     # Exclude added node
5730     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5731     post_nodes = pre_nodes + [self.op.node_name, ]
5732
5733     return (pre_nodes, post_nodes)
5734
5735   def CheckPrereq(self):
5736     """Check prerequisites.
5737
5738     This checks:
5739      - the new node is not already in the config
5740      - it is resolvable
5741      - its parameters (single/dual homed) matches the cluster
5742
5743     Any errors are signaled by raising errors.OpPrereqError.
5744
5745     """
5746     cfg = self.cfg
5747     hostname = self.hostname
5748     node = hostname.name
5749     primary_ip = self.op.primary_ip = hostname.ip
5750     if self.op.secondary_ip is None:
5751       if self.primary_ip_family == netutils.IP6Address.family:
5752         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5753                                    " IPv4 address must be given as secondary",
5754                                    errors.ECODE_INVAL)
5755       self.op.secondary_ip = primary_ip
5756
5757     secondary_ip = self.op.secondary_ip
5758     if not netutils.IP4Address.IsValid(secondary_ip):
5759       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5760                                  " address" % secondary_ip, errors.ECODE_INVAL)
5761
5762     node_list = cfg.GetNodeList()
5763     if not self.op.readd and node in node_list:
5764       raise errors.OpPrereqError("Node %s is already in the configuration" %
5765                                  node, errors.ECODE_EXISTS)
5766     elif self.op.readd and node not in node_list:
5767       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5768                                  errors.ECODE_NOENT)
5769
5770     self.changed_primary_ip = False
5771
5772     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5773       if self.op.readd and node == existing_node_name:
5774         if existing_node.secondary_ip != secondary_ip:
5775           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5776                                      " address configuration as before",
5777                                      errors.ECODE_INVAL)
5778         if existing_node.primary_ip != primary_ip:
5779           self.changed_primary_ip = True
5780
5781         continue
5782
5783       if (existing_node.primary_ip == primary_ip or
5784           existing_node.secondary_ip == primary_ip or
5785           existing_node.primary_ip == secondary_ip or
5786           existing_node.secondary_ip == secondary_ip):
5787         raise errors.OpPrereqError("New node ip address(es) conflict with"
5788                                    " existing node %s" % existing_node.name,
5789                                    errors.ECODE_NOTUNIQUE)
5790
5791     # After this 'if' block, None is no longer a valid value for the
5792     # _capable op attributes
5793     if self.op.readd:
5794       old_node = self.cfg.GetNodeInfo(node)
5795       assert old_node is not None, "Can't retrieve locked node %s" % node
5796       for attr in self._NFLAGS:
5797         if getattr(self.op, attr) is None:
5798           setattr(self.op, attr, getattr(old_node, attr))
5799     else:
5800       for attr in self._NFLAGS:
5801         if getattr(self.op, attr) is None:
5802           setattr(self.op, attr, True)
5803
5804     if self.op.readd and not self.op.vm_capable:
5805       pri, sec = cfg.GetNodeInstances(node)
5806       if pri or sec:
5807         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5808                                    " flag set to false, but it already holds"
5809                                    " instances" % node,
5810                                    errors.ECODE_STATE)
5811
5812     # check that the type of the node (single versus dual homed) is the
5813     # same as for the master
5814     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5815     master_singlehomed = myself.secondary_ip == myself.primary_ip
5816     newbie_singlehomed = secondary_ip == primary_ip
5817     if master_singlehomed != newbie_singlehomed:
5818       if master_singlehomed:
5819         raise errors.OpPrereqError("The master has no secondary ip but the"
5820                                    " new node has one",
5821                                    errors.ECODE_INVAL)
5822       else:
5823         raise errors.OpPrereqError("The master has a secondary ip but the"
5824                                    " new node doesn't have one",
5825                                    errors.ECODE_INVAL)
5826
5827     # checks reachability
5828     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5829       raise errors.OpPrereqError("Node not reachable by ping",
5830                                  errors.ECODE_ENVIRON)
5831
5832     if not newbie_singlehomed:
5833       # check reachability from my secondary ip to newbie's secondary ip
5834       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5835                               source=myself.secondary_ip):
5836         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5837                                    " based ping to node daemon port",
5838                                    errors.ECODE_ENVIRON)
5839
5840     if self.op.readd:
5841       exceptions = [node]
5842     else:
5843       exceptions = []
5844
5845     if self.op.master_capable:
5846       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5847     else:
5848       self.master_candidate = False
5849
5850     if self.op.readd:
5851       self.new_node = old_node
5852     else:
5853       node_group = cfg.LookupNodeGroup(self.op.group)
5854       self.new_node = objects.Node(name=node,
5855                                    primary_ip=primary_ip,
5856                                    secondary_ip=secondary_ip,
5857                                    master_candidate=self.master_candidate,
5858                                    offline=False, drained=False,
5859                                    group=node_group)
5860
5861     if self.op.ndparams:
5862       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5863
5864     if self.op.hv_state:
5865       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5866
5867     if self.op.disk_state:
5868       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5869
5870     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5871     #       it a property on the base class.
5872     result = rpc.DnsOnlyRunner().call_version([node])[node]
5873     result.Raise("Can't get version information from node %s" % node)
5874     if constants.PROTOCOL_VERSION == result.payload:
5875       logging.info("Communication to node %s fine, sw version %s match",
5876                    node, result.payload)
5877     else:
5878       raise errors.OpPrereqError("Version mismatch master version %s,"
5879                                  " node version %s" %
5880                                  (constants.PROTOCOL_VERSION, result.payload),
5881                                  errors.ECODE_ENVIRON)
5882
5883   def Exec(self, feedback_fn):
5884     """Adds the new node to the cluster.
5885
5886     """
5887     new_node = self.new_node
5888     node = new_node.name
5889
5890     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5891       "Not owning BGL"
5892
5893     # We adding a new node so we assume it's powered
5894     new_node.powered = True
5895
5896     # for re-adds, reset the offline/drained/master-candidate flags;
5897     # we need to reset here, otherwise offline would prevent RPC calls
5898     # later in the procedure; this also means that if the re-add
5899     # fails, we are left with a non-offlined, broken node
5900     if self.op.readd:
5901       new_node.drained = new_node.offline = False # pylint: disable=W0201
5902       self.LogInfo("Readding a node, the offline/drained flags were reset")
5903       # if we demote the node, we do cleanup later in the procedure
5904       new_node.master_candidate = self.master_candidate
5905       if self.changed_primary_ip:
5906         new_node.primary_ip = self.op.primary_ip
5907
5908     # copy the master/vm_capable flags
5909     for attr in self._NFLAGS:
5910       setattr(new_node, attr, getattr(self.op, attr))
5911
5912     # notify the user about any possible mc promotion
5913     if new_node.master_candidate:
5914       self.LogInfo("Node will be a master candidate")
5915
5916     if self.op.ndparams:
5917       new_node.ndparams = self.op.ndparams
5918     else:
5919       new_node.ndparams = {}
5920
5921     if self.op.hv_state:
5922       new_node.hv_state_static = self.new_hv_state
5923
5924     if self.op.disk_state:
5925       new_node.disk_state_static = self.new_disk_state
5926
5927     # Add node to our /etc/hosts, and add key to known_hosts
5928     if self.cfg.GetClusterInfo().modify_etc_hosts:
5929       master_node = self.cfg.GetMasterNode()
5930       result = self.rpc.call_etc_hosts_modify(master_node,
5931                                               constants.ETC_HOSTS_ADD,
5932                                               self.hostname.name,
5933                                               self.hostname.ip)
5934       result.Raise("Can't update hosts file with new host data")
5935
5936     if new_node.secondary_ip != new_node.primary_ip:
5937       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5938                                False)
5939
5940     node_verify_list = [self.cfg.GetMasterNode()]
5941     node_verify_param = {
5942       constants.NV_NODELIST: ([node], {}),
5943       # TODO: do a node-net-test as well?
5944     }
5945
5946     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5947                                        self.cfg.GetClusterName())
5948     for verifier in node_verify_list:
5949       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5950       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5951       if nl_payload:
5952         for failed in nl_payload:
5953           feedback_fn("ssh/hostname verification failed"
5954                       " (checking from %s): %s" %
5955                       (verifier, nl_payload[failed]))
5956         raise errors.OpExecError("ssh/hostname verification failed")
5957
5958     if self.op.readd:
5959       _RedistributeAncillaryFiles(self)
5960       self.context.ReaddNode(new_node)
5961       # make sure we redistribute the config
5962       self.cfg.Update(new_node, feedback_fn)
5963       # and make sure the new node will not have old files around
5964       if not new_node.master_candidate:
5965         result = self.rpc.call_node_demote_from_mc(new_node.name)
5966         msg = result.fail_msg
5967         if msg:
5968           self.LogWarning("Node failed to demote itself from master"
5969                           " candidate status: %s" % msg)
5970     else:
5971       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5972                                   additional_vm=self.op.vm_capable)
5973       self.context.AddNode(new_node, self.proc.GetECId())
5974
5975
5976 class LUNodeSetParams(LogicalUnit):
5977   """Modifies the parameters of a node.
5978
5979   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5980       to the node role (as _ROLE_*)
5981   @cvar _R2F: a dictionary from node role to tuples of flags
5982   @cvar _FLAGS: a list of attribute names corresponding to the flags
5983
5984   """
5985   HPATH = "node-modify"
5986   HTYPE = constants.HTYPE_NODE
5987   REQ_BGL = False
5988   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5989   _F2R = {
5990     (True, False, False): _ROLE_CANDIDATE,
5991     (False, True, False): _ROLE_DRAINED,
5992     (False, False, True): _ROLE_OFFLINE,
5993     (False, False, False): _ROLE_REGULAR,
5994     }
5995   _R2F = dict((v, k) for k, v in _F2R.items())
5996   _FLAGS = ["master_candidate", "drained", "offline"]
5997
5998   def CheckArguments(self):
5999     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6000     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6001                 self.op.master_capable, self.op.vm_capable,
6002                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6003                 self.op.disk_state]
6004     if all_mods.count(None) == len(all_mods):
6005       raise errors.OpPrereqError("Please pass at least one modification",
6006                                  errors.ECODE_INVAL)
6007     if all_mods.count(True) > 1:
6008       raise errors.OpPrereqError("Can't set the node into more than one"
6009                                  " state at the same time",
6010                                  errors.ECODE_INVAL)
6011
6012     # Boolean value that tells us whether we might be demoting from MC
6013     self.might_demote = (self.op.master_candidate is False or
6014                          self.op.offline is True or
6015                          self.op.drained is True or
6016                          self.op.master_capable is False)
6017
6018     if self.op.secondary_ip:
6019       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6020         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6021                                    " address" % self.op.secondary_ip,
6022                                    errors.ECODE_INVAL)
6023
6024     self.lock_all = self.op.auto_promote and self.might_demote
6025     self.lock_instances = self.op.secondary_ip is not None
6026
6027   def _InstanceFilter(self, instance):
6028     """Filter for getting affected instances.
6029
6030     """
6031     return (instance.disk_template in constants.DTS_INT_MIRROR and
6032             self.op.node_name in instance.all_nodes)
6033
6034   def ExpandNames(self):
6035     if self.lock_all:
6036       self.needed_locks = {
6037         locking.LEVEL_NODE: locking.ALL_SET,
6038
6039         # Block allocations when all nodes are locked
6040         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6041         }
6042     else:
6043       self.needed_locks = {
6044         locking.LEVEL_NODE: self.op.node_name,
6045         }
6046
6047     # Since modifying a node can have severe effects on currently running
6048     # operations the resource lock is at least acquired in shared mode
6049     self.needed_locks[locking.LEVEL_NODE_RES] = \
6050       self.needed_locks[locking.LEVEL_NODE]
6051
6052     # Get all locks except nodes in shared mode; they are not used for anything
6053     # but read-only access
6054     self.share_locks = _ShareAll()
6055     self.share_locks[locking.LEVEL_NODE] = 0
6056     self.share_locks[locking.LEVEL_NODE_RES] = 0
6057     self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6058
6059     if self.lock_instances:
6060       self.needed_locks[locking.LEVEL_INSTANCE] = \
6061         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6062
6063   def BuildHooksEnv(self):
6064     """Build hooks env.
6065
6066     This runs on the master node.
6067
6068     """
6069     return {
6070       "OP_TARGET": self.op.node_name,
6071       "MASTER_CANDIDATE": str(self.op.master_candidate),
6072       "OFFLINE": str(self.op.offline),
6073       "DRAINED": str(self.op.drained),
6074       "MASTER_CAPABLE": str(self.op.master_capable),
6075       "VM_CAPABLE": str(self.op.vm_capable),
6076       }
6077
6078   def BuildHooksNodes(self):
6079     """Build hooks nodes.
6080
6081     """
6082     nl = [self.cfg.GetMasterNode(), self.op.node_name]
6083     return (nl, nl)
6084
6085   def CheckPrereq(self):
6086     """Check prerequisites.
6087
6088     This only checks the instance list against the existing names.
6089
6090     """
6091     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6092
6093     if self.lock_instances:
6094       affected_instances = \
6095         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6096
6097       # Verify instance locks
6098       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6099       wanted_instances = frozenset(affected_instances.keys())
6100       if wanted_instances - owned_instances:
6101         raise errors.OpPrereqError("Instances affected by changing node %s's"
6102                                    " secondary IP address have changed since"
6103                                    " locks were acquired, wanted '%s', have"
6104                                    " '%s'; retry the operation" %
6105                                    (self.op.node_name,
6106                                     utils.CommaJoin(wanted_instances),
6107                                     utils.CommaJoin(owned_instances)),
6108                                    errors.ECODE_STATE)
6109     else:
6110       affected_instances = None
6111
6112     if (self.op.master_candidate is not None or
6113         self.op.drained is not None or
6114         self.op.offline is not None):
6115       # we can't change the master's node flags
6116       if self.op.node_name == self.cfg.GetMasterNode():
6117         raise errors.OpPrereqError("The master role can be changed"
6118                                    " only via master-failover",
6119                                    errors.ECODE_INVAL)
6120
6121     if self.op.master_candidate and not node.master_capable:
6122       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6123                                  " it a master candidate" % node.name,
6124                                  errors.ECODE_STATE)
6125
6126     if self.op.vm_capable is False:
6127       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6128       if ipri or isec:
6129         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6130                                    " the vm_capable flag" % node.name,
6131                                    errors.ECODE_STATE)
6132
6133     if node.master_candidate and self.might_demote and not self.lock_all:
6134       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6135       # check if after removing the current node, we're missing master
6136       # candidates
6137       (mc_remaining, mc_should, _) = \
6138           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6139       if mc_remaining < mc_should:
6140         raise errors.OpPrereqError("Not enough master candidates, please"
6141                                    " pass auto promote option to allow"
6142                                    " promotion (--auto-promote or RAPI"
6143                                    " auto_promote=True)", errors.ECODE_STATE)
6144
6145     self.old_flags = old_flags = (node.master_candidate,
6146                                   node.drained, node.offline)
6147     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6148     self.old_role = old_role = self._F2R[old_flags]
6149
6150     # Check for ineffective changes
6151     for attr in self._FLAGS:
6152       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6153         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6154         setattr(self.op, attr, None)
6155
6156     # Past this point, any flag change to False means a transition
6157     # away from the respective state, as only real changes are kept
6158
6159     # TODO: We might query the real power state if it supports OOB
6160     if _SupportsOob(self.cfg, node):
6161       if self.op.offline is False and not (node.powered or
6162                                            self.op.powered is True):
6163         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6164                                     " offline status can be reset") %
6165                                    self.op.node_name, errors.ECODE_STATE)
6166     elif self.op.powered is not None:
6167       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6168                                   " as it does not support out-of-band"
6169                                   " handling") % self.op.node_name,
6170                                  errors.ECODE_STATE)
6171
6172     # If we're being deofflined/drained, we'll MC ourself if needed
6173     if (self.op.drained is False or self.op.offline is False or
6174         (self.op.master_capable and not node.master_capable)):
6175       if _DecideSelfPromotion(self):
6176         self.op.master_candidate = True
6177         self.LogInfo("Auto-promoting node to master candidate")
6178
6179     # If we're no longer master capable, we'll demote ourselves from MC
6180     if self.op.master_capable is False and node.master_candidate:
6181       self.LogInfo("Demoting from master candidate")
6182       self.op.master_candidate = False
6183
6184     # Compute new role
6185     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6186     if self.op.master_candidate:
6187       new_role = self._ROLE_CANDIDATE
6188     elif self.op.drained:
6189       new_role = self._ROLE_DRAINED
6190     elif self.op.offline:
6191       new_role = self._ROLE_OFFLINE
6192     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6193       # False is still in new flags, which means we're un-setting (the
6194       # only) True flag
6195       new_role = self._ROLE_REGULAR
6196     else: # no new flags, nothing, keep old role
6197       new_role = old_role
6198
6199     self.new_role = new_role
6200
6201     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6202       # Trying to transition out of offline status
6203       result = self.rpc.call_version([node.name])[node.name]
6204       if result.fail_msg:
6205         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6206                                    " to report its version: %s" %
6207                                    (node.name, result.fail_msg),
6208                                    errors.ECODE_STATE)
6209       else:
6210         self.LogWarning("Transitioning node from offline to online state"
6211                         " without using re-add. Please make sure the node"
6212                         " is healthy!")
6213
6214     # When changing the secondary ip, verify if this is a single-homed to
6215     # multi-homed transition or vice versa, and apply the relevant
6216     # restrictions.
6217     if self.op.secondary_ip:
6218       # Ok even without locking, because this can't be changed by any LU
6219       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6220       master_singlehomed = master.secondary_ip == master.primary_ip
6221       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6222         if self.op.force and node.name == master.name:
6223           self.LogWarning("Transitioning from single-homed to multi-homed"
6224                           " cluster; all nodes will require a secondary IP"
6225                           " address")
6226         else:
6227           raise errors.OpPrereqError("Changing the secondary ip on a"
6228                                      " single-homed cluster requires the"
6229                                      " --force option to be passed, and the"
6230                                      " target node to be the master",
6231                                      errors.ECODE_INVAL)
6232       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6233         if self.op.force and node.name == master.name:
6234           self.LogWarning("Transitioning from multi-homed to single-homed"
6235                           " cluster; secondary IP addresses will have to be"
6236                           " removed")
6237         else:
6238           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6239                                      " same as the primary IP on a multi-homed"
6240                                      " cluster, unless the --force option is"
6241                                      " passed, and the target node is the"
6242                                      " master", errors.ECODE_INVAL)
6243
6244       assert not (frozenset(affected_instances) -
6245                   self.owned_locks(locking.LEVEL_INSTANCE))
6246
6247       if node.offline:
6248         if affected_instances:
6249           msg = ("Cannot change secondary IP address: offline node has"
6250                  " instances (%s) configured to use it" %
6251                  utils.CommaJoin(affected_instances.keys()))
6252           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6253       else:
6254         # On online nodes, check that no instances are running, and that
6255         # the node has the new ip and we can reach it.
6256         for instance in affected_instances.values():
6257           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6258                               msg="cannot change secondary ip")
6259
6260         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6261         if master.name != node.name:
6262           # check reachability from master secondary ip to new secondary ip
6263           if not netutils.TcpPing(self.op.secondary_ip,
6264                                   constants.DEFAULT_NODED_PORT,
6265                                   source=master.secondary_ip):
6266             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6267                                        " based ping to node daemon port",
6268                                        errors.ECODE_ENVIRON)
6269
6270     if self.op.ndparams:
6271       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6272       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6273       self.new_ndparams = new_ndparams
6274
6275     if self.op.hv_state:
6276       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6277                                                  self.node.hv_state_static)
6278
6279     if self.op.disk_state:
6280       self.new_disk_state = \
6281         _MergeAndVerifyDiskState(self.op.disk_state,
6282                                  self.node.disk_state_static)
6283
6284   def Exec(self, feedback_fn):
6285     """Modifies a node.
6286
6287     """
6288     node = self.node
6289     old_role = self.old_role
6290     new_role = self.new_role
6291
6292     result = []
6293
6294     if self.op.ndparams:
6295       node.ndparams = self.new_ndparams
6296
6297     if self.op.powered is not None:
6298       node.powered = self.op.powered
6299
6300     if self.op.hv_state:
6301       node.hv_state_static = self.new_hv_state
6302
6303     if self.op.disk_state:
6304       node.disk_state_static = self.new_disk_state
6305
6306     for attr in ["master_capable", "vm_capable"]:
6307       val = getattr(self.op, attr)
6308       if val is not None:
6309         setattr(node, attr, val)
6310         result.append((attr, str(val)))
6311
6312     if new_role != old_role:
6313       # Tell the node to demote itself, if no longer MC and not offline
6314       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6315         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6316         if msg:
6317           self.LogWarning("Node failed to demote itself: %s", msg)
6318
6319       new_flags = self._R2F[new_role]
6320       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6321         if of != nf:
6322           result.append((desc, str(nf)))
6323       (node.master_candidate, node.drained, node.offline) = new_flags
6324
6325       # we locked all nodes, we adjust the CP before updating this node
6326       if self.lock_all:
6327         _AdjustCandidatePool(self, [node.name])
6328
6329     if self.op.secondary_ip:
6330       node.secondary_ip = self.op.secondary_ip
6331       result.append(("secondary_ip", self.op.secondary_ip))
6332
6333     # this will trigger configuration file update, if needed
6334     self.cfg.Update(node, feedback_fn)
6335
6336     # this will trigger job queue propagation or cleanup if the mc
6337     # flag changed
6338     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6339       self.context.ReaddNode(node)
6340
6341     return result
6342
6343
6344 class LUNodePowercycle(NoHooksLU):
6345   """Powercycles a node.
6346
6347   """
6348   REQ_BGL = False
6349
6350   def CheckArguments(self):
6351     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6352     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6353       raise errors.OpPrereqError("The node is the master and the force"
6354                                  " parameter was not set",
6355                                  errors.ECODE_INVAL)
6356
6357   def ExpandNames(self):
6358     """Locking for PowercycleNode.
6359
6360     This is a last-resort option and shouldn't block on other
6361     jobs. Therefore, we grab no locks.
6362
6363     """
6364     self.needed_locks = {}
6365
6366   def Exec(self, feedback_fn):
6367     """Reboots a node.
6368
6369     """
6370     result = self.rpc.call_node_powercycle(self.op.node_name,
6371                                            self.cfg.GetHypervisorType())
6372     result.Raise("Failed to schedule the reboot")
6373     return result.payload
6374
6375
6376 class LUClusterQuery(NoHooksLU):
6377   """Query cluster configuration.
6378
6379   """
6380   REQ_BGL = False
6381
6382   def ExpandNames(self):
6383     self.needed_locks = {}
6384
6385   def Exec(self, feedback_fn):
6386     """Return cluster config.
6387
6388     """
6389     cluster = self.cfg.GetClusterInfo()
6390     os_hvp = {}
6391
6392     # Filter just for enabled hypervisors
6393     for os_name, hv_dict in cluster.os_hvp.items():
6394       os_hvp[os_name] = {}
6395       for hv_name, hv_params in hv_dict.items():
6396         if hv_name in cluster.enabled_hypervisors:
6397           os_hvp[os_name][hv_name] = hv_params
6398
6399     # Convert ip_family to ip_version
6400     primary_ip_version = constants.IP4_VERSION
6401     if cluster.primary_ip_family == netutils.IP6Address.family:
6402       primary_ip_version = constants.IP6_VERSION
6403
6404     result = {
6405       "software_version": constants.RELEASE_VERSION,
6406       "protocol_version": constants.PROTOCOL_VERSION,
6407       "config_version": constants.CONFIG_VERSION,
6408       "os_api_version": max(constants.OS_API_VERSIONS),
6409       "export_version": constants.EXPORT_VERSION,
6410       "architecture": runtime.GetArchInfo(),
6411       "name": cluster.cluster_name,
6412       "master": cluster.master_node,
6413       "default_hypervisor": cluster.primary_hypervisor,
6414       "enabled_hypervisors": cluster.enabled_hypervisors,
6415       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6416                         for hypervisor_name in cluster.enabled_hypervisors]),
6417       "os_hvp": os_hvp,
6418       "beparams": cluster.beparams,
6419       "osparams": cluster.osparams,
6420       "ipolicy": cluster.ipolicy,
6421       "nicparams": cluster.nicparams,
6422       "ndparams": cluster.ndparams,
6423       "diskparams": cluster.diskparams,
6424       "candidate_pool_size": cluster.candidate_pool_size,
6425       "master_netdev": cluster.master_netdev,
6426       "master_netmask": cluster.master_netmask,
6427       "use_external_mip_script": cluster.use_external_mip_script,
6428       "volume_group_name": cluster.volume_group_name,
6429       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6430       "file_storage_dir": cluster.file_storage_dir,
6431       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6432       "maintain_node_health": cluster.maintain_node_health,
6433       "ctime": cluster.ctime,
6434       "mtime": cluster.mtime,
6435       "uuid": cluster.uuid,
6436       "tags": list(cluster.GetTags()),
6437       "uid_pool": cluster.uid_pool,
6438       "default_iallocator": cluster.default_iallocator,
6439       "reserved_lvs": cluster.reserved_lvs,
6440       "primary_ip_version": primary_ip_version,
6441       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6442       "hidden_os": cluster.hidden_os,
6443       "blacklisted_os": cluster.blacklisted_os,
6444       }
6445
6446     return result
6447
6448
6449 class LUClusterConfigQuery(NoHooksLU):
6450   """Return configuration values.
6451
6452   """
6453   REQ_BGL = False
6454
6455   def CheckArguments(self):
6456     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6457
6458   def ExpandNames(self):
6459     self.cq.ExpandNames(self)
6460
6461   def DeclareLocks(self, level):
6462     self.cq.DeclareLocks(self, level)
6463
6464   def Exec(self, feedback_fn):
6465     result = self.cq.OldStyleQuery(self)
6466
6467     assert len(result) == 1
6468
6469     return result[0]
6470
6471
6472 class _ClusterQuery(_QueryBase):
6473   FIELDS = query.CLUSTER_FIELDS
6474
6475   #: Do not sort (there is only one item)
6476   SORT_FIELD = None
6477
6478   def ExpandNames(self, lu):
6479     lu.needed_locks = {}
6480
6481     # The following variables interact with _QueryBase._GetNames
6482     self.wanted = locking.ALL_SET
6483     self.do_locking = self.use_locking
6484
6485     if self.do_locking:
6486       raise errors.OpPrereqError("Can not use locking for cluster queries",
6487                                  errors.ECODE_INVAL)
6488
6489   def DeclareLocks(self, lu, level):
6490     pass
6491
6492   def _GetQueryData(self, lu):
6493     """Computes the list of nodes and their attributes.
6494
6495     """
6496     # Locking is not used
6497     assert not (compat.any(lu.glm.is_owned(level)
6498                            for level in locking.LEVELS
6499                            if level != locking.LEVEL_CLUSTER) or
6500                 self.do_locking or self.use_locking)
6501
6502     if query.CQ_CONFIG in self.requested_data:
6503       cluster = lu.cfg.GetClusterInfo()
6504     else:
6505       cluster = NotImplemented
6506
6507     if query.CQ_QUEUE_DRAINED in self.requested_data:
6508       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6509     else:
6510       drain_flag = NotImplemented
6511
6512     if query.CQ_WATCHER_PAUSE in self.requested_data:
6513       master_name = lu.cfg.GetMasterNode()
6514
6515       result = lu.rpc.call_get_watcher_pause(master_name)
6516       result.Raise("Can't retrieve watcher pause from master node '%s'" %
6517                    master_name)
6518
6519       watcher_pause = result.payload
6520     else:
6521       watcher_pause = NotImplemented
6522
6523     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6524
6525
6526 class LUInstanceActivateDisks(NoHooksLU):
6527   """Bring up an instance's disks.
6528
6529   """
6530   REQ_BGL = False
6531
6532   def ExpandNames(self):
6533     self._ExpandAndLockInstance()
6534     self.needed_locks[locking.LEVEL_NODE] = []
6535     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6536
6537   def DeclareLocks(self, level):
6538     if level == locking.LEVEL_NODE:
6539       self._LockInstancesNodes()
6540
6541   def CheckPrereq(self):
6542     """Check prerequisites.
6543
6544     This checks that the instance is in the cluster.
6545
6546     """
6547     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6548     assert self.instance is not None, \
6549       "Cannot retrieve locked instance %s" % self.op.instance_name
6550     _CheckNodeOnline(self, self.instance.primary_node)
6551
6552   def Exec(self, feedback_fn):
6553     """Activate the disks.
6554
6555     """
6556     disks_ok, disks_info = \
6557               _AssembleInstanceDisks(self, self.instance,
6558                                      ignore_size=self.op.ignore_size)
6559     if not disks_ok:
6560       raise errors.OpExecError("Cannot activate block devices")
6561
6562     if self.op.wait_for_sync:
6563       if not _WaitForSync(self, self.instance):
6564         raise errors.OpExecError("Some disks of the instance are degraded!")
6565
6566     return disks_info
6567
6568
6569 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6570                            ignore_size=False):
6571   """Prepare the block devices for an instance.
6572
6573   This sets up the block devices on all nodes.
6574
6575   @type lu: L{LogicalUnit}
6576   @param lu: the logical unit on whose behalf we execute
6577   @type instance: L{objects.Instance}
6578   @param instance: the instance for whose disks we assemble
6579   @type disks: list of L{objects.Disk} or None
6580   @param disks: which disks to assemble (or all, if None)
6581   @type ignore_secondaries: boolean
6582   @param ignore_secondaries: if true, errors on secondary nodes
6583       won't result in an error return from the function
6584   @type ignore_size: boolean
6585   @param ignore_size: if true, the current known size of the disk
6586       will not be used during the disk activation, useful for cases
6587       when the size is wrong
6588   @return: False if the operation failed, otherwise a list of
6589       (host, instance_visible_name, node_visible_name)
6590       with the mapping from node devices to instance devices
6591
6592   """
6593   device_info = []
6594   disks_ok = True
6595   iname = instance.name
6596   disks = _ExpandCheckDisks(instance, disks)
6597
6598   # With the two passes mechanism we try to reduce the window of
6599   # opportunity for the race condition of switching DRBD to primary
6600   # before handshaking occured, but we do not eliminate it
6601
6602   # The proper fix would be to wait (with some limits) until the
6603   # connection has been made and drbd transitions from WFConnection
6604   # into any other network-connected state (Connected, SyncTarget,
6605   # SyncSource, etc.)
6606
6607   # 1st pass, assemble on all nodes in secondary mode
6608   for idx, inst_disk in enumerate(disks):
6609     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6610       if ignore_size:
6611         node_disk = node_disk.Copy()
6612         node_disk.UnsetSize()
6613       lu.cfg.SetDiskID(node_disk, node)
6614       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6615                                              False, idx)
6616       msg = result.fail_msg
6617       if msg:
6618         is_offline_secondary = (node in instance.secondary_nodes and
6619                                 result.offline)
6620         lu.LogWarning("Could not prepare block device %s on node %s"
6621                       " (is_primary=False, pass=1): %s",
6622                       inst_disk.iv_name, node, msg)
6623         if not (ignore_secondaries or is_offline_secondary):
6624           disks_ok = False
6625
6626   # FIXME: race condition on drbd migration to primary
6627
6628   # 2nd pass, do only the primary node
6629   for idx, inst_disk in enumerate(disks):
6630     dev_path = None
6631
6632     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6633       if node != instance.primary_node:
6634         continue
6635       if ignore_size:
6636         node_disk = node_disk.Copy()
6637         node_disk.UnsetSize()
6638       lu.cfg.SetDiskID(node_disk, node)
6639       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6640                                              True, idx)
6641       msg = result.fail_msg
6642       if msg:
6643         lu.LogWarning("Could not prepare block device %s on node %s"
6644                       " (is_primary=True, pass=2): %s",
6645                       inst_disk.iv_name, node, msg)
6646         disks_ok = False
6647       else:
6648         dev_path = result.payload
6649
6650     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6651
6652   # leave the disks configured for the primary node
6653   # this is a workaround that would be fixed better by
6654   # improving the logical/physical id handling
6655   for disk in disks:
6656     lu.cfg.SetDiskID(disk, instance.primary_node)
6657
6658   return disks_ok, device_info
6659
6660
6661 def _StartInstanceDisks(lu, instance, force):
6662   """Start the disks of an instance.
6663
6664   """
6665   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6666                                            ignore_secondaries=force)
6667   if not disks_ok:
6668     _ShutdownInstanceDisks(lu, instance)
6669     if force is not None and not force:
6670       lu.LogWarning("",
6671                     hint=("If the message above refers to a secondary node,"
6672                           " you can retry the operation using '--force'"))
6673     raise errors.OpExecError("Disk consistency error")
6674
6675
6676 class LUInstanceDeactivateDisks(NoHooksLU):
6677   """Shutdown an instance's disks.
6678
6679   """
6680   REQ_BGL = False
6681
6682   def ExpandNames(self):
6683     self._ExpandAndLockInstance()
6684     self.needed_locks[locking.LEVEL_NODE] = []
6685     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6686
6687   def DeclareLocks(self, level):
6688     if level == locking.LEVEL_NODE:
6689       self._LockInstancesNodes()
6690
6691   def CheckPrereq(self):
6692     """Check prerequisites.
6693
6694     This checks that the instance is in the cluster.
6695
6696     """
6697     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6698     assert self.instance is not None, \
6699       "Cannot retrieve locked instance %s" % self.op.instance_name
6700
6701   def Exec(self, feedback_fn):
6702     """Deactivate the disks
6703
6704     """
6705     instance = self.instance
6706     if self.op.force:
6707       _ShutdownInstanceDisks(self, instance)
6708     else:
6709       _SafeShutdownInstanceDisks(self, instance)
6710
6711
6712 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6713   """Shutdown block devices of an instance.
6714
6715   This function checks if an instance is running, before calling
6716   _ShutdownInstanceDisks.
6717
6718   """
6719   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6720   _ShutdownInstanceDisks(lu, instance, disks=disks)
6721
6722
6723 def _ExpandCheckDisks(instance, disks):
6724   """Return the instance disks selected by the disks list
6725
6726   @type disks: list of L{objects.Disk} or None
6727   @param disks: selected disks
6728   @rtype: list of L{objects.Disk}
6729   @return: selected instance disks to act on
6730
6731   """
6732   if disks is None:
6733     return instance.disks
6734   else:
6735     if not set(disks).issubset(instance.disks):
6736       raise errors.ProgrammerError("Can only act on disks belonging to the"
6737                                    " target instance")
6738     return disks
6739
6740
6741 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6742   """Shutdown block devices of an instance.
6743
6744   This does the shutdown on all nodes of the instance.
6745
6746   If the ignore_primary is false, errors on the primary node are
6747   ignored.
6748
6749   """
6750   all_result = True
6751   disks = _ExpandCheckDisks(instance, disks)
6752
6753   for disk in disks:
6754     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6755       lu.cfg.SetDiskID(top_disk, node)
6756       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6757       msg = result.fail_msg
6758       if msg:
6759         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6760                       disk.iv_name, node, msg)
6761         if ((node == instance.primary_node and not ignore_primary) or
6762             (node != instance.primary_node and not result.offline)):
6763           all_result = False
6764   return all_result
6765
6766
6767 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6768   """Checks if a node has enough free memory.
6769
6770   This function checks if a given node has the needed amount of free
6771   memory. In case the node has less memory or we cannot get the
6772   information from the node, this function raises an OpPrereqError
6773   exception.
6774
6775   @type lu: C{LogicalUnit}
6776   @param lu: a logical unit from which we get configuration data
6777   @type node: C{str}
6778   @param node: the node to check
6779   @type reason: C{str}
6780   @param reason: string to use in the error message
6781   @type requested: C{int}
6782   @param requested: the amount of memory in MiB to check for
6783   @type hypervisor_name: C{str}
6784   @param hypervisor_name: the hypervisor to ask for memory stats
6785   @rtype: integer
6786   @return: node current free memory
6787   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6788       we cannot check the node
6789
6790   """
6791   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6792   nodeinfo[node].Raise("Can't get data from node %s" % node,
6793                        prereq=True, ecode=errors.ECODE_ENVIRON)
6794   (_, _, (hv_info, )) = nodeinfo[node].payload
6795
6796   free_mem = hv_info.get("memory_free", None)
6797   if not isinstance(free_mem, int):
6798     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6799                                " was '%s'" % (node, free_mem),
6800                                errors.ECODE_ENVIRON)
6801   if requested > free_mem:
6802     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6803                                " needed %s MiB, available %s MiB" %
6804                                (node, reason, requested, free_mem),
6805                                errors.ECODE_NORES)
6806   return free_mem
6807
6808
6809 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6810   """Checks if nodes have enough free disk space in all the VGs.
6811
6812   This function checks if all given nodes have the needed amount of
6813   free disk. In case any node has less disk or we cannot get the
6814   information from the node, this function raises an OpPrereqError
6815   exception.
6816
6817   @type lu: C{LogicalUnit}
6818   @param lu: a logical unit from which we get configuration data
6819   @type nodenames: C{list}
6820   @param nodenames: the list of node names to check
6821   @type req_sizes: C{dict}
6822   @param req_sizes: the hash of vg and corresponding amount of disk in
6823       MiB to check for
6824   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6825       or we cannot check the node
6826
6827   """
6828   for vg, req_size in req_sizes.items():
6829     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6830
6831
6832 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6833   """Checks if nodes have enough free disk space in the specified VG.
6834
6835   This function checks if all given nodes have the needed amount of
6836   free disk. In case any node has less disk or we cannot get the
6837   information from the node, this function raises an OpPrereqError
6838   exception.
6839
6840   @type lu: C{LogicalUnit}
6841   @param lu: a logical unit from which we get configuration data
6842   @type nodenames: C{list}
6843   @param nodenames: the list of node names to check
6844   @type vg: C{str}
6845   @param vg: the volume group to check
6846   @type requested: C{int}
6847   @param requested: the amount of disk in MiB to check for
6848   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6849       or we cannot check the node
6850
6851   """
6852   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6853   for node in nodenames:
6854     info = nodeinfo[node]
6855     info.Raise("Cannot get current information from node %s" % node,
6856                prereq=True, ecode=errors.ECODE_ENVIRON)
6857     (_, (vg_info, ), _) = info.payload
6858     vg_free = vg_info.get("vg_free", None)
6859     if not isinstance(vg_free, int):
6860       raise errors.OpPrereqError("Can't compute free disk space on node"
6861                                  " %s for vg %s, result was '%s'" %
6862                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6863     if requested > vg_free:
6864       raise errors.OpPrereqError("Not enough disk space on target node %s"
6865                                  " vg %s: required %d MiB, available %d MiB" %
6866                                  (node, vg, requested, vg_free),
6867                                  errors.ECODE_NORES)
6868
6869
6870 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6871   """Checks if nodes have enough physical CPUs
6872
6873   This function checks if all given nodes have the needed number of
6874   physical CPUs. In case any node has less CPUs or we cannot get the
6875   information from the node, this function raises an OpPrereqError
6876   exception.
6877
6878   @type lu: C{LogicalUnit}
6879   @param lu: a logical unit from which we get configuration data
6880   @type nodenames: C{list}
6881   @param nodenames: the list of node names to check
6882   @type requested: C{int}
6883   @param requested: the minimum acceptable number of physical CPUs
6884   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6885       or we cannot check the node
6886
6887   """
6888   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6889   for node in nodenames:
6890     info = nodeinfo[node]
6891     info.Raise("Cannot get current information from node %s" % node,
6892                prereq=True, ecode=errors.ECODE_ENVIRON)
6893     (_, _, (hv_info, )) = info.payload
6894     num_cpus = hv_info.get("cpu_total", None)
6895     if not isinstance(num_cpus, int):
6896       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6897                                  " on node %s, result was '%s'" %
6898                                  (node, num_cpus), errors.ECODE_ENVIRON)
6899     if requested > num_cpus:
6900       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6901                                  "required" % (node, num_cpus, requested),
6902                                  errors.ECODE_NORES)
6903
6904
6905 class LUInstanceStartup(LogicalUnit):
6906   """Starts an instance.
6907
6908   """
6909   HPATH = "instance-start"
6910   HTYPE = constants.HTYPE_INSTANCE
6911   REQ_BGL = False
6912
6913   def CheckArguments(self):
6914     # extra beparams
6915     if self.op.beparams:
6916       # fill the beparams dict
6917       objects.UpgradeBeParams(self.op.beparams)
6918       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6919
6920   def ExpandNames(self):
6921     self._ExpandAndLockInstance()
6922     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6923
6924   def DeclareLocks(self, level):
6925     if level == locking.LEVEL_NODE_RES:
6926       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6927
6928   def BuildHooksEnv(self):
6929     """Build hooks env.
6930
6931     This runs on master, primary and secondary nodes of the instance.
6932
6933     """
6934     env = {
6935       "FORCE": self.op.force,
6936       }
6937
6938     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6939
6940     return env
6941
6942   def BuildHooksNodes(self):
6943     """Build hooks nodes.
6944
6945     """
6946     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6947     return (nl, nl)
6948
6949   def CheckPrereq(self):
6950     """Check prerequisites.
6951
6952     This checks that the instance is in the cluster.
6953
6954     """
6955     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6956     assert self.instance is not None, \
6957       "Cannot retrieve locked instance %s" % self.op.instance_name
6958
6959     # extra hvparams
6960     if self.op.hvparams:
6961       # check hypervisor parameter syntax (locally)
6962       cluster = self.cfg.GetClusterInfo()
6963       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6964       filled_hvp = cluster.FillHV(instance)
6965       filled_hvp.update(self.op.hvparams)
6966       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6967       hv_type.CheckParameterSyntax(filled_hvp)
6968       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6969
6970     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6971
6972     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6973
6974     if self.primary_offline and self.op.ignore_offline_nodes:
6975       self.LogWarning("Ignoring offline primary node")
6976
6977       if self.op.hvparams or self.op.beparams:
6978         self.LogWarning("Overridden parameters are ignored")
6979     else:
6980       _CheckNodeOnline(self, instance.primary_node)
6981
6982       bep = self.cfg.GetClusterInfo().FillBE(instance)
6983       bep.update(self.op.beparams)
6984
6985       # check bridges existence
6986       _CheckInstanceBridgesExist(self, instance)
6987
6988       remote_info = self.rpc.call_instance_info(instance.primary_node,
6989                                                 instance.name,
6990                                                 instance.hypervisor)
6991       remote_info.Raise("Error checking node %s" % instance.primary_node,
6992                         prereq=True, ecode=errors.ECODE_ENVIRON)
6993       if not remote_info.payload: # not running already
6994         _CheckNodeFreeMemory(self, instance.primary_node,
6995                              "starting instance %s" % instance.name,
6996                              bep[constants.BE_MINMEM], instance.hypervisor)
6997
6998   def Exec(self, feedback_fn):
6999     """Start the instance.
7000
7001     """
7002     instance = self.instance
7003     force = self.op.force
7004
7005     if not self.op.no_remember:
7006       self.cfg.MarkInstanceUp(instance.name)
7007
7008     if self.primary_offline:
7009       assert self.op.ignore_offline_nodes
7010       self.LogInfo("Primary node offline, marked instance as started")
7011     else:
7012       node_current = instance.primary_node
7013
7014       _StartInstanceDisks(self, instance, force)
7015
7016       result = \
7017         self.rpc.call_instance_start(node_current,
7018                                      (instance, self.op.hvparams,
7019                                       self.op.beparams),
7020                                      self.op.startup_paused)
7021       msg = result.fail_msg
7022       if msg:
7023         _ShutdownInstanceDisks(self, instance)
7024         raise errors.OpExecError("Could not start instance: %s" % msg)
7025
7026
7027 class LUInstanceReboot(LogicalUnit):
7028   """Reboot an instance.
7029
7030   """
7031   HPATH = "instance-reboot"
7032   HTYPE = constants.HTYPE_INSTANCE
7033   REQ_BGL = False
7034
7035   def ExpandNames(self):
7036     self._ExpandAndLockInstance()
7037
7038   def BuildHooksEnv(self):
7039     """Build hooks env.
7040
7041     This runs on master, primary and secondary nodes of the instance.
7042
7043     """
7044     env = {
7045       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7046       "REBOOT_TYPE": self.op.reboot_type,
7047       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7048       }
7049
7050     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7051
7052     return env
7053
7054   def BuildHooksNodes(self):
7055     """Build hooks nodes.
7056
7057     """
7058     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7059     return (nl, nl)
7060
7061   def CheckPrereq(self):
7062     """Check prerequisites.
7063
7064     This checks that the instance is in the cluster.
7065
7066     """
7067     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7068     assert self.instance is not None, \
7069       "Cannot retrieve locked instance %s" % self.op.instance_name
7070     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7071     _CheckNodeOnline(self, instance.primary_node)
7072
7073     # check bridges existence
7074     _CheckInstanceBridgesExist(self, instance)
7075
7076   def Exec(self, feedback_fn):
7077     """Reboot the instance.
7078
7079     """
7080     instance = self.instance
7081     ignore_secondaries = self.op.ignore_secondaries
7082     reboot_type = self.op.reboot_type
7083
7084     remote_info = self.rpc.call_instance_info(instance.primary_node,
7085                                               instance.name,
7086                                               instance.hypervisor)
7087     remote_info.Raise("Error checking node %s" % instance.primary_node)
7088     instance_running = bool(remote_info.payload)
7089
7090     node_current = instance.primary_node
7091
7092     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7093                                             constants.INSTANCE_REBOOT_HARD]:
7094       for disk in instance.disks:
7095         self.cfg.SetDiskID(disk, node_current)
7096       result = self.rpc.call_instance_reboot(node_current, instance,
7097                                              reboot_type,
7098                                              self.op.shutdown_timeout)
7099       result.Raise("Could not reboot instance")
7100     else:
7101       if instance_running:
7102         result = self.rpc.call_instance_shutdown(node_current, instance,
7103                                                  self.op.shutdown_timeout)
7104         result.Raise("Could not shutdown instance for full reboot")
7105         _ShutdownInstanceDisks(self, instance)
7106       else:
7107         self.LogInfo("Instance %s was already stopped, starting now",
7108                      instance.name)
7109       _StartInstanceDisks(self, instance, ignore_secondaries)
7110       result = self.rpc.call_instance_start(node_current,
7111                                             (instance, None, None), False)
7112       msg = result.fail_msg
7113       if msg:
7114         _ShutdownInstanceDisks(self, instance)
7115         raise errors.OpExecError("Could not start instance for"
7116                                  " full reboot: %s" % msg)
7117
7118     self.cfg.MarkInstanceUp(instance.name)
7119
7120
7121 class LUInstanceShutdown(LogicalUnit):
7122   """Shutdown an instance.
7123
7124   """
7125   HPATH = "instance-stop"
7126   HTYPE = constants.HTYPE_INSTANCE
7127   REQ_BGL = False
7128
7129   def ExpandNames(self):
7130     self._ExpandAndLockInstance()
7131
7132   def BuildHooksEnv(self):
7133     """Build hooks env.
7134
7135     This runs on master, primary and secondary nodes of the instance.
7136
7137     """
7138     env = _BuildInstanceHookEnvByObject(self, self.instance)
7139     env["TIMEOUT"] = self.op.timeout
7140     return env
7141
7142   def BuildHooksNodes(self):
7143     """Build hooks nodes.
7144
7145     """
7146     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7147     return (nl, nl)
7148
7149   def CheckPrereq(self):
7150     """Check prerequisites.
7151
7152     This checks that the instance is in the cluster.
7153
7154     """
7155     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7156     assert self.instance is not None, \
7157       "Cannot retrieve locked instance %s" % self.op.instance_name
7158
7159     if not self.op.force:
7160       _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7161     else:
7162       self.LogWarning("Ignoring offline instance check")
7163
7164     self.primary_offline = \
7165       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7166
7167     if self.primary_offline and self.op.ignore_offline_nodes:
7168       self.LogWarning("Ignoring offline primary node")
7169     else:
7170       _CheckNodeOnline(self, self.instance.primary_node)
7171
7172   def Exec(self, feedback_fn):
7173     """Shutdown the instance.
7174
7175     """
7176     instance = self.instance
7177     node_current = instance.primary_node
7178     timeout = self.op.timeout
7179
7180     if not self.op.no_remember:
7181       self.cfg.MarkInstanceDown(instance.name)
7182
7183     if self.primary_offline:
7184       assert self.op.ignore_offline_nodes
7185       self.LogInfo("Primary node offline, marked instance as stopped")
7186     else:
7187       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7188       msg = result.fail_msg
7189       if msg:
7190         self.LogWarning("Could not shutdown instance: %s", msg)
7191
7192       _ShutdownInstanceDisks(self, instance)
7193
7194
7195 class LUInstanceReinstall(LogicalUnit):
7196   """Reinstall an instance.
7197
7198   """
7199   HPATH = "instance-reinstall"
7200   HTYPE = constants.HTYPE_INSTANCE
7201   REQ_BGL = False
7202
7203   def ExpandNames(self):
7204     self._ExpandAndLockInstance()
7205
7206   def BuildHooksEnv(self):
7207     """Build hooks env.
7208
7209     This runs on master, primary and secondary nodes of the instance.
7210
7211     """
7212     return _BuildInstanceHookEnvByObject(self, self.instance)
7213
7214   def BuildHooksNodes(self):
7215     """Build hooks nodes.
7216
7217     """
7218     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7219     return (nl, nl)
7220
7221   def CheckPrereq(self):
7222     """Check prerequisites.
7223
7224     This checks that the instance is in the cluster and is not running.
7225
7226     """
7227     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7228     assert instance is not None, \
7229       "Cannot retrieve locked instance %s" % self.op.instance_name
7230     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7231                      " offline, cannot reinstall")
7232
7233     if instance.disk_template == constants.DT_DISKLESS:
7234       raise errors.OpPrereqError("Instance '%s' has no disks" %
7235                                  self.op.instance_name,
7236                                  errors.ECODE_INVAL)
7237     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7238
7239     if self.op.os_type is not None:
7240       # OS verification
7241       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7242       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7243       instance_os = self.op.os_type
7244     else:
7245       instance_os = instance.os
7246
7247     nodelist = list(instance.all_nodes)
7248
7249     if self.op.osparams:
7250       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7251       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7252       self.os_inst = i_osdict # the new dict (without defaults)
7253     else:
7254       self.os_inst = None
7255
7256     self.instance = instance
7257
7258   def Exec(self, feedback_fn):
7259     """Reinstall the instance.
7260
7261     """
7262     inst = self.instance
7263
7264     if self.op.os_type is not None:
7265       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7266       inst.os = self.op.os_type
7267       # Write to configuration
7268       self.cfg.Update(inst, feedback_fn)
7269
7270     _StartInstanceDisks(self, inst, None)
7271     try:
7272       feedback_fn("Running the instance OS create scripts...")
7273       # FIXME: pass debug option from opcode to backend
7274       result = self.rpc.call_instance_os_add(inst.primary_node,
7275                                              (inst, self.os_inst), True,
7276                                              self.op.debug_level)
7277       result.Raise("Could not install OS for instance %s on node %s" %
7278                    (inst.name, inst.primary_node))
7279     finally:
7280       _ShutdownInstanceDisks(self, inst)
7281
7282
7283 class LUInstanceRecreateDisks(LogicalUnit):
7284   """Recreate an instance's missing disks.
7285
7286   """
7287   HPATH = "instance-recreate-disks"
7288   HTYPE = constants.HTYPE_INSTANCE
7289   REQ_BGL = False
7290
7291   _MODIFYABLE = compat.UniqueFrozenset([
7292     constants.IDISK_SIZE,
7293     constants.IDISK_MODE,
7294     ])
7295
7296   # New or changed disk parameters may have different semantics
7297   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7298     constants.IDISK_ADOPT,
7299
7300     # TODO: Implement support changing VG while recreating
7301     constants.IDISK_VG,
7302     constants.IDISK_METAVG,
7303     ]))
7304
7305   def _RunAllocator(self):
7306     """Run the allocator based on input opcode.
7307
7308     """
7309     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7310
7311     # FIXME
7312     # The allocator should actually run in "relocate" mode, but current
7313     # allocators don't support relocating all the nodes of an instance at
7314     # the same time. As a workaround we use "allocate" mode, but this is
7315     # suboptimal for two reasons:
7316     # - The instance name passed to the allocator is present in the list of
7317     #   existing instances, so there could be a conflict within the
7318     #   internal structures of the allocator. This doesn't happen with the
7319     #   current allocators, but it's a liability.
7320     # - The allocator counts the resources used by the instance twice: once
7321     #   because the instance exists already, and once because it tries to
7322     #   allocate a new instance.
7323     # The allocator could choose some of the nodes on which the instance is
7324     # running, but that's not a problem. If the instance nodes are broken,
7325     # they should be already be marked as drained or offline, and hence
7326     # skipped by the allocator. If instance disks have been lost for other
7327     # reasons, then recreating the disks on the same nodes should be fine.
7328     disk_template = self.instance.disk_template
7329     spindle_use = be_full[constants.BE_SPINDLE_USE]
7330     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7331                                         disk_template=disk_template,
7332                                         tags=list(self.instance.GetTags()),
7333                                         os=self.instance.os,
7334                                         nics=[{}],
7335                                         vcpus=be_full[constants.BE_VCPUS],
7336                                         memory=be_full[constants.BE_MAXMEM],
7337                                         spindle_use=spindle_use,
7338                                         disks=[{constants.IDISK_SIZE: d.size,
7339                                                 constants.IDISK_MODE: d.mode}
7340                                                 for d in self.instance.disks],
7341                                         hypervisor=self.instance.hypervisor)
7342     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7343
7344     ial.Run(self.op.iallocator)
7345
7346     assert req.RequiredNodes() == len(self.instance.all_nodes)
7347
7348     if not ial.success:
7349       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7350                                  " %s" % (self.op.iallocator, ial.info),
7351                                  errors.ECODE_NORES)
7352
7353     self.op.nodes = ial.result
7354     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7355                  self.op.instance_name, self.op.iallocator,
7356                  utils.CommaJoin(ial.result))
7357
7358   def CheckArguments(self):
7359     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7360       # Normalize and convert deprecated list of disk indices
7361       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7362
7363     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7364     if duplicates:
7365       raise errors.OpPrereqError("Some disks have been specified more than"
7366                                  " once: %s" % utils.CommaJoin(duplicates),
7367                                  errors.ECODE_INVAL)
7368
7369     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7370     # when neither iallocator nor nodes are specified
7371     if self.op.iallocator or self.op.nodes:
7372       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7373
7374     for (idx, params) in self.op.disks:
7375       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7376       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7377       if unsupported:
7378         raise errors.OpPrereqError("Parameters for disk %s try to change"
7379                                    " unmodifyable parameter(s): %s" %
7380                                    (idx, utils.CommaJoin(unsupported)),
7381                                    errors.ECODE_INVAL)
7382
7383   def ExpandNames(self):
7384     self._ExpandAndLockInstance()
7385     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7386
7387     if self.op.nodes:
7388       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7389       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7390     else:
7391       self.needed_locks[locking.LEVEL_NODE] = []
7392       if self.op.iallocator:
7393         # iallocator will select a new node in the same group
7394         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7395         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7396
7397     self.needed_locks[locking.LEVEL_NODE_RES] = []
7398
7399   def DeclareLocks(self, level):
7400     if level == locking.LEVEL_NODEGROUP:
7401       assert self.op.iallocator is not None
7402       assert not self.op.nodes
7403       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7404       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7405       # Lock the primary group used by the instance optimistically; this
7406       # requires going via the node before it's locked, requiring
7407       # verification later on
7408       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7409         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7410
7411     elif level == locking.LEVEL_NODE:
7412       # If an allocator is used, then we lock all the nodes in the current
7413       # instance group, as we don't know yet which ones will be selected;
7414       # if we replace the nodes without using an allocator, locks are
7415       # already declared in ExpandNames; otherwise, we need to lock all the
7416       # instance nodes for disk re-creation
7417       if self.op.iallocator:
7418         assert not self.op.nodes
7419         assert not self.needed_locks[locking.LEVEL_NODE]
7420         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7421
7422         # Lock member nodes of the group of the primary node
7423         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7424           self.needed_locks[locking.LEVEL_NODE].extend(
7425             self.cfg.GetNodeGroup(group_uuid).members)
7426
7427         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7428       elif not self.op.nodes:
7429         self._LockInstancesNodes(primary_only=False)
7430     elif level == locking.LEVEL_NODE_RES:
7431       # Copy node locks
7432       self.needed_locks[locking.LEVEL_NODE_RES] = \
7433         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7434
7435   def BuildHooksEnv(self):
7436     """Build hooks env.
7437
7438     This runs on master, primary and secondary nodes of the instance.
7439
7440     """
7441     return _BuildInstanceHookEnvByObject(self, self.instance)
7442
7443   def BuildHooksNodes(self):
7444     """Build hooks nodes.
7445
7446     """
7447     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7448     return (nl, nl)
7449
7450   def CheckPrereq(self):
7451     """Check prerequisites.
7452
7453     This checks that the instance is in the cluster and is not running.
7454
7455     """
7456     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7457     assert instance is not None, \
7458       "Cannot retrieve locked instance %s" % self.op.instance_name
7459     if self.op.nodes:
7460       if len(self.op.nodes) != len(instance.all_nodes):
7461         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7462                                    " %d replacement nodes were specified" %
7463                                    (instance.name, len(instance.all_nodes),
7464                                     len(self.op.nodes)),
7465                                    errors.ECODE_INVAL)
7466       assert instance.disk_template != constants.DT_DRBD8 or \
7467           len(self.op.nodes) == 2
7468       assert instance.disk_template != constants.DT_PLAIN or \
7469           len(self.op.nodes) == 1
7470       primary_node = self.op.nodes[0]
7471     else:
7472       primary_node = instance.primary_node
7473     if not self.op.iallocator:
7474       _CheckNodeOnline(self, primary_node)
7475
7476     if instance.disk_template == constants.DT_DISKLESS:
7477       raise errors.OpPrereqError("Instance '%s' has no disks" %
7478                                  self.op.instance_name, errors.ECODE_INVAL)
7479
7480     # Verify if node group locks are still correct
7481     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7482     if owned_groups:
7483       # Node group locks are acquired only for the primary node (and only
7484       # when the allocator is used)
7485       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7486                                primary_only=True)
7487
7488     # if we replace nodes *and* the old primary is offline, we don't
7489     # check the instance state
7490     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7491     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7492       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7493                           msg="cannot recreate disks")
7494
7495     if self.op.disks:
7496       self.disks = dict(self.op.disks)
7497     else:
7498       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7499
7500     maxidx = max(self.disks.keys())
7501     if maxidx >= len(instance.disks):
7502       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7503                                  errors.ECODE_INVAL)
7504
7505     if ((self.op.nodes or self.op.iallocator) and
7506         sorted(self.disks.keys()) != range(len(instance.disks))):
7507       raise errors.OpPrereqError("Can't recreate disks partially and"
7508                                  " change the nodes at the same time",
7509                                  errors.ECODE_INVAL)
7510
7511     self.instance = instance
7512
7513     if self.op.iallocator:
7514       self._RunAllocator()
7515       # Release unneeded node and node resource locks
7516       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7517       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7518       _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7519
7520     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7521
7522   def Exec(self, feedback_fn):
7523     """Recreate the disks.
7524
7525     """
7526     instance = self.instance
7527
7528     assert (self.owned_locks(locking.LEVEL_NODE) ==
7529             self.owned_locks(locking.LEVEL_NODE_RES))
7530
7531     to_skip = []
7532     mods = [] # keeps track of needed changes
7533
7534     for idx, disk in enumerate(instance.disks):
7535       try:
7536         changes = self.disks[idx]
7537       except KeyError:
7538         # Disk should not be recreated
7539         to_skip.append(idx)
7540         continue
7541
7542       # update secondaries for disks, if needed
7543       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7544         # need to update the nodes and minors
7545         assert len(self.op.nodes) == 2
7546         assert len(disk.logical_id) == 6 # otherwise disk internals
7547                                          # have changed
7548         (_, _, old_port, _, _, old_secret) = disk.logical_id
7549         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7550         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7551                   new_minors[0], new_minors[1], old_secret)
7552         assert len(disk.logical_id) == len(new_id)
7553       else:
7554         new_id = None
7555
7556       mods.append((idx, new_id, changes))
7557
7558     # now that we have passed all asserts above, we can apply the mods
7559     # in a single run (to avoid partial changes)
7560     for idx, new_id, changes in mods:
7561       disk = instance.disks[idx]
7562       if new_id is not None:
7563         assert disk.dev_type == constants.LD_DRBD8
7564         disk.logical_id = new_id
7565       if changes:
7566         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7567                     mode=changes.get(constants.IDISK_MODE, None))
7568
7569     # change primary node, if needed
7570     if self.op.nodes:
7571       instance.primary_node = self.op.nodes[0]
7572       self.LogWarning("Changing the instance's nodes, you will have to"
7573                       " remove any disks left on the older nodes manually")
7574
7575     if self.op.nodes:
7576       self.cfg.Update(instance, feedback_fn)
7577
7578     # All touched nodes must be locked
7579     mylocks = self.owned_locks(locking.LEVEL_NODE)
7580     assert mylocks.issuperset(frozenset(instance.all_nodes))
7581     _CreateDisks(self, instance, to_skip=to_skip)
7582
7583
7584 class LUInstanceRename(LogicalUnit):
7585   """Rename an instance.
7586
7587   """
7588   HPATH = "instance-rename"
7589   HTYPE = constants.HTYPE_INSTANCE
7590
7591   def CheckArguments(self):
7592     """Check arguments.
7593
7594     """
7595     if self.op.ip_check and not self.op.name_check:
7596       # TODO: make the ip check more flexible and not depend on the name check
7597       raise errors.OpPrereqError("IP address check requires a name check",
7598                                  errors.ECODE_INVAL)
7599
7600   def BuildHooksEnv(self):
7601     """Build hooks env.
7602
7603     This runs on master, primary and secondary nodes of the instance.
7604
7605     """
7606     env = _BuildInstanceHookEnvByObject(self, self.instance)
7607     env["INSTANCE_NEW_NAME"] = self.op.new_name
7608     return env
7609
7610   def BuildHooksNodes(self):
7611     """Build hooks nodes.
7612
7613     """
7614     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7615     return (nl, nl)
7616
7617   def CheckPrereq(self):
7618     """Check prerequisites.
7619
7620     This checks that the instance is in the cluster and is not running.
7621
7622     """
7623     self.op.instance_name = _ExpandInstanceName(self.cfg,
7624                                                 self.op.instance_name)
7625     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7626     assert instance is not None
7627     _CheckNodeOnline(self, instance.primary_node)
7628     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7629                         msg="cannot rename")
7630     self.instance = instance
7631
7632     new_name = self.op.new_name
7633     if self.op.name_check:
7634       hostname = _CheckHostnameSane(self, new_name)
7635       new_name = self.op.new_name = hostname.name
7636       if (self.op.ip_check and
7637           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7638         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7639                                    (hostname.ip, new_name),
7640                                    errors.ECODE_NOTUNIQUE)
7641
7642     instance_list = self.cfg.GetInstanceList()
7643     if new_name in instance_list and new_name != instance.name:
7644       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7645                                  new_name, errors.ECODE_EXISTS)
7646
7647   def Exec(self, feedback_fn):
7648     """Rename the instance.
7649
7650     """
7651     inst = self.instance
7652     old_name = inst.name
7653
7654     rename_file_storage = False
7655     if (inst.disk_template in constants.DTS_FILEBASED and
7656         self.op.new_name != inst.name):
7657       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7658       rename_file_storage = True
7659
7660     self.cfg.RenameInstance(inst.name, self.op.new_name)
7661     # Change the instance lock. This is definitely safe while we hold the BGL.
7662     # Otherwise the new lock would have to be added in acquired mode.
7663     assert self.REQ_BGL
7664     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7665     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7666     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7667
7668     # re-read the instance from the configuration after rename
7669     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7670
7671     if rename_file_storage:
7672       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7673       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7674                                                      old_file_storage_dir,
7675                                                      new_file_storage_dir)
7676       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7677                    " (but the instance has been renamed in Ganeti)" %
7678                    (inst.primary_node, old_file_storage_dir,
7679                     new_file_storage_dir))
7680
7681     _StartInstanceDisks(self, inst, None)
7682     # update info on disks
7683     info = _GetInstanceInfoText(inst)
7684     for (idx, disk) in enumerate(inst.disks):
7685       for node in inst.all_nodes:
7686         self.cfg.SetDiskID(disk, node)
7687         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7688         if result.fail_msg:
7689           self.LogWarning("Error setting info on node %s for disk %s: %s",
7690                           node, idx, result.fail_msg)
7691     try:
7692       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7693                                                  old_name, self.op.debug_level)
7694       msg = result.fail_msg
7695       if msg:
7696         msg = ("Could not run OS rename script for instance %s on node %s"
7697                " (but the instance has been renamed in Ganeti): %s" %
7698                (inst.name, inst.primary_node, msg))
7699         self.LogWarning(msg)
7700     finally:
7701       _ShutdownInstanceDisks(self, inst)
7702
7703     return inst.name
7704
7705
7706 class LUInstanceRemove(LogicalUnit):
7707   """Remove an instance.
7708
7709   """
7710   HPATH = "instance-remove"
7711   HTYPE = constants.HTYPE_INSTANCE
7712   REQ_BGL = False
7713
7714   def ExpandNames(self):
7715     self._ExpandAndLockInstance()
7716     self.needed_locks[locking.LEVEL_NODE] = []
7717     self.needed_locks[locking.LEVEL_NODE_RES] = []
7718     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7719
7720   def DeclareLocks(self, level):
7721     if level == locking.LEVEL_NODE:
7722       self._LockInstancesNodes()
7723     elif level == locking.LEVEL_NODE_RES:
7724       # Copy node locks
7725       self.needed_locks[locking.LEVEL_NODE_RES] = \
7726         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7727
7728   def BuildHooksEnv(self):
7729     """Build hooks env.
7730
7731     This runs on master, primary and secondary nodes of the instance.
7732
7733     """
7734     env = _BuildInstanceHookEnvByObject(self, self.instance)
7735     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7736     return env
7737
7738   def BuildHooksNodes(self):
7739     """Build hooks nodes.
7740
7741     """
7742     nl = [self.cfg.GetMasterNode()]
7743     nl_post = list(self.instance.all_nodes) + nl
7744     return (nl, nl_post)
7745
7746   def CheckPrereq(self):
7747     """Check prerequisites.
7748
7749     This checks that the instance is in the cluster.
7750
7751     """
7752     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7753     assert self.instance is not None, \
7754       "Cannot retrieve locked instance %s" % self.op.instance_name
7755
7756   def Exec(self, feedback_fn):
7757     """Remove the instance.
7758
7759     """
7760     instance = self.instance
7761     logging.info("Shutting down instance %s on node %s",
7762                  instance.name, instance.primary_node)
7763
7764     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7765                                              self.op.shutdown_timeout)
7766     msg = result.fail_msg
7767     if msg:
7768       if self.op.ignore_failures:
7769         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7770       else:
7771         raise errors.OpExecError("Could not shutdown instance %s on"
7772                                  " node %s: %s" %
7773                                  (instance.name, instance.primary_node, msg))
7774
7775     assert (self.owned_locks(locking.LEVEL_NODE) ==
7776             self.owned_locks(locking.LEVEL_NODE_RES))
7777     assert not (set(instance.all_nodes) -
7778                 self.owned_locks(locking.LEVEL_NODE)), \
7779       "Not owning correct locks"
7780
7781     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7782
7783
7784 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7785   """Utility function to remove an instance.
7786
7787   """
7788   logging.info("Removing block devices for instance %s", instance.name)
7789
7790   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7791     if not ignore_failures:
7792       raise errors.OpExecError("Can't remove instance's disks")
7793     feedback_fn("Warning: can't remove instance's disks")
7794
7795   logging.info("Removing instance %s out of cluster config", instance.name)
7796
7797   lu.cfg.RemoveInstance(instance.name)
7798
7799   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7800     "Instance lock removal conflict"
7801
7802   # Remove lock for the instance
7803   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7804
7805
7806 class LUInstanceQuery(NoHooksLU):
7807   """Logical unit for querying instances.
7808
7809   """
7810   # pylint: disable=W0142
7811   REQ_BGL = False
7812
7813   def CheckArguments(self):
7814     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7815                              self.op.output_fields, self.op.use_locking)
7816
7817   def ExpandNames(self):
7818     self.iq.ExpandNames(self)
7819
7820   def DeclareLocks(self, level):
7821     self.iq.DeclareLocks(self, level)
7822
7823   def Exec(self, feedback_fn):
7824     return self.iq.OldStyleQuery(self)
7825
7826
7827 def _ExpandNamesForMigration(lu):
7828   """Expands names for use with L{TLMigrateInstance}.
7829
7830   @type lu: L{LogicalUnit}
7831
7832   """
7833   if lu.op.target_node is not None:
7834     lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
7835
7836   lu.needed_locks[locking.LEVEL_NODE] = []
7837   lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7838
7839   lu.needed_locks[locking.LEVEL_NODE_RES] = []
7840   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7841
7842   # The node allocation lock is actually only needed for replicated instances
7843   # (e.g. DRBD8) and if an iallocator is used.
7844   lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
7845
7846
7847 def _DeclareLocksForMigration(lu, level):
7848   """Declares locks for L{TLMigrateInstance}.
7849
7850   @type lu: L{LogicalUnit}
7851   @param level: Lock level
7852
7853   """
7854   if level == locking.LEVEL_NODE_ALLOC:
7855     assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
7856
7857     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
7858
7859     # Node locks are already declared here rather than at LEVEL_NODE as we need
7860     # the instance object anyway to declare the node allocation lock.
7861     if instance.disk_template in constants.DTS_EXT_MIRROR:
7862       if lu.op.target_node is None:
7863         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7864         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7865       else:
7866         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7867                                                lu.op.target_node]
7868       del lu.recalculate_locks[locking.LEVEL_NODE]
7869     else:
7870       lu._LockInstancesNodes() # pylint: disable=W0212
7871
7872   elif level == locking.LEVEL_NODE:
7873     # Node locks are declared together with the node allocation lock
7874     assert (lu.needed_locks[locking.LEVEL_NODE] or
7875             lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
7876
7877   elif level == locking.LEVEL_NODE_RES:
7878     # Copy node locks
7879     lu.needed_locks[locking.LEVEL_NODE_RES] = \
7880       _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
7881
7882
7883 class LUInstanceFailover(LogicalUnit):
7884   """Failover an instance.
7885
7886   """
7887   HPATH = "instance-failover"
7888   HTYPE = constants.HTYPE_INSTANCE
7889   REQ_BGL = False
7890
7891   def CheckArguments(self):
7892     """Check the arguments.
7893
7894     """
7895     self.iallocator = getattr(self.op, "iallocator", None)
7896     self.target_node = getattr(self.op, "target_node", None)
7897
7898   def ExpandNames(self):
7899     self._ExpandAndLockInstance()
7900     _ExpandNamesForMigration(self)
7901
7902     self._migrater = \
7903       TLMigrateInstance(self, self.op.instance_name, False, True, False,
7904                         self.op.ignore_consistency, True,
7905                         self.op.shutdown_timeout, self.op.ignore_ipolicy)
7906
7907     self.tasklets = [self._migrater]
7908
7909   def DeclareLocks(self, level):
7910     _DeclareLocksForMigration(self, level)
7911
7912   def BuildHooksEnv(self):
7913     """Build hooks env.
7914
7915     This runs on master, primary and secondary nodes of the instance.
7916
7917     """
7918     instance = self._migrater.instance
7919     source_node = instance.primary_node
7920     target_node = self.op.target_node
7921     env = {
7922       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7923       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7924       "OLD_PRIMARY": source_node,
7925       "NEW_PRIMARY": target_node,
7926       }
7927
7928     if instance.disk_template in constants.DTS_INT_MIRROR:
7929       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7930       env["NEW_SECONDARY"] = source_node
7931     else:
7932       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7933
7934     env.update(_BuildInstanceHookEnvByObject(self, instance))
7935
7936     return env
7937
7938   def BuildHooksNodes(self):
7939     """Build hooks nodes.
7940
7941     """
7942     instance = self._migrater.instance
7943     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7944     return (nl, nl + [instance.primary_node])
7945
7946
7947 class LUInstanceMigrate(LogicalUnit):
7948   """Migrate an instance.
7949
7950   This is migration without shutting down, compared to the failover,
7951   which is done with shutdown.
7952
7953   """
7954   HPATH = "instance-migrate"
7955   HTYPE = constants.HTYPE_INSTANCE
7956   REQ_BGL = False
7957
7958   def ExpandNames(self):
7959     self._ExpandAndLockInstance()
7960     _ExpandNamesForMigration(self)
7961
7962     self._migrater = \
7963       TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
7964                         False, self.op.allow_failover, False,
7965                         self.op.allow_runtime_changes,
7966                         constants.DEFAULT_SHUTDOWN_TIMEOUT,
7967                         self.op.ignore_ipolicy)
7968
7969     self.tasklets = [self._migrater]
7970
7971   def DeclareLocks(self, level):
7972     _DeclareLocksForMigration(self, level)
7973
7974   def BuildHooksEnv(self):
7975     """Build hooks env.
7976
7977     This runs on master, primary and secondary nodes of the instance.
7978
7979     """
7980     instance = self._migrater.instance
7981     source_node = instance.primary_node
7982     target_node = self.op.target_node
7983     env = _BuildInstanceHookEnvByObject(self, instance)
7984     env.update({
7985       "MIGRATE_LIVE": self._migrater.live,
7986       "MIGRATE_CLEANUP": self.op.cleanup,
7987       "OLD_PRIMARY": source_node,
7988       "NEW_PRIMARY": target_node,
7989       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7990       })
7991
7992     if instance.disk_template in constants.DTS_INT_MIRROR:
7993       env["OLD_SECONDARY"] = target_node
7994       env["NEW_SECONDARY"] = source_node
7995     else:
7996       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7997
7998     return env
7999
8000   def BuildHooksNodes(self):
8001     """Build hooks nodes.
8002
8003     """
8004     instance = self._migrater.instance
8005     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8006     return (nl, nl + [instance.primary_node])
8007
8008
8009 class LUInstanceMove(LogicalUnit):
8010   """Move an instance by data-copying.
8011
8012   """
8013   HPATH = "instance-move"
8014   HTYPE = constants.HTYPE_INSTANCE
8015   REQ_BGL = False
8016
8017   def ExpandNames(self):
8018     self._ExpandAndLockInstance()
8019     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8020     self.op.target_node = target_node
8021     self.needed_locks[locking.LEVEL_NODE] = [target_node]
8022     self.needed_locks[locking.LEVEL_NODE_RES] = []
8023     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8024
8025   def DeclareLocks(self, level):
8026     if level == locking.LEVEL_NODE:
8027       self._LockInstancesNodes(primary_only=True)
8028     elif level == locking.LEVEL_NODE_RES:
8029       # Copy node locks
8030       self.needed_locks[locking.LEVEL_NODE_RES] = \
8031         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8032
8033   def BuildHooksEnv(self):
8034     """Build hooks env.
8035
8036     This runs on master, primary and secondary nodes of the instance.
8037
8038     """
8039     env = {
8040       "TARGET_NODE": self.op.target_node,
8041       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8042       }
8043     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8044     return env
8045
8046   def BuildHooksNodes(self):
8047     """Build hooks nodes.
8048
8049     """
8050     nl = [
8051       self.cfg.GetMasterNode(),
8052       self.instance.primary_node,
8053       self.op.target_node,
8054       ]
8055     return (nl, nl)
8056
8057   def CheckPrereq(self):
8058     """Check prerequisites.
8059
8060     This checks that the instance is in the cluster.
8061
8062     """
8063     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8064     assert self.instance is not None, \
8065       "Cannot retrieve locked instance %s" % self.op.instance_name
8066
8067     node = self.cfg.GetNodeInfo(self.op.target_node)
8068     assert node is not None, \
8069       "Cannot retrieve locked node %s" % self.op.target_node
8070
8071     self.target_node = target_node = node.name
8072
8073     if target_node == instance.primary_node:
8074       raise errors.OpPrereqError("Instance %s is already on the node %s" %
8075                                  (instance.name, target_node),
8076                                  errors.ECODE_STATE)
8077
8078     bep = self.cfg.GetClusterInfo().FillBE(instance)
8079
8080     for idx, dsk in enumerate(instance.disks):
8081       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8082         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8083                                    " cannot copy" % idx, errors.ECODE_STATE)
8084
8085     _CheckNodeOnline(self, target_node)
8086     _CheckNodeNotDrained(self, target_node)
8087     _CheckNodeVmCapable(self, target_node)
8088     cluster = self.cfg.GetClusterInfo()
8089     group_info = self.cfg.GetNodeGroup(node.group)
8090     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8091     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8092                             ignore=self.op.ignore_ipolicy)
8093
8094     if instance.admin_state == constants.ADMINST_UP:
8095       # check memory requirements on the secondary node
8096       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8097                            instance.name, bep[constants.BE_MAXMEM],
8098                            instance.hypervisor)
8099     else:
8100       self.LogInfo("Not checking memory on the secondary node as"
8101                    " instance will not be started")
8102
8103     # check bridge existance
8104     _CheckInstanceBridgesExist(self, instance, node=target_node)
8105
8106   def Exec(self, feedback_fn):
8107     """Move an instance.
8108
8109     The move is done by shutting it down on its present node, copying
8110     the data over (slow) and starting it on the new node.
8111
8112     """
8113     instance = self.instance
8114
8115     source_node = instance.primary_node
8116     target_node = self.target_node
8117
8118     self.LogInfo("Shutting down instance %s on source node %s",
8119                  instance.name, source_node)
8120
8121     assert (self.owned_locks(locking.LEVEL_NODE) ==
8122             self.owned_locks(locking.LEVEL_NODE_RES))
8123
8124     result = self.rpc.call_instance_shutdown(source_node, instance,
8125                                              self.op.shutdown_timeout)
8126     msg = result.fail_msg
8127     if msg:
8128       if self.op.ignore_consistency:
8129         self.LogWarning("Could not shutdown instance %s on node %s."
8130                         " Proceeding anyway. Please make sure node"
8131                         " %s is down. Error details: %s",
8132                         instance.name, source_node, source_node, msg)
8133       else:
8134         raise errors.OpExecError("Could not shutdown instance %s on"
8135                                  " node %s: %s" %
8136                                  (instance.name, source_node, msg))
8137
8138     # create the target disks
8139     try:
8140       _CreateDisks(self, instance, target_node=target_node)
8141     except errors.OpExecError:
8142       self.LogWarning("Device creation failed, reverting...")
8143       try:
8144         _RemoveDisks(self, instance, target_node=target_node)
8145       finally:
8146         self.cfg.ReleaseDRBDMinors(instance.name)
8147         raise
8148
8149     cluster_name = self.cfg.GetClusterInfo().cluster_name
8150
8151     errs = []
8152     # activate, get path, copy the data over
8153     for idx, disk in enumerate(instance.disks):
8154       self.LogInfo("Copying data for disk %d", idx)
8155       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8156                                                instance.name, True, idx)
8157       if result.fail_msg:
8158         self.LogWarning("Can't assemble newly created disk %d: %s",
8159                         idx, result.fail_msg)
8160         errs.append(result.fail_msg)
8161         break
8162       dev_path = result.payload
8163       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8164                                              target_node, dev_path,
8165                                              cluster_name)
8166       if result.fail_msg:
8167         self.LogWarning("Can't copy data over for disk %d: %s",
8168                         idx, result.fail_msg)
8169         errs.append(result.fail_msg)
8170         break
8171
8172     if errs:
8173       self.LogWarning("Some disks failed to copy, aborting")
8174       try:
8175         _RemoveDisks(self, instance, target_node=target_node)
8176       finally:
8177         self.cfg.ReleaseDRBDMinors(instance.name)
8178         raise errors.OpExecError("Errors during disk copy: %s" %
8179                                  (",".join(errs),))
8180
8181     instance.primary_node = target_node
8182     self.cfg.Update(instance, feedback_fn)
8183
8184     self.LogInfo("Removing the disks on the original node")
8185     _RemoveDisks(self, instance, target_node=source_node)
8186
8187     # Only start the instance if it's marked as up
8188     if instance.admin_state == constants.ADMINST_UP:
8189       self.LogInfo("Starting instance %s on node %s",
8190                    instance.name, target_node)
8191
8192       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8193                                            ignore_secondaries=True)
8194       if not disks_ok:
8195         _ShutdownInstanceDisks(self, instance)
8196         raise errors.OpExecError("Can't activate the instance's disks")
8197
8198       result = self.rpc.call_instance_start(target_node,
8199                                             (instance, None, None), False)
8200       msg = result.fail_msg
8201       if msg:
8202         _ShutdownInstanceDisks(self, instance)
8203         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8204                                  (instance.name, target_node, msg))
8205
8206
8207 class LUNodeMigrate(LogicalUnit):
8208   """Migrate all instances from a node.
8209
8210   """
8211   HPATH = "node-migrate"
8212   HTYPE = constants.HTYPE_NODE
8213   REQ_BGL = False
8214
8215   def CheckArguments(self):
8216     pass
8217
8218   def ExpandNames(self):
8219     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8220
8221     self.share_locks = _ShareAll()
8222     self.needed_locks = {
8223       locking.LEVEL_NODE: [self.op.node_name],
8224       }
8225
8226   def BuildHooksEnv(self):
8227     """Build hooks env.
8228
8229     This runs on the master, the primary and all the secondaries.
8230
8231     """
8232     return {
8233       "NODE_NAME": self.op.node_name,
8234       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8235       }
8236
8237   def BuildHooksNodes(self):
8238     """Build hooks nodes.
8239
8240     """
8241     nl = [self.cfg.GetMasterNode()]
8242     return (nl, nl)
8243
8244   def CheckPrereq(self):
8245     pass
8246
8247   def Exec(self, feedback_fn):
8248     # Prepare jobs for migration instances
8249     allow_runtime_changes = self.op.allow_runtime_changes
8250     jobs = [
8251       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8252                                  mode=self.op.mode,
8253                                  live=self.op.live,
8254                                  iallocator=self.op.iallocator,
8255                                  target_node=self.op.target_node,
8256                                  allow_runtime_changes=allow_runtime_changes,
8257                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8258       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8259
8260     # TODO: Run iallocator in this opcode and pass correct placement options to
8261     # OpInstanceMigrate. Since other jobs can modify the cluster between
8262     # running the iallocator and the actual migration, a good consistency model
8263     # will have to be found.
8264
8265     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8266             frozenset([self.op.node_name]))
8267
8268     return ResultWithJobs(jobs)
8269
8270
8271 class TLMigrateInstance(Tasklet):
8272   """Tasklet class for instance migration.
8273
8274   @type live: boolean
8275   @ivar live: whether the migration will be done live or non-live;
8276       this variable is initalized only after CheckPrereq has run
8277   @type cleanup: boolean
8278   @ivar cleanup: Wheater we cleanup from a failed migration
8279   @type iallocator: string
8280   @ivar iallocator: The iallocator used to determine target_node
8281   @type target_node: string
8282   @ivar target_node: If given, the target_node to reallocate the instance to
8283   @type failover: boolean
8284   @ivar failover: Whether operation results in failover or migration
8285   @type fallback: boolean
8286   @ivar fallback: Whether fallback to failover is allowed if migration not
8287                   possible
8288   @type ignore_consistency: boolean
8289   @ivar ignore_consistency: Wheter we should ignore consistency between source
8290                             and target node
8291   @type shutdown_timeout: int
8292   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8293   @type ignore_ipolicy: bool
8294   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8295
8296   """
8297
8298   # Constants
8299   _MIGRATION_POLL_INTERVAL = 1      # seconds
8300   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8301
8302   def __init__(self, lu, instance_name, cleanup, failover, fallback,
8303                ignore_consistency, allow_runtime_changes, shutdown_timeout,
8304                ignore_ipolicy):
8305     """Initializes this class.
8306
8307     """
8308     Tasklet.__init__(self, lu)
8309
8310     # Parameters
8311     self.instance_name = instance_name
8312     self.cleanup = cleanup
8313     self.live = False # will be overridden later
8314     self.failover = failover
8315     self.fallback = fallback
8316     self.ignore_consistency = ignore_consistency
8317     self.shutdown_timeout = shutdown_timeout
8318     self.ignore_ipolicy = ignore_ipolicy
8319     self.allow_runtime_changes = allow_runtime_changes
8320
8321   def CheckPrereq(self):
8322     """Check prerequisites.
8323
8324     This checks that the instance is in the cluster.
8325
8326     """
8327     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8328     instance = self.cfg.GetInstanceInfo(instance_name)
8329     assert instance is not None
8330     self.instance = instance
8331     cluster = self.cfg.GetClusterInfo()
8332
8333     if (not self.cleanup and
8334         not instance.admin_state == constants.ADMINST_UP and
8335         not self.failover and self.fallback):
8336       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8337                       " switching to failover")
8338       self.failover = True
8339
8340     if instance.disk_template not in constants.DTS_MIRRORED:
8341       if self.failover:
8342         text = "failovers"
8343       else:
8344         text = "migrations"
8345       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8346                                  " %s" % (instance.disk_template, text),
8347                                  errors.ECODE_STATE)
8348
8349     if instance.disk_template in constants.DTS_EXT_MIRROR:
8350       assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8351
8352       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8353
8354       if self.lu.op.iallocator:
8355         self._RunAllocator()
8356       else:
8357         # We set set self.target_node as it is required by
8358         # BuildHooksEnv
8359         self.target_node = self.lu.op.target_node
8360
8361       # Check that the target node is correct in terms of instance policy
8362       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8363       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8364       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8365                                                               group_info)
8366       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8367                               ignore=self.ignore_ipolicy)
8368
8369       # self.target_node is already populated, either directly or by the
8370       # iallocator run
8371       target_node = self.target_node
8372       if self.target_node == instance.primary_node:
8373         raise errors.OpPrereqError("Cannot migrate instance %s"
8374                                    " to its primary (%s)" %
8375                                    (instance.name, instance.primary_node),
8376                                    errors.ECODE_STATE)
8377
8378       if len(self.lu.tasklets) == 1:
8379         # It is safe to release locks only when we're the only tasklet
8380         # in the LU
8381         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8382                       keep=[instance.primary_node, self.target_node])
8383         _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8384
8385     else:
8386       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8387
8388       secondary_nodes = instance.secondary_nodes
8389       if not secondary_nodes:
8390         raise errors.ConfigurationError("No secondary node but using"
8391                                         " %s disk template" %
8392                                         instance.disk_template)
8393       target_node = secondary_nodes[0]
8394       if self.lu.op.iallocator or (self.lu.op.target_node and
8395                                    self.lu.op.target_node != target_node):
8396         if self.failover:
8397           text = "failed over"
8398         else:
8399           text = "migrated"
8400         raise errors.OpPrereqError("Instances with disk template %s cannot"
8401                                    " be %s to arbitrary nodes"
8402                                    " (neither an iallocator nor a target"
8403                                    " node can be passed)" %
8404                                    (instance.disk_template, text),
8405                                    errors.ECODE_INVAL)
8406       nodeinfo = self.cfg.GetNodeInfo(target_node)
8407       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8408       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8409                                                               group_info)
8410       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8411                               ignore=self.ignore_ipolicy)
8412
8413     i_be = cluster.FillBE(instance)
8414
8415     # check memory requirements on the secondary node
8416     if (not self.cleanup and
8417          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8418       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8419                                                "migrating instance %s" %
8420                                                instance.name,
8421                                                i_be[constants.BE_MINMEM],
8422                                                instance.hypervisor)
8423     else:
8424       self.lu.LogInfo("Not checking memory on the secondary node as"
8425                       " instance will not be started")
8426
8427     # check if failover must be forced instead of migration
8428     if (not self.cleanup and not self.failover and
8429         i_be[constants.BE_ALWAYS_FAILOVER]):
8430       self.lu.LogInfo("Instance configured to always failover; fallback"
8431                       " to failover")
8432       self.failover = True
8433
8434     # check bridge existance
8435     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8436
8437     if not self.cleanup:
8438       _CheckNodeNotDrained(self.lu, target_node)
8439       if not self.failover:
8440         result = self.rpc.call_instance_migratable(instance.primary_node,
8441                                                    instance)
8442         if result.fail_msg and self.fallback:
8443           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8444                           " failover")
8445           self.failover = True
8446         else:
8447           result.Raise("Can't migrate, please use failover",
8448                        prereq=True, ecode=errors.ECODE_STATE)
8449
8450     assert not (self.failover and self.cleanup)
8451
8452     if not self.failover:
8453       if self.lu.op.live is not None and self.lu.op.mode is not None:
8454         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8455                                    " parameters are accepted",
8456                                    errors.ECODE_INVAL)
8457       if self.lu.op.live is not None:
8458         if self.lu.op.live:
8459           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8460         else:
8461           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8462         # reset the 'live' parameter to None so that repeated
8463         # invocations of CheckPrereq do not raise an exception
8464         self.lu.op.live = None
8465       elif self.lu.op.mode is None:
8466         # read the default value from the hypervisor
8467         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8468         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8469
8470       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8471     else:
8472       # Failover is never live
8473       self.live = False
8474
8475     if not (self.failover or self.cleanup):
8476       remote_info = self.rpc.call_instance_info(instance.primary_node,
8477                                                 instance.name,
8478                                                 instance.hypervisor)
8479       remote_info.Raise("Error checking instance on node %s" %
8480                         instance.primary_node)
8481       instance_running = bool(remote_info.payload)
8482       if instance_running:
8483         self.current_mem = int(remote_info.payload["memory"])
8484
8485   def _RunAllocator(self):
8486     """Run the allocator based on input opcode.
8487
8488     """
8489     assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8490
8491     # FIXME: add a self.ignore_ipolicy option
8492     req = iallocator.IAReqRelocate(name=self.instance_name,
8493                                    relocate_from=[self.instance.primary_node])
8494     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8495
8496     ial.Run(self.lu.op.iallocator)
8497
8498     if not ial.success:
8499       raise errors.OpPrereqError("Can't compute nodes using"
8500                                  " iallocator '%s': %s" %
8501                                  (self.lu.op.iallocator, ial.info),
8502                                  errors.ECODE_NORES)
8503     self.target_node = ial.result[0]
8504     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8505                     self.instance_name, self.lu.op.iallocator,
8506                     utils.CommaJoin(ial.result))
8507
8508   def _WaitUntilSync(self):
8509     """Poll with custom rpc for disk sync.
8510
8511     This uses our own step-based rpc call.
8512
8513     """
8514     self.feedback_fn("* wait until resync is done")
8515     all_done = False
8516     while not all_done:
8517       all_done = True
8518       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8519                                             self.nodes_ip,
8520                                             (self.instance.disks,
8521                                              self.instance))
8522       min_percent = 100
8523       for node, nres in result.items():
8524         nres.Raise("Cannot resync disks on node %s" % node)
8525         node_done, node_percent = nres.payload
8526         all_done = all_done and node_done
8527         if node_percent is not None:
8528           min_percent = min(min_percent, node_percent)
8529       if not all_done:
8530         if min_percent < 100:
8531           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8532         time.sleep(2)
8533
8534   def _EnsureSecondary(self, node):
8535     """Demote a node to secondary.
8536
8537     """
8538     self.feedback_fn("* switching node %s to secondary mode" % node)
8539
8540     for dev in self.instance.disks:
8541       self.cfg.SetDiskID(dev, node)
8542
8543     result = self.rpc.call_blockdev_close(node, self.instance.name,
8544                                           self.instance.disks)
8545     result.Raise("Cannot change disk to secondary on node %s" % node)
8546
8547   def _GoStandalone(self):
8548     """Disconnect from the network.
8549
8550     """
8551     self.feedback_fn("* changing into standalone mode")
8552     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8553                                                self.instance.disks)
8554     for node, nres in result.items():
8555       nres.Raise("Cannot disconnect disks node %s" % node)
8556
8557   def _GoReconnect(self, multimaster):
8558     """Reconnect to the network.
8559
8560     """
8561     if multimaster:
8562       msg = "dual-master"
8563     else:
8564       msg = "single-master"
8565     self.feedback_fn("* changing disks into %s mode" % msg)
8566     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8567                                            (self.instance.disks, self.instance),
8568                                            self.instance.name, multimaster)
8569     for node, nres in result.items():
8570       nres.Raise("Cannot change disks config on node %s" % node)
8571
8572   def _ExecCleanup(self):
8573     """Try to cleanup after a failed migration.
8574
8575     The cleanup is done by:
8576       - check that the instance is running only on one node
8577         (and update the config if needed)
8578       - change disks on its secondary node to secondary
8579       - wait until disks are fully synchronized
8580       - disconnect from the network
8581       - change disks into single-master mode
8582       - wait again until disks are fully synchronized
8583
8584     """
8585     instance = self.instance
8586     target_node = self.target_node
8587     source_node = self.source_node
8588
8589     # check running on only one node
8590     self.feedback_fn("* checking where the instance actually runs"
8591                      " (if this hangs, the hypervisor might be in"
8592                      " a bad state)")
8593     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8594     for node, result in ins_l.items():
8595       result.Raise("Can't contact node %s" % node)
8596
8597     runningon_source = instance.name in ins_l[source_node].payload
8598     runningon_target = instance.name in ins_l[target_node].payload
8599
8600     if runningon_source and runningon_target:
8601       raise errors.OpExecError("Instance seems to be running on two nodes,"
8602                                " or the hypervisor is confused; you will have"
8603                                " to ensure manually that it runs only on one"
8604                                " and restart this operation")
8605
8606     if not (runningon_source or runningon_target):
8607       raise errors.OpExecError("Instance does not seem to be running at all;"
8608                                " in this case it's safer to repair by"
8609                                " running 'gnt-instance stop' to ensure disk"
8610                                " shutdown, and then restarting it")
8611
8612     if runningon_target:
8613       # the migration has actually succeeded, we need to update the config
8614       self.feedback_fn("* instance running on secondary node (%s),"
8615                        " updating config" % target_node)
8616       instance.primary_node = target_node
8617       self.cfg.Update(instance, self.feedback_fn)
8618       demoted_node = source_node
8619     else:
8620       self.feedback_fn("* instance confirmed to be running on its"
8621                        " primary node (%s)" % source_node)
8622       demoted_node = target_node
8623
8624     if instance.disk_template in constants.DTS_INT_MIRROR:
8625       self._EnsureSecondary(demoted_node)
8626       try:
8627         self._WaitUntilSync()
8628       except errors.OpExecError:
8629         # we ignore here errors, since if the device is standalone, it
8630         # won't be able to sync
8631         pass
8632       self._GoStandalone()
8633       self._GoReconnect(False)
8634       self._WaitUntilSync()
8635
8636     self.feedback_fn("* done")
8637
8638   def _RevertDiskStatus(self):
8639     """Try to revert the disk status after a failed migration.
8640
8641     """
8642     target_node = self.target_node
8643     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8644       return
8645
8646     try:
8647       self._EnsureSecondary(target_node)
8648       self._GoStandalone()
8649       self._GoReconnect(False)
8650       self._WaitUntilSync()
8651     except errors.OpExecError, err:
8652       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8653                          " please try to recover the instance manually;"
8654                          " error '%s'" % str(err))
8655
8656   def _AbortMigration(self):
8657     """Call the hypervisor code to abort a started migration.
8658
8659     """
8660     instance = self.instance
8661     target_node = self.target_node
8662     source_node = self.source_node
8663     migration_info = self.migration_info
8664
8665     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8666                                                                  instance,
8667                                                                  migration_info,
8668                                                                  False)
8669     abort_msg = abort_result.fail_msg
8670     if abort_msg:
8671       logging.error("Aborting migration failed on target node %s: %s",
8672                     target_node, abort_msg)
8673       # Don't raise an exception here, as we stil have to try to revert the
8674       # disk status, even if this step failed.
8675
8676     abort_result = self.rpc.call_instance_finalize_migration_src(
8677       source_node, instance, False, self.live)
8678     abort_msg = abort_result.fail_msg
8679     if abort_msg:
8680       logging.error("Aborting migration failed on source node %s: %s",
8681                     source_node, abort_msg)
8682
8683   def _ExecMigration(self):
8684     """Migrate an instance.
8685
8686     The migrate is done by:
8687       - change the disks into dual-master mode
8688       - wait until disks are fully synchronized again
8689       - migrate the instance
8690       - change disks on the new secondary node (the old primary) to secondary
8691       - wait until disks are fully synchronized
8692       - change disks into single-master mode
8693
8694     """
8695     instance = self.instance
8696     target_node = self.target_node
8697     source_node = self.source_node
8698
8699     # Check for hypervisor version mismatch and warn the user.
8700     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8701                                        None, [self.instance.hypervisor])
8702     for ninfo in nodeinfo.values():
8703       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8704                   ninfo.node)
8705     (_, _, (src_info, )) = nodeinfo[source_node].payload
8706     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8707
8708     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8709         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8710       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8711       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8712       if src_version != dst_version:
8713         self.feedback_fn("* warning: hypervisor version mismatch between"
8714                          " source (%s) and target (%s) node" %
8715                          (src_version, dst_version))
8716
8717     self.feedback_fn("* checking disk consistency between source and target")
8718     for (idx, dev) in enumerate(instance.disks):
8719       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8720         raise errors.OpExecError("Disk %s is degraded or not fully"
8721                                  " synchronized on target node,"
8722                                  " aborting migration" % idx)
8723
8724     if self.current_mem > self.tgt_free_mem:
8725       if not self.allow_runtime_changes:
8726         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8727                                  " free memory to fit instance %s on target"
8728                                  " node %s (have %dMB, need %dMB)" %
8729                                  (instance.name, target_node,
8730                                   self.tgt_free_mem, self.current_mem))
8731       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8732       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8733                                                      instance,
8734                                                      self.tgt_free_mem)
8735       rpcres.Raise("Cannot modify instance runtime memory")
8736
8737     # First get the migration information from the remote node
8738     result = self.rpc.call_migration_info(source_node, instance)
8739     msg = result.fail_msg
8740     if msg:
8741       log_err = ("Failed fetching source migration information from %s: %s" %
8742                  (source_node, msg))
8743       logging.error(log_err)
8744       raise errors.OpExecError(log_err)
8745
8746     self.migration_info = migration_info = result.payload
8747
8748     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8749       # Then switch the disks to master/master mode
8750       self._EnsureSecondary(target_node)
8751       self._GoStandalone()
8752       self._GoReconnect(True)
8753       self._WaitUntilSync()
8754
8755     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8756     result = self.rpc.call_accept_instance(target_node,
8757                                            instance,
8758                                            migration_info,
8759                                            self.nodes_ip[target_node])
8760
8761     msg = result.fail_msg
8762     if msg:
8763       logging.error("Instance pre-migration failed, trying to revert"
8764                     " disk status: %s", msg)
8765       self.feedback_fn("Pre-migration failed, aborting")
8766       self._AbortMigration()
8767       self._RevertDiskStatus()
8768       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8769                                (instance.name, msg))
8770
8771     self.feedback_fn("* migrating instance to %s" % target_node)
8772     result = self.rpc.call_instance_migrate(source_node, instance,
8773                                             self.nodes_ip[target_node],
8774                                             self.live)
8775     msg = result.fail_msg
8776     if msg:
8777       logging.error("Instance migration failed, trying to revert"
8778                     " disk status: %s", msg)
8779       self.feedback_fn("Migration failed, aborting")
8780       self._AbortMigration()
8781       self._RevertDiskStatus()
8782       raise errors.OpExecError("Could not migrate instance %s: %s" %
8783                                (instance.name, msg))
8784
8785     self.feedback_fn("* starting memory transfer")
8786     last_feedback = time.time()
8787     while True:
8788       result = self.rpc.call_instance_get_migration_status(source_node,
8789                                                            instance)
8790       msg = result.fail_msg
8791       ms = result.payload   # MigrationStatus instance
8792       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8793         logging.error("Instance migration failed, trying to revert"
8794                       " disk status: %s", msg)
8795         self.feedback_fn("Migration failed, aborting")
8796         self._AbortMigration()
8797         self._RevertDiskStatus()
8798         if not msg:
8799           msg = "hypervisor returned failure"
8800         raise errors.OpExecError("Could not migrate instance %s: %s" %
8801                                  (instance.name, msg))
8802
8803       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8804         self.feedback_fn("* memory transfer complete")
8805         break
8806
8807       if (utils.TimeoutExpired(last_feedback,
8808                                self._MIGRATION_FEEDBACK_INTERVAL) and
8809           ms.transferred_ram is not None):
8810         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8811         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8812         last_feedback = time.time()
8813
8814       time.sleep(self._MIGRATION_POLL_INTERVAL)
8815
8816     result = self.rpc.call_instance_finalize_migration_src(source_node,
8817                                                            instance,
8818                                                            True,
8819                                                            self.live)
8820     msg = result.fail_msg
8821     if msg:
8822       logging.error("Instance migration succeeded, but finalization failed"
8823                     " on the source node: %s", msg)
8824       raise errors.OpExecError("Could not finalize instance migration: %s" %
8825                                msg)
8826
8827     instance.primary_node = target_node
8828
8829     # distribute new instance config to the other nodes
8830     self.cfg.Update(instance, self.feedback_fn)
8831
8832     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8833                                                            instance,
8834                                                            migration_info,
8835                                                            True)
8836     msg = result.fail_msg
8837     if msg:
8838       logging.error("Instance migration succeeded, but finalization failed"
8839                     " on the target node: %s", msg)
8840       raise errors.OpExecError("Could not finalize instance migration: %s" %
8841                                msg)
8842
8843     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8844       self._EnsureSecondary(source_node)
8845       self._WaitUntilSync()
8846       self._GoStandalone()
8847       self._GoReconnect(False)
8848       self._WaitUntilSync()
8849
8850     # If the instance's disk template is `rbd' and there was a successful
8851     # migration, unmap the device from the source node.
8852     if self.instance.disk_template == constants.DT_RBD:
8853       disks = _ExpandCheckDisks(instance, instance.disks)
8854       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8855       for disk in disks:
8856         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8857         msg = result.fail_msg
8858         if msg:
8859           logging.error("Migration was successful, but couldn't unmap the"
8860                         " block device %s on source node %s: %s",
8861                         disk.iv_name, source_node, msg)
8862           logging.error("You need to unmap the device %s manually on %s",
8863                         disk.iv_name, source_node)
8864
8865     self.feedback_fn("* done")
8866
8867   def _ExecFailover(self):
8868     """Failover an instance.
8869
8870     The failover is done by shutting it down on its present node and
8871     starting it on the secondary.
8872
8873     """
8874     instance = self.instance
8875     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8876
8877     source_node = instance.primary_node
8878     target_node = self.target_node
8879
8880     if instance.admin_state == constants.ADMINST_UP:
8881       self.feedback_fn("* checking disk consistency between source and target")
8882       for (idx, dev) in enumerate(instance.disks):
8883         # for drbd, these are drbd over lvm
8884         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8885                                      False):
8886           if primary_node.offline:
8887             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8888                              " target node %s" %
8889                              (primary_node.name, idx, target_node))
8890           elif not self.ignore_consistency:
8891             raise errors.OpExecError("Disk %s is degraded on target node,"
8892                                      " aborting failover" % idx)
8893     else:
8894       self.feedback_fn("* not checking disk consistency as instance is not"
8895                        " running")
8896
8897     self.feedback_fn("* shutting down instance on source node")
8898     logging.info("Shutting down instance %s on node %s",
8899                  instance.name, source_node)
8900
8901     result = self.rpc.call_instance_shutdown(source_node, instance,
8902                                              self.shutdown_timeout)
8903     msg = result.fail_msg
8904     if msg:
8905       if self.ignore_consistency or primary_node.offline:
8906         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8907                            " proceeding anyway; please make sure node"
8908                            " %s is down; error details: %s",
8909                            instance.name, source_node, source_node, msg)
8910       else:
8911         raise errors.OpExecError("Could not shutdown instance %s on"
8912                                  " node %s: %s" %
8913                                  (instance.name, source_node, msg))
8914
8915     self.feedback_fn("* deactivating the instance's disks on source node")
8916     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8917       raise errors.OpExecError("Can't shut down the instance's disks")
8918
8919     instance.primary_node = target_node
8920     # distribute new instance config to the other nodes
8921     self.cfg.Update(instance, self.feedback_fn)
8922
8923     # Only start the instance if it's marked as up
8924     if instance.admin_state == constants.ADMINST_UP:
8925       self.feedback_fn("* activating the instance's disks on target node %s" %
8926                        target_node)
8927       logging.info("Starting instance %s on node %s",
8928                    instance.name, target_node)
8929
8930       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8931                                            ignore_secondaries=True)
8932       if not disks_ok:
8933         _ShutdownInstanceDisks(self.lu, instance)
8934         raise errors.OpExecError("Can't activate the instance's disks")
8935
8936       self.feedback_fn("* starting the instance on the target node %s" %
8937                        target_node)
8938       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8939                                             False)
8940       msg = result.fail_msg
8941       if msg:
8942         _ShutdownInstanceDisks(self.lu, instance)
8943         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8944                                  (instance.name, target_node, msg))
8945
8946   def Exec(self, feedback_fn):
8947     """Perform the migration.
8948
8949     """
8950     self.feedback_fn = feedback_fn
8951     self.source_node = self.instance.primary_node
8952
8953     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8954     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8955       self.target_node = self.instance.secondary_nodes[0]
8956       # Otherwise self.target_node has been populated either
8957       # directly, or through an iallocator.
8958
8959     self.all_nodes = [self.source_node, self.target_node]
8960     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8961                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8962
8963     if self.failover:
8964       feedback_fn("Failover instance %s" % self.instance.name)
8965       self._ExecFailover()
8966     else:
8967       feedback_fn("Migrating instance %s" % self.instance.name)
8968
8969       if self.cleanup:
8970         return self._ExecCleanup()
8971       else:
8972         return self._ExecMigration()
8973
8974
8975 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8976                     force_open):
8977   """Wrapper around L{_CreateBlockDevInner}.
8978
8979   This method annotates the root device first.
8980
8981   """
8982   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8983   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8984                               force_open)
8985
8986
8987 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8988                          info, force_open):
8989   """Create a tree of block devices on a given node.
8990
8991   If this device type has to be created on secondaries, create it and
8992   all its children.
8993
8994   If not, just recurse to children keeping the same 'force' value.
8995
8996   @attention: The device has to be annotated already.
8997
8998   @param lu: the lu on whose behalf we execute
8999   @param node: the node on which to create the device
9000   @type instance: L{objects.Instance}
9001   @param instance: the instance which owns the device
9002   @type device: L{objects.Disk}
9003   @param device: the device to create
9004   @type force_create: boolean
9005   @param force_create: whether to force creation of this device; this
9006       will be change to True whenever we find a device which has
9007       CreateOnSecondary() attribute
9008   @param info: the extra 'metadata' we should attach to the device
9009       (this will be represented as a LVM tag)
9010   @type force_open: boolean
9011   @param force_open: this parameter will be passes to the
9012       L{backend.BlockdevCreate} function where it specifies
9013       whether we run on primary or not, and it affects both
9014       the child assembly and the device own Open() execution
9015
9016   """
9017   if device.CreateOnSecondary():
9018     force_create = True
9019
9020   if device.children:
9021     for child in device.children:
9022       _CreateBlockDevInner(lu, node, instance, child, force_create,
9023                            info, force_open)
9024
9025   if not force_create:
9026     return
9027
9028   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
9029
9030
9031 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
9032   """Create a single block device on a given node.
9033
9034   This will not recurse over children of the device, so they must be
9035   created in advance.
9036
9037   @param lu: the lu on whose behalf we execute
9038   @param node: the node on which to create the device
9039   @type instance: L{objects.Instance}
9040   @param instance: the instance which owns the device
9041   @type device: L{objects.Disk}
9042   @param device: the device to create
9043   @param info: the extra 'metadata' we should attach to the device
9044       (this will be represented as a LVM tag)
9045   @type force_open: boolean
9046   @param force_open: this parameter will be passes to the
9047       L{backend.BlockdevCreate} function where it specifies
9048       whether we run on primary or not, and it affects both
9049       the child assembly and the device own Open() execution
9050
9051   """
9052   lu.cfg.SetDiskID(device, node)
9053   result = lu.rpc.call_blockdev_create(node, device, device.size,
9054                                        instance.name, force_open, info)
9055   result.Raise("Can't create block device %s on"
9056                " node %s for instance %s" % (device, node, instance.name))
9057   if device.physical_id is None:
9058     device.physical_id = result.payload
9059
9060
9061 def _GenerateUniqueNames(lu, exts):
9062   """Generate a suitable LV name.
9063
9064   This will generate a logical volume name for the given instance.
9065
9066   """
9067   results = []
9068   for val in exts:
9069     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9070     results.append("%s%s" % (new_id, val))
9071   return results
9072
9073
9074 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9075                          iv_name, p_minor, s_minor):
9076   """Generate a drbd8 device complete with its children.
9077
9078   """
9079   assert len(vgnames) == len(names) == 2
9080   port = lu.cfg.AllocatePort()
9081   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9082
9083   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9084                           logical_id=(vgnames[0], names[0]),
9085                           params={})
9086   dev_meta = objects.Disk(dev_type=constants.LD_LV,
9087                           size=constants.DRBD_META_SIZE,
9088                           logical_id=(vgnames[1], names[1]),
9089                           params={})
9090   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9091                           logical_id=(primary, secondary, port,
9092                                       p_minor, s_minor,
9093                                       shared_secret),
9094                           children=[dev_data, dev_meta],
9095                           iv_name=iv_name, params={})
9096   return drbd_dev
9097
9098
9099 _DISK_TEMPLATE_NAME_PREFIX = {
9100   constants.DT_PLAIN: "",
9101   constants.DT_RBD: ".rbd",
9102   }
9103
9104
9105 _DISK_TEMPLATE_DEVICE_TYPE = {
9106   constants.DT_PLAIN: constants.LD_LV,
9107   constants.DT_FILE: constants.LD_FILE,
9108   constants.DT_SHARED_FILE: constants.LD_FILE,
9109   constants.DT_BLOCK: constants.LD_BLOCKDEV,
9110   constants.DT_RBD: constants.LD_RBD,
9111   }
9112
9113
9114 def _GenerateDiskTemplate(
9115   lu, template_name, instance_name, primary_node, secondary_nodes,
9116   disk_info, file_storage_dir, file_driver, base_index,
9117   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9118   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9119   """Generate the entire disk layout for a given template type.
9120
9121   """
9122   vgname = lu.cfg.GetVGName()
9123   disk_count = len(disk_info)
9124   disks = []
9125
9126   if template_name == constants.DT_DISKLESS:
9127     pass
9128   elif template_name == constants.DT_DRBD8:
9129     if len(secondary_nodes) != 1:
9130       raise errors.ProgrammerError("Wrong template configuration")
9131     remote_node = secondary_nodes[0]
9132     minors = lu.cfg.AllocateDRBDMinor(
9133       [primary_node, remote_node] * len(disk_info), instance_name)
9134
9135     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9136                                                        full_disk_params)
9137     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9138
9139     names = []
9140     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9141                                                for i in range(disk_count)]):
9142       names.append(lv_prefix + "_data")
9143       names.append(lv_prefix + "_meta")
9144     for idx, disk in enumerate(disk_info):
9145       disk_index = idx + base_index
9146       data_vg = disk.get(constants.IDISK_VG, vgname)
9147       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9148       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9149                                       disk[constants.IDISK_SIZE],
9150                                       [data_vg, meta_vg],
9151                                       names[idx * 2:idx * 2 + 2],
9152                                       "disk/%d" % disk_index,
9153                                       minors[idx * 2], minors[idx * 2 + 1])
9154       disk_dev.mode = disk[constants.IDISK_MODE]
9155       disks.append(disk_dev)
9156   else:
9157     if secondary_nodes:
9158       raise errors.ProgrammerError("Wrong template configuration")
9159
9160     if template_name == constants.DT_FILE:
9161       _req_file_storage()
9162     elif template_name == constants.DT_SHARED_FILE:
9163       _req_shr_file_storage()
9164
9165     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9166     if name_prefix is None:
9167       names = None
9168     else:
9169       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9170                                         (name_prefix, base_index + i)
9171                                         for i in range(disk_count)])
9172
9173     if template_name == constants.DT_PLAIN:
9174
9175       def logical_id_fn(idx, _, disk):
9176         vg = disk.get(constants.IDISK_VG, vgname)
9177         return (vg, names[idx])
9178
9179     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9180       logical_id_fn = \
9181         lambda _, disk_index, disk: (file_driver,
9182                                      "%s/disk%d" % (file_storage_dir,
9183                                                     disk_index))
9184     elif template_name == constants.DT_BLOCK:
9185       logical_id_fn = \
9186         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9187                                        disk[constants.IDISK_ADOPT])
9188     elif template_name == constants.DT_RBD:
9189       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9190     else:
9191       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9192
9193     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9194
9195     for idx, disk in enumerate(disk_info):
9196       disk_index = idx + base_index
9197       size = disk[constants.IDISK_SIZE]
9198       feedback_fn("* disk %s, size %s" %
9199                   (disk_index, utils.FormatUnit(size, "h")))
9200       disks.append(objects.Disk(dev_type=dev_type, size=size,
9201                                 logical_id=logical_id_fn(idx, disk_index, disk),
9202                                 iv_name="disk/%d" % disk_index,
9203                                 mode=disk[constants.IDISK_MODE],
9204                                 params={}))
9205
9206   return disks
9207
9208
9209 def _GetInstanceInfoText(instance):
9210   """Compute that text that should be added to the disk's metadata.
9211
9212   """
9213   return "originstname+%s" % instance.name
9214
9215
9216 def _CalcEta(time_taken, written, total_size):
9217   """Calculates the ETA based on size written and total size.
9218
9219   @param time_taken: The time taken so far
9220   @param written: amount written so far
9221   @param total_size: The total size of data to be written
9222   @return: The remaining time in seconds
9223
9224   """
9225   avg_time = time_taken / float(written)
9226   return (total_size - written) * avg_time
9227
9228
9229 def _WipeDisks(lu, instance, disks=None):
9230   """Wipes instance disks.
9231
9232   @type lu: L{LogicalUnit}
9233   @param lu: the logical unit on whose behalf we execute
9234   @type instance: L{objects.Instance}
9235   @param instance: the instance whose disks we should create
9236   @return: the success of the wipe
9237
9238   """
9239   node = instance.primary_node
9240
9241   if disks is None:
9242     disks = [(idx, disk, 0)
9243              for (idx, disk) in enumerate(instance.disks)]
9244
9245   for (_, device, _) in disks:
9246     lu.cfg.SetDiskID(device, node)
9247
9248   logging.info("Pausing synchronization of disks of instance '%s'",
9249                instance.name)
9250   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9251                                                   (map(compat.snd, disks),
9252                                                    instance),
9253                                                   True)
9254   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9255
9256   for idx, success in enumerate(result.payload):
9257     if not success:
9258       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9259                    " failed", idx, instance.name)
9260
9261   try:
9262     for (idx, device, offset) in disks:
9263       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9264       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9265       wipe_chunk_size = \
9266         int(min(constants.MAX_WIPE_CHUNK,
9267                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9268
9269       size = device.size
9270       last_output = 0
9271       start_time = time.time()
9272
9273       if offset == 0:
9274         info_text = ""
9275       else:
9276         info_text = (" (from %s to %s)" %
9277                      (utils.FormatUnit(offset, "h"),
9278                       utils.FormatUnit(size, "h")))
9279
9280       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9281
9282       logging.info("Wiping disk %d for instance %s on node %s using"
9283                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9284
9285       while offset < size:
9286         wipe_size = min(wipe_chunk_size, size - offset)
9287
9288         logging.debug("Wiping disk %d, offset %s, chunk %s",
9289                       idx, offset, wipe_size)
9290
9291         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9292                                            wipe_size)
9293         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9294                      (idx, offset, wipe_size))
9295
9296         now = time.time()
9297         offset += wipe_size
9298         if now - last_output >= 60:
9299           eta = _CalcEta(now - start_time, offset, size)
9300           lu.LogInfo(" - done: %.1f%% ETA: %s",
9301                      offset / float(size) * 100, utils.FormatSeconds(eta))
9302           last_output = now
9303   finally:
9304     logging.info("Resuming synchronization of disks for instance '%s'",
9305                  instance.name)
9306
9307     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9308                                                     (map(compat.snd, disks),
9309                                                      instance),
9310                                                     False)
9311
9312     if result.fail_msg:
9313       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9314                     node, result.fail_msg)
9315     else:
9316       for idx, success in enumerate(result.payload):
9317         if not success:
9318           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9319                         " failed", idx, instance.name)
9320
9321
9322 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9323   """Create all disks for an instance.
9324
9325   This abstracts away some work from AddInstance.
9326
9327   @type lu: L{LogicalUnit}
9328   @param lu: the logical unit on whose behalf we execute
9329   @type instance: L{objects.Instance}
9330   @param instance: the instance whose disks we should create
9331   @type to_skip: list
9332   @param to_skip: list of indices to skip
9333   @type target_node: string
9334   @param target_node: if passed, overrides the target node for creation
9335   @rtype: boolean
9336   @return: the success of the creation
9337
9338   """
9339   info = _GetInstanceInfoText(instance)
9340   if target_node is None:
9341     pnode = instance.primary_node
9342     all_nodes = instance.all_nodes
9343   else:
9344     pnode = target_node
9345     all_nodes = [pnode]
9346
9347   if instance.disk_template in constants.DTS_FILEBASED:
9348     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9349     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9350
9351     result.Raise("Failed to create directory '%s' on"
9352                  " node %s" % (file_storage_dir, pnode))
9353
9354   # Note: this needs to be kept in sync with adding of disks in
9355   # LUInstanceSetParams
9356   for idx, device in enumerate(instance.disks):
9357     if to_skip and idx in to_skip:
9358       continue
9359     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9360     #HARDCODE
9361     for node in all_nodes:
9362       f_create = node == pnode
9363       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9364
9365
9366 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9367   """Remove all disks for an instance.
9368
9369   This abstracts away some work from `AddInstance()` and
9370   `RemoveInstance()`. Note that in case some of the devices couldn't
9371   be removed, the removal will continue with the other ones (compare
9372   with `_CreateDisks()`).
9373
9374   @type lu: L{LogicalUnit}
9375   @param lu: the logical unit on whose behalf we execute
9376   @type instance: L{objects.Instance}
9377   @param instance: the instance whose disks we should remove
9378   @type target_node: string
9379   @param target_node: used to override the node on which to remove the disks
9380   @rtype: boolean
9381   @return: the success of the removal
9382
9383   """
9384   logging.info("Removing block devices for instance %s", instance.name)
9385
9386   all_result = True
9387   ports_to_release = set()
9388   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9389   for (idx, device) in enumerate(anno_disks):
9390     if target_node:
9391       edata = [(target_node, device)]
9392     else:
9393       edata = device.ComputeNodeTree(instance.primary_node)
9394     for node, disk in edata:
9395       lu.cfg.SetDiskID(disk, node)
9396       result = lu.rpc.call_blockdev_remove(node, disk)
9397       if result.fail_msg:
9398         lu.LogWarning("Could not remove disk %s on node %s,"
9399                       " continuing anyway: %s", idx, node, result.fail_msg)
9400         if not (result.offline and node != instance.primary_node):
9401           all_result = False
9402
9403     # if this is a DRBD disk, return its port to the pool
9404     if device.dev_type in constants.LDS_DRBD:
9405       ports_to_release.add(device.logical_id[2])
9406
9407   if all_result or ignore_failures:
9408     for port in ports_to_release:
9409       lu.cfg.AddTcpUdpPort(port)
9410
9411   if instance.disk_template in constants.DTS_FILEBASED:
9412     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9413     if target_node:
9414       tgt = target_node
9415     else:
9416       tgt = instance.primary_node
9417     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9418     if result.fail_msg:
9419       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9420                     file_storage_dir, instance.primary_node, result.fail_msg)
9421       all_result = False
9422
9423   return all_result
9424
9425
9426 def _ComputeDiskSizePerVG(disk_template, disks):
9427   """Compute disk size requirements in the volume group
9428
9429   """
9430   def _compute(disks, payload):
9431     """Universal algorithm.
9432
9433     """
9434     vgs = {}
9435     for disk in disks:
9436       vgs[disk[constants.IDISK_VG]] = \
9437         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9438
9439     return vgs
9440
9441   # Required free disk space as a function of disk and swap space
9442   req_size_dict = {
9443     constants.DT_DISKLESS: {},
9444     constants.DT_PLAIN: _compute(disks, 0),
9445     # 128 MB are added for drbd metadata for each disk
9446     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9447     constants.DT_FILE: {},
9448     constants.DT_SHARED_FILE: {},
9449   }
9450
9451   if disk_template not in req_size_dict:
9452     raise errors.ProgrammerError("Disk template '%s' size requirement"
9453                                  " is unknown" % disk_template)
9454
9455   return req_size_dict[disk_template]
9456
9457
9458 def _FilterVmNodes(lu, nodenames):
9459   """Filters out non-vm_capable nodes from a list.
9460
9461   @type lu: L{LogicalUnit}
9462   @param lu: the logical unit for which we check
9463   @type nodenames: list
9464   @param nodenames: the list of nodes on which we should check
9465   @rtype: list
9466   @return: the list of vm-capable nodes
9467
9468   """
9469   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9470   return [name for name in nodenames if name not in vm_nodes]
9471
9472
9473 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9474   """Hypervisor parameter validation.
9475
9476   This function abstract the hypervisor parameter validation to be
9477   used in both instance create and instance modify.
9478
9479   @type lu: L{LogicalUnit}
9480   @param lu: the logical unit for which we check
9481   @type nodenames: list
9482   @param nodenames: the list of nodes on which we should check
9483   @type hvname: string
9484   @param hvname: the name of the hypervisor we should use
9485   @type hvparams: dict
9486   @param hvparams: the parameters which we need to check
9487   @raise errors.OpPrereqError: if the parameters are not valid
9488
9489   """
9490   nodenames = _FilterVmNodes(lu, nodenames)
9491
9492   cluster = lu.cfg.GetClusterInfo()
9493   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9494
9495   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9496   for node in nodenames:
9497     info = hvinfo[node]
9498     if info.offline:
9499       continue
9500     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9501
9502
9503 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9504   """OS parameters validation.
9505
9506   @type lu: L{LogicalUnit}
9507   @param lu: the logical unit for which we check
9508   @type required: boolean
9509   @param required: whether the validation should fail if the OS is not
9510       found
9511   @type nodenames: list
9512   @param nodenames: the list of nodes on which we should check
9513   @type osname: string
9514   @param osname: the name of the hypervisor we should use
9515   @type osparams: dict
9516   @param osparams: the parameters which we need to check
9517   @raise errors.OpPrereqError: if the parameters are not valid
9518
9519   """
9520   nodenames = _FilterVmNodes(lu, nodenames)
9521   result = lu.rpc.call_os_validate(nodenames, required, osname,
9522                                    [constants.OS_VALIDATE_PARAMETERS],
9523                                    osparams)
9524   for node, nres in result.items():
9525     # we don't check for offline cases since this should be run only
9526     # against the master node and/or an instance's nodes
9527     nres.Raise("OS Parameters validation failed on node %s" % node)
9528     if not nres.payload:
9529       lu.LogInfo("OS %s not found on node %s, validation skipped",
9530                  osname, node)
9531
9532
9533 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9534   """Wrapper around IAReqInstanceAlloc.
9535
9536   @param op: The instance opcode
9537   @param disks: The computed disks
9538   @param nics: The computed nics
9539   @param beparams: The full filled beparams
9540   @param node_whitelist: List of nodes which should appear as online to the
9541     allocator (unless the node is already marked offline)
9542
9543   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9544
9545   """
9546   spindle_use = beparams[constants.BE_SPINDLE_USE]
9547   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9548                                        disk_template=op.disk_template,
9549                                        tags=op.tags,
9550                                        os=op.os_type,
9551                                        vcpus=beparams[constants.BE_VCPUS],
9552                                        memory=beparams[constants.BE_MAXMEM],
9553                                        spindle_use=spindle_use,
9554                                        disks=disks,
9555                                        nics=[n.ToDict() for n in nics],
9556                                        hypervisor=op.hypervisor,
9557                                        node_whitelist=node_whitelist)
9558
9559
9560 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9561   """Computes the nics.
9562
9563   @param op: The instance opcode
9564   @param cluster: Cluster configuration object
9565   @param default_ip: The default ip to assign
9566   @param cfg: An instance of the configuration object
9567   @param ec_id: Execution context ID
9568
9569   @returns: The build up nics
9570
9571   """
9572   nics = []
9573   for nic in op.nics:
9574     nic_mode_req = nic.get(constants.INIC_MODE, None)
9575     nic_mode = nic_mode_req
9576     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9577       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9578
9579     net = nic.get(constants.INIC_NETWORK, None)
9580     link = nic.get(constants.NIC_LINK, None)
9581     ip = nic.get(constants.INIC_IP, None)
9582
9583     if net is None or net.lower() == constants.VALUE_NONE:
9584       net = None
9585     else:
9586       if nic_mode_req is not None or link is not None:
9587         raise errors.OpPrereqError("If network is given, no mode or link"
9588                                    " is allowed to be passed",
9589                                    errors.ECODE_INVAL)
9590
9591     # ip validity checks
9592     if ip is None or ip.lower() == constants.VALUE_NONE:
9593       nic_ip = None
9594     elif ip.lower() == constants.VALUE_AUTO:
9595       if not op.name_check:
9596         raise errors.OpPrereqError("IP address set to auto but name checks"
9597                                    " have been skipped",
9598                                    errors.ECODE_INVAL)
9599       nic_ip = default_ip
9600     else:
9601       # We defer pool operations until later, so that the iallocator has
9602       # filled in the instance's node(s) dimara
9603       if ip.lower() == constants.NIC_IP_POOL:
9604         if net is None:
9605           raise errors.OpPrereqError("if ip=pool, parameter network"
9606                                      " must be passed too",
9607                                      errors.ECODE_INVAL)
9608
9609       elif not netutils.IPAddress.IsValid(ip):
9610         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9611                                    errors.ECODE_INVAL)
9612
9613       nic_ip = ip
9614
9615     # TODO: check the ip address for uniqueness
9616     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9617       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9618                                  errors.ECODE_INVAL)
9619
9620     # MAC address verification
9621     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9622     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9623       mac = utils.NormalizeAndValidateMac(mac)
9624
9625       try:
9626         # TODO: We need to factor this out
9627         cfg.ReserveMAC(mac, ec_id)
9628       except errors.ReservationError:
9629         raise errors.OpPrereqError("MAC address %s already in use"
9630                                    " in cluster" % mac,
9631                                    errors.ECODE_NOTUNIQUE)
9632
9633     #  Build nic parameters
9634     nicparams = {}
9635     if nic_mode_req:
9636       nicparams[constants.NIC_MODE] = nic_mode
9637     if link:
9638       nicparams[constants.NIC_LINK] = link
9639
9640     check_params = cluster.SimpleFillNIC(nicparams)
9641     objects.NIC.CheckParameterSyntax(check_params)
9642     nics.append(objects.NIC(mac=mac, ip=nic_ip,
9643                             network=net, nicparams=nicparams))
9644
9645   return nics
9646
9647
9648 def _ComputeDisks(op, default_vg):
9649   """Computes the instance disks.
9650
9651   @param op: The instance opcode
9652   @param default_vg: The default_vg to assume
9653
9654   @return: The computer disks
9655
9656   """
9657   disks = []
9658   for disk in op.disks:
9659     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9660     if mode not in constants.DISK_ACCESS_SET:
9661       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9662                                  mode, errors.ECODE_INVAL)
9663     size = disk.get(constants.IDISK_SIZE, None)
9664     if size is None:
9665       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9666     try:
9667       size = int(size)
9668     except (TypeError, ValueError):
9669       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9670                                  errors.ECODE_INVAL)
9671
9672     data_vg = disk.get(constants.IDISK_VG, default_vg)
9673     new_disk = {
9674       constants.IDISK_SIZE: size,
9675       constants.IDISK_MODE: mode,
9676       constants.IDISK_VG: data_vg,
9677       }
9678     if constants.IDISK_METAVG in disk:
9679       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9680     if constants.IDISK_ADOPT in disk:
9681       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9682     disks.append(new_disk)
9683
9684   return disks
9685
9686
9687 def _ComputeFullBeParams(op, cluster):
9688   """Computes the full beparams.
9689
9690   @param op: The instance opcode
9691   @param cluster: The cluster config object
9692
9693   @return: The fully filled beparams
9694
9695   """
9696   default_beparams = cluster.beparams[constants.PP_DEFAULT]
9697   for param, value in op.beparams.iteritems():
9698     if value == constants.VALUE_AUTO:
9699       op.beparams[param] = default_beparams[param]
9700   objects.UpgradeBeParams(op.beparams)
9701   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9702   return cluster.SimpleFillBE(op.beparams)
9703
9704
9705 class LUInstanceCreate(LogicalUnit):
9706   """Create an instance.
9707
9708   """
9709   HPATH = "instance-add"
9710   HTYPE = constants.HTYPE_INSTANCE
9711   REQ_BGL = False
9712
9713   def CheckArguments(self):
9714     """Check arguments.
9715
9716     """
9717     # do not require name_check to ease forward/backward compatibility
9718     # for tools
9719     if self.op.no_install and self.op.start:
9720       self.LogInfo("No-installation mode selected, disabling startup")
9721       self.op.start = False
9722     # validate/normalize the instance name
9723     self.op.instance_name = \
9724       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9725
9726     if self.op.ip_check and not self.op.name_check:
9727       # TODO: make the ip check more flexible and not depend on the name check
9728       raise errors.OpPrereqError("Cannot do IP address check without a name"
9729                                  " check", errors.ECODE_INVAL)
9730
9731     # check nics' parameter names
9732     for nic in self.op.nics:
9733       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9734
9735     # check disks. parameter names and consistent adopt/no-adopt strategy
9736     has_adopt = has_no_adopt = False
9737     for disk in self.op.disks:
9738       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9739       if constants.IDISK_ADOPT in disk:
9740         has_adopt = True
9741       else:
9742         has_no_adopt = True
9743     if has_adopt and has_no_adopt:
9744       raise errors.OpPrereqError("Either all disks are adopted or none is",
9745                                  errors.ECODE_INVAL)
9746     if has_adopt:
9747       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9748         raise errors.OpPrereqError("Disk adoption is not supported for the"
9749                                    " '%s' disk template" %
9750                                    self.op.disk_template,
9751                                    errors.ECODE_INVAL)
9752       if self.op.iallocator is not None:
9753         raise errors.OpPrereqError("Disk adoption not allowed with an"
9754                                    " iallocator script", errors.ECODE_INVAL)
9755       if self.op.mode == constants.INSTANCE_IMPORT:
9756         raise errors.OpPrereqError("Disk adoption not allowed for"
9757                                    " instance import", errors.ECODE_INVAL)
9758     else:
9759       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9760         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9761                                    " but no 'adopt' parameter given" %
9762                                    self.op.disk_template,
9763                                    errors.ECODE_INVAL)
9764
9765     self.adopt_disks = has_adopt
9766
9767     # instance name verification
9768     if self.op.name_check:
9769       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9770       self.op.instance_name = self.hostname1.name
9771       # used in CheckPrereq for ip ping check
9772       self.check_ip = self.hostname1.ip
9773     else:
9774       self.check_ip = None
9775
9776     # file storage checks
9777     if (self.op.file_driver and
9778         not self.op.file_driver in constants.FILE_DRIVER):
9779       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9780                                  self.op.file_driver, errors.ECODE_INVAL)
9781
9782     if self.op.disk_template == constants.DT_FILE:
9783       opcodes.RequireFileStorage()
9784     elif self.op.disk_template == constants.DT_SHARED_FILE:
9785       opcodes.RequireSharedFileStorage()
9786
9787     ### Node/iallocator related checks
9788     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9789
9790     if self.op.pnode is not None:
9791       if self.op.disk_template in constants.DTS_INT_MIRROR:
9792         if self.op.snode is None:
9793           raise errors.OpPrereqError("The networked disk templates need"
9794                                      " a mirror node", errors.ECODE_INVAL)
9795       elif self.op.snode:
9796         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9797                         " template")
9798         self.op.snode = None
9799
9800     self._cds = _GetClusterDomainSecret()
9801
9802     if self.op.mode == constants.INSTANCE_IMPORT:
9803       # On import force_variant must be True, because if we forced it at
9804       # initial install, our only chance when importing it back is that it
9805       # works again!
9806       self.op.force_variant = True
9807
9808       if self.op.no_install:
9809         self.LogInfo("No-installation mode has no effect during import")
9810
9811     elif self.op.mode == constants.INSTANCE_CREATE:
9812       if self.op.os_type is None:
9813         raise errors.OpPrereqError("No guest OS specified",
9814                                    errors.ECODE_INVAL)
9815       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9816         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9817                                    " installation" % self.op.os_type,
9818                                    errors.ECODE_STATE)
9819       if self.op.disk_template is None:
9820         raise errors.OpPrereqError("No disk template specified",
9821                                    errors.ECODE_INVAL)
9822
9823     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9824       # Check handshake to ensure both clusters have the same domain secret
9825       src_handshake = self.op.source_handshake
9826       if not src_handshake:
9827         raise errors.OpPrereqError("Missing source handshake",
9828                                    errors.ECODE_INVAL)
9829
9830       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9831                                                            src_handshake)
9832       if errmsg:
9833         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9834                                    errors.ECODE_INVAL)
9835
9836       # Load and check source CA
9837       self.source_x509_ca_pem = self.op.source_x509_ca
9838       if not self.source_x509_ca_pem:
9839         raise errors.OpPrereqError("Missing source X509 CA",
9840                                    errors.ECODE_INVAL)
9841
9842       try:
9843         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9844                                                     self._cds)
9845       except OpenSSL.crypto.Error, err:
9846         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9847                                    (err, ), errors.ECODE_INVAL)
9848
9849       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9850       if errcode is not None:
9851         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9852                                    errors.ECODE_INVAL)
9853
9854       self.source_x509_ca = cert
9855
9856       src_instance_name = self.op.source_instance_name
9857       if not src_instance_name:
9858         raise errors.OpPrereqError("Missing source instance name",
9859                                    errors.ECODE_INVAL)
9860
9861       self.source_instance_name = \
9862           netutils.GetHostname(name=src_instance_name).name
9863
9864     else:
9865       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9866                                  self.op.mode, errors.ECODE_INVAL)
9867
9868   def ExpandNames(self):
9869     """ExpandNames for CreateInstance.
9870
9871     Figure out the right locks for instance creation.
9872
9873     """
9874     self.needed_locks = {}
9875
9876     instance_name = self.op.instance_name
9877     # this is just a preventive check, but someone might still add this
9878     # instance in the meantime, and creation will fail at lock-add time
9879     if instance_name in self.cfg.GetInstanceList():
9880       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9881                                  instance_name, errors.ECODE_EXISTS)
9882
9883     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9884
9885     if self.op.iallocator:
9886       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9887       # specifying a group on instance creation and then selecting nodes from
9888       # that group
9889       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9890       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9891
9892       if self.op.opportunistic_locking:
9893         self.opportunistic_locks[locking.LEVEL_NODE] = True
9894         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
9895     else:
9896       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9897       nodelist = [self.op.pnode]
9898       if self.op.snode is not None:
9899         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9900         nodelist.append(self.op.snode)
9901       self.needed_locks[locking.LEVEL_NODE] = nodelist
9902
9903     # in case of import lock the source node too
9904     if self.op.mode == constants.INSTANCE_IMPORT:
9905       src_node = self.op.src_node
9906       src_path = self.op.src_path
9907
9908       if src_path is None:
9909         self.op.src_path = src_path = self.op.instance_name
9910
9911       if src_node is None:
9912         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9913         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9914         self.op.src_node = None
9915         if os.path.isabs(src_path):
9916           raise errors.OpPrereqError("Importing an instance from a path"
9917                                      " requires a source node option",
9918                                      errors.ECODE_INVAL)
9919       else:
9920         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9921         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9922           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9923         if not os.path.isabs(src_path):
9924           self.op.src_path = src_path = \
9925             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9926
9927     self.needed_locks[locking.LEVEL_NODE_RES] = \
9928       _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
9929
9930   def _RunAllocator(self):
9931     """Run the allocator based on input opcode.
9932
9933     """
9934     if self.op.opportunistic_locking:
9935       # Only consider nodes for which a lock is held
9936       node_whitelist = self.owned_locks(locking.LEVEL_NODE)
9937     else:
9938       node_whitelist = None
9939
9940     #TODO Export network to iallocator so that it chooses a pnode
9941     #     in a nodegroup that has the desired network connected to
9942     req = _CreateInstanceAllocRequest(self.op, self.disks,
9943                                       self.nics, self.be_full,
9944                                       node_whitelist)
9945     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9946
9947     ial.Run(self.op.iallocator)
9948
9949     if not ial.success:
9950       # When opportunistic locks are used only a temporary failure is generated
9951       if self.op.opportunistic_locking:
9952         ecode = errors.ECODE_TEMP_NORES
9953       else:
9954         ecode = errors.ECODE_NORES
9955
9956       raise errors.OpPrereqError("Can't compute nodes using"
9957                                  " iallocator '%s': %s" %
9958                                  (self.op.iallocator, ial.info),
9959                                  ecode)
9960
9961     self.op.pnode = ial.result[0]
9962     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9963                  self.op.instance_name, self.op.iallocator,
9964                  utils.CommaJoin(ial.result))
9965
9966     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9967
9968     if req.RequiredNodes() == 2:
9969       self.op.snode = ial.result[1]
9970
9971   def BuildHooksEnv(self):
9972     """Build hooks env.
9973
9974     This runs on master, primary and secondary nodes of the instance.
9975
9976     """
9977     env = {
9978       "ADD_MODE": self.op.mode,
9979       }
9980     if self.op.mode == constants.INSTANCE_IMPORT:
9981       env["SRC_NODE"] = self.op.src_node
9982       env["SRC_PATH"] = self.op.src_path
9983       env["SRC_IMAGES"] = self.src_images
9984
9985     env.update(_BuildInstanceHookEnv(
9986       name=self.op.instance_name,
9987       primary_node=self.op.pnode,
9988       secondary_nodes=self.secondaries,
9989       status=self.op.start,
9990       os_type=self.op.os_type,
9991       minmem=self.be_full[constants.BE_MINMEM],
9992       maxmem=self.be_full[constants.BE_MAXMEM],
9993       vcpus=self.be_full[constants.BE_VCPUS],
9994       nics=_NICListToTuple(self, self.nics),
9995       disk_template=self.op.disk_template,
9996       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9997              for d in self.disks],
9998       bep=self.be_full,
9999       hvp=self.hv_full,
10000       hypervisor_name=self.op.hypervisor,
10001       tags=self.op.tags,
10002     ))
10003
10004     return env
10005
10006   def BuildHooksNodes(self):
10007     """Build hooks nodes.
10008
10009     """
10010     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10011     return nl, nl
10012
10013   def _ReadExportInfo(self):
10014     """Reads the export information from disk.
10015
10016     It will override the opcode source node and path with the actual
10017     information, if these two were not specified before.
10018
10019     @return: the export information
10020
10021     """
10022     assert self.op.mode == constants.INSTANCE_IMPORT
10023
10024     src_node = self.op.src_node
10025     src_path = self.op.src_path
10026
10027     if src_node is None:
10028       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10029       exp_list = self.rpc.call_export_list(locked_nodes)
10030       found = False
10031       for node in exp_list:
10032         if exp_list[node].fail_msg:
10033           continue
10034         if src_path in exp_list[node].payload:
10035           found = True
10036           self.op.src_node = src_node = node
10037           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10038                                                        src_path)
10039           break
10040       if not found:
10041         raise errors.OpPrereqError("No export found for relative path %s" %
10042                                     src_path, errors.ECODE_INVAL)
10043
10044     _CheckNodeOnline(self, src_node)
10045     result = self.rpc.call_export_info(src_node, src_path)
10046     result.Raise("No export or invalid export found in dir %s" % src_path)
10047
10048     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10049     if not export_info.has_section(constants.INISECT_EXP):
10050       raise errors.ProgrammerError("Corrupted export config",
10051                                    errors.ECODE_ENVIRON)
10052
10053     ei_version = export_info.get(constants.INISECT_EXP, "version")
10054     if (int(ei_version) != constants.EXPORT_VERSION):
10055       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10056                                  (ei_version, constants.EXPORT_VERSION),
10057                                  errors.ECODE_ENVIRON)
10058     return export_info
10059
10060   def _ReadExportParams(self, einfo):
10061     """Use export parameters as defaults.
10062
10063     In case the opcode doesn't specify (as in override) some instance
10064     parameters, then try to use them from the export information, if
10065     that declares them.
10066
10067     """
10068     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10069
10070     if self.op.disk_template is None:
10071       if einfo.has_option(constants.INISECT_INS, "disk_template"):
10072         self.op.disk_template = einfo.get(constants.INISECT_INS,
10073                                           "disk_template")
10074         if self.op.disk_template not in constants.DISK_TEMPLATES:
10075           raise errors.OpPrereqError("Disk template specified in configuration"
10076                                      " file is not one of the allowed values:"
10077                                      " %s" %
10078                                      " ".join(constants.DISK_TEMPLATES),
10079                                      errors.ECODE_INVAL)
10080       else:
10081         raise errors.OpPrereqError("No disk template specified and the export"
10082                                    " is missing the disk_template information",
10083                                    errors.ECODE_INVAL)
10084
10085     if not self.op.disks:
10086       disks = []
10087       # TODO: import the disk iv_name too
10088       for idx in range(constants.MAX_DISKS):
10089         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10090           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10091           disks.append({constants.IDISK_SIZE: disk_sz})
10092       self.op.disks = disks
10093       if not disks and self.op.disk_template != constants.DT_DISKLESS:
10094         raise errors.OpPrereqError("No disk info specified and the export"
10095                                    " is missing the disk information",
10096                                    errors.ECODE_INVAL)
10097
10098     if not self.op.nics:
10099       nics = []
10100       for idx in range(constants.MAX_NICS):
10101         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10102           ndict = {}
10103           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10104             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10105             ndict[name] = v
10106           nics.append(ndict)
10107         else:
10108           break
10109       self.op.nics = nics
10110
10111     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10112       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10113
10114     if (self.op.hypervisor is None and
10115         einfo.has_option(constants.INISECT_INS, "hypervisor")):
10116       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10117
10118     if einfo.has_section(constants.INISECT_HYP):
10119       # use the export parameters but do not override the ones
10120       # specified by the user
10121       for name, value in einfo.items(constants.INISECT_HYP):
10122         if name not in self.op.hvparams:
10123           self.op.hvparams[name] = value
10124
10125     if einfo.has_section(constants.INISECT_BEP):
10126       # use the parameters, without overriding
10127       for name, value in einfo.items(constants.INISECT_BEP):
10128         if name not in self.op.beparams:
10129           self.op.beparams[name] = value
10130         # Compatibility for the old "memory" be param
10131         if name == constants.BE_MEMORY:
10132           if constants.BE_MAXMEM not in self.op.beparams:
10133             self.op.beparams[constants.BE_MAXMEM] = value
10134           if constants.BE_MINMEM not in self.op.beparams:
10135             self.op.beparams[constants.BE_MINMEM] = value
10136     else:
10137       # try to read the parameters old style, from the main section
10138       for name in constants.BES_PARAMETERS:
10139         if (name not in self.op.beparams and
10140             einfo.has_option(constants.INISECT_INS, name)):
10141           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10142
10143     if einfo.has_section(constants.INISECT_OSP):
10144       # use the parameters, without overriding
10145       for name, value in einfo.items(constants.INISECT_OSP):
10146         if name not in self.op.osparams:
10147           self.op.osparams[name] = value
10148
10149   def _RevertToDefaults(self, cluster):
10150     """Revert the instance parameters to the default values.
10151
10152     """
10153     # hvparams
10154     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10155     for name in self.op.hvparams.keys():
10156       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10157         del self.op.hvparams[name]
10158     # beparams
10159     be_defs = cluster.SimpleFillBE({})
10160     for name in self.op.beparams.keys():
10161       if name in be_defs and be_defs[name] == self.op.beparams[name]:
10162         del self.op.beparams[name]
10163     # nic params
10164     nic_defs = cluster.SimpleFillNIC({})
10165     for nic in self.op.nics:
10166       for name in constants.NICS_PARAMETERS:
10167         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10168           del nic[name]
10169     # osparams
10170     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10171     for name in self.op.osparams.keys():
10172       if name in os_defs and os_defs[name] == self.op.osparams[name]:
10173         del self.op.osparams[name]
10174
10175   def _CalculateFileStorageDir(self):
10176     """Calculate final instance file storage dir.
10177
10178     """
10179     # file storage dir calculation/check
10180     self.instance_file_storage_dir = None
10181     if self.op.disk_template in constants.DTS_FILEBASED:
10182       # build the full file storage dir path
10183       joinargs = []
10184
10185       if self.op.disk_template == constants.DT_SHARED_FILE:
10186         get_fsd_fn = self.cfg.GetSharedFileStorageDir
10187       else:
10188         get_fsd_fn = self.cfg.GetFileStorageDir
10189
10190       cfg_storagedir = get_fsd_fn()
10191       if not cfg_storagedir:
10192         raise errors.OpPrereqError("Cluster file storage dir not defined",
10193                                    errors.ECODE_STATE)
10194       joinargs.append(cfg_storagedir)
10195
10196       if self.op.file_storage_dir is not None:
10197         joinargs.append(self.op.file_storage_dir)
10198
10199       joinargs.append(self.op.instance_name)
10200
10201       # pylint: disable=W0142
10202       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10203
10204   def CheckPrereq(self): # pylint: disable=R0914
10205     """Check prerequisites.
10206
10207     """
10208     self._CalculateFileStorageDir()
10209
10210     if self.op.mode == constants.INSTANCE_IMPORT:
10211       export_info = self._ReadExportInfo()
10212       self._ReadExportParams(export_info)
10213       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10214     else:
10215       self._old_instance_name = None
10216
10217     if (not self.cfg.GetVGName() and
10218         self.op.disk_template not in constants.DTS_NOT_LVM):
10219       raise errors.OpPrereqError("Cluster does not support lvm-based"
10220                                  " instances", errors.ECODE_STATE)
10221
10222     if (self.op.hypervisor is None or
10223         self.op.hypervisor == constants.VALUE_AUTO):
10224       self.op.hypervisor = self.cfg.GetHypervisorType()
10225
10226     cluster = self.cfg.GetClusterInfo()
10227     enabled_hvs = cluster.enabled_hypervisors
10228     if self.op.hypervisor not in enabled_hvs:
10229       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10230                                  " cluster (%s)" %
10231                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10232                                  errors.ECODE_STATE)
10233
10234     # Check tag validity
10235     for tag in self.op.tags:
10236       objects.TaggableObject.ValidateTag(tag)
10237
10238     # check hypervisor parameter syntax (locally)
10239     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10240     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10241                                       self.op.hvparams)
10242     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10243     hv_type.CheckParameterSyntax(filled_hvp)
10244     self.hv_full = filled_hvp
10245     # check that we don't specify global parameters on an instance
10246     _CheckGlobalHvParams(self.op.hvparams)
10247
10248     # fill and remember the beparams dict
10249     self.be_full = _ComputeFullBeParams(self.op, cluster)
10250
10251     # build os parameters
10252     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10253
10254     # now that hvp/bep are in final format, let's reset to defaults,
10255     # if told to do so
10256     if self.op.identify_defaults:
10257       self._RevertToDefaults(cluster)
10258
10259     # NIC buildup
10260     self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10261                              self.proc.GetECId())
10262
10263     # disk checks/pre-build
10264     default_vg = self.cfg.GetVGName()
10265     self.disks = _ComputeDisks(self.op, default_vg)
10266
10267     if self.op.mode == constants.INSTANCE_IMPORT:
10268       disk_images = []
10269       for idx in range(len(self.disks)):
10270         option = "disk%d_dump" % idx
10271         if export_info.has_option(constants.INISECT_INS, option):
10272           # FIXME: are the old os-es, disk sizes, etc. useful?
10273           export_name = export_info.get(constants.INISECT_INS, option)
10274           image = utils.PathJoin(self.op.src_path, export_name)
10275           disk_images.append(image)
10276         else:
10277           disk_images.append(False)
10278
10279       self.src_images = disk_images
10280
10281       if self.op.instance_name == self._old_instance_name:
10282         for idx, nic in enumerate(self.nics):
10283           if nic.mac == constants.VALUE_AUTO:
10284             nic_mac_ini = "nic%d_mac" % idx
10285             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10286
10287     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10288
10289     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10290     if self.op.ip_check:
10291       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10292         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10293                                    (self.check_ip, self.op.instance_name),
10294                                    errors.ECODE_NOTUNIQUE)
10295
10296     #### mac address generation
10297     # By generating here the mac address both the allocator and the hooks get
10298     # the real final mac address rather than the 'auto' or 'generate' value.
10299     # There is a race condition between the generation and the instance object
10300     # creation, which means that we know the mac is valid now, but we're not
10301     # sure it will be when we actually add the instance. If things go bad
10302     # adding the instance will abort because of a duplicate mac, and the
10303     # creation job will fail.
10304     for nic in self.nics:
10305       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10306         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10307
10308     #### allocator run
10309
10310     if self.op.iallocator is not None:
10311       self._RunAllocator()
10312
10313     # Release all unneeded node locks
10314     keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10315     _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10316     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10317     _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10318
10319     assert (self.owned_locks(locking.LEVEL_NODE) ==
10320             self.owned_locks(locking.LEVEL_NODE_RES)), \
10321       "Node locks differ from node resource locks"
10322
10323     #### node related checks
10324
10325     # check primary node
10326     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10327     assert self.pnode is not None, \
10328       "Cannot retrieve locked node %s" % self.op.pnode
10329     if pnode.offline:
10330       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10331                                  pnode.name, errors.ECODE_STATE)
10332     if pnode.drained:
10333       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10334                                  pnode.name, errors.ECODE_STATE)
10335     if not pnode.vm_capable:
10336       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10337                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10338
10339     self.secondaries = []
10340
10341     # Fill in any IPs from IP pools. This must happen here, because we need to
10342     # know the nic's primary node, as specified by the iallocator
10343     for idx, nic in enumerate(self.nics):
10344       net = nic.network
10345       if net is not None:
10346         netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10347         if netparams is None:
10348           raise errors.OpPrereqError("No netparams found for network"
10349                                      " %s. Propably not connected to"
10350                                      " node's %s nodegroup" %
10351                                      (net, self.pnode.name),
10352                                      errors.ECODE_INVAL)
10353         self.LogInfo("NIC/%d inherits netparams %s" %
10354                      (idx, netparams.values()))
10355         nic.nicparams = dict(netparams)
10356         if nic.ip is not None:
10357           if nic.ip.lower() == constants.NIC_IP_POOL:
10358             try:
10359               nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10360             except errors.ReservationError:
10361               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10362                                          " from the address pool" % idx,
10363                                          errors.ECODE_STATE)
10364             self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10365           else:
10366             try:
10367               self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10368             except errors.ReservationError:
10369               raise errors.OpPrereqError("IP address %s already in use"
10370                                          " or does not belong to network %s" %
10371                                          (nic.ip, net),
10372                                          errors.ECODE_NOTUNIQUE)
10373       else:
10374         # net is None, ip None or given
10375         if self.op.conflicts_check:
10376           _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10377
10378     # mirror node verification
10379     if self.op.disk_template in constants.DTS_INT_MIRROR:
10380       if self.op.snode == pnode.name:
10381         raise errors.OpPrereqError("The secondary node cannot be the"
10382                                    " primary node", errors.ECODE_INVAL)
10383       _CheckNodeOnline(self, self.op.snode)
10384       _CheckNodeNotDrained(self, self.op.snode)
10385       _CheckNodeVmCapable(self, self.op.snode)
10386       self.secondaries.append(self.op.snode)
10387
10388       snode = self.cfg.GetNodeInfo(self.op.snode)
10389       if pnode.group != snode.group:
10390         self.LogWarning("The primary and secondary nodes are in two"
10391                         " different node groups; the disk parameters"
10392                         " from the first disk's node group will be"
10393                         " used")
10394
10395     nodenames = [pnode.name] + self.secondaries
10396
10397     # Verify instance specs
10398     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10399     ispec = {
10400       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10401       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10402       constants.ISPEC_DISK_COUNT: len(self.disks),
10403       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10404       constants.ISPEC_NIC_COUNT: len(self.nics),
10405       constants.ISPEC_SPINDLE_USE: spindle_use,
10406       }
10407
10408     group_info = self.cfg.GetNodeGroup(pnode.group)
10409     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10410     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10411     if not self.op.ignore_ipolicy and res:
10412       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10413              (pnode.group, group_info.name, utils.CommaJoin(res)))
10414       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10415
10416     if not self.adopt_disks:
10417       if self.op.disk_template == constants.DT_RBD:
10418         # _CheckRADOSFreeSpace() is just a placeholder.
10419         # Any function that checks prerequisites can be placed here.
10420         # Check if there is enough space on the RADOS cluster.
10421         _CheckRADOSFreeSpace()
10422       else:
10423         # Check lv size requirements, if not adopting
10424         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10425         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10426
10427     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10428       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10429                                 disk[constants.IDISK_ADOPT])
10430                      for disk in self.disks])
10431       if len(all_lvs) != len(self.disks):
10432         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10433                                    errors.ECODE_INVAL)
10434       for lv_name in all_lvs:
10435         try:
10436           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10437           # to ReserveLV uses the same syntax
10438           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10439         except errors.ReservationError:
10440           raise errors.OpPrereqError("LV named %s used by another instance" %
10441                                      lv_name, errors.ECODE_NOTUNIQUE)
10442
10443       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10444       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10445
10446       node_lvs = self.rpc.call_lv_list([pnode.name],
10447                                        vg_names.payload.keys())[pnode.name]
10448       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10449       node_lvs = node_lvs.payload
10450
10451       delta = all_lvs.difference(node_lvs.keys())
10452       if delta:
10453         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10454                                    utils.CommaJoin(delta),
10455                                    errors.ECODE_INVAL)
10456       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10457       if online_lvs:
10458         raise errors.OpPrereqError("Online logical volumes found, cannot"
10459                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10460                                    errors.ECODE_STATE)
10461       # update the size of disk based on what is found
10462       for dsk in self.disks:
10463         dsk[constants.IDISK_SIZE] = \
10464           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10465                                         dsk[constants.IDISK_ADOPT])][0]))
10466
10467     elif self.op.disk_template == constants.DT_BLOCK:
10468       # Normalize and de-duplicate device paths
10469       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10470                        for disk in self.disks])
10471       if len(all_disks) != len(self.disks):
10472         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10473                                    errors.ECODE_INVAL)
10474       baddisks = [d for d in all_disks
10475                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10476       if baddisks:
10477         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10478                                    " cannot be adopted" %
10479                                    (utils.CommaJoin(baddisks),
10480                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10481                                    errors.ECODE_INVAL)
10482
10483       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10484                                             list(all_disks))[pnode.name]
10485       node_disks.Raise("Cannot get block device information from node %s" %
10486                        pnode.name)
10487       node_disks = node_disks.payload
10488       delta = all_disks.difference(node_disks.keys())
10489       if delta:
10490         raise errors.OpPrereqError("Missing block device(s): %s" %
10491                                    utils.CommaJoin(delta),
10492                                    errors.ECODE_INVAL)
10493       for dsk in self.disks:
10494         dsk[constants.IDISK_SIZE] = \
10495           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10496
10497     # Verify instance specs
10498     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10499     ispec = {
10500       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10501       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10502       constants.ISPEC_DISK_COUNT: len(self.disks),
10503       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10504                                   for disk in self.disks],
10505       constants.ISPEC_NIC_COUNT: len(self.nics),
10506       constants.ISPEC_SPINDLE_USE: spindle_use,
10507       }
10508
10509     group_info = self.cfg.GetNodeGroup(pnode.group)
10510     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10511     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10512     if not self.op.ignore_ipolicy and res:
10513       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10514                                   " policy: %s") % (pnode.group,
10515                                                     utils.CommaJoin(res)),
10516                                   errors.ECODE_INVAL)
10517
10518     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10519
10520     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10521     # check OS parameters (remotely)
10522     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10523
10524     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10525
10526     # memory check on primary node
10527     #TODO(dynmem): use MINMEM for checking
10528     if self.op.start:
10529       _CheckNodeFreeMemory(self, self.pnode.name,
10530                            "creating instance %s" % self.op.instance_name,
10531                            self.be_full[constants.BE_MAXMEM],
10532                            self.op.hypervisor)
10533
10534     self.dry_run_result = list(nodenames)
10535
10536   def Exec(self, feedback_fn):
10537     """Create and add the instance to the cluster.
10538
10539     """
10540     instance = self.op.instance_name
10541     pnode_name = self.pnode.name
10542
10543     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10544                 self.owned_locks(locking.LEVEL_NODE)), \
10545       "Node locks differ from node resource locks"
10546     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10547
10548     ht_kind = self.op.hypervisor
10549     if ht_kind in constants.HTS_REQ_PORT:
10550       network_port = self.cfg.AllocatePort()
10551     else:
10552       network_port = None
10553
10554     # This is ugly but we got a chicken-egg problem here
10555     # We can only take the group disk parameters, as the instance
10556     # has no disks yet (we are generating them right here).
10557     node = self.cfg.GetNodeInfo(pnode_name)
10558     nodegroup = self.cfg.GetNodeGroup(node.group)
10559     disks = _GenerateDiskTemplate(self,
10560                                   self.op.disk_template,
10561                                   instance, pnode_name,
10562                                   self.secondaries,
10563                                   self.disks,
10564                                   self.instance_file_storage_dir,
10565                                   self.op.file_driver,
10566                                   0,
10567                                   feedback_fn,
10568                                   self.cfg.GetGroupDiskParams(nodegroup))
10569
10570     iobj = objects.Instance(name=instance, os=self.op.os_type,
10571                             primary_node=pnode_name,
10572                             nics=self.nics, disks=disks,
10573                             disk_template=self.op.disk_template,
10574                             admin_state=constants.ADMINST_DOWN,
10575                             network_port=network_port,
10576                             beparams=self.op.beparams,
10577                             hvparams=self.op.hvparams,
10578                             hypervisor=self.op.hypervisor,
10579                             osparams=self.op.osparams,
10580                             )
10581
10582     if self.op.tags:
10583       for tag in self.op.tags:
10584         iobj.AddTag(tag)
10585
10586     if self.adopt_disks:
10587       if self.op.disk_template == constants.DT_PLAIN:
10588         # rename LVs to the newly-generated names; we need to construct
10589         # 'fake' LV disks with the old data, plus the new unique_id
10590         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10591         rename_to = []
10592         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10593           rename_to.append(t_dsk.logical_id)
10594           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10595           self.cfg.SetDiskID(t_dsk, pnode_name)
10596         result = self.rpc.call_blockdev_rename(pnode_name,
10597                                                zip(tmp_disks, rename_to))
10598         result.Raise("Failed to rename adoped LVs")
10599     else:
10600       feedback_fn("* creating instance disks...")
10601       try:
10602         _CreateDisks(self, iobj)
10603       except errors.OpExecError:
10604         self.LogWarning("Device creation failed, reverting...")
10605         try:
10606           _RemoveDisks(self, iobj)
10607         finally:
10608           self.cfg.ReleaseDRBDMinors(instance)
10609           raise
10610
10611     feedback_fn("adding instance %s to cluster config" % instance)
10612
10613     self.cfg.AddInstance(iobj, self.proc.GetECId())
10614
10615     # Declare that we don't want to remove the instance lock anymore, as we've
10616     # added the instance to the config
10617     del self.remove_locks[locking.LEVEL_INSTANCE]
10618
10619     if self.op.mode == constants.INSTANCE_IMPORT:
10620       # Release unused nodes
10621       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10622     else:
10623       # Release all nodes
10624       _ReleaseLocks(self, locking.LEVEL_NODE)
10625
10626     disk_abort = False
10627     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10628       feedback_fn("* wiping instance disks...")
10629       try:
10630         _WipeDisks(self, iobj)
10631       except errors.OpExecError, err:
10632         logging.exception("Wiping disks failed")
10633         self.LogWarning("Wiping instance disks failed (%s)", err)
10634         disk_abort = True
10635
10636     if disk_abort:
10637       # Something is already wrong with the disks, don't do anything else
10638       pass
10639     elif self.op.wait_for_sync:
10640       disk_abort = not _WaitForSync(self, iobj)
10641     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10642       # make sure the disks are not degraded (still sync-ing is ok)
10643       feedback_fn("* checking mirrors status")
10644       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10645     else:
10646       disk_abort = False
10647
10648     if disk_abort:
10649       _RemoveDisks(self, iobj)
10650       self.cfg.RemoveInstance(iobj.name)
10651       # Make sure the instance lock gets removed
10652       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10653       raise errors.OpExecError("There are some degraded disks for"
10654                                " this instance")
10655
10656     # Release all node resource locks
10657     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10658
10659     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10660       # we need to set the disks ID to the primary node, since the
10661       # preceding code might or might have not done it, depending on
10662       # disk template and other options
10663       for disk in iobj.disks:
10664         self.cfg.SetDiskID(disk, pnode_name)
10665       if self.op.mode == constants.INSTANCE_CREATE:
10666         if not self.op.no_install:
10667           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10668                         not self.op.wait_for_sync)
10669           if pause_sync:
10670             feedback_fn("* pausing disk sync to install instance OS")
10671             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10672                                                               (iobj.disks,
10673                                                                iobj), True)
10674             for idx, success in enumerate(result.payload):
10675               if not success:
10676                 logging.warn("pause-sync of instance %s for disk %d failed",
10677                              instance, idx)
10678
10679           feedback_fn("* running the instance OS create scripts...")
10680           # FIXME: pass debug option from opcode to backend
10681           os_add_result = \
10682             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10683                                           self.op.debug_level)
10684           if pause_sync:
10685             feedback_fn("* resuming disk sync")
10686             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10687                                                               (iobj.disks,
10688                                                                iobj), False)
10689             for idx, success in enumerate(result.payload):
10690               if not success:
10691                 logging.warn("resume-sync of instance %s for disk %d failed",
10692                              instance, idx)
10693
10694           os_add_result.Raise("Could not add os for instance %s"
10695                               " on node %s" % (instance, pnode_name))
10696
10697       else:
10698         if self.op.mode == constants.INSTANCE_IMPORT:
10699           feedback_fn("* running the instance OS import scripts...")
10700
10701           transfers = []
10702
10703           for idx, image in enumerate(self.src_images):
10704             if not image:
10705               continue
10706
10707             # FIXME: pass debug option from opcode to backend
10708             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10709                                                constants.IEIO_FILE, (image, ),
10710                                                constants.IEIO_SCRIPT,
10711                                                (iobj.disks[idx], idx),
10712                                                None)
10713             transfers.append(dt)
10714
10715           import_result = \
10716             masterd.instance.TransferInstanceData(self, feedback_fn,
10717                                                   self.op.src_node, pnode_name,
10718                                                   self.pnode.secondary_ip,
10719                                                   iobj, transfers)
10720           if not compat.all(import_result):
10721             self.LogWarning("Some disks for instance %s on node %s were not"
10722                             " imported successfully" % (instance, pnode_name))
10723
10724           rename_from = self._old_instance_name
10725
10726         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10727           feedback_fn("* preparing remote import...")
10728           # The source cluster will stop the instance before attempting to make
10729           # a connection. In some cases stopping an instance can take a long
10730           # time, hence the shutdown timeout is added to the connection
10731           # timeout.
10732           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10733                              self.op.source_shutdown_timeout)
10734           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10735
10736           assert iobj.primary_node == self.pnode.name
10737           disk_results = \
10738             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10739                                           self.source_x509_ca,
10740                                           self._cds, timeouts)
10741           if not compat.all(disk_results):
10742             # TODO: Should the instance still be started, even if some disks
10743             # failed to import (valid for local imports, too)?
10744             self.LogWarning("Some disks for instance %s on node %s were not"
10745                             " imported successfully" % (instance, pnode_name))
10746
10747           rename_from = self.source_instance_name
10748
10749         else:
10750           # also checked in the prereq part
10751           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10752                                        % self.op.mode)
10753
10754         # Run rename script on newly imported instance
10755         assert iobj.name == instance
10756         feedback_fn("Running rename script for %s" % instance)
10757         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10758                                                    rename_from,
10759                                                    self.op.debug_level)
10760         if result.fail_msg:
10761           self.LogWarning("Failed to run rename script for %s on node"
10762                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10763
10764     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10765
10766     if self.op.start:
10767       iobj.admin_state = constants.ADMINST_UP
10768       self.cfg.Update(iobj, feedback_fn)
10769       logging.info("Starting instance %s on node %s", instance, pnode_name)
10770       feedback_fn("* starting instance...")
10771       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10772                                             False)
10773       result.Raise("Could not start instance")
10774
10775     return list(iobj.all_nodes)
10776
10777
10778 class LUInstanceMultiAlloc(NoHooksLU):
10779   """Allocates multiple instances at the same time.
10780
10781   """
10782   REQ_BGL = False
10783
10784   def CheckArguments(self):
10785     """Check arguments.
10786
10787     """
10788     nodes = []
10789     for inst in self.op.instances:
10790       if inst.iallocator is not None:
10791         raise errors.OpPrereqError("iallocator are not allowed to be set on"
10792                                    " instance objects", errors.ECODE_INVAL)
10793       nodes.append(bool(inst.pnode))
10794       if inst.disk_template in constants.DTS_INT_MIRROR:
10795         nodes.append(bool(inst.snode))
10796
10797     has_nodes = compat.any(nodes)
10798     if compat.all(nodes) ^ has_nodes:
10799       raise errors.OpPrereqError("There are instance objects providing"
10800                                  " pnode/snode while others do not",
10801                                  errors.ECODE_INVAL)
10802
10803     if self.op.iallocator is None:
10804       default_iallocator = self.cfg.GetDefaultIAllocator()
10805       if default_iallocator and has_nodes:
10806         self.op.iallocator = default_iallocator
10807       else:
10808         raise errors.OpPrereqError("No iallocator or nodes on the instances"
10809                                    " given and no cluster-wide default"
10810                                    " iallocator found; please specify either"
10811                                    " an iallocator or nodes on the instances"
10812                                    " or set a cluster-wide default iallocator",
10813                                    errors.ECODE_INVAL)
10814
10815     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10816     if dups:
10817       raise errors.OpPrereqError("There are duplicate instance names: %s" %
10818                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
10819
10820   def ExpandNames(self):
10821     """Calculate the locks.
10822
10823     """
10824     self.share_locks = _ShareAll()
10825     self.needed_locks = {
10826       # iallocator will select nodes and even if no iallocator is used,
10827       # collisions with LUInstanceCreate should be avoided
10828       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
10829       }
10830
10831     if self.op.iallocator:
10832       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10833       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10834
10835       if self.op.opportunistic_locking:
10836         self.opportunistic_locks[locking.LEVEL_NODE] = True
10837         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10838     else:
10839       nodeslist = []
10840       for inst in self.op.instances:
10841         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10842         nodeslist.append(inst.pnode)
10843         if inst.snode is not None:
10844           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10845           nodeslist.append(inst.snode)
10846
10847       self.needed_locks[locking.LEVEL_NODE] = nodeslist
10848       # Lock resources of instance's primary and secondary nodes (copy to
10849       # prevent accidential modification)
10850       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10851
10852   def CheckPrereq(self):
10853     """Check prerequisite.
10854
10855     """
10856     cluster = self.cfg.GetClusterInfo()
10857     default_vg = self.cfg.GetVGName()
10858     ec_id = self.proc.GetECId()
10859
10860     if self.op.opportunistic_locking:
10861       # Only consider nodes for which a lock is held
10862       node_whitelist = self.owned_locks(locking.LEVEL_NODE)
10863     else:
10864       node_whitelist = None
10865
10866     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10867                                          _ComputeNics(op, cluster, None,
10868                                                       self.cfg, ec_id),
10869                                          _ComputeFullBeParams(op, cluster),
10870                                          node_whitelist)
10871              for op in self.op.instances]
10872
10873     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10874     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10875
10876     ial.Run(self.op.iallocator)
10877
10878     if not ial.success:
10879       raise errors.OpPrereqError("Can't compute nodes using"
10880                                  " iallocator '%s': %s" %
10881                                  (self.op.iallocator, ial.info),
10882                                  errors.ECODE_NORES)
10883
10884     self.ia_result = ial.result
10885
10886     if self.op.dry_run:
10887       self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
10888         constants.JOB_IDS_KEY: [],
10889         })
10890
10891   def _ConstructPartialResult(self):
10892     """Contructs the partial result.
10893
10894     """
10895     (allocatable, failed) = self.ia_result
10896     return {
10897       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10898         map(compat.fst, allocatable),
10899       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10900       }
10901
10902   def Exec(self, feedback_fn):
10903     """Executes the opcode.
10904
10905     """
10906     op2inst = dict((op.instance_name, op) for op in self.op.instances)
10907     (allocatable, failed) = self.ia_result
10908
10909     jobs = []
10910     for (name, nodes) in allocatable:
10911       op = op2inst.pop(name)
10912
10913       if len(nodes) > 1:
10914         (op.pnode, op.snode) = nodes
10915       else:
10916         (op.pnode,) = nodes
10917
10918       jobs.append([op])
10919
10920     missing = set(op2inst.keys()) - set(failed)
10921     assert not missing, \
10922       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10923
10924     return ResultWithJobs(jobs, **self._ConstructPartialResult())
10925
10926
10927 def _CheckRADOSFreeSpace():
10928   """Compute disk size requirements inside the RADOS cluster.
10929
10930   """
10931   # For the RADOS cluster we assume there is always enough space.
10932   pass
10933
10934
10935 class LUInstanceConsole(NoHooksLU):
10936   """Connect to an instance's console.
10937
10938   This is somewhat special in that it returns the command line that
10939   you need to run on the master node in order to connect to the
10940   console.
10941
10942   """
10943   REQ_BGL = False
10944
10945   def ExpandNames(self):
10946     self.share_locks = _ShareAll()
10947     self._ExpandAndLockInstance()
10948
10949   def CheckPrereq(self):
10950     """Check prerequisites.
10951
10952     This checks that the instance is in the cluster.
10953
10954     """
10955     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10956     assert self.instance is not None, \
10957       "Cannot retrieve locked instance %s" % self.op.instance_name
10958     _CheckNodeOnline(self, self.instance.primary_node)
10959
10960   def Exec(self, feedback_fn):
10961     """Connect to the console of an instance
10962
10963     """
10964     instance = self.instance
10965     node = instance.primary_node
10966
10967     node_insts = self.rpc.call_instance_list([node],
10968                                              [instance.hypervisor])[node]
10969     node_insts.Raise("Can't get node information from %s" % node)
10970
10971     if instance.name not in node_insts.payload:
10972       if instance.admin_state == constants.ADMINST_UP:
10973         state = constants.INSTST_ERRORDOWN
10974       elif instance.admin_state == constants.ADMINST_DOWN:
10975         state = constants.INSTST_ADMINDOWN
10976       else:
10977         state = constants.INSTST_ADMINOFFLINE
10978       raise errors.OpExecError("Instance %s is not running (state %s)" %
10979                                (instance.name, state))
10980
10981     logging.debug("Connecting to console of %s on %s", instance.name, node)
10982
10983     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10984
10985
10986 def _GetInstanceConsole(cluster, instance):
10987   """Returns console information for an instance.
10988
10989   @type cluster: L{objects.Cluster}
10990   @type instance: L{objects.Instance}
10991   @rtype: dict
10992
10993   """
10994   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10995   # beparams and hvparams are passed separately, to avoid editing the
10996   # instance and then saving the defaults in the instance itself.
10997   hvparams = cluster.FillHV(instance)
10998   beparams = cluster.FillBE(instance)
10999   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11000
11001   assert console.instance == instance.name
11002   assert console.Validate()
11003
11004   return console.ToDict()
11005
11006
11007 class LUInstanceReplaceDisks(LogicalUnit):
11008   """Replace the disks of an instance.
11009
11010   """
11011   HPATH = "mirrors-replace"
11012   HTYPE = constants.HTYPE_INSTANCE
11013   REQ_BGL = False
11014
11015   def CheckArguments(self):
11016     """Check arguments.
11017
11018     """
11019     remote_node = self.op.remote_node
11020     ialloc = self.op.iallocator
11021     if self.op.mode == constants.REPLACE_DISK_CHG:
11022       if remote_node is None and ialloc is None:
11023         raise errors.OpPrereqError("When changing the secondary either an"
11024                                    " iallocator script must be used or the"
11025                                    " new node given", errors.ECODE_INVAL)
11026       else:
11027         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11028
11029     elif remote_node is not None or ialloc is not None:
11030       # Not replacing the secondary
11031       raise errors.OpPrereqError("The iallocator and new node options can"
11032                                  " only be used when changing the"
11033                                  " secondary node", errors.ECODE_INVAL)
11034
11035   def ExpandNames(self):
11036     self._ExpandAndLockInstance()
11037
11038     assert locking.LEVEL_NODE not in self.needed_locks
11039     assert locking.LEVEL_NODE_RES not in self.needed_locks
11040     assert locking.LEVEL_NODEGROUP not in self.needed_locks
11041
11042     assert self.op.iallocator is None or self.op.remote_node is None, \
11043       "Conflicting options"
11044
11045     if self.op.remote_node is not None:
11046       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11047
11048       # Warning: do not remove the locking of the new secondary here
11049       # unless DRBD8.AddChildren is changed to work in parallel;
11050       # currently it doesn't since parallel invocations of
11051       # FindUnusedMinor will conflict
11052       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11053       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11054     else:
11055       self.needed_locks[locking.LEVEL_NODE] = []
11056       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11057
11058       if self.op.iallocator is not None:
11059         # iallocator will select a new node in the same group
11060         self.needed_locks[locking.LEVEL_NODEGROUP] = []
11061         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11062
11063     self.needed_locks[locking.LEVEL_NODE_RES] = []
11064
11065     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11066                                    self.op.iallocator, self.op.remote_node,
11067                                    self.op.disks, self.op.early_release,
11068                                    self.op.ignore_ipolicy)
11069
11070     self.tasklets = [self.replacer]
11071
11072   def DeclareLocks(self, level):
11073     if level == locking.LEVEL_NODEGROUP:
11074       assert self.op.remote_node is None
11075       assert self.op.iallocator is not None
11076       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11077
11078       self.share_locks[locking.LEVEL_NODEGROUP] = 1
11079       # Lock all groups used by instance optimistically; this requires going
11080       # via the node before it's locked, requiring verification later on
11081       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11082         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11083
11084     elif level == locking.LEVEL_NODE:
11085       if self.op.iallocator is not None:
11086         assert self.op.remote_node is None
11087         assert not self.needed_locks[locking.LEVEL_NODE]
11088         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11089
11090         # Lock member nodes of all locked groups
11091         self.needed_locks[locking.LEVEL_NODE] = \
11092             [node_name
11093              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11094              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11095       else:
11096         assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11097
11098         self._LockInstancesNodes()
11099
11100     elif level == locking.LEVEL_NODE_RES:
11101       # Reuse node locks
11102       self.needed_locks[locking.LEVEL_NODE_RES] = \
11103         self.needed_locks[locking.LEVEL_NODE]
11104
11105   def BuildHooksEnv(self):
11106     """Build hooks env.
11107
11108     This runs on the master, the primary and all the secondaries.
11109
11110     """
11111     instance = self.replacer.instance
11112     env = {
11113       "MODE": self.op.mode,
11114       "NEW_SECONDARY": self.op.remote_node,
11115       "OLD_SECONDARY": instance.secondary_nodes[0],
11116       }
11117     env.update(_BuildInstanceHookEnvByObject(self, instance))
11118     return env
11119
11120   def BuildHooksNodes(self):
11121     """Build hooks nodes.
11122
11123     """
11124     instance = self.replacer.instance
11125     nl = [
11126       self.cfg.GetMasterNode(),
11127       instance.primary_node,
11128       ]
11129     if self.op.remote_node is not None:
11130       nl.append(self.op.remote_node)
11131     return nl, nl
11132
11133   def CheckPrereq(self):
11134     """Check prerequisites.
11135
11136     """
11137     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11138             self.op.iallocator is None)
11139
11140     # Verify if node group locks are still correct
11141     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11142     if owned_groups:
11143       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11144
11145     return LogicalUnit.CheckPrereq(self)
11146
11147
11148 class TLReplaceDisks(Tasklet):
11149   """Replaces disks for an instance.
11150
11151   Note: Locking is not within the scope of this class.
11152
11153   """
11154   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11155                disks, early_release, ignore_ipolicy):
11156     """Initializes this class.
11157
11158     """
11159     Tasklet.__init__(self, lu)
11160
11161     # Parameters
11162     self.instance_name = instance_name
11163     self.mode = mode
11164     self.iallocator_name = iallocator_name
11165     self.remote_node = remote_node
11166     self.disks = disks
11167     self.early_release = early_release
11168     self.ignore_ipolicy = ignore_ipolicy
11169
11170     # Runtime data
11171     self.instance = None
11172     self.new_node = None
11173     self.target_node = None
11174     self.other_node = None
11175     self.remote_node_info = None
11176     self.node_secondary_ip = None
11177
11178   @staticmethod
11179   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11180     """Compute a new secondary node using an IAllocator.
11181
11182     """
11183     req = iallocator.IAReqRelocate(name=instance_name,
11184                                    relocate_from=list(relocate_from))
11185     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11186
11187     ial.Run(iallocator_name)
11188
11189     if not ial.success:
11190       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11191                                  " %s" % (iallocator_name, ial.info),
11192                                  errors.ECODE_NORES)
11193
11194     remote_node_name = ial.result[0]
11195
11196     lu.LogInfo("Selected new secondary for instance '%s': %s",
11197                instance_name, remote_node_name)
11198
11199     return remote_node_name
11200
11201   def _FindFaultyDisks(self, node_name):
11202     """Wrapper for L{_FindFaultyInstanceDisks}.
11203
11204     """
11205     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11206                                     node_name, True)
11207
11208   def _CheckDisksActivated(self, instance):
11209     """Checks if the instance disks are activated.
11210
11211     @param instance: The instance to check disks
11212     @return: True if they are activated, False otherwise
11213
11214     """
11215     nodes = instance.all_nodes
11216
11217     for idx, dev in enumerate(instance.disks):
11218       for node in nodes:
11219         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11220         self.cfg.SetDiskID(dev, node)
11221
11222         result = _BlockdevFind(self, node, dev, instance)
11223
11224         if result.offline:
11225           continue
11226         elif result.fail_msg or not result.payload:
11227           return False
11228
11229     return True
11230
11231   def CheckPrereq(self):
11232     """Check prerequisites.
11233
11234     This checks that the instance is in the cluster.
11235
11236     """
11237     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11238     assert instance is not None, \
11239       "Cannot retrieve locked instance %s" % self.instance_name
11240
11241     if instance.disk_template != constants.DT_DRBD8:
11242       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11243                                  " instances", errors.ECODE_INVAL)
11244
11245     if len(instance.secondary_nodes) != 1:
11246       raise errors.OpPrereqError("The instance has a strange layout,"
11247                                  " expected one secondary but found %d" %
11248                                  len(instance.secondary_nodes),
11249                                  errors.ECODE_FAULT)
11250
11251     instance = self.instance
11252     secondary_node = instance.secondary_nodes[0]
11253
11254     if self.iallocator_name is None:
11255       remote_node = self.remote_node
11256     else:
11257       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11258                                        instance.name, instance.secondary_nodes)
11259
11260     if remote_node is None:
11261       self.remote_node_info = None
11262     else:
11263       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11264              "Remote node '%s' is not locked" % remote_node
11265
11266       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11267       assert self.remote_node_info is not None, \
11268         "Cannot retrieve locked node %s" % remote_node
11269
11270     if remote_node == self.instance.primary_node:
11271       raise errors.OpPrereqError("The specified node is the primary node of"
11272                                  " the instance", errors.ECODE_INVAL)
11273
11274     if remote_node == secondary_node:
11275       raise errors.OpPrereqError("The specified node is already the"
11276                                  " secondary node of the instance",
11277                                  errors.ECODE_INVAL)
11278
11279     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11280                                     constants.REPLACE_DISK_CHG):
11281       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11282                                  errors.ECODE_INVAL)
11283
11284     if self.mode == constants.REPLACE_DISK_AUTO:
11285       if not self._CheckDisksActivated(instance):
11286         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11287                                    " first" % self.instance_name,
11288                                    errors.ECODE_STATE)
11289       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11290       faulty_secondary = self._FindFaultyDisks(secondary_node)
11291
11292       if faulty_primary and faulty_secondary:
11293         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11294                                    " one node and can not be repaired"
11295                                    " automatically" % self.instance_name,
11296                                    errors.ECODE_STATE)
11297
11298       if faulty_primary:
11299         self.disks = faulty_primary
11300         self.target_node = instance.primary_node
11301         self.other_node = secondary_node
11302         check_nodes = [self.target_node, self.other_node]
11303       elif faulty_secondary:
11304         self.disks = faulty_secondary
11305         self.target_node = secondary_node
11306         self.other_node = instance.primary_node
11307         check_nodes = [self.target_node, self.other_node]
11308       else:
11309         self.disks = []
11310         check_nodes = []
11311
11312     else:
11313       # Non-automatic modes
11314       if self.mode == constants.REPLACE_DISK_PRI:
11315         self.target_node = instance.primary_node
11316         self.other_node = secondary_node
11317         check_nodes = [self.target_node, self.other_node]
11318
11319       elif self.mode == constants.REPLACE_DISK_SEC:
11320         self.target_node = secondary_node
11321         self.other_node = instance.primary_node
11322         check_nodes = [self.target_node, self.other_node]
11323
11324       elif self.mode == constants.REPLACE_DISK_CHG:
11325         self.new_node = remote_node
11326         self.other_node = instance.primary_node
11327         self.target_node = secondary_node
11328         check_nodes = [self.new_node, self.other_node]
11329
11330         _CheckNodeNotDrained(self.lu, remote_node)
11331         _CheckNodeVmCapable(self.lu, remote_node)
11332
11333         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11334         assert old_node_info is not None
11335         if old_node_info.offline and not self.early_release:
11336           # doesn't make sense to delay the release
11337           self.early_release = True
11338           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11339                           " early-release mode", secondary_node)
11340
11341       else:
11342         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11343                                      self.mode)
11344
11345       # If not specified all disks should be replaced
11346       if not self.disks:
11347         self.disks = range(len(self.instance.disks))
11348
11349     # TODO: This is ugly, but right now we can't distinguish between internal
11350     # submitted opcode and external one. We should fix that.
11351     if self.remote_node_info:
11352       # We change the node, lets verify it still meets instance policy
11353       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11354       cluster = self.cfg.GetClusterInfo()
11355       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11356                                                               new_group_info)
11357       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11358                               ignore=self.ignore_ipolicy)
11359
11360     for node in check_nodes:
11361       _CheckNodeOnline(self.lu, node)
11362
11363     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11364                                                           self.other_node,
11365                                                           self.target_node]
11366                               if node_name is not None)
11367
11368     # Release unneeded node and node resource locks
11369     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11370     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11371     _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11372
11373     # Release any owned node group
11374     _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11375
11376     # Check whether disks are valid
11377     for disk_idx in self.disks:
11378       instance.FindDisk(disk_idx)
11379
11380     # Get secondary node IP addresses
11381     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11382                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11383
11384   def Exec(self, feedback_fn):
11385     """Execute disk replacement.
11386
11387     This dispatches the disk replacement to the appropriate handler.
11388
11389     """
11390     if __debug__:
11391       # Verify owned locks before starting operation
11392       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11393       assert set(owned_nodes) == set(self.node_secondary_ip), \
11394           ("Incorrect node locks, owning %s, expected %s" %
11395            (owned_nodes, self.node_secondary_ip.keys()))
11396       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11397               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11398       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11399
11400       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11401       assert list(owned_instances) == [self.instance_name], \
11402           "Instance '%s' not locked" % self.instance_name
11403
11404       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11405           "Should not own any node group lock at this point"
11406
11407     if not self.disks:
11408       feedback_fn("No disks need replacement for instance '%s'" %
11409                   self.instance.name)
11410       return
11411
11412     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11413                 (utils.CommaJoin(self.disks), self.instance.name))
11414     feedback_fn("Current primary node: %s" % self.instance.primary_node)
11415     feedback_fn("Current seconary node: %s" %
11416                 utils.CommaJoin(self.instance.secondary_nodes))
11417
11418     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11419
11420     # Activate the instance disks if we're replacing them on a down instance
11421     if activate_disks:
11422       _StartInstanceDisks(self.lu, self.instance, True)
11423
11424     try:
11425       # Should we replace the secondary node?
11426       if self.new_node is not None:
11427         fn = self._ExecDrbd8Secondary
11428       else:
11429         fn = self._ExecDrbd8DiskOnly
11430
11431       result = fn(feedback_fn)
11432     finally:
11433       # Deactivate the instance disks if we're replacing them on a
11434       # down instance
11435       if activate_disks:
11436         _SafeShutdownInstanceDisks(self.lu, self.instance)
11437
11438     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11439
11440     if __debug__:
11441       # Verify owned locks
11442       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11443       nodes = frozenset(self.node_secondary_ip)
11444       assert ((self.early_release and not owned_nodes) or
11445               (not self.early_release and not (set(owned_nodes) - nodes))), \
11446         ("Not owning the correct locks, early_release=%s, owned=%r,"
11447          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11448
11449     return result
11450
11451   def _CheckVolumeGroup(self, nodes):
11452     self.lu.LogInfo("Checking volume groups")
11453
11454     vgname = self.cfg.GetVGName()
11455
11456     # Make sure volume group exists on all involved nodes
11457     results = self.rpc.call_vg_list(nodes)
11458     if not results:
11459       raise errors.OpExecError("Can't list volume groups on the nodes")
11460
11461     for node in nodes:
11462       res = results[node]
11463       res.Raise("Error checking node %s" % node)
11464       if vgname not in res.payload:
11465         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11466                                  (vgname, node))
11467
11468   def _CheckDisksExistence(self, nodes):
11469     # Check disk existence
11470     for idx, dev in enumerate(self.instance.disks):
11471       if idx not in self.disks:
11472         continue
11473
11474       for node in nodes:
11475         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11476         self.cfg.SetDiskID(dev, node)
11477
11478         result = _BlockdevFind(self, node, dev, self.instance)
11479
11480         msg = result.fail_msg
11481         if msg or not result.payload:
11482           if not msg:
11483             msg = "disk not found"
11484           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11485                                    (idx, node, msg))
11486
11487   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11488     for idx, dev in enumerate(self.instance.disks):
11489       if idx not in self.disks:
11490         continue
11491
11492       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11493                       (idx, node_name))
11494
11495       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11496                                    on_primary, ldisk=ldisk):
11497         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11498                                  " replace disks for instance %s" %
11499                                  (node_name, self.instance.name))
11500
11501   def _CreateNewStorage(self, node_name):
11502     """Create new storage on the primary or secondary node.
11503
11504     This is only used for same-node replaces, not for changing the
11505     secondary node, hence we don't want to modify the existing disk.
11506
11507     """
11508     iv_names = {}
11509
11510     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11511     for idx, dev in enumerate(disks):
11512       if idx not in self.disks:
11513         continue
11514
11515       self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11516
11517       self.cfg.SetDiskID(dev, node_name)
11518
11519       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11520       names = _GenerateUniqueNames(self.lu, lv_names)
11521
11522       (data_disk, meta_disk) = dev.children
11523       vg_data = data_disk.logical_id[0]
11524       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11525                              logical_id=(vg_data, names[0]),
11526                              params=data_disk.params)
11527       vg_meta = meta_disk.logical_id[0]
11528       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11529                              size=constants.DRBD_META_SIZE,
11530                              logical_id=(vg_meta, names[1]),
11531                              params=meta_disk.params)
11532
11533       new_lvs = [lv_data, lv_meta]
11534       old_lvs = [child.Copy() for child in dev.children]
11535       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11536
11537       # we pass force_create=True to force the LVM creation
11538       for new_lv in new_lvs:
11539         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11540                              _GetInstanceInfoText(self.instance), False)
11541
11542     return iv_names
11543
11544   def _CheckDevices(self, node_name, iv_names):
11545     for name, (dev, _, _) in iv_names.iteritems():
11546       self.cfg.SetDiskID(dev, node_name)
11547
11548       result = _BlockdevFind(self, node_name, dev, self.instance)
11549
11550       msg = result.fail_msg
11551       if msg or not result.payload:
11552         if not msg:
11553           msg = "disk not found"
11554         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11555                                  (name, msg))
11556
11557       if result.payload.is_degraded:
11558         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11559
11560   def _RemoveOldStorage(self, node_name, iv_names):
11561     for name, (_, old_lvs, _) in iv_names.iteritems():
11562       self.lu.LogInfo("Remove logical volumes for %s", name)
11563
11564       for lv in old_lvs:
11565         self.cfg.SetDiskID(lv, node_name)
11566
11567         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11568         if msg:
11569           self.lu.LogWarning("Can't remove old LV: %s", msg,
11570                              hint="remove unused LVs manually")
11571
11572   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11573     """Replace a disk on the primary or secondary for DRBD 8.
11574
11575     The algorithm for replace is quite complicated:
11576
11577       1. for each disk to be replaced:
11578
11579         1. create new LVs on the target node with unique names
11580         1. detach old LVs from the drbd device
11581         1. rename old LVs to name_replaced.<time_t>
11582         1. rename new LVs to old LVs
11583         1. attach the new LVs (with the old names now) to the drbd device
11584
11585       1. wait for sync across all devices
11586
11587       1. for each modified disk:
11588
11589         1. remove old LVs (which have the name name_replaces.<time_t>)
11590
11591     Failures are not very well handled.
11592
11593     """
11594     steps_total = 6
11595
11596     # Step: check device activation
11597     self.lu.LogStep(1, steps_total, "Check device existence")
11598     self._CheckDisksExistence([self.other_node, self.target_node])
11599     self._CheckVolumeGroup([self.target_node, self.other_node])
11600
11601     # Step: check other node consistency
11602     self.lu.LogStep(2, steps_total, "Check peer consistency")
11603     self._CheckDisksConsistency(self.other_node,
11604                                 self.other_node == self.instance.primary_node,
11605                                 False)
11606
11607     # Step: create new storage
11608     self.lu.LogStep(3, steps_total, "Allocate new storage")
11609     iv_names = self._CreateNewStorage(self.target_node)
11610
11611     # Step: for each lv, detach+rename*2+attach
11612     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11613     for dev, old_lvs, new_lvs in iv_names.itervalues():
11614       self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11615
11616       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11617                                                      old_lvs)
11618       result.Raise("Can't detach drbd from local storage on node"
11619                    " %s for device %s" % (self.target_node, dev.iv_name))
11620       #dev.children = []
11621       #cfg.Update(instance)
11622
11623       # ok, we created the new LVs, so now we know we have the needed
11624       # storage; as such, we proceed on the target node to rename
11625       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11626       # using the assumption that logical_id == physical_id (which in
11627       # turn is the unique_id on that node)
11628
11629       # FIXME(iustin): use a better name for the replaced LVs
11630       temp_suffix = int(time.time())
11631       ren_fn = lambda d, suff: (d.physical_id[0],
11632                                 d.physical_id[1] + "_replaced-%s" % suff)
11633
11634       # Build the rename list based on what LVs exist on the node
11635       rename_old_to_new = []
11636       for to_ren in old_lvs:
11637         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11638         if not result.fail_msg and result.payload:
11639           # device exists
11640           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11641
11642       self.lu.LogInfo("Renaming the old LVs on the target node")
11643       result = self.rpc.call_blockdev_rename(self.target_node,
11644                                              rename_old_to_new)
11645       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11646
11647       # Now we rename the new LVs to the old LVs
11648       self.lu.LogInfo("Renaming the new LVs on the target node")
11649       rename_new_to_old = [(new, old.physical_id)
11650                            for old, new in zip(old_lvs, new_lvs)]
11651       result = self.rpc.call_blockdev_rename(self.target_node,
11652                                              rename_new_to_old)
11653       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11654
11655       # Intermediate steps of in memory modifications
11656       for old, new in zip(old_lvs, new_lvs):
11657         new.logical_id = old.logical_id
11658         self.cfg.SetDiskID(new, self.target_node)
11659
11660       # We need to modify old_lvs so that removal later removes the
11661       # right LVs, not the newly added ones; note that old_lvs is a
11662       # copy here
11663       for disk in old_lvs:
11664         disk.logical_id = ren_fn(disk, temp_suffix)
11665         self.cfg.SetDiskID(disk, self.target_node)
11666
11667       # Now that the new lvs have the old name, we can add them to the device
11668       self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
11669       result = self.rpc.call_blockdev_addchildren(self.target_node,
11670                                                   (dev, self.instance), new_lvs)
11671       msg = result.fail_msg
11672       if msg:
11673         for new_lv in new_lvs:
11674           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11675                                                new_lv).fail_msg
11676           if msg2:
11677             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11678                                hint=("cleanup manually the unused logical"
11679                                      "volumes"))
11680         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11681
11682     cstep = itertools.count(5)
11683
11684     if self.early_release:
11685       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11686       self._RemoveOldStorage(self.target_node, iv_names)
11687       # TODO: Check if releasing locks early still makes sense
11688       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11689     else:
11690       # Release all resource locks except those used by the instance
11691       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11692                     keep=self.node_secondary_ip.keys())
11693
11694     # Release all node locks while waiting for sync
11695     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11696
11697     # TODO: Can the instance lock be downgraded here? Take the optional disk
11698     # shutdown in the caller into consideration.
11699
11700     # Wait for sync
11701     # This can fail as the old devices are degraded and _WaitForSync
11702     # does a combined result over all disks, so we don't check its return value
11703     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11704     _WaitForSync(self.lu, self.instance)
11705
11706     # Check all devices manually
11707     self._CheckDevices(self.instance.primary_node, iv_names)
11708
11709     # Step: remove old storage
11710     if not self.early_release:
11711       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11712       self._RemoveOldStorage(self.target_node, iv_names)
11713
11714   def _ExecDrbd8Secondary(self, feedback_fn):
11715     """Replace the secondary node for DRBD 8.
11716
11717     The algorithm for replace is quite complicated:
11718       - for all disks of the instance:
11719         - create new LVs on the new node with same names
11720         - shutdown the drbd device on the old secondary
11721         - disconnect the drbd network on the primary
11722         - create the drbd device on the new secondary
11723         - network attach the drbd on the primary, using an artifice:
11724           the drbd code for Attach() will connect to the network if it
11725           finds a device which is connected to the good local disks but
11726           not network enabled
11727       - wait for sync across all devices
11728       - remove all disks from the old secondary
11729
11730     Failures are not very well handled.
11731
11732     """
11733     steps_total = 6
11734
11735     pnode = self.instance.primary_node
11736
11737     # Step: check device activation
11738     self.lu.LogStep(1, steps_total, "Check device existence")
11739     self._CheckDisksExistence([self.instance.primary_node])
11740     self._CheckVolumeGroup([self.instance.primary_node])
11741
11742     # Step: check other node consistency
11743     self.lu.LogStep(2, steps_total, "Check peer consistency")
11744     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11745
11746     # Step: create new storage
11747     self.lu.LogStep(3, steps_total, "Allocate new storage")
11748     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11749     for idx, dev in enumerate(disks):
11750       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11751                       (self.new_node, idx))
11752       # we pass force_create=True to force LVM creation
11753       for new_lv in dev.children:
11754         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11755                              True, _GetInstanceInfoText(self.instance), False)
11756
11757     # Step 4: dbrd minors and drbd setups changes
11758     # after this, we must manually remove the drbd minors on both the
11759     # error and the success paths
11760     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11761     minors = self.cfg.AllocateDRBDMinor([self.new_node
11762                                          for dev in self.instance.disks],
11763                                         self.instance.name)
11764     logging.debug("Allocated minors %r", minors)
11765
11766     iv_names = {}
11767     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11768       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11769                       (self.new_node, idx))
11770       # create new devices on new_node; note that we create two IDs:
11771       # one without port, so the drbd will be activated without
11772       # networking information on the new node at this stage, and one
11773       # with network, for the latter activation in step 4
11774       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11775       if self.instance.primary_node == o_node1:
11776         p_minor = o_minor1
11777       else:
11778         assert self.instance.primary_node == o_node2, "Three-node instance?"
11779         p_minor = o_minor2
11780
11781       new_alone_id = (self.instance.primary_node, self.new_node, None,
11782                       p_minor, new_minor, o_secret)
11783       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11784                     p_minor, new_minor, o_secret)
11785
11786       iv_names[idx] = (dev, dev.children, new_net_id)
11787       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11788                     new_net_id)
11789       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11790                               logical_id=new_alone_id,
11791                               children=dev.children,
11792                               size=dev.size,
11793                               params={})
11794       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11795                                              self.cfg)
11796       try:
11797         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11798                               anno_new_drbd,
11799                               _GetInstanceInfoText(self.instance), False)
11800       except errors.GenericError:
11801         self.cfg.ReleaseDRBDMinors(self.instance.name)
11802         raise
11803
11804     # We have new devices, shutdown the drbd on the old secondary
11805     for idx, dev in enumerate(self.instance.disks):
11806       self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
11807       self.cfg.SetDiskID(dev, self.target_node)
11808       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11809                                             (dev, self.instance)).fail_msg
11810       if msg:
11811         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11812                            "node: %s" % (idx, msg),
11813                            hint=("Please cleanup this device manually as"
11814                                  " soon as possible"))
11815
11816     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11817     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11818                                                self.instance.disks)[pnode]
11819
11820     msg = result.fail_msg
11821     if msg:
11822       # detaches didn't succeed (unlikely)
11823       self.cfg.ReleaseDRBDMinors(self.instance.name)
11824       raise errors.OpExecError("Can't detach the disks from the network on"
11825                                " old node: %s" % (msg,))
11826
11827     # if we managed to detach at least one, we update all the disks of
11828     # the instance to point to the new secondary
11829     self.lu.LogInfo("Updating instance configuration")
11830     for dev, _, new_logical_id in iv_names.itervalues():
11831       dev.logical_id = new_logical_id
11832       self.cfg.SetDiskID(dev, self.instance.primary_node)
11833
11834     self.cfg.Update(self.instance, feedback_fn)
11835
11836     # Release all node locks (the configuration has been updated)
11837     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11838
11839     # and now perform the drbd attach
11840     self.lu.LogInfo("Attaching primary drbds to new secondary"
11841                     " (standalone => connected)")
11842     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11843                                             self.new_node],
11844                                            self.node_secondary_ip,
11845                                            (self.instance.disks, self.instance),
11846                                            self.instance.name,
11847                                            False)
11848     for to_node, to_result in result.items():
11849       msg = to_result.fail_msg
11850       if msg:
11851         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11852                            to_node, msg,
11853                            hint=("please do a gnt-instance info to see the"
11854                                  " status of disks"))
11855
11856     cstep = itertools.count(5)
11857
11858     if self.early_release:
11859       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11860       self._RemoveOldStorage(self.target_node, iv_names)
11861       # TODO: Check if releasing locks early still makes sense
11862       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11863     else:
11864       # Release all resource locks except those used by the instance
11865       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11866                     keep=self.node_secondary_ip.keys())
11867
11868     # TODO: Can the instance lock be downgraded here? Take the optional disk
11869     # shutdown in the caller into consideration.
11870
11871     # Wait for sync
11872     # This can fail as the old devices are degraded and _WaitForSync
11873     # does a combined result over all disks, so we don't check its return value
11874     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11875     _WaitForSync(self.lu, self.instance)
11876
11877     # Check all devices manually
11878     self._CheckDevices(self.instance.primary_node, iv_names)
11879
11880     # Step: remove old storage
11881     if not self.early_release:
11882       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11883       self._RemoveOldStorage(self.target_node, iv_names)
11884
11885
11886 class LURepairNodeStorage(NoHooksLU):
11887   """Repairs the volume group on a node.
11888
11889   """
11890   REQ_BGL = False
11891
11892   def CheckArguments(self):
11893     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11894
11895     storage_type = self.op.storage_type
11896
11897     if (constants.SO_FIX_CONSISTENCY not in
11898         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11899       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11900                                  " repaired" % storage_type,
11901                                  errors.ECODE_INVAL)
11902
11903   def ExpandNames(self):
11904     self.needed_locks = {
11905       locking.LEVEL_NODE: [self.op.node_name],
11906       }
11907
11908   def _CheckFaultyDisks(self, instance, node_name):
11909     """Ensure faulty disks abort the opcode or at least warn."""
11910     try:
11911       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11912                                   node_name, True):
11913         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11914                                    " node '%s'" % (instance.name, node_name),
11915                                    errors.ECODE_STATE)
11916     except errors.OpPrereqError, err:
11917       if self.op.ignore_consistency:
11918         self.LogWarning(str(err.args[0]))
11919       else:
11920         raise
11921
11922   def CheckPrereq(self):
11923     """Check prerequisites.
11924
11925     """
11926     # Check whether any instance on this node has faulty disks
11927     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11928       if inst.admin_state != constants.ADMINST_UP:
11929         continue
11930       check_nodes = set(inst.all_nodes)
11931       check_nodes.discard(self.op.node_name)
11932       for inst_node_name in check_nodes:
11933         self._CheckFaultyDisks(inst, inst_node_name)
11934
11935   def Exec(self, feedback_fn):
11936     feedback_fn("Repairing storage unit '%s' on %s ..." %
11937                 (self.op.name, self.op.node_name))
11938
11939     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11940     result = self.rpc.call_storage_execute(self.op.node_name,
11941                                            self.op.storage_type, st_args,
11942                                            self.op.name,
11943                                            constants.SO_FIX_CONSISTENCY)
11944     result.Raise("Failed to repair storage unit '%s' on %s" %
11945                  (self.op.name, self.op.node_name))
11946
11947
11948 class LUNodeEvacuate(NoHooksLU):
11949   """Evacuates instances off a list of nodes.
11950
11951   """
11952   REQ_BGL = False
11953
11954   _MODE2IALLOCATOR = {
11955     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11956     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11957     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11958     }
11959   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11960   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11961           constants.IALLOCATOR_NEVAC_MODES)
11962
11963   def CheckArguments(self):
11964     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11965
11966   def ExpandNames(self):
11967     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11968
11969     if self.op.remote_node is not None:
11970       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11971       assert self.op.remote_node
11972
11973       if self.op.remote_node == self.op.node_name:
11974         raise errors.OpPrereqError("Can not use evacuated node as a new"
11975                                    " secondary node", errors.ECODE_INVAL)
11976
11977       if self.op.mode != constants.NODE_EVAC_SEC:
11978         raise errors.OpPrereqError("Without the use of an iallocator only"
11979                                    " secondary instances can be evacuated",
11980                                    errors.ECODE_INVAL)
11981
11982     # Declare locks
11983     self.share_locks = _ShareAll()
11984     self.needed_locks = {
11985       locking.LEVEL_INSTANCE: [],
11986       locking.LEVEL_NODEGROUP: [],
11987       locking.LEVEL_NODE: [],
11988       }
11989
11990     # Determine nodes (via group) optimistically, needs verification once locks
11991     # have been acquired
11992     self.lock_nodes = self._DetermineNodes()
11993
11994   def _DetermineNodes(self):
11995     """Gets the list of nodes to operate on.
11996
11997     """
11998     if self.op.remote_node is None:
11999       # Iallocator will choose any node(s) in the same group
12000       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12001     else:
12002       group_nodes = frozenset([self.op.remote_node])
12003
12004     # Determine nodes to be locked
12005     return set([self.op.node_name]) | group_nodes
12006
12007   def _DetermineInstances(self):
12008     """Builds list of instances to operate on.
12009
12010     """
12011     assert self.op.mode in constants.NODE_EVAC_MODES
12012
12013     if self.op.mode == constants.NODE_EVAC_PRI:
12014       # Primary instances only
12015       inst_fn = _GetNodePrimaryInstances
12016       assert self.op.remote_node is None, \
12017         "Evacuating primary instances requires iallocator"
12018     elif self.op.mode == constants.NODE_EVAC_SEC:
12019       # Secondary instances only
12020       inst_fn = _GetNodeSecondaryInstances
12021     else:
12022       # All instances
12023       assert self.op.mode == constants.NODE_EVAC_ALL
12024       inst_fn = _GetNodeInstances
12025       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12026       # per instance
12027       raise errors.OpPrereqError("Due to an issue with the iallocator"
12028                                  " interface it is not possible to evacuate"
12029                                  " all instances at once; specify explicitly"
12030                                  " whether to evacuate primary or secondary"
12031                                  " instances",
12032                                  errors.ECODE_INVAL)
12033
12034     return inst_fn(self.cfg, self.op.node_name)
12035
12036   def DeclareLocks(self, level):
12037     if level == locking.LEVEL_INSTANCE:
12038       # Lock instances optimistically, needs verification once node and group
12039       # locks have been acquired
12040       self.needed_locks[locking.LEVEL_INSTANCE] = \
12041         set(i.name for i in self._DetermineInstances())
12042
12043     elif level == locking.LEVEL_NODEGROUP:
12044       # Lock node groups for all potential target nodes optimistically, needs
12045       # verification once nodes have been acquired
12046       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12047         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12048
12049     elif level == locking.LEVEL_NODE:
12050       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12051
12052   def CheckPrereq(self):
12053     # Verify locks
12054     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12055     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12056     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12057
12058     need_nodes = self._DetermineNodes()
12059
12060     if not owned_nodes.issuperset(need_nodes):
12061       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12062                                  " locks were acquired, current nodes are"
12063                                  " are '%s', used to be '%s'; retry the"
12064                                  " operation" %
12065                                  (self.op.node_name,
12066                                   utils.CommaJoin(need_nodes),
12067                                   utils.CommaJoin(owned_nodes)),
12068                                  errors.ECODE_STATE)
12069
12070     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12071     if owned_groups != wanted_groups:
12072       raise errors.OpExecError("Node groups changed since locks were acquired,"
12073                                " current groups are '%s', used to be '%s';"
12074                                " retry the operation" %
12075                                (utils.CommaJoin(wanted_groups),
12076                                 utils.CommaJoin(owned_groups)))
12077
12078     # Determine affected instances
12079     self.instances = self._DetermineInstances()
12080     self.instance_names = [i.name for i in self.instances]
12081
12082     if set(self.instance_names) != owned_instances:
12083       raise errors.OpExecError("Instances on node '%s' changed since locks"
12084                                " were acquired, current instances are '%s',"
12085                                " used to be '%s'; retry the operation" %
12086                                (self.op.node_name,
12087                                 utils.CommaJoin(self.instance_names),
12088                                 utils.CommaJoin(owned_instances)))
12089
12090     if self.instance_names:
12091       self.LogInfo("Evacuating instances from node '%s': %s",
12092                    self.op.node_name,
12093                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
12094     else:
12095       self.LogInfo("No instances to evacuate from node '%s'",
12096                    self.op.node_name)
12097
12098     if self.op.remote_node is not None:
12099       for i in self.instances:
12100         if i.primary_node == self.op.remote_node:
12101           raise errors.OpPrereqError("Node %s is the primary node of"
12102                                      " instance %s, cannot use it as"
12103                                      " secondary" %
12104                                      (self.op.remote_node, i.name),
12105                                      errors.ECODE_INVAL)
12106
12107   def Exec(self, feedback_fn):
12108     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12109
12110     if not self.instance_names:
12111       # No instances to evacuate
12112       jobs = []
12113
12114     elif self.op.iallocator is not None:
12115       # TODO: Implement relocation to other group
12116       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12117       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12118                                      instances=list(self.instance_names))
12119       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12120
12121       ial.Run(self.op.iallocator)
12122
12123       if not ial.success:
12124         raise errors.OpPrereqError("Can't compute node evacuation using"
12125                                    " iallocator '%s': %s" %
12126                                    (self.op.iallocator, ial.info),
12127                                    errors.ECODE_NORES)
12128
12129       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12130
12131     elif self.op.remote_node is not None:
12132       assert self.op.mode == constants.NODE_EVAC_SEC
12133       jobs = [
12134         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12135                                         remote_node=self.op.remote_node,
12136                                         disks=[],
12137                                         mode=constants.REPLACE_DISK_CHG,
12138                                         early_release=self.op.early_release)]
12139         for instance_name in self.instance_names]
12140
12141     else:
12142       raise errors.ProgrammerError("No iallocator or remote node")
12143
12144     return ResultWithJobs(jobs)
12145
12146
12147 def _SetOpEarlyRelease(early_release, op):
12148   """Sets C{early_release} flag on opcodes if available.
12149
12150   """
12151   try:
12152     op.early_release = early_release
12153   except AttributeError:
12154     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12155
12156   return op
12157
12158
12159 def _NodeEvacDest(use_nodes, group, nodes):
12160   """Returns group or nodes depending on caller's choice.
12161
12162   """
12163   if use_nodes:
12164     return utils.CommaJoin(nodes)
12165   else:
12166     return group
12167
12168
12169 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12170   """Unpacks the result of change-group and node-evacuate iallocator requests.
12171
12172   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12173   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12174
12175   @type lu: L{LogicalUnit}
12176   @param lu: Logical unit instance
12177   @type alloc_result: tuple/list
12178   @param alloc_result: Result from iallocator
12179   @type early_release: bool
12180   @param early_release: Whether to release locks early if possible
12181   @type use_nodes: bool
12182   @param use_nodes: Whether to display node names instead of groups
12183
12184   """
12185   (moved, failed, jobs) = alloc_result
12186
12187   if failed:
12188     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12189                                  for (name, reason) in failed)
12190     lu.LogWarning("Unable to evacuate instances %s", failreason)
12191     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12192
12193   if moved:
12194     lu.LogInfo("Instances to be moved: %s",
12195                utils.CommaJoin("%s (to %s)" %
12196                                (name, _NodeEvacDest(use_nodes, group, nodes))
12197                                for (name, group, nodes) in moved))
12198
12199   return [map(compat.partial(_SetOpEarlyRelease, early_release),
12200               map(opcodes.OpCode.LoadOpCode, ops))
12201           for ops in jobs]
12202
12203
12204 def _DiskSizeInBytesToMebibytes(lu, size):
12205   """Converts a disk size in bytes to mebibytes.
12206
12207   Warns and rounds up if the size isn't an even multiple of 1 MiB.
12208
12209   """
12210   (mib, remainder) = divmod(size, 1024 * 1024)
12211
12212   if remainder != 0:
12213     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12214                   " to not overwrite existing data (%s bytes will not be"
12215                   " wiped)", (1024 * 1024) - remainder)
12216     mib += 1
12217
12218   return mib
12219
12220
12221 class LUInstanceGrowDisk(LogicalUnit):
12222   """Grow a disk of an instance.
12223
12224   """
12225   HPATH = "disk-grow"
12226   HTYPE = constants.HTYPE_INSTANCE
12227   REQ_BGL = False
12228
12229   def ExpandNames(self):
12230     self._ExpandAndLockInstance()
12231     self.needed_locks[locking.LEVEL_NODE] = []
12232     self.needed_locks[locking.LEVEL_NODE_RES] = []
12233     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12234     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12235
12236   def DeclareLocks(self, level):
12237     if level == locking.LEVEL_NODE:
12238       self._LockInstancesNodes()
12239     elif level == locking.LEVEL_NODE_RES:
12240       # Copy node locks
12241       self.needed_locks[locking.LEVEL_NODE_RES] = \
12242         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12243
12244   def BuildHooksEnv(self):
12245     """Build hooks env.
12246
12247     This runs on the master, the primary and all the secondaries.
12248
12249     """
12250     env = {
12251       "DISK": self.op.disk,
12252       "AMOUNT": self.op.amount,
12253       "ABSOLUTE": self.op.absolute,
12254       }
12255     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12256     return env
12257
12258   def BuildHooksNodes(self):
12259     """Build hooks nodes.
12260
12261     """
12262     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12263     return (nl, nl)
12264
12265   def CheckPrereq(self):
12266     """Check prerequisites.
12267
12268     This checks that the instance is in the cluster.
12269
12270     """
12271     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12272     assert instance is not None, \
12273       "Cannot retrieve locked instance %s" % self.op.instance_name
12274     nodenames = list(instance.all_nodes)
12275     for node in nodenames:
12276       _CheckNodeOnline(self, node)
12277
12278     self.instance = instance
12279
12280     if instance.disk_template not in constants.DTS_GROWABLE:
12281       raise errors.OpPrereqError("Instance's disk layout does not support"
12282                                  " growing", errors.ECODE_INVAL)
12283
12284     self.disk = instance.FindDisk(self.op.disk)
12285
12286     if self.op.absolute:
12287       self.target = self.op.amount
12288       self.delta = self.target - self.disk.size
12289       if self.delta < 0:
12290         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12291                                    "current disk size (%s)" %
12292                                    (utils.FormatUnit(self.target, "h"),
12293                                     utils.FormatUnit(self.disk.size, "h")),
12294                                    errors.ECODE_STATE)
12295     else:
12296       self.delta = self.op.amount
12297       self.target = self.disk.size + self.delta
12298       if self.delta < 0:
12299         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12300                                    utils.FormatUnit(self.delta, "h"),
12301                                    errors.ECODE_INVAL)
12302
12303     if instance.disk_template not in (constants.DT_FILE,
12304                                       constants.DT_SHARED_FILE,
12305                                       constants.DT_RBD):
12306       # TODO: check the free disk space for file, when that feature will be
12307       # supported
12308       _CheckNodesFreeDiskPerVG(self, nodenames,
12309                                self.disk.ComputeGrowth(self.delta))
12310
12311   def Exec(self, feedback_fn):
12312     """Execute disk grow.
12313
12314     """
12315     instance = self.instance
12316     disk = self.disk
12317
12318     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12319     assert (self.owned_locks(locking.LEVEL_NODE) ==
12320             self.owned_locks(locking.LEVEL_NODE_RES))
12321
12322     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12323
12324     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12325     if not disks_ok:
12326       raise errors.OpExecError("Cannot activate block device to grow")
12327
12328     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12329                 (self.op.disk, instance.name,
12330                  utils.FormatUnit(self.delta, "h"),
12331                  utils.FormatUnit(self.target, "h")))
12332
12333     # First run all grow ops in dry-run mode
12334     for node in instance.all_nodes:
12335       self.cfg.SetDiskID(disk, node)
12336       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12337                                            True, True)
12338       result.Raise("Dry-run grow request failed to node %s" % node)
12339
12340     if wipe_disks:
12341       # Get disk size from primary node for wiping
12342       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12343       result.Raise("Failed to retrieve disk size from node '%s'" %
12344                    instance.primary_node)
12345
12346       (disk_size_in_bytes, ) = result.payload
12347
12348       if disk_size_in_bytes is None:
12349         raise errors.OpExecError("Failed to retrieve disk size from primary"
12350                                  " node '%s'" % instance.primary_node)
12351
12352       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12353
12354       assert old_disk_size >= disk.size, \
12355         ("Retrieved disk size too small (got %s, should be at least %s)" %
12356          (old_disk_size, disk.size))
12357     else:
12358       old_disk_size = None
12359
12360     # We know that (as far as we can test) operations across different
12361     # nodes will succeed, time to run it for real on the backing storage
12362     for node in instance.all_nodes:
12363       self.cfg.SetDiskID(disk, node)
12364       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12365                                            False, True)
12366       result.Raise("Grow request failed to node %s" % node)
12367
12368     # And now execute it for logical storage, on the primary node
12369     node = instance.primary_node
12370     self.cfg.SetDiskID(disk, node)
12371     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12372                                          False, False)
12373     result.Raise("Grow request failed to node %s" % node)
12374
12375     disk.RecordGrow(self.delta)
12376     self.cfg.Update(instance, feedback_fn)
12377
12378     # Changes have been recorded, release node lock
12379     _ReleaseLocks(self, locking.LEVEL_NODE)
12380
12381     # Downgrade lock while waiting for sync
12382     self.glm.downgrade(locking.LEVEL_INSTANCE)
12383
12384     assert wipe_disks ^ (old_disk_size is None)
12385
12386     if wipe_disks:
12387       assert instance.disks[self.op.disk] == disk
12388
12389       # Wipe newly added disk space
12390       _WipeDisks(self, instance,
12391                  disks=[(self.op.disk, disk, old_disk_size)])
12392
12393     if self.op.wait_for_sync:
12394       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12395       if disk_abort:
12396         self.LogWarning("Disk syncing has not returned a good status; check"
12397                         " the instance")
12398       if instance.admin_state != constants.ADMINST_UP:
12399         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12400     elif instance.admin_state != constants.ADMINST_UP:
12401       self.LogWarning("Not shutting down the disk even if the instance is"
12402                       " not supposed to be running because no wait for"
12403                       " sync mode was requested")
12404
12405     assert self.owned_locks(locking.LEVEL_NODE_RES)
12406     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12407
12408
12409 class LUInstanceQueryData(NoHooksLU):
12410   """Query runtime instance data.
12411
12412   """
12413   REQ_BGL = False
12414
12415   def ExpandNames(self):
12416     self.needed_locks = {}
12417
12418     # Use locking if requested or when non-static information is wanted
12419     if not (self.op.static or self.op.use_locking):
12420       self.LogWarning("Non-static data requested, locks need to be acquired")
12421       self.op.use_locking = True
12422
12423     if self.op.instances or not self.op.use_locking:
12424       # Expand instance names right here
12425       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12426     else:
12427       # Will use acquired locks
12428       self.wanted_names = None
12429
12430     if self.op.use_locking:
12431       self.share_locks = _ShareAll()
12432
12433       if self.wanted_names is None:
12434         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12435       else:
12436         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12437
12438       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12439       self.needed_locks[locking.LEVEL_NODE] = []
12440       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12441
12442   def DeclareLocks(self, level):
12443     if self.op.use_locking:
12444       if level == locking.LEVEL_NODEGROUP:
12445         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12446
12447         # Lock all groups used by instances optimistically; this requires going
12448         # via the node before it's locked, requiring verification later on
12449         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12450           frozenset(group_uuid
12451                     for instance_name in owned_instances
12452                     for group_uuid in
12453                       self.cfg.GetInstanceNodeGroups(instance_name))
12454
12455       elif level == locking.LEVEL_NODE:
12456         self._LockInstancesNodes()
12457
12458   def CheckPrereq(self):
12459     """Check prerequisites.
12460
12461     This only checks the optional instance list against the existing names.
12462
12463     """
12464     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12465     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12466     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12467
12468     if self.wanted_names is None:
12469       assert self.op.use_locking, "Locking was not used"
12470       self.wanted_names = owned_instances
12471
12472     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12473
12474     if self.op.use_locking:
12475       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12476                                 None)
12477     else:
12478       assert not (owned_instances or owned_groups or owned_nodes)
12479
12480     self.wanted_instances = instances.values()
12481
12482   def _ComputeBlockdevStatus(self, node, instance, dev):
12483     """Returns the status of a block device
12484
12485     """
12486     if self.op.static or not node:
12487       return None
12488
12489     self.cfg.SetDiskID(dev, node)
12490
12491     result = self.rpc.call_blockdev_find(node, dev)
12492     if result.offline:
12493       return None
12494
12495     result.Raise("Can't compute disk status for %s" % instance.name)
12496
12497     status = result.payload
12498     if status is None:
12499       return None
12500
12501     return (status.dev_path, status.major, status.minor,
12502             status.sync_percent, status.estimated_time,
12503             status.is_degraded, status.ldisk_status)
12504
12505   def _ComputeDiskStatus(self, instance, snode, dev):
12506     """Compute block device status.
12507
12508     """
12509     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12510
12511     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12512
12513   def _ComputeDiskStatusInner(self, instance, snode, dev):
12514     """Compute block device status.
12515
12516     @attention: The device has to be annotated already.
12517
12518     """
12519     if dev.dev_type in constants.LDS_DRBD:
12520       # we change the snode then (otherwise we use the one passed in)
12521       if dev.logical_id[0] == instance.primary_node:
12522         snode = dev.logical_id[1]
12523       else:
12524         snode = dev.logical_id[0]
12525
12526     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12527                                               instance, dev)
12528     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12529
12530     if dev.children:
12531       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12532                                         instance, snode),
12533                          dev.children)
12534     else:
12535       dev_children = []
12536
12537     return {
12538       "iv_name": dev.iv_name,
12539       "dev_type": dev.dev_type,
12540       "logical_id": dev.logical_id,
12541       "physical_id": dev.physical_id,
12542       "pstatus": dev_pstatus,
12543       "sstatus": dev_sstatus,
12544       "children": dev_children,
12545       "mode": dev.mode,
12546       "size": dev.size,
12547       }
12548
12549   def Exec(self, feedback_fn):
12550     """Gather and return data"""
12551     result = {}
12552
12553     cluster = self.cfg.GetClusterInfo()
12554
12555     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12556     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12557
12558     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12559                                                  for node in nodes.values()))
12560
12561     group2name_fn = lambda uuid: groups[uuid].name
12562
12563     for instance in self.wanted_instances:
12564       pnode = nodes[instance.primary_node]
12565
12566       if self.op.static or pnode.offline:
12567         remote_state = None
12568         if pnode.offline:
12569           self.LogWarning("Primary node %s is marked offline, returning static"
12570                           " information only for instance %s" %
12571                           (pnode.name, instance.name))
12572       else:
12573         remote_info = self.rpc.call_instance_info(instance.primary_node,
12574                                                   instance.name,
12575                                                   instance.hypervisor)
12576         remote_info.Raise("Error checking node %s" % instance.primary_node)
12577         remote_info = remote_info.payload
12578         if remote_info and "state" in remote_info:
12579           remote_state = "up"
12580         else:
12581           if instance.admin_state == constants.ADMINST_UP:
12582             remote_state = "down"
12583           else:
12584             remote_state = instance.admin_state
12585
12586       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12587                   instance.disks)
12588
12589       snodes_group_uuids = [nodes[snode_name].group
12590                             for snode_name in instance.secondary_nodes]
12591
12592       result[instance.name] = {
12593         "name": instance.name,
12594         "config_state": instance.admin_state,
12595         "run_state": remote_state,
12596         "pnode": instance.primary_node,
12597         "pnode_group_uuid": pnode.group,
12598         "pnode_group_name": group2name_fn(pnode.group),
12599         "snodes": instance.secondary_nodes,
12600         "snodes_group_uuids": snodes_group_uuids,
12601         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12602         "os": instance.os,
12603         # this happens to be the same format used for hooks
12604         "nics": _NICListToTuple(self, instance.nics),
12605         "disk_template": instance.disk_template,
12606         "disks": disks,
12607         "hypervisor": instance.hypervisor,
12608         "network_port": instance.network_port,
12609         "hv_instance": instance.hvparams,
12610         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12611         "be_instance": instance.beparams,
12612         "be_actual": cluster.FillBE(instance),
12613         "os_instance": instance.osparams,
12614         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12615         "serial_no": instance.serial_no,
12616         "mtime": instance.mtime,
12617         "ctime": instance.ctime,
12618         "uuid": instance.uuid,
12619         }
12620
12621     return result
12622
12623
12624 def PrepareContainerMods(mods, private_fn):
12625   """Prepares a list of container modifications by adding a private data field.
12626
12627   @type mods: list of tuples; (operation, index, parameters)
12628   @param mods: List of modifications
12629   @type private_fn: callable or None
12630   @param private_fn: Callable for constructing a private data field for a
12631     modification
12632   @rtype: list
12633
12634   """
12635   if private_fn is None:
12636     fn = lambda: None
12637   else:
12638     fn = private_fn
12639
12640   return [(op, idx, params, fn()) for (op, idx, params) in mods]
12641
12642
12643 #: Type description for changes as returned by L{ApplyContainerMods}'s
12644 #: callbacks
12645 _TApplyContModsCbChanges = \
12646   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12647     ht.TNonEmptyString,
12648     ht.TAny,
12649     ])))
12650
12651
12652 def ApplyContainerMods(kind, container, chgdesc, mods,
12653                        create_fn, modify_fn, remove_fn):
12654   """Applies descriptions in C{mods} to C{container}.
12655
12656   @type kind: string
12657   @param kind: One-word item description
12658   @type container: list
12659   @param container: Container to modify
12660   @type chgdesc: None or list
12661   @param chgdesc: List of applied changes
12662   @type mods: list
12663   @param mods: Modifications as returned by L{PrepareContainerMods}
12664   @type create_fn: callable
12665   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12666     receives absolute item index, parameters and private data object as added
12667     by L{PrepareContainerMods}, returns tuple containing new item and changes
12668     as list
12669   @type modify_fn: callable
12670   @param modify_fn: Callback for modifying an existing item
12671     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12672     and private data object as added by L{PrepareContainerMods}, returns
12673     changes as list
12674   @type remove_fn: callable
12675   @param remove_fn: Callback on removing item; receives absolute item index,
12676     item and private data object as added by L{PrepareContainerMods}
12677
12678   """
12679   for (op, idx, params, private) in mods:
12680     if idx == -1:
12681       # Append
12682       absidx = len(container) - 1
12683     elif idx < 0:
12684       raise IndexError("Not accepting negative indices other than -1")
12685     elif idx > len(container):
12686       raise IndexError("Got %s index %s, but there are only %s" %
12687                        (kind, idx, len(container)))
12688     else:
12689       absidx = idx
12690
12691     changes = None
12692
12693     if op == constants.DDM_ADD:
12694       # Calculate where item will be added
12695       if idx == -1:
12696         addidx = len(container)
12697       else:
12698         addidx = idx
12699
12700       if create_fn is None:
12701         item = params
12702       else:
12703         (item, changes) = create_fn(addidx, params, private)
12704
12705       if idx == -1:
12706         container.append(item)
12707       else:
12708         assert idx >= 0
12709         assert idx <= len(container)
12710         # list.insert does so before the specified index
12711         container.insert(idx, item)
12712     else:
12713       # Retrieve existing item
12714       try:
12715         item = container[absidx]
12716       except IndexError:
12717         raise IndexError("Invalid %s index %s" % (kind, idx))
12718
12719       if op == constants.DDM_REMOVE:
12720         assert not params
12721
12722         if remove_fn is not None:
12723           remove_fn(absidx, item, private)
12724
12725         changes = [("%s/%s" % (kind, absidx), "remove")]
12726
12727         assert container[absidx] == item
12728         del container[absidx]
12729       elif op == constants.DDM_MODIFY:
12730         if modify_fn is not None:
12731           changes = modify_fn(absidx, item, params, private)
12732       else:
12733         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12734
12735     assert _TApplyContModsCbChanges(changes)
12736
12737     if not (chgdesc is None or changes is None):
12738       chgdesc.extend(changes)
12739
12740
12741 def _UpdateIvNames(base_index, disks):
12742   """Updates the C{iv_name} attribute of disks.
12743
12744   @type disks: list of L{objects.Disk}
12745
12746   """
12747   for (idx, disk) in enumerate(disks):
12748     disk.iv_name = "disk/%s" % (base_index + idx, )
12749
12750
12751 class _InstNicModPrivate:
12752   """Data structure for network interface modifications.
12753
12754   Used by L{LUInstanceSetParams}.
12755
12756   """
12757   def __init__(self):
12758     self.params = None
12759     self.filled = None
12760
12761
12762 class LUInstanceSetParams(LogicalUnit):
12763   """Modifies an instances's parameters.
12764
12765   """
12766   HPATH = "instance-modify"
12767   HTYPE = constants.HTYPE_INSTANCE
12768   REQ_BGL = False
12769
12770   @staticmethod
12771   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12772     assert ht.TList(mods)
12773     assert not mods or len(mods[0]) in (2, 3)
12774
12775     if mods and len(mods[0]) == 2:
12776       result = []
12777
12778       addremove = 0
12779       for op, params in mods:
12780         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12781           result.append((op, -1, params))
12782           addremove += 1
12783
12784           if addremove > 1:
12785             raise errors.OpPrereqError("Only one %s add or remove operation is"
12786                                        " supported at a time" % kind,
12787                                        errors.ECODE_INVAL)
12788         else:
12789           result.append((constants.DDM_MODIFY, op, params))
12790
12791       assert verify_fn(result)
12792     else:
12793       result = mods
12794
12795     return result
12796
12797   @staticmethod
12798   def _CheckMods(kind, mods, key_types, item_fn):
12799     """Ensures requested disk/NIC modifications are valid.
12800
12801     """
12802     for (op, _, params) in mods:
12803       assert ht.TDict(params)
12804
12805       utils.ForceDictType(params, key_types)
12806
12807       if op == constants.DDM_REMOVE:
12808         if params:
12809           raise errors.OpPrereqError("No settings should be passed when"
12810                                      " removing a %s" % kind,
12811                                      errors.ECODE_INVAL)
12812       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12813         item_fn(op, params)
12814       else:
12815         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12816
12817   @staticmethod
12818   def _VerifyDiskModification(op, params):
12819     """Verifies a disk modification.
12820
12821     """
12822     if op == constants.DDM_ADD:
12823       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12824       if mode not in constants.DISK_ACCESS_SET:
12825         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12826                                    errors.ECODE_INVAL)
12827
12828       size = params.get(constants.IDISK_SIZE, None)
12829       if size is None:
12830         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12831                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12832
12833       try:
12834         size = int(size)
12835       except (TypeError, ValueError), err:
12836         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12837                                    errors.ECODE_INVAL)
12838
12839       params[constants.IDISK_SIZE] = size
12840
12841     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12842       raise errors.OpPrereqError("Disk size change not possible, use"
12843                                  " grow-disk", errors.ECODE_INVAL)
12844
12845   @staticmethod
12846   def _VerifyNicModification(op, params):
12847     """Verifies a network interface modification.
12848
12849     """
12850     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12851       ip = params.get(constants.INIC_IP, None)
12852       req_net = params.get(constants.INIC_NETWORK, None)
12853       link = params.get(constants.NIC_LINK, None)
12854       mode = params.get(constants.NIC_MODE, None)
12855       if req_net is not None:
12856         if req_net.lower() == constants.VALUE_NONE:
12857           params[constants.INIC_NETWORK] = None
12858           req_net = None
12859         elif link is not None or mode is not None:
12860           raise errors.OpPrereqError("If network is given"
12861                                      " mode or link should not",
12862                                      errors.ECODE_INVAL)
12863
12864       if op == constants.DDM_ADD:
12865         macaddr = params.get(constants.INIC_MAC, None)
12866         if macaddr is None:
12867           params[constants.INIC_MAC] = constants.VALUE_AUTO
12868
12869       if ip is not None:
12870         if ip.lower() == constants.VALUE_NONE:
12871           params[constants.INIC_IP] = None
12872         else:
12873           if ip.lower() == constants.NIC_IP_POOL:
12874             if op == constants.DDM_ADD and req_net is None:
12875               raise errors.OpPrereqError("If ip=pool, parameter network"
12876                                          " cannot be none",
12877                                          errors.ECODE_INVAL)
12878           else:
12879             if not netutils.IPAddress.IsValid(ip):
12880               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12881                                          errors.ECODE_INVAL)
12882
12883       if constants.INIC_MAC in params:
12884         macaddr = params[constants.INIC_MAC]
12885         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12886           macaddr = utils.NormalizeAndValidateMac(macaddr)
12887
12888         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12889           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12890                                      " modifying an existing NIC",
12891                                      errors.ECODE_INVAL)
12892
12893   def CheckArguments(self):
12894     if not (self.op.nics or self.op.disks or self.op.disk_template or
12895             self.op.hvparams or self.op.beparams or self.op.os_name or
12896             self.op.offline is not None or self.op.runtime_mem):
12897       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12898
12899     if self.op.hvparams:
12900       _CheckGlobalHvParams(self.op.hvparams)
12901
12902     self.op.disks = self._UpgradeDiskNicMods(
12903       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12904     self.op.nics = self._UpgradeDiskNicMods(
12905       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12906
12907     # Check disk modifications
12908     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12909                     self._VerifyDiskModification)
12910
12911     if self.op.disks and self.op.disk_template is not None:
12912       raise errors.OpPrereqError("Disk template conversion and other disk"
12913                                  " changes not supported at the same time",
12914                                  errors.ECODE_INVAL)
12915
12916     if (self.op.disk_template and
12917         self.op.disk_template in constants.DTS_INT_MIRROR and
12918         self.op.remote_node is None):
12919       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12920                                  " one requires specifying a secondary node",
12921                                  errors.ECODE_INVAL)
12922
12923     # Check NIC modifications
12924     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12925                     self._VerifyNicModification)
12926
12927   def ExpandNames(self):
12928     self._ExpandAndLockInstance()
12929     self.needed_locks[locking.LEVEL_NODEGROUP] = []
12930     # Can't even acquire node locks in shared mode as upcoming changes in
12931     # Ganeti 2.6 will start to modify the node object on disk conversion
12932     self.needed_locks[locking.LEVEL_NODE] = []
12933     self.needed_locks[locking.LEVEL_NODE_RES] = []
12934     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12935     # Look node group to look up the ipolicy
12936     self.share_locks[locking.LEVEL_NODEGROUP] = 1
12937
12938   def DeclareLocks(self, level):
12939     if level == locking.LEVEL_NODEGROUP:
12940       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12941       # Acquire locks for the instance's nodegroups optimistically. Needs
12942       # to be verified in CheckPrereq
12943       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12944         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12945     elif level == locking.LEVEL_NODE:
12946       self._LockInstancesNodes()
12947       if self.op.disk_template and self.op.remote_node:
12948         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12949         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12950     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12951       # Copy node locks
12952       self.needed_locks[locking.LEVEL_NODE_RES] = \
12953         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12954
12955   def BuildHooksEnv(self):
12956     """Build hooks env.
12957
12958     This runs on the master, primary and secondaries.
12959
12960     """
12961     args = {}
12962     if constants.BE_MINMEM in self.be_new:
12963       args["minmem"] = self.be_new[constants.BE_MINMEM]
12964     if constants.BE_MAXMEM in self.be_new:
12965       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12966     if constants.BE_VCPUS in self.be_new:
12967       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12968     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12969     # information at all.
12970
12971     if self._new_nics is not None:
12972       nics = []
12973
12974       for nic in self._new_nics:
12975         n = copy.deepcopy(nic)
12976         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12977         n.nicparams = nicparams
12978         nics.append(_NICToTuple(self, n))
12979
12980       args["nics"] = nics
12981
12982     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12983     if self.op.disk_template:
12984       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12985     if self.op.runtime_mem:
12986       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12987
12988     return env
12989
12990   def BuildHooksNodes(self):
12991     """Build hooks nodes.
12992
12993     """
12994     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12995     return (nl, nl)
12996
12997   def _PrepareNicModification(self, params, private, old_ip, old_net,
12998                               old_params, cluster, pnode):
12999
13000     update_params_dict = dict([(key, params[key])
13001                                for key in constants.NICS_PARAMETERS
13002                                if key in params])
13003
13004     req_link = update_params_dict.get(constants.NIC_LINK, None)
13005     req_mode = update_params_dict.get(constants.NIC_MODE, None)
13006
13007     new_net = params.get(constants.INIC_NETWORK, old_net)
13008     if new_net is not None:
13009       netparams = self.cfg.GetGroupNetParams(new_net, pnode)
13010       if netparams is None:
13011         raise errors.OpPrereqError("No netparams found for the network"
13012                                    " %s, probably not connected" % new_net,
13013                                    errors.ECODE_INVAL)
13014       new_params = dict(netparams)
13015     else:
13016       new_params = _GetUpdatedParams(old_params, update_params_dict)
13017
13018     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13019
13020     new_filled_params = cluster.SimpleFillNIC(new_params)
13021     objects.NIC.CheckParameterSyntax(new_filled_params)
13022
13023     new_mode = new_filled_params[constants.NIC_MODE]
13024     if new_mode == constants.NIC_MODE_BRIDGED:
13025       bridge = new_filled_params[constants.NIC_LINK]
13026       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13027       if msg:
13028         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13029         if self.op.force:
13030           self.warn.append(msg)
13031         else:
13032           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13033
13034     elif new_mode == constants.NIC_MODE_ROUTED:
13035       ip = params.get(constants.INIC_IP, old_ip)
13036       if ip is None:
13037         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13038                                    " on a routed NIC", errors.ECODE_INVAL)
13039
13040     elif new_mode == constants.NIC_MODE_OVS:
13041       # TODO: check OVS link
13042       self.LogInfo("OVS links are currently not checked for correctness")
13043
13044     if constants.INIC_MAC in params:
13045       mac = params[constants.INIC_MAC]
13046       if mac is None:
13047         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13048                                    errors.ECODE_INVAL)
13049       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13050         # otherwise generate the MAC address
13051         params[constants.INIC_MAC] = \
13052           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13053       else:
13054         # or validate/reserve the current one
13055         try:
13056           self.cfg.ReserveMAC(mac, self.proc.GetECId())
13057         except errors.ReservationError:
13058           raise errors.OpPrereqError("MAC address '%s' already in use"
13059                                      " in cluster" % mac,
13060                                      errors.ECODE_NOTUNIQUE)
13061     elif new_net != old_net:
13062
13063       def get_net_prefix(net):
13064         if net:
13065           uuid = self.cfg.LookupNetwork(net)
13066           if uuid:
13067             nobj = self.cfg.GetNetwork(uuid)
13068             return nobj.mac_prefix
13069         return None
13070
13071       new_prefix = get_net_prefix(new_net)
13072       old_prefix = get_net_prefix(old_net)
13073       if old_prefix != new_prefix:
13074         params[constants.INIC_MAC] = \
13075           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13076
13077     #if there is a change in nic-network configuration
13078     new_ip = params.get(constants.INIC_IP, old_ip)
13079     if (new_ip, new_net) != (old_ip, old_net):
13080       if new_ip:
13081         if new_net:
13082           if new_ip.lower() == constants.NIC_IP_POOL:
13083             try:
13084               new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13085             except errors.ReservationError:
13086               raise errors.OpPrereqError("Unable to get a free IP"
13087                                          " from the address pool",
13088                                          errors.ECODE_STATE)
13089             self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13090             params[constants.INIC_IP] = new_ip
13091           elif new_ip != old_ip or new_net != old_net:
13092             try:
13093               self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13094               self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13095             except errors.ReservationError:
13096               raise errors.OpPrereqError("IP %s not available in network %s" %
13097                                          (new_ip, new_net),
13098                                          errors.ECODE_NOTUNIQUE)
13099         elif new_ip.lower() == constants.NIC_IP_POOL:
13100           raise errors.OpPrereqError("ip=pool, but no network found",
13101                                      errors.ECODE_INVAL)
13102         else:
13103           # new net is None
13104           if self.op.conflicts_check:
13105             _CheckForConflictingIp(self, new_ip, pnode)
13106
13107       if old_ip:
13108         if old_net:
13109           try:
13110             self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13111           except errors.AddressPoolError:
13112             logging.warning("Release IP %s not contained in network %s",
13113                             old_ip, old_net)
13114
13115     # there are no changes in (net, ip) tuple
13116     elif (old_net is not None and
13117           (req_link is not None or req_mode is not None)):
13118       raise errors.OpPrereqError("Not allowed to change link or mode of"
13119                                  " a NIC that is connected to a network",
13120                                  errors.ECODE_INVAL)
13121
13122     private.params = new_params
13123     private.filled = new_filled_params
13124
13125   def CheckPrereq(self):
13126     """Check prerequisites.
13127
13128     This only checks the instance list against the existing names.
13129
13130     """
13131     assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13132     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13133
13134     cluster = self.cluster = self.cfg.GetClusterInfo()
13135     assert self.instance is not None, \
13136       "Cannot retrieve locked instance %s" % self.op.instance_name
13137
13138     pnode = instance.primary_node
13139     assert pnode in self.owned_locks(locking.LEVEL_NODE)
13140     nodelist = list(instance.all_nodes)
13141     pnode_info = self.cfg.GetNodeInfo(pnode)
13142     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13143
13144     #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13145     assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13146     group_info = self.cfg.GetNodeGroup(pnode_info.group)
13147
13148     # dictionary with instance information after the modification
13149     ispec = {}
13150
13151     # Prepare disk/NIC modifications
13152     self.diskmod = PrepareContainerMods(self.op.disks, None)
13153     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13154
13155     # OS change
13156     if self.op.os_name and not self.op.force:
13157       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13158                       self.op.force_variant)
13159       instance_os = self.op.os_name
13160     else:
13161       instance_os = instance.os
13162
13163     assert not (self.op.disk_template and self.op.disks), \
13164       "Can't modify disk template and apply disk changes at the same time"
13165
13166     if self.op.disk_template:
13167       if instance.disk_template == self.op.disk_template:
13168         raise errors.OpPrereqError("Instance already has disk template %s" %
13169                                    instance.disk_template, errors.ECODE_INVAL)
13170
13171       if (instance.disk_template,
13172           self.op.disk_template) not in self._DISK_CONVERSIONS:
13173         raise errors.OpPrereqError("Unsupported disk template conversion from"
13174                                    " %s to %s" % (instance.disk_template,
13175                                                   self.op.disk_template),
13176                                    errors.ECODE_INVAL)
13177       _CheckInstanceState(self, instance, INSTANCE_DOWN,
13178                           msg="cannot change disk template")
13179       if self.op.disk_template in constants.DTS_INT_MIRROR:
13180         if self.op.remote_node == pnode:
13181           raise errors.OpPrereqError("Given new secondary node %s is the same"
13182                                      " as the primary node of the instance" %
13183                                      self.op.remote_node, errors.ECODE_STATE)
13184         _CheckNodeOnline(self, self.op.remote_node)
13185         _CheckNodeNotDrained(self, self.op.remote_node)
13186         # FIXME: here we assume that the old instance type is DT_PLAIN
13187         assert instance.disk_template == constants.DT_PLAIN
13188         disks = [{constants.IDISK_SIZE: d.size,
13189                   constants.IDISK_VG: d.logical_id[0]}
13190                  for d in instance.disks]
13191         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13192         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13193
13194         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13195         snode_group = self.cfg.GetNodeGroup(snode_info.group)
13196         ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13197                                                                 snode_group)
13198         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13199                                 ignore=self.op.ignore_ipolicy)
13200         if pnode_info.group != snode_info.group:
13201           self.LogWarning("The primary and secondary nodes are in two"
13202                           " different node groups; the disk parameters"
13203                           " from the first disk's node group will be"
13204                           " used")
13205
13206     # hvparams processing
13207     if self.op.hvparams:
13208       hv_type = instance.hypervisor
13209       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13210       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13211       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13212
13213       # local check
13214       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
13215       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13216       self.hv_proposed = self.hv_new = hv_new # the new actual values
13217       self.hv_inst = i_hvdict # the new dict (without defaults)
13218     else:
13219       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13220                                               instance.hvparams)
13221       self.hv_new = self.hv_inst = {}
13222
13223     # beparams processing
13224     if self.op.beparams:
13225       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13226                                    use_none=True)
13227       objects.UpgradeBeParams(i_bedict)
13228       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13229       be_new = cluster.SimpleFillBE(i_bedict)
13230       self.be_proposed = self.be_new = be_new # the new actual values
13231       self.be_inst = i_bedict # the new dict (without defaults)
13232     else:
13233       self.be_new = self.be_inst = {}
13234       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13235     be_old = cluster.FillBE(instance)
13236
13237     # CPU param validation -- checking every time a parameter is
13238     # changed to cover all cases where either CPU mask or vcpus have
13239     # changed
13240     if (constants.BE_VCPUS in self.be_proposed and
13241         constants.HV_CPU_MASK in self.hv_proposed):
13242       cpu_list = \
13243         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13244       # Verify mask is consistent with number of vCPUs. Can skip this
13245       # test if only 1 entry in the CPU mask, which means same mask
13246       # is applied to all vCPUs.
13247       if (len(cpu_list) > 1 and
13248           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13249         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13250                                    " CPU mask [%s]" %
13251                                    (self.be_proposed[constants.BE_VCPUS],
13252                                     self.hv_proposed[constants.HV_CPU_MASK]),
13253                                    errors.ECODE_INVAL)
13254
13255       # Only perform this test if a new CPU mask is given
13256       if constants.HV_CPU_MASK in self.hv_new:
13257         # Calculate the largest CPU number requested
13258         max_requested_cpu = max(map(max, cpu_list))
13259         # Check that all of the instance's nodes have enough physical CPUs to
13260         # satisfy the requested CPU mask
13261         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13262                                 max_requested_cpu + 1, instance.hypervisor)
13263
13264     # osparams processing
13265     if self.op.osparams:
13266       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13267       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13268       self.os_inst = i_osdict # the new dict (without defaults)
13269     else:
13270       self.os_inst = {}
13271
13272     self.warn = []
13273
13274     #TODO(dynmem): do the appropriate check involving MINMEM
13275     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13276         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13277       mem_check_list = [pnode]
13278       if be_new[constants.BE_AUTO_BALANCE]:
13279         # either we changed auto_balance to yes or it was from before
13280         mem_check_list.extend(instance.secondary_nodes)
13281       instance_info = self.rpc.call_instance_info(pnode, instance.name,
13282                                                   instance.hypervisor)
13283       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13284                                          [instance.hypervisor])
13285       pninfo = nodeinfo[pnode]
13286       msg = pninfo.fail_msg
13287       if msg:
13288         # Assume the primary node is unreachable and go ahead
13289         self.warn.append("Can't get info from primary node %s: %s" %
13290                          (pnode, msg))
13291       else:
13292         (_, _, (pnhvinfo, )) = pninfo.payload
13293         if not isinstance(pnhvinfo.get("memory_free", None), int):
13294           self.warn.append("Node data from primary node %s doesn't contain"
13295                            " free memory information" % pnode)
13296         elif instance_info.fail_msg:
13297           self.warn.append("Can't get instance runtime information: %s" %
13298                            instance_info.fail_msg)
13299         else:
13300           if instance_info.payload:
13301             current_mem = int(instance_info.payload["memory"])
13302           else:
13303             # Assume instance not running
13304             # (there is a slight race condition here, but it's not very
13305             # probable, and we have no other way to check)
13306             # TODO: Describe race condition
13307             current_mem = 0
13308           #TODO(dynmem): do the appropriate check involving MINMEM
13309           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13310                       pnhvinfo["memory_free"])
13311           if miss_mem > 0:
13312             raise errors.OpPrereqError("This change will prevent the instance"
13313                                        " from starting, due to %d MB of memory"
13314                                        " missing on its primary node" %
13315                                        miss_mem, errors.ECODE_NORES)
13316
13317       if be_new[constants.BE_AUTO_BALANCE]:
13318         for node, nres in nodeinfo.items():
13319           if node not in instance.secondary_nodes:
13320             continue
13321           nres.Raise("Can't get info from secondary node %s" % node,
13322                      prereq=True, ecode=errors.ECODE_STATE)
13323           (_, _, (nhvinfo, )) = nres.payload
13324           if not isinstance(nhvinfo.get("memory_free", None), int):
13325             raise errors.OpPrereqError("Secondary node %s didn't return free"
13326                                        " memory information" % node,
13327                                        errors.ECODE_STATE)
13328           #TODO(dynmem): do the appropriate check involving MINMEM
13329           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13330             raise errors.OpPrereqError("This change will prevent the instance"
13331                                        " from failover to its secondary node"
13332                                        " %s, due to not enough memory" % node,
13333                                        errors.ECODE_STATE)
13334
13335     if self.op.runtime_mem:
13336       remote_info = self.rpc.call_instance_info(instance.primary_node,
13337                                                 instance.name,
13338                                                 instance.hypervisor)
13339       remote_info.Raise("Error checking node %s" % instance.primary_node)
13340       if not remote_info.payload: # not running already
13341         raise errors.OpPrereqError("Instance %s is not running" %
13342                                    instance.name, errors.ECODE_STATE)
13343
13344       current_memory = remote_info.payload["memory"]
13345       if (not self.op.force and
13346            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13347             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13348         raise errors.OpPrereqError("Instance %s must have memory between %d"
13349                                    " and %d MB of memory unless --force is"
13350                                    " given" %
13351                                    (instance.name,
13352                                     self.be_proposed[constants.BE_MINMEM],
13353                                     self.be_proposed[constants.BE_MAXMEM]),
13354                                    errors.ECODE_INVAL)
13355
13356       delta = self.op.runtime_mem - current_memory
13357       if delta > 0:
13358         _CheckNodeFreeMemory(self, instance.primary_node,
13359                              "ballooning memory for instance %s" %
13360                              instance.name, delta, instance.hypervisor)
13361
13362     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13363       raise errors.OpPrereqError("Disk operations not supported for"
13364                                  " diskless instances", errors.ECODE_INVAL)
13365
13366     def _PrepareNicCreate(_, params, private):
13367       self._PrepareNicModification(params, private, None, None,
13368                                    {}, cluster, pnode)
13369       return (None, None)
13370
13371     def _PrepareNicMod(_, nic, params, private):
13372       self._PrepareNicModification(params, private, nic.ip, nic.network,
13373                                    nic.nicparams, cluster, pnode)
13374       return None
13375
13376     def _PrepareNicRemove(_, params, __):
13377       ip = params.ip
13378       net = params.network
13379       if net is not None and ip is not None:
13380         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13381
13382     # Verify NIC changes (operating on copy)
13383     nics = instance.nics[:]
13384     ApplyContainerMods("NIC", nics, None, self.nicmod,
13385                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13386     if len(nics) > constants.MAX_NICS:
13387       raise errors.OpPrereqError("Instance has too many network interfaces"
13388                                  " (%d), cannot add more" % constants.MAX_NICS,
13389                                  errors.ECODE_STATE)
13390
13391     # Verify disk changes (operating on a copy)
13392     disks = instance.disks[:]
13393     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13394     if len(disks) > constants.MAX_DISKS:
13395       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13396                                  " more" % constants.MAX_DISKS,
13397                                  errors.ECODE_STATE)
13398     disk_sizes = [disk.size for disk in instance.disks]
13399     disk_sizes.extend(params["size"] for (op, idx, params, private) in
13400                       self.diskmod if op == constants.DDM_ADD)
13401     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13402     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13403
13404     if self.op.offline is not None and self.op.offline:
13405       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13406                           msg="can't change to offline")
13407
13408     # Pre-compute NIC changes (necessary to use result in hooks)
13409     self._nic_chgdesc = []
13410     if self.nicmod:
13411       # Operate on copies as this is still in prereq
13412       nics = [nic.Copy() for nic in instance.nics]
13413       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13414                          self._CreateNewNic, self._ApplyNicMods, None)
13415       self._new_nics = nics
13416       ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13417     else:
13418       self._new_nics = None
13419       ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13420
13421     if not self.op.ignore_ipolicy:
13422       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13423                                                               group_info)
13424
13425       # Fill ispec with backend parameters
13426       ispec[constants.ISPEC_SPINDLE_USE] = \
13427         self.be_new.get(constants.BE_SPINDLE_USE, None)
13428       ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13429                                                          None)
13430
13431       # Copy ispec to verify parameters with min/max values separately
13432       ispec_max = ispec.copy()
13433       ispec_max[constants.ISPEC_MEM_SIZE] = \
13434         self.be_new.get(constants.BE_MAXMEM, None)
13435       res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13436       ispec_min = ispec.copy()
13437       ispec_min[constants.ISPEC_MEM_SIZE] = \
13438         self.be_new.get(constants.BE_MINMEM, None)
13439       res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13440
13441       if (res_max or res_min):
13442         # FIXME: Improve error message by including information about whether
13443         # the upper or lower limit of the parameter fails the ipolicy.
13444         msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13445                (group_info, group_info.name,
13446                 utils.CommaJoin(set(res_max + res_min))))
13447         raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13448
13449   def _ConvertPlainToDrbd(self, feedback_fn):
13450     """Converts an instance from plain to drbd.
13451
13452     """
13453     feedback_fn("Converting template to drbd")
13454     instance = self.instance
13455     pnode = instance.primary_node
13456     snode = self.op.remote_node
13457
13458     assert instance.disk_template == constants.DT_PLAIN
13459
13460     # create a fake disk info for _GenerateDiskTemplate
13461     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13462                   constants.IDISK_VG: d.logical_id[0]}
13463                  for d in instance.disks]
13464     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13465                                       instance.name, pnode, [snode],
13466                                       disk_info, None, None, 0, feedback_fn,
13467                                       self.diskparams)
13468     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13469                                         self.diskparams)
13470     info = _GetInstanceInfoText(instance)
13471     feedback_fn("Creating additional volumes...")
13472     # first, create the missing data and meta devices
13473     for disk in anno_disks:
13474       # unfortunately this is... not too nice
13475       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13476                             info, True)
13477       for child in disk.children:
13478         _CreateSingleBlockDev(self, snode, instance, child, info, True)
13479     # at this stage, all new LVs have been created, we can rename the
13480     # old ones
13481     feedback_fn("Renaming original volumes...")
13482     rename_list = [(o, n.children[0].logical_id)
13483                    for (o, n) in zip(instance.disks, new_disks)]
13484     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13485     result.Raise("Failed to rename original LVs")
13486
13487     feedback_fn("Initializing DRBD devices...")
13488     # all child devices are in place, we can now create the DRBD devices
13489     for disk in anno_disks:
13490       for node in [pnode, snode]:
13491         f_create = node == pnode
13492         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13493
13494     # at this point, the instance has been modified
13495     instance.disk_template = constants.DT_DRBD8
13496     instance.disks = new_disks
13497     self.cfg.Update(instance, feedback_fn)
13498
13499     # Release node locks while waiting for sync
13500     _ReleaseLocks(self, locking.LEVEL_NODE)
13501
13502     # disks are created, waiting for sync
13503     disk_abort = not _WaitForSync(self, instance,
13504                                   oneshot=not self.op.wait_for_sync)
13505     if disk_abort:
13506       raise errors.OpExecError("There are some degraded disks for"
13507                                " this instance, please cleanup manually")
13508
13509     # Node resource locks will be released by caller
13510
13511   def _ConvertDrbdToPlain(self, feedback_fn):
13512     """Converts an instance from drbd to plain.
13513
13514     """
13515     instance = self.instance
13516
13517     assert len(instance.secondary_nodes) == 1
13518     assert instance.disk_template == constants.DT_DRBD8
13519
13520     pnode = instance.primary_node
13521     snode = instance.secondary_nodes[0]
13522     feedback_fn("Converting template to plain")
13523
13524     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13525     new_disks = [d.children[0] for d in instance.disks]
13526
13527     # copy over size and mode
13528     for parent, child in zip(old_disks, new_disks):
13529       child.size = parent.size
13530       child.mode = parent.mode
13531
13532     # this is a DRBD disk, return its port to the pool
13533     # NOTE: this must be done right before the call to cfg.Update!
13534     for disk in old_disks:
13535       tcp_port = disk.logical_id[2]
13536       self.cfg.AddTcpUdpPort(tcp_port)
13537
13538     # update instance structure
13539     instance.disks = new_disks
13540     instance.disk_template = constants.DT_PLAIN
13541     self.cfg.Update(instance, feedback_fn)
13542
13543     # Release locks in case removing disks takes a while
13544     _ReleaseLocks(self, locking.LEVEL_NODE)
13545
13546     feedback_fn("Removing volumes on the secondary node...")
13547     for disk in old_disks:
13548       self.cfg.SetDiskID(disk, snode)
13549       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13550       if msg:
13551         self.LogWarning("Could not remove block device %s on node %s,"
13552                         " continuing anyway: %s", disk.iv_name, snode, msg)
13553
13554     feedback_fn("Removing unneeded volumes on the primary node...")
13555     for idx, disk in enumerate(old_disks):
13556       meta = disk.children[1]
13557       self.cfg.SetDiskID(meta, pnode)
13558       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13559       if msg:
13560         self.LogWarning("Could not remove metadata for disk %d on node %s,"
13561                         " continuing anyway: %s", idx, pnode, msg)
13562
13563   def _CreateNewDisk(self, idx, params, _):
13564     """Creates a new disk.
13565
13566     """
13567     instance = self.instance
13568
13569     # add a new disk
13570     if instance.disk_template in constants.DTS_FILEBASED:
13571       (file_driver, file_path) = instance.disks[0].logical_id
13572       file_path = os.path.dirname(file_path)
13573     else:
13574       file_driver = file_path = None
13575
13576     disk = \
13577       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13578                             instance.primary_node, instance.secondary_nodes,
13579                             [params], file_path, file_driver, idx,
13580                             self.Log, self.diskparams)[0]
13581
13582     info = _GetInstanceInfoText(instance)
13583
13584     logging.info("Creating volume %s for instance %s",
13585                  disk.iv_name, instance.name)
13586     # Note: this needs to be kept in sync with _CreateDisks
13587     #HARDCODE
13588     for node in instance.all_nodes:
13589       f_create = (node == instance.primary_node)
13590       try:
13591         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13592       except errors.OpExecError, err:
13593         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13594                         disk.iv_name, disk, node, err)
13595
13596     return (disk, [
13597       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13598       ])
13599
13600   @staticmethod
13601   def _ModifyDisk(idx, disk, params, _):
13602     """Modifies a disk.
13603
13604     """
13605     disk.mode = params[constants.IDISK_MODE]
13606
13607     return [
13608       ("disk.mode/%d" % idx, disk.mode),
13609       ]
13610
13611   def _RemoveDisk(self, idx, root, _):
13612     """Removes a disk.
13613
13614     """
13615     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13616     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13617       self.cfg.SetDiskID(disk, node)
13618       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13619       if msg:
13620         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13621                         " continuing anyway", idx, node, msg)
13622
13623     # if this is a DRBD disk, return its port to the pool
13624     if root.dev_type in constants.LDS_DRBD:
13625       self.cfg.AddTcpUdpPort(root.logical_id[2])
13626
13627   @staticmethod
13628   def _CreateNewNic(idx, params, private):
13629     """Creates data structure for a new network interface.
13630
13631     """
13632     mac = params[constants.INIC_MAC]
13633     ip = params.get(constants.INIC_IP, None)
13634     net = params.get(constants.INIC_NETWORK, None)
13635     #TODO: not private.filled?? can a nic have no nicparams??
13636     nicparams = private.filled
13637
13638     return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
13639       ("nic.%d" % idx,
13640        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13641        (mac, ip, private.filled[constants.NIC_MODE],
13642        private.filled[constants.NIC_LINK],
13643        net)),
13644       ])
13645
13646   @staticmethod
13647   def _ApplyNicMods(idx, nic, params, private):
13648     """Modifies a network interface.
13649
13650     """
13651     changes = []
13652
13653     for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13654       if key in params:
13655         changes.append(("nic.%s/%d" % (key, idx), params[key]))
13656         setattr(nic, key, params[key])
13657
13658     if private.filled:
13659       nic.nicparams = private.filled
13660
13661       for (key, val) in nic.nicparams.items():
13662         changes.append(("nic.%s/%d" % (key, idx), val))
13663
13664     return changes
13665
13666   def Exec(self, feedback_fn):
13667     """Modifies an instance.
13668
13669     All parameters take effect only at the next restart of the instance.
13670
13671     """
13672     # Process here the warnings from CheckPrereq, as we don't have a
13673     # feedback_fn there.
13674     # TODO: Replace with self.LogWarning
13675     for warn in self.warn:
13676       feedback_fn("WARNING: %s" % warn)
13677
13678     assert ((self.op.disk_template is None) ^
13679             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13680       "Not owning any node resource locks"
13681
13682     result = []
13683     instance = self.instance
13684
13685     # runtime memory
13686     if self.op.runtime_mem:
13687       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13688                                                      instance,
13689                                                      self.op.runtime_mem)
13690       rpcres.Raise("Cannot modify instance runtime memory")
13691       result.append(("runtime_memory", self.op.runtime_mem))
13692
13693     # Apply disk changes
13694     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13695                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13696     _UpdateIvNames(0, instance.disks)
13697
13698     if self.op.disk_template:
13699       if __debug__:
13700         check_nodes = set(instance.all_nodes)
13701         if self.op.remote_node:
13702           check_nodes.add(self.op.remote_node)
13703         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13704           owned = self.owned_locks(level)
13705           assert not (check_nodes - owned), \
13706             ("Not owning the correct locks, owning %r, expected at least %r" %
13707              (owned, check_nodes))
13708
13709       r_shut = _ShutdownInstanceDisks(self, instance)
13710       if not r_shut:
13711         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13712                                  " proceed with disk template conversion")
13713       mode = (instance.disk_template, self.op.disk_template)
13714       try:
13715         self._DISK_CONVERSIONS[mode](self, feedback_fn)
13716       except:
13717         self.cfg.ReleaseDRBDMinors(instance.name)
13718         raise
13719       result.append(("disk_template", self.op.disk_template))
13720
13721       assert instance.disk_template == self.op.disk_template, \
13722         ("Expected disk template '%s', found '%s'" %
13723          (self.op.disk_template, instance.disk_template))
13724
13725     # Release node and resource locks if there are any (they might already have
13726     # been released during disk conversion)
13727     _ReleaseLocks(self, locking.LEVEL_NODE)
13728     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13729
13730     # Apply NIC changes
13731     if self._new_nics is not None:
13732       instance.nics = self._new_nics
13733       result.extend(self._nic_chgdesc)
13734
13735     # hvparams changes
13736     if self.op.hvparams:
13737       instance.hvparams = self.hv_inst
13738       for key, val in self.op.hvparams.iteritems():
13739         result.append(("hv/%s" % key, val))
13740
13741     # beparams changes
13742     if self.op.beparams:
13743       instance.beparams = self.be_inst
13744       for key, val in self.op.beparams.iteritems():
13745         result.append(("be/%s" % key, val))
13746
13747     # OS change
13748     if self.op.os_name:
13749       instance.os = self.op.os_name
13750
13751     # osparams changes
13752     if self.op.osparams:
13753       instance.osparams = self.os_inst
13754       for key, val in self.op.osparams.iteritems():
13755         result.append(("os/%s" % key, val))
13756
13757     if self.op.offline is None:
13758       # Ignore
13759       pass
13760     elif self.op.offline:
13761       # Mark instance as offline
13762       self.cfg.MarkInstanceOffline(instance.name)
13763       result.append(("admin_state", constants.ADMINST_OFFLINE))
13764     else:
13765       # Mark instance as online, but stopped
13766       self.cfg.MarkInstanceDown(instance.name)
13767       result.append(("admin_state", constants.ADMINST_DOWN))
13768
13769     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13770
13771     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13772                 self.owned_locks(locking.LEVEL_NODE)), \
13773       "All node locks should have been released by now"
13774
13775     return result
13776
13777   _DISK_CONVERSIONS = {
13778     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13779     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13780     }
13781
13782
13783 class LUInstanceChangeGroup(LogicalUnit):
13784   HPATH = "instance-change-group"
13785   HTYPE = constants.HTYPE_INSTANCE
13786   REQ_BGL = False
13787
13788   def ExpandNames(self):
13789     self.share_locks = _ShareAll()
13790
13791     self.needed_locks = {
13792       locking.LEVEL_NODEGROUP: [],
13793       locking.LEVEL_NODE: [],
13794       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
13795       }
13796
13797     self._ExpandAndLockInstance()
13798
13799     if self.op.target_groups:
13800       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13801                                   self.op.target_groups)
13802     else:
13803       self.req_target_uuids = None
13804
13805     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13806
13807   def DeclareLocks(self, level):
13808     if level == locking.LEVEL_NODEGROUP:
13809       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13810
13811       if self.req_target_uuids:
13812         lock_groups = set(self.req_target_uuids)
13813
13814         # Lock all groups used by instance optimistically; this requires going
13815         # via the node before it's locked, requiring verification later on
13816         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13817         lock_groups.update(instance_groups)
13818       else:
13819         # No target groups, need to lock all of them
13820         lock_groups = locking.ALL_SET
13821
13822       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13823
13824     elif level == locking.LEVEL_NODE:
13825       if self.req_target_uuids:
13826         # Lock all nodes used by instances
13827         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13828         self._LockInstancesNodes()
13829
13830         # Lock all nodes in all potential target groups
13831         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13832                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13833         member_nodes = [node_name
13834                         for group in lock_groups
13835                         for node_name in self.cfg.GetNodeGroup(group).members]
13836         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13837       else:
13838         # Lock all nodes as all groups are potential targets
13839         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13840
13841   def CheckPrereq(self):
13842     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13843     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13844     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13845
13846     assert (self.req_target_uuids is None or
13847             owned_groups.issuperset(self.req_target_uuids))
13848     assert owned_instances == set([self.op.instance_name])
13849
13850     # Get instance information
13851     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13852
13853     # Check if node groups for locked instance are still correct
13854     assert owned_nodes.issuperset(self.instance.all_nodes), \
13855       ("Instance %s's nodes changed while we kept the lock" %
13856        self.op.instance_name)
13857
13858     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13859                                            owned_groups)
13860
13861     if self.req_target_uuids:
13862       # User requested specific target groups
13863       self.target_uuids = frozenset(self.req_target_uuids)
13864     else:
13865       # All groups except those used by the instance are potential targets
13866       self.target_uuids = owned_groups - inst_groups
13867
13868     conflicting_groups = self.target_uuids & inst_groups
13869     if conflicting_groups:
13870       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13871                                  " used by the instance '%s'" %
13872                                  (utils.CommaJoin(conflicting_groups),
13873                                   self.op.instance_name),
13874                                  errors.ECODE_INVAL)
13875
13876     if not self.target_uuids:
13877       raise errors.OpPrereqError("There are no possible target groups",
13878                                  errors.ECODE_INVAL)
13879
13880   def BuildHooksEnv(self):
13881     """Build hooks env.
13882
13883     """
13884     assert self.target_uuids
13885
13886     env = {
13887       "TARGET_GROUPS": " ".join(self.target_uuids),
13888       }
13889
13890     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13891
13892     return env
13893
13894   def BuildHooksNodes(self):
13895     """Build hooks nodes.
13896
13897     """
13898     mn = self.cfg.GetMasterNode()
13899     return ([mn], [mn])
13900
13901   def Exec(self, feedback_fn):
13902     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13903
13904     assert instances == [self.op.instance_name], "Instance not locked"
13905
13906     req = iallocator.IAReqGroupChange(instances=instances,
13907                                       target_groups=list(self.target_uuids))
13908     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13909
13910     ial.Run(self.op.iallocator)
13911
13912     if not ial.success:
13913       raise errors.OpPrereqError("Can't compute solution for changing group of"
13914                                  " instance '%s' using iallocator '%s': %s" %
13915                                  (self.op.instance_name, self.op.iallocator,
13916                                   ial.info), errors.ECODE_NORES)
13917
13918     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13919
13920     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13921                  " instance '%s'", len(jobs), self.op.instance_name)
13922
13923     return ResultWithJobs(jobs)
13924
13925
13926 class LUBackupQuery(NoHooksLU):
13927   """Query the exports list
13928
13929   """
13930   REQ_BGL = False
13931
13932   def CheckArguments(self):
13933     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13934                              ["node", "export"], self.op.use_locking)
13935
13936   def ExpandNames(self):
13937     self.expq.ExpandNames(self)
13938
13939   def DeclareLocks(self, level):
13940     self.expq.DeclareLocks(self, level)
13941
13942   def Exec(self, feedback_fn):
13943     result = {}
13944
13945     for (node, expname) in self.expq.OldStyleQuery(self):
13946       if expname is None:
13947         result[node] = False
13948       else:
13949         result.setdefault(node, []).append(expname)
13950
13951     return result
13952
13953
13954 class _ExportQuery(_QueryBase):
13955   FIELDS = query.EXPORT_FIELDS
13956
13957   #: The node name is not a unique key for this query
13958   SORT_FIELD = "node"
13959
13960   def ExpandNames(self, lu):
13961     lu.needed_locks = {}
13962
13963     # The following variables interact with _QueryBase._GetNames
13964     if self.names:
13965       self.wanted = _GetWantedNodes(lu, self.names)
13966     else:
13967       self.wanted = locking.ALL_SET
13968
13969     self.do_locking = self.use_locking
13970
13971     if self.do_locking:
13972       lu.share_locks = _ShareAll()
13973       lu.needed_locks = {
13974         locking.LEVEL_NODE: self.wanted,
13975         }
13976
13977       if not self.names:
13978         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
13979
13980   def DeclareLocks(self, lu, level):
13981     pass
13982
13983   def _GetQueryData(self, lu):
13984     """Computes the list of nodes and their attributes.
13985
13986     """
13987     # Locking is not used
13988     # TODO
13989     assert not (compat.any(lu.glm.is_owned(level)
13990                            for level in locking.LEVELS
13991                            if level != locking.LEVEL_CLUSTER) or
13992                 self.do_locking or self.use_locking)
13993
13994     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13995
13996     result = []
13997
13998     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13999       if nres.fail_msg:
14000         result.append((node, None))
14001       else:
14002         result.extend((node, expname) for expname in nres.payload)
14003
14004     return result
14005
14006
14007 class LUBackupPrepare(NoHooksLU):
14008   """Prepares an instance for an export and returns useful information.
14009
14010   """
14011   REQ_BGL = False
14012
14013   def ExpandNames(self):
14014     self._ExpandAndLockInstance()
14015
14016   def CheckPrereq(self):
14017     """Check prerequisites.
14018
14019     """
14020     instance_name = self.op.instance_name
14021
14022     self.instance = self.cfg.GetInstanceInfo(instance_name)
14023     assert self.instance is not None, \
14024           "Cannot retrieve locked instance %s" % self.op.instance_name
14025     _CheckNodeOnline(self, self.instance.primary_node)
14026
14027     self._cds = _GetClusterDomainSecret()
14028
14029   def Exec(self, feedback_fn):
14030     """Prepares an instance for an export.
14031
14032     """
14033     instance = self.instance
14034
14035     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14036       salt = utils.GenerateSecret(8)
14037
14038       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14039       result = self.rpc.call_x509_cert_create(instance.primary_node,
14040                                               constants.RIE_CERT_VALIDITY)
14041       result.Raise("Can't create X509 key and certificate on %s" % result.node)
14042
14043       (name, cert_pem) = result.payload
14044
14045       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14046                                              cert_pem)
14047
14048       return {
14049         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14050         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14051                           salt),
14052         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14053         }
14054
14055     return None
14056
14057
14058 class LUBackupExport(LogicalUnit):
14059   """Export an instance to an image in the cluster.
14060
14061   """
14062   HPATH = "instance-export"
14063   HTYPE = constants.HTYPE_INSTANCE
14064   REQ_BGL = False
14065
14066   def CheckArguments(self):
14067     """Check the arguments.
14068
14069     """
14070     self.x509_key_name = self.op.x509_key_name
14071     self.dest_x509_ca_pem = self.op.destination_x509_ca
14072
14073     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14074       if not self.x509_key_name:
14075         raise errors.OpPrereqError("Missing X509 key name for encryption",
14076                                    errors.ECODE_INVAL)
14077
14078       if not self.dest_x509_ca_pem:
14079         raise errors.OpPrereqError("Missing destination X509 CA",
14080                                    errors.ECODE_INVAL)
14081
14082   def ExpandNames(self):
14083     self._ExpandAndLockInstance()
14084
14085     # Lock all nodes for local exports
14086     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14087       # FIXME: lock only instance primary and destination node
14088       #
14089       # Sad but true, for now we have do lock all nodes, as we don't know where
14090       # the previous export might be, and in this LU we search for it and
14091       # remove it from its current node. In the future we could fix this by:
14092       #  - making a tasklet to search (share-lock all), then create the
14093       #    new one, then one to remove, after
14094       #  - removing the removal operation altogether
14095       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14096
14097       # Allocations should be stopped while this LU runs with node locks, but
14098       # it doesn't have to be exclusive
14099       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14100       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14101
14102   def DeclareLocks(self, level):
14103     """Last minute lock declaration."""
14104     # All nodes are locked anyway, so nothing to do here.
14105
14106   def BuildHooksEnv(self):
14107     """Build hooks env.
14108
14109     This will run on the master, primary node and target node.
14110
14111     """
14112     env = {
14113       "EXPORT_MODE": self.op.mode,
14114       "EXPORT_NODE": self.op.target_node,
14115       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14116       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14117       # TODO: Generic function for boolean env variables
14118       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14119       }
14120
14121     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14122
14123     return env
14124
14125   def BuildHooksNodes(self):
14126     """Build hooks nodes.
14127
14128     """
14129     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14130
14131     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14132       nl.append(self.op.target_node)
14133
14134     return (nl, nl)
14135
14136   def CheckPrereq(self):
14137     """Check prerequisites.
14138
14139     This checks that the instance and node names are valid.
14140
14141     """
14142     instance_name = self.op.instance_name
14143
14144     self.instance = self.cfg.GetInstanceInfo(instance_name)
14145     assert self.instance is not None, \
14146           "Cannot retrieve locked instance %s" % self.op.instance_name
14147     _CheckNodeOnline(self, self.instance.primary_node)
14148
14149     if (self.op.remove_instance and
14150         self.instance.admin_state == constants.ADMINST_UP and
14151         not self.op.shutdown):
14152       raise errors.OpPrereqError("Can not remove instance without shutting it"
14153                                  " down before", errors.ECODE_STATE)
14154
14155     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14156       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14157       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14158       assert self.dst_node is not None
14159
14160       _CheckNodeOnline(self, self.dst_node.name)
14161       _CheckNodeNotDrained(self, self.dst_node.name)
14162
14163       self._cds = None
14164       self.dest_disk_info = None
14165       self.dest_x509_ca = None
14166
14167     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14168       self.dst_node = None
14169
14170       if len(self.op.target_node) != len(self.instance.disks):
14171         raise errors.OpPrereqError(("Received destination information for %s"
14172                                     " disks, but instance %s has %s disks") %
14173                                    (len(self.op.target_node), instance_name,
14174                                     len(self.instance.disks)),
14175                                    errors.ECODE_INVAL)
14176
14177       cds = _GetClusterDomainSecret()
14178
14179       # Check X509 key name
14180       try:
14181         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14182       except (TypeError, ValueError), err:
14183         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14184                                    errors.ECODE_INVAL)
14185
14186       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14187         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14188                                    errors.ECODE_INVAL)
14189
14190       # Load and verify CA
14191       try:
14192         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14193       except OpenSSL.crypto.Error, err:
14194         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14195                                    (err, ), errors.ECODE_INVAL)
14196
14197       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14198       if errcode is not None:
14199         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14200                                    (msg, ), errors.ECODE_INVAL)
14201
14202       self.dest_x509_ca = cert
14203
14204       # Verify target information
14205       disk_info = []
14206       for idx, disk_data in enumerate(self.op.target_node):
14207         try:
14208           (host, port, magic) = \
14209             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14210         except errors.GenericError, err:
14211           raise errors.OpPrereqError("Target info for disk %s: %s" %
14212                                      (idx, err), errors.ECODE_INVAL)
14213
14214         disk_info.append((host, port, magic))
14215
14216       assert len(disk_info) == len(self.op.target_node)
14217       self.dest_disk_info = disk_info
14218
14219     else:
14220       raise errors.ProgrammerError("Unhandled export mode %r" %
14221                                    self.op.mode)
14222
14223     # instance disk type verification
14224     # TODO: Implement export support for file-based disks
14225     for disk in self.instance.disks:
14226       if disk.dev_type == constants.LD_FILE:
14227         raise errors.OpPrereqError("Export not supported for instances with"
14228                                    " file-based disks", errors.ECODE_INVAL)
14229
14230   def _CleanupExports(self, feedback_fn):
14231     """Removes exports of current instance from all other nodes.
14232
14233     If an instance in a cluster with nodes A..D was exported to node C, its
14234     exports will be removed from the nodes A, B and D.
14235
14236     """
14237     assert self.op.mode != constants.EXPORT_MODE_REMOTE
14238
14239     nodelist = self.cfg.GetNodeList()
14240     nodelist.remove(self.dst_node.name)
14241
14242     # on one-node clusters nodelist will be empty after the removal
14243     # if we proceed the backup would be removed because OpBackupQuery
14244     # substitutes an empty list with the full cluster node list.
14245     iname = self.instance.name
14246     if nodelist:
14247       feedback_fn("Removing old exports for instance %s" % iname)
14248       exportlist = self.rpc.call_export_list(nodelist)
14249       for node in exportlist:
14250         if exportlist[node].fail_msg:
14251           continue
14252         if iname in exportlist[node].payload:
14253           msg = self.rpc.call_export_remove(node, iname).fail_msg
14254           if msg:
14255             self.LogWarning("Could not remove older export for instance %s"
14256                             " on node %s: %s", iname, node, msg)
14257
14258   def Exec(self, feedback_fn):
14259     """Export an instance to an image in the cluster.
14260
14261     """
14262     assert self.op.mode in constants.EXPORT_MODES
14263
14264     instance = self.instance
14265     src_node = instance.primary_node
14266
14267     if self.op.shutdown:
14268       # shutdown the instance, but not the disks
14269       feedback_fn("Shutting down instance %s" % instance.name)
14270       result = self.rpc.call_instance_shutdown(src_node, instance,
14271                                                self.op.shutdown_timeout)
14272       # TODO: Maybe ignore failures if ignore_remove_failures is set
14273       result.Raise("Could not shutdown instance %s on"
14274                    " node %s" % (instance.name, src_node))
14275
14276     # set the disks ID correctly since call_instance_start needs the
14277     # correct drbd minor to create the symlinks
14278     for disk in instance.disks:
14279       self.cfg.SetDiskID(disk, src_node)
14280
14281     activate_disks = (instance.admin_state != constants.ADMINST_UP)
14282
14283     if activate_disks:
14284       # Activate the instance disks if we'exporting a stopped instance
14285       feedback_fn("Activating disks for %s" % instance.name)
14286       _StartInstanceDisks(self, instance, None)
14287
14288     try:
14289       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14290                                                      instance)
14291
14292       helper.CreateSnapshots()
14293       try:
14294         if (self.op.shutdown and
14295             instance.admin_state == constants.ADMINST_UP and
14296             not self.op.remove_instance):
14297           assert not activate_disks
14298           feedback_fn("Starting instance %s" % instance.name)
14299           result = self.rpc.call_instance_start(src_node,
14300                                                 (instance, None, None), False)
14301           msg = result.fail_msg
14302           if msg:
14303             feedback_fn("Failed to start instance: %s" % msg)
14304             _ShutdownInstanceDisks(self, instance)
14305             raise errors.OpExecError("Could not start instance: %s" % msg)
14306
14307         if self.op.mode == constants.EXPORT_MODE_LOCAL:
14308           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14309         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14310           connect_timeout = constants.RIE_CONNECT_TIMEOUT
14311           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14312
14313           (key_name, _, _) = self.x509_key_name
14314
14315           dest_ca_pem = \
14316             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14317                                             self.dest_x509_ca)
14318
14319           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14320                                                      key_name, dest_ca_pem,
14321                                                      timeouts)
14322       finally:
14323         helper.Cleanup()
14324
14325       # Check for backwards compatibility
14326       assert len(dresults) == len(instance.disks)
14327       assert compat.all(isinstance(i, bool) for i in dresults), \
14328              "Not all results are boolean: %r" % dresults
14329
14330     finally:
14331       if activate_disks:
14332         feedback_fn("Deactivating disks for %s" % instance.name)
14333         _ShutdownInstanceDisks(self, instance)
14334
14335     if not (compat.all(dresults) and fin_resu):
14336       failures = []
14337       if not fin_resu:
14338         failures.append("export finalization")
14339       if not compat.all(dresults):
14340         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14341                                if not dsk)
14342         failures.append("disk export: disk(s) %s" % fdsk)
14343
14344       raise errors.OpExecError("Export failed, errors in %s" %
14345                                utils.CommaJoin(failures))
14346
14347     # At this point, the export was successful, we can cleanup/finish
14348
14349     # Remove instance if requested
14350     if self.op.remove_instance:
14351       feedback_fn("Removing instance %s" % instance.name)
14352       _RemoveInstance(self, feedback_fn, instance,
14353                       self.op.ignore_remove_failures)
14354
14355     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14356       self._CleanupExports(feedback_fn)
14357
14358     return fin_resu, dresults
14359
14360
14361 class LUBackupRemove(NoHooksLU):
14362   """Remove exports related to the named instance.
14363
14364   """
14365   REQ_BGL = False
14366
14367   def ExpandNames(self):
14368     self.needed_locks = {
14369       # We need all nodes to be locked in order for RemoveExport to work, but
14370       # we don't need to lock the instance itself, as nothing will happen to it
14371       # (and we can remove exports also for a removed instance)
14372       locking.LEVEL_NODE: locking.ALL_SET,
14373
14374       # Removing backups is quick, so blocking allocations is justified
14375       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14376       }
14377
14378     # Allocations should be stopped while this LU runs with node locks, but it
14379     # doesn't have to be exclusive
14380     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14381
14382   def Exec(self, feedback_fn):
14383     """Remove any export.
14384
14385     """
14386     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14387     # If the instance was not found we'll try with the name that was passed in.
14388     # This will only work if it was an FQDN, though.
14389     fqdn_warn = False
14390     if not instance_name:
14391       fqdn_warn = True
14392       instance_name = self.op.instance_name
14393
14394     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14395     exportlist = self.rpc.call_export_list(locked_nodes)
14396     found = False
14397     for node in exportlist:
14398       msg = exportlist[node].fail_msg
14399       if msg:
14400         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14401         continue
14402       if instance_name in exportlist[node].payload:
14403         found = True
14404         result = self.rpc.call_export_remove(node, instance_name)
14405         msg = result.fail_msg
14406         if msg:
14407           logging.error("Could not remove export for instance %s"
14408                         " on node %s: %s", instance_name, node, msg)
14409
14410     if fqdn_warn and not found:
14411       feedback_fn("Export not found. If trying to remove an export belonging"
14412                   " to a deleted instance please use its Fully Qualified"
14413                   " Domain Name.")
14414
14415
14416 class LUGroupAdd(LogicalUnit):
14417   """Logical unit for creating node groups.
14418
14419   """
14420   HPATH = "group-add"
14421   HTYPE = constants.HTYPE_GROUP
14422   REQ_BGL = False
14423
14424   def ExpandNames(self):
14425     # We need the new group's UUID here so that we can create and acquire the
14426     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14427     # that it should not check whether the UUID exists in the configuration.
14428     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14429     self.needed_locks = {}
14430     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14431
14432   def CheckPrereq(self):
14433     """Check prerequisites.
14434
14435     This checks that the given group name is not an existing node group
14436     already.
14437
14438     """
14439     try:
14440       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14441     except errors.OpPrereqError:
14442       pass
14443     else:
14444       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14445                                  " node group (UUID: %s)" %
14446                                  (self.op.group_name, existing_uuid),
14447                                  errors.ECODE_EXISTS)
14448
14449     if self.op.ndparams:
14450       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14451
14452     if self.op.hv_state:
14453       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14454     else:
14455       self.new_hv_state = None
14456
14457     if self.op.disk_state:
14458       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14459     else:
14460       self.new_disk_state = None
14461
14462     if self.op.diskparams:
14463       for templ in constants.DISK_TEMPLATES:
14464         if templ in self.op.diskparams:
14465           utils.ForceDictType(self.op.diskparams[templ],
14466                               constants.DISK_DT_TYPES)
14467       self.new_diskparams = self.op.diskparams
14468       try:
14469         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14470       except errors.OpPrereqError, err:
14471         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14472                                    errors.ECODE_INVAL)
14473     else:
14474       self.new_diskparams = {}
14475
14476     if self.op.ipolicy:
14477       cluster = self.cfg.GetClusterInfo()
14478       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14479       try:
14480         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14481       except errors.ConfigurationError, err:
14482         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14483                                    errors.ECODE_INVAL)
14484
14485   def BuildHooksEnv(self):
14486     """Build hooks env.
14487
14488     """
14489     return {
14490       "GROUP_NAME": self.op.group_name,
14491       }
14492
14493   def BuildHooksNodes(self):
14494     """Build hooks nodes.
14495
14496     """
14497     mn = self.cfg.GetMasterNode()
14498     return ([mn], [mn])
14499
14500   def Exec(self, feedback_fn):
14501     """Add the node group to the cluster.
14502
14503     """
14504     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14505                                   uuid=self.group_uuid,
14506                                   alloc_policy=self.op.alloc_policy,
14507                                   ndparams=self.op.ndparams,
14508                                   diskparams=self.new_diskparams,
14509                                   ipolicy=self.op.ipolicy,
14510                                   hv_state_static=self.new_hv_state,
14511                                   disk_state_static=self.new_disk_state)
14512
14513     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14514     del self.remove_locks[locking.LEVEL_NODEGROUP]
14515
14516
14517 class LUGroupAssignNodes(NoHooksLU):
14518   """Logical unit for assigning nodes to groups.
14519
14520   """
14521   REQ_BGL = False
14522
14523   def ExpandNames(self):
14524     # These raise errors.OpPrereqError on their own:
14525     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14526     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14527
14528     # We want to lock all the affected nodes and groups. We have readily
14529     # available the list of nodes, and the *destination* group. To gather the
14530     # list of "source" groups, we need to fetch node information later on.
14531     self.needed_locks = {
14532       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14533       locking.LEVEL_NODE: self.op.nodes,
14534       }
14535
14536   def DeclareLocks(self, level):
14537     if level == locking.LEVEL_NODEGROUP:
14538       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14539
14540       # Try to get all affected nodes' groups without having the group or node
14541       # lock yet. Needs verification later in the code flow.
14542       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14543
14544       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14545
14546   def CheckPrereq(self):
14547     """Check prerequisites.
14548
14549     """
14550     assert self.needed_locks[locking.LEVEL_NODEGROUP]
14551     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14552             frozenset(self.op.nodes))
14553
14554     expected_locks = (set([self.group_uuid]) |
14555                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14556     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14557     if actual_locks != expected_locks:
14558       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14559                                " current groups are '%s', used to be '%s'" %
14560                                (utils.CommaJoin(expected_locks),
14561                                 utils.CommaJoin(actual_locks)))
14562
14563     self.node_data = self.cfg.GetAllNodesInfo()
14564     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14565     instance_data = self.cfg.GetAllInstancesInfo()
14566
14567     if self.group is None:
14568       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14569                                (self.op.group_name, self.group_uuid))
14570
14571     (new_splits, previous_splits) = \
14572       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14573                                              for node in self.op.nodes],
14574                                             self.node_data, instance_data)
14575
14576     if new_splits:
14577       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14578
14579       if not self.op.force:
14580         raise errors.OpExecError("The following instances get split by this"
14581                                  " change and --force was not given: %s" %
14582                                  fmt_new_splits)
14583       else:
14584         self.LogWarning("This operation will split the following instances: %s",
14585                         fmt_new_splits)
14586
14587         if previous_splits:
14588           self.LogWarning("In addition, these already-split instances continue"
14589                           " to be split across groups: %s",
14590                           utils.CommaJoin(utils.NiceSort(previous_splits)))
14591
14592   def Exec(self, feedback_fn):
14593     """Assign nodes to a new group.
14594
14595     """
14596     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14597
14598     self.cfg.AssignGroupNodes(mods)
14599
14600   @staticmethod
14601   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14602     """Check for split instances after a node assignment.
14603
14604     This method considers a series of node assignments as an atomic operation,
14605     and returns information about split instances after applying the set of
14606     changes.
14607
14608     In particular, it returns information about newly split instances, and
14609     instances that were already split, and remain so after the change.
14610
14611     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14612     considered.
14613
14614     @type changes: list of (node_name, new_group_uuid) pairs.
14615     @param changes: list of node assignments to consider.
14616     @param node_data: a dict with data for all nodes
14617     @param instance_data: a dict with all instances to consider
14618     @rtype: a two-tuple
14619     @return: a list of instances that were previously okay and result split as a
14620       consequence of this change, and a list of instances that were previously
14621       split and this change does not fix.
14622
14623     """
14624     changed_nodes = dict((node, group) for node, group in changes
14625                          if node_data[node].group != group)
14626
14627     all_split_instances = set()
14628     previously_split_instances = set()
14629
14630     def InstanceNodes(instance):
14631       return [instance.primary_node] + list(instance.secondary_nodes)
14632
14633     for inst in instance_data.values():
14634       if inst.disk_template not in constants.DTS_INT_MIRROR:
14635         continue
14636
14637       instance_nodes = InstanceNodes(inst)
14638
14639       if len(set(node_data[node].group for node in instance_nodes)) > 1:
14640         previously_split_instances.add(inst.name)
14641
14642       if len(set(changed_nodes.get(node, node_data[node].group)
14643                  for node in instance_nodes)) > 1:
14644         all_split_instances.add(inst.name)
14645
14646     return (list(all_split_instances - previously_split_instances),
14647             list(previously_split_instances & all_split_instances))
14648
14649
14650 class _GroupQuery(_QueryBase):
14651   FIELDS = query.GROUP_FIELDS
14652
14653   def ExpandNames(self, lu):
14654     lu.needed_locks = {}
14655
14656     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14657     self._cluster = lu.cfg.GetClusterInfo()
14658     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14659
14660     if not self.names:
14661       self.wanted = [name_to_uuid[name]
14662                      for name in utils.NiceSort(name_to_uuid.keys())]
14663     else:
14664       # Accept names to be either names or UUIDs.
14665       missing = []
14666       self.wanted = []
14667       all_uuid = frozenset(self._all_groups.keys())
14668
14669       for name in self.names:
14670         if name in all_uuid:
14671           self.wanted.append(name)
14672         elif name in name_to_uuid:
14673           self.wanted.append(name_to_uuid[name])
14674         else:
14675           missing.append(name)
14676
14677       if missing:
14678         raise errors.OpPrereqError("Some groups do not exist: %s" %
14679                                    utils.CommaJoin(missing),
14680                                    errors.ECODE_NOENT)
14681
14682   def DeclareLocks(self, lu, level):
14683     pass
14684
14685   def _GetQueryData(self, lu):
14686     """Computes the list of node groups and their attributes.
14687
14688     """
14689     do_nodes = query.GQ_NODE in self.requested_data
14690     do_instances = query.GQ_INST in self.requested_data
14691
14692     group_to_nodes = None
14693     group_to_instances = None
14694
14695     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14696     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14697     # latter GetAllInstancesInfo() is not enough, for we have to go through
14698     # instance->node. Hence, we will need to process nodes even if we only need
14699     # instance information.
14700     if do_nodes or do_instances:
14701       all_nodes = lu.cfg.GetAllNodesInfo()
14702       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14703       node_to_group = {}
14704
14705       for node in all_nodes.values():
14706         if node.group in group_to_nodes:
14707           group_to_nodes[node.group].append(node.name)
14708           node_to_group[node.name] = node.group
14709
14710       if do_instances:
14711         all_instances = lu.cfg.GetAllInstancesInfo()
14712         group_to_instances = dict((uuid, []) for uuid in self.wanted)
14713
14714         for instance in all_instances.values():
14715           node = instance.primary_node
14716           if node in node_to_group:
14717             group_to_instances[node_to_group[node]].append(instance.name)
14718
14719         if not do_nodes:
14720           # Do not pass on node information if it was not requested.
14721           group_to_nodes = None
14722
14723     return query.GroupQueryData(self._cluster,
14724                                 [self._all_groups[uuid]
14725                                  for uuid in self.wanted],
14726                                 group_to_nodes, group_to_instances,
14727                                 query.GQ_DISKPARAMS in self.requested_data)
14728
14729
14730 class LUGroupQuery(NoHooksLU):
14731   """Logical unit for querying node groups.
14732
14733   """
14734   REQ_BGL = False
14735
14736   def CheckArguments(self):
14737     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14738                           self.op.output_fields, False)
14739
14740   def ExpandNames(self):
14741     self.gq.ExpandNames(self)
14742
14743   def DeclareLocks(self, level):
14744     self.gq.DeclareLocks(self, level)
14745
14746   def Exec(self, feedback_fn):
14747     return self.gq.OldStyleQuery(self)
14748
14749
14750 class LUGroupSetParams(LogicalUnit):
14751   """Modifies the parameters of a node group.
14752
14753   """
14754   HPATH = "group-modify"
14755   HTYPE = constants.HTYPE_GROUP
14756   REQ_BGL = False
14757
14758   def CheckArguments(self):
14759     all_changes = [
14760       self.op.ndparams,
14761       self.op.diskparams,
14762       self.op.alloc_policy,
14763       self.op.hv_state,
14764       self.op.disk_state,
14765       self.op.ipolicy,
14766       ]
14767
14768     if all_changes.count(None) == len(all_changes):
14769       raise errors.OpPrereqError("Please pass at least one modification",
14770                                  errors.ECODE_INVAL)
14771
14772   def ExpandNames(self):
14773     # This raises errors.OpPrereqError on its own:
14774     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14775
14776     self.needed_locks = {
14777       locking.LEVEL_INSTANCE: [],
14778       locking.LEVEL_NODEGROUP: [self.group_uuid],
14779       }
14780
14781     self.share_locks[locking.LEVEL_INSTANCE] = 1
14782
14783   def DeclareLocks(self, level):
14784     if level == locking.LEVEL_INSTANCE:
14785       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14786
14787       # Lock instances optimistically, needs verification once group lock has
14788       # been acquired
14789       self.needed_locks[locking.LEVEL_INSTANCE] = \
14790           self.cfg.GetNodeGroupInstances(self.group_uuid)
14791
14792   @staticmethod
14793   def _UpdateAndVerifyDiskParams(old, new):
14794     """Updates and verifies disk parameters.
14795
14796     """
14797     new_params = _GetUpdatedParams(old, new)
14798     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14799     return new_params
14800
14801   def CheckPrereq(self):
14802     """Check prerequisites.
14803
14804     """
14805     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14806
14807     # Check if locked instances are still correct
14808     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14809
14810     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14811     cluster = self.cfg.GetClusterInfo()
14812
14813     if self.group is None:
14814       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14815                                (self.op.group_name, self.group_uuid))
14816
14817     if self.op.ndparams:
14818       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14819       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14820       self.new_ndparams = new_ndparams
14821
14822     if self.op.diskparams:
14823       diskparams = self.group.diskparams
14824       uavdp = self._UpdateAndVerifyDiskParams
14825       # For each disktemplate subdict update and verify the values
14826       new_diskparams = dict((dt,
14827                              uavdp(diskparams.get(dt, {}),
14828                                    self.op.diskparams[dt]))
14829                             for dt in constants.DISK_TEMPLATES
14830                             if dt in self.op.diskparams)
14831       # As we've all subdicts of diskparams ready, lets merge the actual
14832       # dict with all updated subdicts
14833       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14834       try:
14835         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14836       except errors.OpPrereqError, err:
14837         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14838                                    errors.ECODE_INVAL)
14839
14840     if self.op.hv_state:
14841       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14842                                                  self.group.hv_state_static)
14843
14844     if self.op.disk_state:
14845       self.new_disk_state = \
14846         _MergeAndVerifyDiskState(self.op.disk_state,
14847                                  self.group.disk_state_static)
14848
14849     if self.op.ipolicy:
14850       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14851                                             self.op.ipolicy,
14852                                             group_policy=True)
14853
14854       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14855       inst_filter = lambda inst: inst.name in owned_instances
14856       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14857       gmi = ganeti.masterd.instance
14858       violations = \
14859           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14860                                                                   self.group),
14861                                         new_ipolicy, instances)
14862
14863       if violations:
14864         self.LogWarning("After the ipolicy change the following instances"
14865                         " violate them: %s",
14866                         utils.CommaJoin(violations))
14867
14868   def BuildHooksEnv(self):
14869     """Build hooks env.
14870
14871     """
14872     return {
14873       "GROUP_NAME": self.op.group_name,
14874       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14875       }
14876
14877   def BuildHooksNodes(self):
14878     """Build hooks nodes.
14879
14880     """
14881     mn = self.cfg.GetMasterNode()
14882     return ([mn], [mn])
14883
14884   def Exec(self, feedback_fn):
14885     """Modifies the node group.
14886
14887     """
14888     result = []
14889
14890     if self.op.ndparams:
14891       self.group.ndparams = self.new_ndparams
14892       result.append(("ndparams", str(self.group.ndparams)))
14893
14894     if self.op.diskparams:
14895       self.group.diskparams = self.new_diskparams
14896       result.append(("diskparams", str(self.group.diskparams)))
14897
14898     if self.op.alloc_policy:
14899       self.group.alloc_policy = self.op.alloc_policy
14900
14901     if self.op.hv_state:
14902       self.group.hv_state_static = self.new_hv_state
14903
14904     if self.op.disk_state:
14905       self.group.disk_state_static = self.new_disk_state
14906
14907     if self.op.ipolicy:
14908       self.group.ipolicy = self.new_ipolicy
14909
14910     self.cfg.Update(self.group, feedback_fn)
14911     return result
14912
14913
14914 class LUGroupRemove(LogicalUnit):
14915   HPATH = "group-remove"
14916   HTYPE = constants.HTYPE_GROUP
14917   REQ_BGL = False
14918
14919   def ExpandNames(self):
14920     # This will raises errors.OpPrereqError on its own:
14921     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14922     self.needed_locks = {
14923       locking.LEVEL_NODEGROUP: [self.group_uuid],
14924       }
14925
14926   def CheckPrereq(self):
14927     """Check prerequisites.
14928
14929     This checks that the given group name exists as a node group, that is
14930     empty (i.e., contains no nodes), and that is not the last group of the
14931     cluster.
14932
14933     """
14934     # Verify that the group is empty.
14935     group_nodes = [node.name
14936                    for node in self.cfg.GetAllNodesInfo().values()
14937                    if node.group == self.group_uuid]
14938
14939     if group_nodes:
14940       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14941                                  " nodes: %s" %
14942                                  (self.op.group_name,
14943                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14944                                  errors.ECODE_STATE)
14945
14946     # Verify the cluster would not be left group-less.
14947     if len(self.cfg.GetNodeGroupList()) == 1:
14948       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14949                                  " removed" % self.op.group_name,
14950                                  errors.ECODE_STATE)
14951
14952   def BuildHooksEnv(self):
14953     """Build hooks env.
14954
14955     """
14956     return {
14957       "GROUP_NAME": self.op.group_name,
14958       }
14959
14960   def BuildHooksNodes(self):
14961     """Build hooks nodes.
14962
14963     """
14964     mn = self.cfg.GetMasterNode()
14965     return ([mn], [mn])
14966
14967   def Exec(self, feedback_fn):
14968     """Remove the node group.
14969
14970     """
14971     try:
14972       self.cfg.RemoveNodeGroup(self.group_uuid)
14973     except errors.ConfigurationError:
14974       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14975                                (self.op.group_name, self.group_uuid))
14976
14977     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14978
14979
14980 class LUGroupRename(LogicalUnit):
14981   HPATH = "group-rename"
14982   HTYPE = constants.HTYPE_GROUP
14983   REQ_BGL = False
14984
14985   def ExpandNames(self):
14986     # This raises errors.OpPrereqError on its own:
14987     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14988
14989     self.needed_locks = {
14990       locking.LEVEL_NODEGROUP: [self.group_uuid],
14991       }
14992
14993   def CheckPrereq(self):
14994     """Check prerequisites.
14995
14996     Ensures requested new name is not yet used.
14997
14998     """
14999     try:
15000       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15001     except errors.OpPrereqError:
15002       pass
15003     else:
15004       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15005                                  " node group (UUID: %s)" %
15006                                  (self.op.new_name, new_name_uuid),
15007                                  errors.ECODE_EXISTS)
15008
15009   def BuildHooksEnv(self):
15010     """Build hooks env.
15011
15012     """
15013     return {
15014       "OLD_NAME": self.op.group_name,
15015       "NEW_NAME": self.op.new_name,
15016       }
15017
15018   def BuildHooksNodes(self):
15019     """Build hooks nodes.
15020
15021     """
15022     mn = self.cfg.GetMasterNode()
15023
15024     all_nodes = self.cfg.GetAllNodesInfo()
15025     all_nodes.pop(mn, None)
15026
15027     run_nodes = [mn]
15028     run_nodes.extend(node.name for node in all_nodes.values()
15029                      if node.group == self.group_uuid)
15030
15031     return (run_nodes, run_nodes)
15032
15033   def Exec(self, feedback_fn):
15034     """Rename the node group.
15035
15036     """
15037     group = self.cfg.GetNodeGroup(self.group_uuid)
15038
15039     if group is None:
15040       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15041                                (self.op.group_name, self.group_uuid))
15042
15043     group.name = self.op.new_name
15044     self.cfg.Update(group, feedback_fn)
15045
15046     return self.op.new_name
15047
15048
15049 class LUGroupEvacuate(LogicalUnit):
15050   HPATH = "group-evacuate"
15051   HTYPE = constants.HTYPE_GROUP
15052   REQ_BGL = False
15053
15054   def ExpandNames(self):
15055     # This raises errors.OpPrereqError on its own:
15056     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15057
15058     if self.op.target_groups:
15059       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15060                                   self.op.target_groups)
15061     else:
15062       self.req_target_uuids = []
15063
15064     if self.group_uuid in self.req_target_uuids:
15065       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15066                                  " as a target group (targets are %s)" %
15067                                  (self.group_uuid,
15068                                   utils.CommaJoin(self.req_target_uuids)),
15069                                  errors.ECODE_INVAL)
15070
15071     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15072
15073     self.share_locks = _ShareAll()
15074     self.needed_locks = {
15075       locking.LEVEL_INSTANCE: [],
15076       locking.LEVEL_NODEGROUP: [],
15077       locking.LEVEL_NODE: [],
15078       }
15079
15080   def DeclareLocks(self, level):
15081     if level == locking.LEVEL_INSTANCE:
15082       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15083
15084       # Lock instances optimistically, needs verification once node and group
15085       # locks have been acquired
15086       self.needed_locks[locking.LEVEL_INSTANCE] = \
15087         self.cfg.GetNodeGroupInstances(self.group_uuid)
15088
15089     elif level == locking.LEVEL_NODEGROUP:
15090       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15091
15092       if self.req_target_uuids:
15093         lock_groups = set([self.group_uuid] + self.req_target_uuids)
15094
15095         # Lock all groups used by instances optimistically; this requires going
15096         # via the node before it's locked, requiring verification later on
15097         lock_groups.update(group_uuid
15098                            for instance_name in
15099                              self.owned_locks(locking.LEVEL_INSTANCE)
15100                            for group_uuid in
15101                              self.cfg.GetInstanceNodeGroups(instance_name))
15102       else:
15103         # No target groups, need to lock all of them
15104         lock_groups = locking.ALL_SET
15105
15106       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15107
15108     elif level == locking.LEVEL_NODE:
15109       # This will only lock the nodes in the group to be evacuated which
15110       # contain actual instances
15111       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15112       self._LockInstancesNodes()
15113
15114       # Lock all nodes in group to be evacuated and target groups
15115       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15116       assert self.group_uuid in owned_groups
15117       member_nodes = [node_name
15118                       for group in owned_groups
15119                       for node_name in self.cfg.GetNodeGroup(group).members]
15120       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15121
15122   def CheckPrereq(self):
15123     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15124     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15125     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15126
15127     assert owned_groups.issuperset(self.req_target_uuids)
15128     assert self.group_uuid in owned_groups
15129
15130     # Check if locked instances are still correct
15131     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15132
15133     # Get instance information
15134     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15135
15136     # Check if node groups for locked instances are still correct
15137     _CheckInstancesNodeGroups(self.cfg, self.instances,
15138                               owned_groups, owned_nodes, self.group_uuid)
15139
15140     if self.req_target_uuids:
15141       # User requested specific target groups
15142       self.target_uuids = self.req_target_uuids
15143     else:
15144       # All groups except the one to be evacuated are potential targets
15145       self.target_uuids = [group_uuid for group_uuid in owned_groups
15146                            if group_uuid != self.group_uuid]
15147
15148       if not self.target_uuids:
15149         raise errors.OpPrereqError("There are no possible target groups",
15150                                    errors.ECODE_INVAL)
15151
15152   def BuildHooksEnv(self):
15153     """Build hooks env.
15154
15155     """
15156     return {
15157       "GROUP_NAME": self.op.group_name,
15158       "TARGET_GROUPS": " ".join(self.target_uuids),
15159       }
15160
15161   def BuildHooksNodes(self):
15162     """Build hooks nodes.
15163
15164     """
15165     mn = self.cfg.GetMasterNode()
15166
15167     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15168
15169     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15170
15171     return (run_nodes, run_nodes)
15172
15173   def Exec(self, feedback_fn):
15174     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15175
15176     assert self.group_uuid not in self.target_uuids
15177
15178     req = iallocator.IAReqGroupChange(instances=instances,
15179                                       target_groups=self.target_uuids)
15180     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15181
15182     ial.Run(self.op.iallocator)
15183
15184     if not ial.success:
15185       raise errors.OpPrereqError("Can't compute group evacuation using"
15186                                  " iallocator '%s': %s" %
15187                                  (self.op.iallocator, ial.info),
15188                                  errors.ECODE_NORES)
15189
15190     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15191
15192     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15193                  len(jobs), self.op.group_name)
15194
15195     return ResultWithJobs(jobs)
15196
15197
15198 class TagsLU(NoHooksLU): # pylint: disable=W0223
15199   """Generic tags LU.
15200
15201   This is an abstract class which is the parent of all the other tags LUs.
15202
15203   """
15204   def ExpandNames(self):
15205     self.group_uuid = None
15206     self.needed_locks = {}
15207
15208     if self.op.kind == constants.TAG_NODE:
15209       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15210       lock_level = locking.LEVEL_NODE
15211       lock_name = self.op.name
15212     elif self.op.kind == constants.TAG_INSTANCE:
15213       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15214       lock_level = locking.LEVEL_INSTANCE
15215       lock_name = self.op.name
15216     elif self.op.kind == constants.TAG_NODEGROUP:
15217       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15218       lock_level = locking.LEVEL_NODEGROUP
15219       lock_name = self.group_uuid
15220     elif self.op.kind == constants.TAG_NETWORK:
15221       self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15222       lock_level = locking.LEVEL_NETWORK
15223       lock_name = self.network_uuid
15224     else:
15225       lock_level = None
15226       lock_name = None
15227
15228     if lock_level and getattr(self.op, "use_locking", True):
15229       self.needed_locks[lock_level] = lock_name
15230
15231     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15232     # not possible to acquire the BGL based on opcode parameters)
15233
15234   def CheckPrereq(self):
15235     """Check prerequisites.
15236
15237     """
15238     if self.op.kind == constants.TAG_CLUSTER:
15239       self.target = self.cfg.GetClusterInfo()
15240     elif self.op.kind == constants.TAG_NODE:
15241       self.target = self.cfg.GetNodeInfo(self.op.name)
15242     elif self.op.kind == constants.TAG_INSTANCE:
15243       self.target = self.cfg.GetInstanceInfo(self.op.name)
15244     elif self.op.kind == constants.TAG_NODEGROUP:
15245       self.target = self.cfg.GetNodeGroup(self.group_uuid)
15246     elif self.op.kind == constants.TAG_NETWORK:
15247       self.target = self.cfg.GetNetwork(self.network_uuid)
15248     else:
15249       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15250                                  str(self.op.kind), errors.ECODE_INVAL)
15251
15252
15253 class LUTagsGet(TagsLU):
15254   """Returns the tags of a given object.
15255
15256   """
15257   REQ_BGL = False
15258
15259   def ExpandNames(self):
15260     TagsLU.ExpandNames(self)
15261
15262     # Share locks as this is only a read operation
15263     self.share_locks = _ShareAll()
15264
15265   def Exec(self, feedback_fn):
15266     """Returns the tag list.
15267
15268     """
15269     return list(self.target.GetTags())
15270
15271
15272 class LUTagsSearch(NoHooksLU):
15273   """Searches the tags for a given pattern.
15274
15275   """
15276   REQ_BGL = False
15277
15278   def ExpandNames(self):
15279     self.needed_locks = {}
15280
15281   def CheckPrereq(self):
15282     """Check prerequisites.
15283
15284     This checks the pattern passed for validity by compiling it.
15285
15286     """
15287     try:
15288       self.re = re.compile(self.op.pattern)
15289     except re.error, err:
15290       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15291                                  (self.op.pattern, err), errors.ECODE_INVAL)
15292
15293   def Exec(self, feedback_fn):
15294     """Returns the tag list.
15295
15296     """
15297     cfg = self.cfg
15298     tgts = [("/cluster", cfg.GetClusterInfo())]
15299     ilist = cfg.GetAllInstancesInfo().values()
15300     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15301     nlist = cfg.GetAllNodesInfo().values()
15302     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15303     tgts.extend(("/nodegroup/%s" % n.name, n)
15304                 for n in cfg.GetAllNodeGroupsInfo().values())
15305     results = []
15306     for path, target in tgts:
15307       for tag in target.GetTags():
15308         if self.re.search(tag):
15309           results.append((path, tag))
15310     return results
15311
15312
15313 class LUTagsSet(TagsLU):
15314   """Sets a tag on a given object.
15315
15316   """
15317   REQ_BGL = False
15318
15319   def CheckPrereq(self):
15320     """Check prerequisites.
15321
15322     This checks the type and length of the tag name and value.
15323
15324     """
15325     TagsLU.CheckPrereq(self)
15326     for tag in self.op.tags:
15327       objects.TaggableObject.ValidateTag(tag)
15328
15329   def Exec(self, feedback_fn):
15330     """Sets the tag.
15331
15332     """
15333     try:
15334       for tag in self.op.tags:
15335         self.target.AddTag(tag)
15336     except errors.TagError, err:
15337       raise errors.OpExecError("Error while setting tag: %s" % str(err))
15338     self.cfg.Update(self.target, feedback_fn)
15339
15340
15341 class LUTagsDel(TagsLU):
15342   """Delete a list of tags from a given object.
15343
15344   """
15345   REQ_BGL = False
15346
15347   def CheckPrereq(self):
15348     """Check prerequisites.
15349
15350     This checks that we have the given tag.
15351
15352     """
15353     TagsLU.CheckPrereq(self)
15354     for tag in self.op.tags:
15355       objects.TaggableObject.ValidateTag(tag)
15356     del_tags = frozenset(self.op.tags)
15357     cur_tags = self.target.GetTags()
15358
15359     diff_tags = del_tags - cur_tags
15360     if diff_tags:
15361       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15362       raise errors.OpPrereqError("Tag(s) %s not found" %
15363                                  (utils.CommaJoin(diff_names), ),
15364                                  errors.ECODE_NOENT)
15365
15366   def Exec(self, feedback_fn):
15367     """Remove the tag from the object.
15368
15369     """
15370     for tag in self.op.tags:
15371       self.target.RemoveTag(tag)
15372     self.cfg.Update(self.target, feedback_fn)
15373
15374
15375 class LUTestDelay(NoHooksLU):
15376   """Sleep for a specified amount of time.
15377
15378   This LU sleeps on the master and/or nodes for a specified amount of
15379   time.
15380
15381   """
15382   REQ_BGL = False
15383
15384   def ExpandNames(self):
15385     """Expand names and set required locks.
15386
15387     This expands the node list, if any.
15388
15389     """
15390     self.needed_locks = {}
15391     if self.op.on_nodes:
15392       # _GetWantedNodes can be used here, but is not always appropriate to use
15393       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15394       # more information.
15395       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15396       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15397
15398   def _TestDelay(self):
15399     """Do the actual sleep.
15400
15401     """
15402     if self.op.on_master:
15403       if not utils.TestDelay(self.op.duration):
15404         raise errors.OpExecError("Error during master delay test")
15405     if self.op.on_nodes:
15406       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15407       for node, node_result in result.items():
15408         node_result.Raise("Failure during rpc call to node %s" % node)
15409
15410   def Exec(self, feedback_fn):
15411     """Execute the test delay opcode, with the wanted repetitions.
15412
15413     """
15414     if self.op.repeat == 0:
15415       self._TestDelay()
15416     else:
15417       top_value = self.op.repeat - 1
15418       for i in range(self.op.repeat):
15419         self.LogInfo("Test delay iteration %d/%d", i, top_value)
15420         self._TestDelay()
15421
15422
15423 class LURestrictedCommand(NoHooksLU):
15424   """Logical unit for executing restricted commands.
15425
15426   """
15427   REQ_BGL = False
15428
15429   def ExpandNames(self):
15430     if self.op.nodes:
15431       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15432
15433     self.needed_locks = {
15434       locking.LEVEL_NODE: self.op.nodes,
15435       }
15436     self.share_locks = {
15437       locking.LEVEL_NODE: not self.op.use_locking,
15438       }
15439
15440   def CheckPrereq(self):
15441     """Check prerequisites.
15442
15443     """
15444
15445   def Exec(self, feedback_fn):
15446     """Execute restricted command and return output.
15447
15448     """
15449     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15450
15451     # Check if correct locks are held
15452     assert set(self.op.nodes).issubset(owned_nodes)
15453
15454     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15455
15456     result = []
15457
15458     for node_name in self.op.nodes:
15459       nres = rpcres[node_name]
15460       if nres.fail_msg:
15461         msg = ("Command '%s' on node '%s' failed: %s" %
15462                (self.op.command, node_name, nres.fail_msg))
15463         result.append((False, msg))
15464       else:
15465         result.append((True, nres.payload))
15466
15467     return result
15468
15469
15470 class LUTestJqueue(NoHooksLU):
15471   """Utility LU to test some aspects of the job queue.
15472
15473   """
15474   REQ_BGL = False
15475
15476   # Must be lower than default timeout for WaitForJobChange to see whether it
15477   # notices changed jobs
15478   _CLIENT_CONNECT_TIMEOUT = 20.0
15479   _CLIENT_CONFIRM_TIMEOUT = 60.0
15480
15481   @classmethod
15482   def _NotifyUsingSocket(cls, cb, errcls):
15483     """Opens a Unix socket and waits for another program to connect.
15484
15485     @type cb: callable
15486     @param cb: Callback to send socket name to client
15487     @type errcls: class
15488     @param errcls: Exception class to use for errors
15489
15490     """
15491     # Using a temporary directory as there's no easy way to create temporary
15492     # sockets without writing a custom loop around tempfile.mktemp and
15493     # socket.bind
15494     tmpdir = tempfile.mkdtemp()
15495     try:
15496       tmpsock = utils.PathJoin(tmpdir, "sock")
15497
15498       logging.debug("Creating temporary socket at %s", tmpsock)
15499       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15500       try:
15501         sock.bind(tmpsock)
15502         sock.listen(1)
15503
15504         # Send details to client
15505         cb(tmpsock)
15506
15507         # Wait for client to connect before continuing
15508         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15509         try:
15510           (conn, _) = sock.accept()
15511         except socket.error, err:
15512           raise errcls("Client didn't connect in time (%s)" % err)
15513       finally:
15514         sock.close()
15515     finally:
15516       # Remove as soon as client is connected
15517       shutil.rmtree(tmpdir)
15518
15519     # Wait for client to close
15520     try:
15521       try:
15522         # pylint: disable=E1101
15523         # Instance of '_socketobject' has no ... member
15524         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15525         conn.recv(1)
15526       except socket.error, err:
15527         raise errcls("Client failed to confirm notification (%s)" % err)
15528     finally:
15529       conn.close()
15530
15531   def _SendNotification(self, test, arg, sockname):
15532     """Sends a notification to the client.
15533
15534     @type test: string
15535     @param test: Test name
15536     @param arg: Test argument (depends on test)
15537     @type sockname: string
15538     @param sockname: Socket path
15539
15540     """
15541     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15542
15543   def _Notify(self, prereq, test, arg):
15544     """Notifies the client of a test.
15545
15546     @type prereq: bool
15547     @param prereq: Whether this is a prereq-phase test
15548     @type test: string
15549     @param test: Test name
15550     @param arg: Test argument (depends on test)
15551
15552     """
15553     if prereq:
15554       errcls = errors.OpPrereqError
15555     else:
15556       errcls = errors.OpExecError
15557
15558     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15559                                                   test, arg),
15560                                    errcls)
15561
15562   def CheckArguments(self):
15563     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15564     self.expandnames_calls = 0
15565
15566   def ExpandNames(self):
15567     checkargs_calls = getattr(self, "checkargs_calls", 0)
15568     if checkargs_calls < 1:
15569       raise errors.ProgrammerError("CheckArguments was not called")
15570
15571     self.expandnames_calls += 1
15572
15573     if self.op.notify_waitlock:
15574       self._Notify(True, constants.JQT_EXPANDNAMES, None)
15575
15576     self.LogInfo("Expanding names")
15577
15578     # Get lock on master node (just to get a lock, not for a particular reason)
15579     self.needed_locks = {
15580       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15581       }
15582
15583   def Exec(self, feedback_fn):
15584     if self.expandnames_calls < 1:
15585       raise errors.ProgrammerError("ExpandNames was not called")
15586
15587     if self.op.notify_exec:
15588       self._Notify(False, constants.JQT_EXEC, None)
15589
15590     self.LogInfo("Executing")
15591
15592     if self.op.log_messages:
15593       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15594       for idx, msg in enumerate(self.op.log_messages):
15595         self.LogInfo("Sending log message %s", idx + 1)
15596         feedback_fn(constants.JQT_MSGPREFIX + msg)
15597         # Report how many test messages have been sent
15598         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15599
15600     if self.op.fail:
15601       raise errors.OpExecError("Opcode failure was requested")
15602
15603     return True
15604
15605
15606 class LUTestAllocator(NoHooksLU):
15607   """Run allocator tests.
15608
15609   This LU runs the allocator tests
15610
15611   """
15612   def CheckPrereq(self):
15613     """Check prerequisites.
15614
15615     This checks the opcode parameters depending on the director and mode test.
15616
15617     """
15618     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15619                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
15620       for attr in ["memory", "disks", "disk_template",
15621                    "os", "tags", "nics", "vcpus"]:
15622         if not hasattr(self.op, attr):
15623           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15624                                      attr, errors.ECODE_INVAL)
15625       iname = self.cfg.ExpandInstanceName(self.op.name)
15626       if iname is not None:
15627         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15628                                    iname, errors.ECODE_EXISTS)
15629       if not isinstance(self.op.nics, list):
15630         raise errors.OpPrereqError("Invalid parameter 'nics'",
15631                                    errors.ECODE_INVAL)
15632       if not isinstance(self.op.disks, list):
15633         raise errors.OpPrereqError("Invalid parameter 'disks'",
15634                                    errors.ECODE_INVAL)
15635       for row in self.op.disks:
15636         if (not isinstance(row, dict) or
15637             constants.IDISK_SIZE not in row or
15638             not isinstance(row[constants.IDISK_SIZE], int) or
15639             constants.IDISK_MODE not in row or
15640             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15641           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15642                                      " parameter", errors.ECODE_INVAL)
15643       if self.op.hypervisor is None:
15644         self.op.hypervisor = self.cfg.GetHypervisorType()
15645     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15646       fname = _ExpandInstanceName(self.cfg, self.op.name)
15647       self.op.name = fname
15648       self.relocate_from = \
15649           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15650     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15651                           constants.IALLOCATOR_MODE_NODE_EVAC):
15652       if not self.op.instances:
15653         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15654       self.op.instances = _GetWantedInstances(self, self.op.instances)
15655     else:
15656       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15657                                  self.op.mode, errors.ECODE_INVAL)
15658
15659     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15660       if self.op.iallocator is None:
15661         raise errors.OpPrereqError("Missing allocator name",
15662                                    errors.ECODE_INVAL)
15663     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15664       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15665                                  self.op.direction, errors.ECODE_INVAL)
15666
15667   def Exec(self, feedback_fn):
15668     """Run the allocator test.
15669
15670     """
15671     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15672       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15673                                           memory=self.op.memory,
15674                                           disks=self.op.disks,
15675                                           disk_template=self.op.disk_template,
15676                                           os=self.op.os,
15677                                           tags=self.op.tags,
15678                                           nics=self.op.nics,
15679                                           vcpus=self.op.vcpus,
15680                                           spindle_use=self.op.spindle_use,
15681                                           hypervisor=self.op.hypervisor)
15682     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15683       req = iallocator.IAReqRelocate(name=self.op.name,
15684                                      relocate_from=list(self.relocate_from))
15685     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15686       req = iallocator.IAReqGroupChange(instances=self.op.instances,
15687                                         target_groups=self.op.target_groups)
15688     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15689       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15690                                      evac_mode=self.op.evac_mode)
15691     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15692       disk_template = self.op.disk_template
15693       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15694                                              memory=self.op.memory,
15695                                              disks=self.op.disks,
15696                                              disk_template=disk_template,
15697                                              os=self.op.os,
15698                                              tags=self.op.tags,
15699                                              nics=self.op.nics,
15700                                              vcpus=self.op.vcpus,
15701                                              spindle_use=self.op.spindle_use,
15702                                              hypervisor=self.op.hypervisor)
15703                for idx in range(self.op.count)]
15704       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15705     else:
15706       raise errors.ProgrammerError("Uncatched mode %s in"
15707                                    " LUTestAllocator.Exec", self.op.mode)
15708
15709     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15710     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15711       result = ial.in_text
15712     else:
15713       ial.Run(self.op.iallocator, validate=False)
15714       result = ial.out_text
15715     return result
15716
15717
15718 class LUNetworkAdd(LogicalUnit):
15719   """Logical unit for creating networks.
15720
15721   """
15722   HPATH = "network-add"
15723   HTYPE = constants.HTYPE_NETWORK
15724   REQ_BGL = False
15725
15726   def BuildHooksNodes(self):
15727     """Build hooks nodes.
15728
15729     """
15730     mn = self.cfg.GetMasterNode()
15731     return ([mn], [mn])
15732
15733   def CheckArguments(self):
15734     if self.op.mac_prefix:
15735       self.op.mac_prefix = \
15736         utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
15737
15738   def ExpandNames(self):
15739     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15740
15741     if self.op.conflicts_check:
15742       self.share_locks[locking.LEVEL_NODE] = 1
15743       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
15744       self.needed_locks = {
15745         locking.LEVEL_NODE: locking.ALL_SET,
15746         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
15747         }
15748     else:
15749       self.needed_locks = {}
15750
15751     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15752
15753   def CheckPrereq(self):
15754     if self.op.network is None:
15755       raise errors.OpPrereqError("Network must be given",
15756                                  errors.ECODE_INVAL)
15757
15758     uuid = self.cfg.LookupNetwork(self.op.network_name)
15759
15760     if uuid:
15761       raise errors.OpPrereqError("Network '%s' already defined" %
15762                                  self.op.network, errors.ECODE_EXISTS)
15763
15764     # Check tag validity
15765     for tag in self.op.tags:
15766       objects.TaggableObject.ValidateTag(tag)
15767
15768   def BuildHooksEnv(self):
15769     """Build hooks env.
15770
15771     """
15772     args = {
15773       "name": self.op.network_name,
15774       "subnet": self.op.network,
15775       "gateway": self.op.gateway,
15776       "network6": self.op.network6,
15777       "gateway6": self.op.gateway6,
15778       "mac_prefix": self.op.mac_prefix,
15779       "network_type": self.op.network_type,
15780       "tags": self.op.tags,
15781       }
15782     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15783
15784   def Exec(self, feedback_fn):
15785     """Add the ip pool to the cluster.
15786
15787     """
15788     nobj = objects.Network(name=self.op.network_name,
15789                            network=self.op.network,
15790                            gateway=self.op.gateway,
15791                            network6=self.op.network6,
15792                            gateway6=self.op.gateway6,
15793                            mac_prefix=self.op.mac_prefix,
15794                            network_type=self.op.network_type,
15795                            uuid=self.network_uuid,
15796                            family=constants.IP4_VERSION)
15797     # Initialize the associated address pool
15798     try:
15799       pool = network.AddressPool.InitializeNetwork(nobj)
15800     except errors.AddressPoolError, e:
15801       raise errors.OpExecError("Cannot create IP pool for this network: %s" % e)
15802
15803     # Check if we need to reserve the nodes and the cluster master IP
15804     # These may not be allocated to any instances in routed mode, as
15805     # they wouldn't function anyway.
15806     if self.op.conflicts_check:
15807       for node in self.cfg.GetAllNodesInfo().values():
15808         for ip in [node.primary_ip, node.secondary_ip]:
15809           try:
15810             if pool.Contains(ip):
15811               pool.Reserve(ip)
15812               self.LogInfo("Reserved IP address of node '%s' (%s)",
15813                            node.name, ip)
15814           except errors.AddressPoolError:
15815             self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
15816                             node.name, ip)
15817
15818       master_ip = self.cfg.GetClusterInfo().master_ip
15819       try:
15820         if pool.Contains(master_ip):
15821           pool.Reserve(master_ip)
15822           self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
15823       except errors.AddressPoolError:
15824         self.LogWarning("Cannot reserve cluster master IP address (%s)",
15825                         master_ip)
15826
15827     if self.op.add_reserved_ips:
15828       for ip in self.op.add_reserved_ips:
15829         try:
15830           pool.Reserve(ip, external=True)
15831         except errors.AddressPoolError, e:
15832           raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15833
15834     if self.op.tags:
15835       for tag in self.op.tags:
15836         nobj.AddTag(tag)
15837
15838     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15839     del self.remove_locks[locking.LEVEL_NETWORK]
15840
15841
15842 class LUNetworkRemove(LogicalUnit):
15843   HPATH = "network-remove"
15844   HTYPE = constants.HTYPE_NETWORK
15845   REQ_BGL = False
15846
15847   def ExpandNames(self):
15848     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15849
15850     if not self.network_uuid:
15851       raise errors.OpPrereqError(("Network '%s' not found" %
15852                                   self.op.network_name),
15853                                  errors.ECODE_INVAL)
15854
15855     self.share_locks[locking.LEVEL_NODEGROUP] = 1
15856     self.needed_locks = {
15857       locking.LEVEL_NETWORK: [self.network_uuid],
15858       locking.LEVEL_NODEGROUP: locking.ALL_SET,
15859       }
15860
15861   def CheckPrereq(self):
15862     """Check prerequisites.
15863
15864     This checks that the given network name exists as a network, that is
15865     empty (i.e., contains no nodes), and that is not the last group of the
15866     cluster.
15867
15868     """
15869     # Verify that the network is not conncted.
15870     node_groups = [group.name
15871                    for group in self.cfg.GetAllNodeGroupsInfo().values()
15872                    if self.network_uuid in group.networks]
15873
15874     if node_groups:
15875       self.LogWarning("Network '%s' is connected to the following"
15876                       " node groups: %s" %
15877                       (self.op.network_name,
15878                        utils.CommaJoin(utils.NiceSort(node_groups))))
15879       raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
15880
15881   def BuildHooksEnv(self):
15882     """Build hooks env.
15883
15884     """
15885     return {
15886       "NETWORK_NAME": self.op.network_name,
15887       }
15888
15889   def BuildHooksNodes(self):
15890     """Build hooks nodes.
15891
15892     """
15893     mn = self.cfg.GetMasterNode()
15894     return ([mn], [mn])
15895
15896   def Exec(self, feedback_fn):
15897     """Remove the network.
15898
15899     """
15900     try:
15901       self.cfg.RemoveNetwork(self.network_uuid)
15902     except errors.ConfigurationError:
15903       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15904                                (self.op.network_name, self.network_uuid))
15905
15906
15907 class LUNetworkSetParams(LogicalUnit):
15908   """Modifies the parameters of a network.
15909
15910   """
15911   HPATH = "network-modify"
15912   HTYPE = constants.HTYPE_NETWORK
15913   REQ_BGL = False
15914
15915   def CheckArguments(self):
15916     if (self.op.gateway and
15917         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15918       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15919                                  " at once", errors.ECODE_INVAL)
15920
15921   def ExpandNames(self):
15922     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15923     if self.network_uuid is None:
15924       raise errors.OpPrereqError(("Network '%s' not found" %
15925                                   self.op.network_name),
15926                                  errors.ECODE_INVAL)
15927
15928     self.needed_locks = {
15929       locking.LEVEL_NETWORK: [self.network_uuid],
15930       }
15931
15932   def CheckPrereq(self):
15933     """Check prerequisites.
15934
15935     """
15936     self.network = self.cfg.GetNetwork(self.network_uuid)
15937     self.gateway = self.network.gateway
15938     self.network_type = self.network.network_type
15939     self.mac_prefix = self.network.mac_prefix
15940     self.network6 = self.network.network6
15941     self.gateway6 = self.network.gateway6
15942     self.tags = self.network.tags
15943
15944     self.pool = network.AddressPool(self.network)
15945
15946     if self.op.gateway:
15947       if self.op.gateway == constants.VALUE_NONE:
15948         self.gateway = None
15949       else:
15950         self.gateway = self.op.gateway
15951         if self.pool.IsReserved(self.gateway):
15952           raise errors.OpPrereqError("%s is already reserved" %
15953                                      self.gateway, errors.ECODE_INVAL)
15954
15955     if self.op.network_type:
15956       if self.op.network_type == constants.VALUE_NONE:
15957         self.network_type = None
15958       else:
15959         self.network_type = self.op.network_type
15960
15961     if self.op.mac_prefix:
15962       if self.op.mac_prefix == constants.VALUE_NONE:
15963         self.mac_prefix = None
15964       else:
15965         self.mac_prefix = \
15966           utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
15967
15968     if self.op.gateway6:
15969       if self.op.gateway6 == constants.VALUE_NONE:
15970         self.gateway6 = None
15971       else:
15972         self.gateway6 = self.op.gateway6
15973
15974     if self.op.network6:
15975       if self.op.network6 == constants.VALUE_NONE:
15976         self.network6 = None
15977       else:
15978         self.network6 = self.op.network6
15979
15980   def BuildHooksEnv(self):
15981     """Build hooks env.
15982
15983     """
15984     args = {
15985       "name": self.op.network_name,
15986       "subnet": self.network.network,
15987       "gateway": self.gateway,
15988       "network6": self.network6,
15989       "gateway6": self.gateway6,
15990       "mac_prefix": self.mac_prefix,
15991       "network_type": self.network_type,
15992       "tags": self.tags,
15993       }
15994     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15995
15996   def BuildHooksNodes(self):
15997     """Build hooks nodes.
15998
15999     """
16000     mn = self.cfg.GetMasterNode()
16001     return ([mn], [mn])
16002
16003   def Exec(self, feedback_fn):
16004     """Modifies the network.
16005
16006     """
16007     #TODO: reserve/release via temporary reservation manager
16008     #      extend cfg.ReserveIp/ReleaseIp with the external flag
16009     if self.op.gateway:
16010       if self.gateway == self.network.gateway:
16011         self.LogWarning("Gateway is already %s", self.gateway)
16012       else:
16013         if self.gateway:
16014           self.pool.Reserve(self.gateway, external=True)
16015         if self.network.gateway:
16016           self.pool.Release(self.network.gateway, external=True)
16017         self.network.gateway = self.gateway
16018
16019     if self.op.add_reserved_ips:
16020       for ip in self.op.add_reserved_ips:
16021         try:
16022           if self.pool.IsReserved(ip):
16023             self.LogWarning("IP address %s is already reserved", ip)
16024           else:
16025             self.pool.Reserve(ip, external=True)
16026         except errors.AddressPoolError, err:
16027           self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16028
16029     if self.op.remove_reserved_ips:
16030       for ip in self.op.remove_reserved_ips:
16031         if ip == self.network.gateway:
16032           self.LogWarning("Cannot unreserve Gateway's IP")
16033           continue
16034         try:
16035           if not self.pool.IsReserved(ip):
16036             self.LogWarning("IP address %s is already unreserved", ip)
16037           else:
16038             self.pool.Release(ip, external=True)
16039         except errors.AddressPoolError, err:
16040           self.LogWarning("Cannot release IP address %s: %s", ip, err)
16041
16042     if self.op.mac_prefix:
16043       self.network.mac_prefix = self.mac_prefix
16044
16045     if self.op.network6:
16046       self.network.network6 = self.network6
16047
16048     if self.op.gateway6:
16049       self.network.gateway6 = self.gateway6
16050
16051     if self.op.network_type:
16052       self.network.network_type = self.network_type
16053
16054     self.pool.Validate()
16055
16056     self.cfg.Update(self.network, feedback_fn)
16057
16058
16059 class _NetworkQuery(_QueryBase):
16060   FIELDS = query.NETWORK_FIELDS
16061
16062   def ExpandNames(self, lu):
16063     lu.needed_locks = {}
16064
16065     self._all_networks = lu.cfg.GetAllNetworksInfo()
16066     name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
16067
16068     if not self.names:
16069       self.wanted = [name_to_uuid[name]
16070                      for name in utils.NiceSort(name_to_uuid.keys())]
16071     else:
16072       # Accept names to be either names or UUIDs.
16073       missing = []
16074       self.wanted = []
16075       all_uuid = frozenset(self._all_networks.keys())
16076
16077       for name in self.names:
16078         if name in all_uuid:
16079           self.wanted.append(name)
16080         elif name in name_to_uuid:
16081           self.wanted.append(name_to_uuid[name])
16082         else:
16083           missing.append(name)
16084
16085       if missing:
16086         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16087                                    errors.ECODE_NOENT)
16088
16089   def DeclareLocks(self, lu, level):
16090     pass
16091
16092   def _GetQueryData(self, lu):
16093     """Computes the list of networks and their attributes.
16094
16095     """
16096     do_instances = query.NETQ_INST in self.requested_data
16097     do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
16098     do_stats = query.NETQ_STATS in self.requested_data
16099
16100     network_to_groups = None
16101     network_to_instances = None
16102     stats = None
16103
16104     # For NETQ_GROUP, we need to map network->[groups]
16105     if do_groups:
16106       all_groups = lu.cfg.GetAllNodeGroupsInfo()
16107       network_to_groups = dict((uuid, []) for uuid in self.wanted)
16108
16109       if do_instances:
16110         all_instances = lu.cfg.GetAllInstancesInfo()
16111         all_nodes = lu.cfg.GetAllNodesInfo()
16112         network_to_instances = dict((uuid, []) for uuid in self.wanted)
16113
16114       for group in all_groups.values():
16115         if do_instances:
16116           group_nodes = [node.name for node in all_nodes.values() if
16117                          node.group == group.uuid]
16118           group_instances = [instance for instance in all_instances.values()
16119                              if instance.primary_node in group_nodes]
16120
16121         for net_uuid in group.networks.keys():
16122           if net_uuid in network_to_groups:
16123             netparams = group.networks[net_uuid]
16124             mode = netparams[constants.NIC_MODE]
16125             link = netparams[constants.NIC_LINK]
16126             info = group.name + "(" + mode + ", " + link + ")"
16127             network_to_groups[net_uuid].append(info)
16128
16129             if do_instances:
16130               for instance in group_instances:
16131                 for nic in instance.nics:
16132                   if nic.network == self._all_networks[net_uuid].name:
16133                     network_to_instances[net_uuid].append(instance.name)
16134                     break
16135
16136     if do_stats:
16137       stats = {}
16138       for uuid, net in self._all_networks.items():
16139         if uuid in self.wanted:
16140           pool = network.AddressPool(net)
16141           stats[uuid] = {
16142             "free_count": pool.GetFreeCount(),
16143             "reserved_count": pool.GetReservedCount(),
16144             "map": pool.GetMap(),
16145             "external_reservations":
16146               utils.CommaJoin(pool.GetExternalReservations()),
16147             }
16148
16149     return query.NetworkQueryData([self._all_networks[uuid]
16150                                    for uuid in self.wanted],
16151                                    network_to_groups,
16152                                    network_to_instances,
16153                                    stats)
16154
16155
16156 class LUNetworkQuery(NoHooksLU):
16157   """Logical unit for querying networks.
16158
16159   """
16160   REQ_BGL = False
16161
16162   def CheckArguments(self):
16163     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16164                             self.op.output_fields, False)
16165
16166   def ExpandNames(self):
16167     self.nq.ExpandNames(self)
16168
16169   def Exec(self, feedback_fn):
16170     return self.nq.OldStyleQuery(self)
16171
16172
16173 class LUNetworkConnect(LogicalUnit):
16174   """Connect a network to a nodegroup
16175
16176   """
16177   HPATH = "network-connect"
16178   HTYPE = constants.HTYPE_NETWORK
16179   REQ_BGL = False
16180
16181   def ExpandNames(self):
16182     self.network_name = self.op.network_name
16183     self.group_name = self.op.group_name
16184     self.network_mode = self.op.network_mode
16185     self.network_link = self.op.network_link
16186
16187     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16188     if self.network_uuid is None:
16189       raise errors.OpPrereqError("Network %s does not exist" %
16190                                  self.network_name, errors.ECODE_INVAL)
16191
16192     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16193     if self.group_uuid is None:
16194       raise errors.OpPrereqError("Group %s does not exist" %
16195                                  self.group_name, errors.ECODE_INVAL)
16196
16197     self.needed_locks = {
16198       locking.LEVEL_INSTANCE: [],
16199       locking.LEVEL_NODEGROUP: [self.group_uuid],
16200       }
16201     self.share_locks[locking.LEVEL_INSTANCE] = 1
16202
16203     if self.op.conflicts_check:
16204       self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16205       self.share_locks[locking.LEVEL_NETWORK] = 1
16206
16207   def DeclareLocks(self, level):
16208     if level == locking.LEVEL_INSTANCE:
16209       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16210
16211       # Lock instances optimistically, needs verification once group lock has
16212       # been acquired
16213       if self.op.conflicts_check:
16214         self.needed_locks[locking.LEVEL_INSTANCE] = \
16215             self.cfg.GetNodeGroupInstances(self.group_uuid)
16216
16217   def BuildHooksEnv(self):
16218     ret = {
16219       "GROUP_NAME": self.group_name,
16220       "GROUP_NETWORK_MODE": self.network_mode,
16221       "GROUP_NETWORK_LINK": self.network_link,
16222       }
16223     return ret
16224
16225   def BuildHooksNodes(self):
16226     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16227     return (nodes, nodes)
16228
16229   def CheckPrereq(self):
16230     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16231
16232     assert self.group_uuid in owned_groups
16233
16234     self.netparams = {
16235       constants.NIC_MODE: self.network_mode,
16236       constants.NIC_LINK: self.network_link,
16237       }
16238     objects.NIC.CheckParameterSyntax(self.netparams)
16239
16240     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16241     #if self.network_mode == constants.NIC_MODE_BRIDGED:
16242     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16243     self.connected = False
16244     if self.network_uuid in self.group.networks:
16245       self.LogWarning("Network '%s' is already mapped to group '%s'" %
16246                       (self.network_name, self.group.name))
16247       self.connected = True
16248       return
16249
16250     if self.op.conflicts_check:
16251       pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16252
16253       _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16254                             "connect to")
16255
16256   def Exec(self, feedback_fn):
16257     if self.connected:
16258       return
16259
16260     self.group.networks[self.network_uuid] = self.netparams
16261     self.cfg.Update(self.group, feedback_fn)
16262
16263
16264 def _NetworkConflictCheck(lu, check_fn, action):
16265   """Checks for network interface conflicts with a network.
16266
16267   @type lu: L{LogicalUnit}
16268   @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16269     returning boolean
16270   @param check_fn: Function checking for conflict
16271   @type action: string
16272   @param action: Part of error message (see code)
16273   @raise errors.OpPrereqError: If conflicting IP addresses are found.
16274
16275   """
16276   # Check if locked instances are still correct
16277   owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
16278   _CheckNodeGroupInstances(lu.cfg, lu.group_uuid, owned_instances)
16279
16280   conflicts = []
16281
16282   for (_, instance) in lu.cfg.GetMultiInstanceInfo(owned_instances):
16283     instconflicts = [(idx, nic.ip)
16284                      for (idx, nic) in enumerate(instance.nics)
16285                      if check_fn(nic)]
16286
16287     if instconflicts:
16288       conflicts.append((instance.name, instconflicts))
16289
16290   if conflicts:
16291     lu.LogWarning("IP addresses from network '%s', which is about to %s"
16292                   " node group '%s', are in use: %s" %
16293                   (lu.network_name, action, lu.group.name,
16294                    utils.CommaJoin(("%s: %s" %
16295                                     (name, _FmtNetworkConflict(details)))
16296                                    for (name, details) in conflicts)))
16297
16298     raise errors.OpPrereqError("Conflicting IP addresses found; "
16299                                " remove/modify the corresponding network"
16300                                " interfaces", errors.ECODE_INVAL)
16301
16302
16303 def _FmtNetworkConflict(details):
16304   """Utility for L{_NetworkConflictCheck}.
16305
16306   """
16307   return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16308                          for (idx, ipaddr) in details)
16309
16310
16311 class LUNetworkDisconnect(LogicalUnit):
16312   """Disconnect a network to a nodegroup
16313
16314   """
16315   HPATH = "network-disconnect"
16316   HTYPE = constants.HTYPE_NETWORK
16317   REQ_BGL = False
16318
16319   def ExpandNames(self):
16320     self.network_name = self.op.network_name
16321     self.group_name = self.op.group_name
16322
16323     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16324     if self.network_uuid is None:
16325       raise errors.OpPrereqError("Network %s does not exist" %
16326                                  self.network_name, errors.ECODE_INVAL)
16327
16328     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16329     if self.group_uuid is None:
16330       raise errors.OpPrereqError("Group %s does not exist" %
16331                                  self.group_name, errors.ECODE_INVAL)
16332
16333     self.needed_locks = {
16334       locking.LEVEL_INSTANCE: [],
16335       locking.LEVEL_NODEGROUP: [self.group_uuid],
16336       }
16337     self.share_locks[locking.LEVEL_INSTANCE] = 1
16338
16339   def DeclareLocks(self, level):
16340     if level == locking.LEVEL_INSTANCE:
16341       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16342
16343       # Lock instances optimistically, needs verification once group lock has
16344       # been acquired
16345       if self.op.conflicts_check:
16346         self.needed_locks[locking.LEVEL_INSTANCE] = \
16347           self.cfg.GetNodeGroupInstances(self.group_uuid)
16348
16349   def BuildHooksEnv(self):
16350     ret = {
16351       "GROUP_NAME": self.group_name,
16352       }
16353     return ret
16354
16355   def BuildHooksNodes(self):
16356     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16357     return (nodes, nodes)
16358
16359   def CheckPrereq(self):
16360     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16361
16362     assert self.group_uuid in owned_groups
16363
16364     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16365     self.connected = True
16366     if self.network_uuid not in self.group.networks:
16367       self.LogWarning("Network '%s' is not mapped to group '%s'",
16368                       self.network_name, self.group.name)
16369       self.connected = False
16370       return
16371
16372     if self.op.conflicts_check:
16373       _NetworkConflictCheck(self, lambda nic: nic.network == self.network_name,
16374                             "disconnect from")
16375
16376   def Exec(self, feedback_fn):
16377     if not self.connected:
16378       return
16379
16380     del self.group.networks[self.network_uuid]
16381     self.cfg.Update(self.group, feedback_fn)
16382
16383
16384 #: Query type implementations
16385 _QUERY_IMPL = {
16386   constants.QR_CLUSTER: _ClusterQuery,
16387   constants.QR_INSTANCE: _InstanceQuery,
16388   constants.QR_NODE: _NodeQuery,
16389   constants.QR_GROUP: _GroupQuery,
16390   constants.QR_NETWORK: _NetworkQuery,
16391   constants.QR_OS: _OsQuery,
16392   constants.QR_EXPORT: _ExportQuery,
16393   }
16394
16395 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16396
16397
16398 def _GetQueryImplementation(name):
16399   """Returns the implemtnation for a query type.
16400
16401   @param name: Query type, must be one of L{constants.QR_VIA_OP}
16402
16403   """
16404   try:
16405     return _QUERY_IMPL[name]
16406   except KeyError:
16407     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16408                                errors.ECODE_INVAL)
16409
16410
16411 def _CheckForConflictingIp(lu, ip, node):
16412   """In case of conflicting ip raise error.
16413
16414   @type ip: string
16415   @param ip: ip address
16416   @type node: string
16417   @param node: node name
16418
16419   """
16420   (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16421   if conf_net is not None:
16422     raise errors.OpPrereqError("Conflicting IP found:"
16423                                " %s <> %s." % (ip, conf_net),
16424                                errors.ECODE_INVAL)
16425
16426   return (None, None)