code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti import network
  65 from ganeti.masterd import iallocator
  66
  67 import ganeti.masterd.instance # pylint: disable=W0611
  68
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141
 142     # Dictionaries used to declare locking needs to mcpu
 143     self.needed_locks = None
 144     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 145     self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
 146
 147     self.add_locks = {}
 148     self.remove_locks = {}
 149
 150     # Used to force good behavior when calling helper functions
 151     self.recalculate_locks = {}
 152
 153     # logging
 154     self.Log = processor.Log # pylint: disable=C0103
 155     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 156     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 157     self.LogStep = processor.LogStep # pylint: disable=C0103
 158     # support for dry-run
 159     self.dry_run_result = None
 160     # support for generic debug attribute
 161     if (not hasattr(self.op, "debug_level") or
 162         not isinstance(self.op.debug_level, int)):
 163       self.op.debug_level = 0
 164
 165     # Tasklets
 166     self.tasklets = None
 167
 168     # Validate opcode parameters and set defaults
 169     self.op.Validate(True)
 170
 171     self.CheckArguments()
 172
 173   def CheckArguments(self):
 174     """Check syntactic validity for the opcode arguments.
 175
 176     This method is for doing a simple syntactic check and ensure
 177     validity of opcode parameters, without any cluster-related
 178     checks. While the same can be accomplished in ExpandNames and/or
 179     CheckPrereq, doing these separate is better because:
 180
 181       - ExpandNames is left as as purely a lock-related function
 182       - CheckPrereq is run after we have acquired locks (and possible
 183         waited for them)
 184
 185     The function is allowed to change the self.op attribute so that
 186     later methods can no longer worry about missing parameters.
 187
 188     """
 189     pass
 190
 191   def ExpandNames(self):
 192     """Expand names for this LU.
 193
 194     This method is called before starting to execute the opcode, and it should
 195     update all the parameters of the opcode to their canonical form (e.g. a
 196     short node name must be fully expanded after this method has successfully
 197     completed). This way locking, hooks, logging, etc. can work correctly.
 198
 199     LUs which implement this method must also populate the self.needed_locks
 200     member, as a dict with lock levels as keys, and a list of needed lock names
 201     as values. Rules:
 202
 203       - use an empty dict if you don't need any lock
 204       - if you don't need any lock at a particular level omit that
 205         level (note that in this case C{DeclareLocks} won't be called
 206         at all for that level)
 207       - if you need locks at a level, but you can't calculate it in
 208         this function, initialise that level with an empty list and do
 209         further processing in L{LogicalUnit.DeclareLocks} (see that
 210         function's docstring)
 211       - don't put anything for the BGL level
 212       - if you want all locks at a level use L{locking.ALL_SET} as a value
 213
 214     If you need to share locks (rather than acquire them exclusively) at one
 215     level you can modify self.share_locks, setting a true value (usually 1) for
 216     that level. By default locks are not shared.
 217
 218     This function can also define a list of tasklets, which then will be
 219     executed in order instead of the usual LU-level CheckPrereq and Exec
 220     functions, if those are not defined by the LU.
 221
 222     Examples::
 223
 224       # Acquire all nodes and one instance
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: locking.ALL_SET,
 227         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 228       }
 229       # Acquire just two nodes
 230       self.needed_locks = {
 231         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 232       }
 233       # Acquire no locks
 234       self.needed_locks = {} # No, you can't leave it to the default value None
 235
 236     """
 237     # The implementation of this method is mandatory only if the new LU is
 238     # concurrent, so that old LUs don't need to be changed all at the same
 239     # time.
 240     if self.REQ_BGL:
 241       self.needed_locks = {} # Exclusive LUs don't need locks.
 242     else:
 243       raise NotImplementedError
 244
 245   def DeclareLocks(self, level):
 246     """Declare LU locking needs for a level
 247
 248     While most LUs can just declare their locking needs at ExpandNames time,
 249     sometimes there's the need to calculate some locks after having acquired
 250     the ones before. This function is called just before acquiring locks at a
 251     particular level, but after acquiring the ones at lower levels, and permits
 252     such calculations. It can be used to modify self.needed_locks, and by
 253     default it does nothing.
 254
 255     This function is only called if you have something already set in
 256     self.needed_locks for the level.
 257
 258     @param level: Locking level which is going to be locked
 259     @type level: member of L{ganeti.locking.LEVELS}
 260
 261     """
 262
 263   def CheckPrereq(self):
 264     """Check prerequisites for this LU.
 265
 266     This method should check that the prerequisites for the execution
 267     of this LU are fulfilled. It can do internode communication, but
 268     it should be idempotent - no cluster or system changes are
 269     allowed.
 270
 271     The method should raise errors.OpPrereqError in case something is
 272     not fulfilled. Its return value is ignored.
 273
 274     This method should also update all the parameters of the opcode to
 275     their canonical form if it hasn't been done by ExpandNames before.
 276
 277     """
 278     if self.tasklets is not None:
 279       for (idx, tl) in enumerate(self.tasklets):
 280         logging.debug("Checking prerequisites for tasklet %s/%s",
 281                       idx + 1, len(self.tasklets))
 282         tl.CheckPrereq()
 283     else:
 284       pass
 285
 286   def Exec(self, feedback_fn):
 287     """Execute the LU.
 288
 289     This method should implement the actual work. It should raise
 290     errors.OpExecError for failures that are somewhat dealt with in
 291     code, or expected.
 292
 293     """
 294     if self.tasklets is not None:
 295       for (idx, tl) in enumerate(self.tasklets):
 296         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 297         tl.Exec(feedback_fn)
 298     else:
 299       raise NotImplementedError
 300
 301   def BuildHooksEnv(self):
 302     """Build hooks environment for this LU.
 303
 304     @rtype: dict
 305     @return: Dictionary containing the environment that will be used for
 306       running the hooks for this LU. The keys of the dict must not be prefixed
 307       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 308       will extend the environment with additional variables. If no environment
 309       should be defined, an empty dictionary should be returned (not C{None}).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def BuildHooksNodes(self):
 317     """Build list of nodes to run LU's hooks.
 318
 319     @rtype: tuple; (list, list)
 320     @return: Tuple containing a list of node names on which the hook
 321       should run before the execution and a list of node names on which the
 322       hook should run after the execution. No nodes should be returned as an
 323       empty list (and not None).
 324     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 325       will not be called.
 326
 327     """
 328     raise NotImplementedError
 329
 330   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 331     """Notify the LU about the results of its hooks.
 332
 333     This method is called every time a hooks phase is executed, and notifies
 334     the Logical Unit about the hooks' result. The LU can then use it to alter
 335     its result based on the hooks.  By default the method does nothing and the
 336     previous result is passed back unchanged but any LU can define it if it
 337     wants to use the local cluster hook-scripts somehow.
 338
 339     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 340         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 341     @param hook_results: the results of the multi-node hooks rpc call
 342     @param feedback_fn: function used send feedback back to the caller
 343     @param lu_result: the previous Exec result this LU had, or None
 344         in the PRE phase
 345     @return: the new Exec result, based on the previous result
 346         and hook results
 347
 348     """
 349     # API must be kept, thus we ignore the unused argument and could
 350     # be a function warnings
 351     # pylint: disable=W0613,R0201
 352     return lu_result
 353
 354   def _ExpandAndLockInstance(self):
 355     """Helper function to expand and lock an instance.
 356
 357     Many LUs that work on an instance take its name in self.op.instance_name
 358     and need to expand it and then declare the expanded name for locking. This
 359     function does it, and then updates self.op.instance_name to the expanded
 360     name. It also initializes needed_locks as a dict, if this hasn't been done
 361     before.
 362
 363     """
 364     if self.needed_locks is None:
 365       self.needed_locks = {}
 366     else:
 367       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 368         "_ExpandAndLockInstance called with instance-level locks set"
 369     self.op.instance_name = _ExpandInstanceName(self.cfg,
 370                                                 self.op.instance_name)
 371     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 372
 373   def _LockInstancesNodes(self, primary_only=False,
 374                           level=locking.LEVEL_NODE):
 375     """Helper function to declare instances' nodes for locking.
 376
 377     This function should be called after locking one or more instances to lock
 378     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 379     with all primary or secondary nodes for instances already locked and
 380     present in self.needed_locks[locking.LEVEL_INSTANCE].
 381
 382     It should be called from DeclareLocks, and for safety only works if
 383     self.recalculate_locks[locking.LEVEL_NODE] is set.
 384
 385     In the future it may grow parameters to just lock some instance's nodes, or
 386     to just lock primaries or secondary nodes, if needed.
 387
 388     If should be called in DeclareLocks in a way similar to::
 389
 390       if level == locking.LEVEL_NODE:
 391         self._LockInstancesNodes()
 392
 393     @type primary_only: boolean
 394     @param primary_only: only lock primary nodes of locked instances
 395     @param level: Which lock level to use for locking nodes
 396
 397     """
 398     assert level in self.recalculate_locks, \
 399       "_LockInstancesNodes helper function called with no nodes to recalculate"
 400
 401     # TODO: check if we're really been called with the instance locks held
 402
 403     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 404     # future we might want to have different behaviors depending on the value
 405     # of self.recalculate_locks[locking.LEVEL_NODE]
 406     wanted_nodes = []
 407     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 408     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 409       wanted_nodes.append(instance.primary_node)
 410       if not primary_only:
 411         wanted_nodes.extend(instance.secondary_nodes)
 412
 413     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 414       self.needed_locks[level] = wanted_nodes
 415     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 416       self.needed_locks[level].extend(wanted_nodes)
 417     else:
 418       raise errors.ProgrammerError("Unknown recalculation mode")
 419
 420     del self.recalculate_locks[level]
 421
 422
 423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 424   """Simple LU which runs no hooks.
 425
 426   This LU is intended as a parent for other LogicalUnits which will
 427   run no hooks, in order to reduce duplicate code.
 428
 429   """
 430   HPATH = None
 431   HTYPE = None
 432
 433   def BuildHooksEnv(self):
 434     """Empty BuildHooksEnv for NoHooksLu.
 435
 436     This just raises an error.
 437
 438     """
 439     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 440
 441   def BuildHooksNodes(self):
 442     """Empty BuildHooksNodes for NoHooksLU.
 443
 444     """
 445     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 446
 447
 448 class Tasklet:
 449   """Tasklet base class.
 450
 451   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 452   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 453   tasklets know nothing about locks.
 454
 455   Subclasses must follow these rules:
 456     - Implement CheckPrereq
 457     - Implement Exec
 458
 459   """
 460   def __init__(self, lu):
 461     self.lu = lu
 462
 463     # Shortcuts
 464     self.cfg = lu.cfg
 465     self.rpc = lu.rpc
 466
 467   def CheckPrereq(self):
 468     """Check prerequisites for this tasklets.
 469
 470     This method should check whether the prerequisites for the execution of
 471     this tasklet are fulfilled. It can do internode communication, but it
 472     should be idempotent - no cluster or system changes are allowed.
 473
 474     The method should raise errors.OpPrereqError in case something is not
 475     fulfilled. Its return value is ignored.
 476
 477     This method should also update all parameters to their canonical form if it
 478     hasn't been done before.
 479
 480     """
 481     pass
 482
 483   def Exec(self, feedback_fn):
 484     """Execute the tasklet.
 485
 486     This method should implement the actual work. It should raise
 487     errors.OpExecError for failures that are somewhat dealt with in code, or
 488     expected.
 489
 490     """
 491     raise NotImplementedError
 492
 493
 494 class _QueryBase:
 495   """Base for query utility classes.
 496
 497   """
 498   #: Attribute holding field definitions
 499   FIELDS = None
 500
 501   #: Field to sort by
 502   SORT_FIELD = "name"
 503
 504   def __init__(self, qfilter, fields, use_locking):
 505     """Initializes this class.
 506
 507     """
 508     self.use_locking = use_locking
 509
 510     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 511                              namefield=self.SORT_FIELD)
 512     self.requested_data = self.query.RequestedData()
 513     self.names = self.query.RequestedNames()
 514
 515     # Sort only if no names were requested
 516     self.sort_by_name = not self.names
 517
 518     self.do_locking = None
 519     self.wanted = None
 520
 521   def _GetNames(self, lu, all_names, lock_level):
 522     """Helper function to determine names asked for in the query.
 523
 524     """
 525     if self.do_locking:
 526       names = lu.owned_locks(lock_level)
 527     else:
 528       names = all_names
 529
 530     if self.wanted == locking.ALL_SET:
 531       assert not self.names
 532       # caller didn't specify names, so ordering is not important
 533       return utils.NiceSort(names)
 534
 535     # caller specified names and we must keep the same order
 536     assert self.names
 537     assert not self.do_locking or lu.glm.is_owned(lock_level)
 538
 539     missing = set(self.wanted).difference(names)
 540     if missing:
 541       raise errors.OpExecError("Some items were removed before retrieving"
 542                                " their data: %s" % missing)
 543
 544     # Return expanded names
 545     return self.wanted
 546
 547   def ExpandNames(self, lu):
 548     """Expand names for this query.
 549
 550     See L{LogicalUnit.ExpandNames}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def DeclareLocks(self, lu, level):
 556     """Declare locks for this query.
 557
 558     See L{LogicalUnit.DeclareLocks}.
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def _GetQueryData(self, lu):
 564     """Collects all data for this query.
 565
 566     @return: Query data object
 567
 568     """
 569     raise NotImplementedError()
 570
 571   def NewStyleQuery(self, lu):
 572     """Collect data and execute query.
 573
 574     """
 575     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 576                                   sort_by_name=self.sort_by_name)
 577
 578   def OldStyleQuery(self, lu):
 579     """Collect data and execute query.
 580
 581     """
 582     return self.query.OldStyleQuery(self._GetQueryData(lu),
 583                                     sort_by_name=self.sort_by_name)
 584
 585
 586 def _ShareAll():
 587   """Returns a dict declaring all lock levels shared.
 588
 589   """
 590   return dict.fromkeys(locking.LEVELS, 1)
 591
 592
 593 def _AnnotateDiskParams(instance, devs, cfg):
 594   """Little helper wrapper to the rpc annotation method.
 595
 596   @param instance: The instance object
 597   @type devs: List of L{objects.Disk}
 598   @param devs: The root devices (not any of its children!)
 599   @param cfg: The config object
 600   @returns The annotated disk copies
 601   @see L{rpc.AnnotateDiskParams}
 602
 603   """
 604   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 605                                 cfg.GetInstanceDiskParams(instance))
 606
 607
 608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 609                               cur_group_uuid):
 610   """Checks if node groups for locked instances are still correct.
 611
 612   @type cfg: L{config.ConfigWriter}
 613   @param cfg: Cluster configuration
 614   @type instances: dict; string as key, L{objects.Instance} as value
 615   @param instances: Dictionary, instance name as key, instance object as value
 616   @type owned_groups: iterable of string
 617   @param owned_groups: List of owned groups
 618   @type owned_nodes: iterable of string
 619   @param owned_nodes: List of owned nodes
 620   @type cur_group_uuid: string or None
 621   @param cur_group_uuid: Optional group UUID to check against instance's groups
 622
 623   """
 624   for (name, inst) in instances.items():
 625     assert owned_nodes.issuperset(inst.all_nodes), \
 626       "Instance %s's nodes changed while we kept the lock" % name
 627
 628     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 629
 630     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 631       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 632
 633
 634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 635                              primary_only=False):
 636   """Checks if the owned node groups are still correct for an instance.
 637
 638   @type cfg: L{config.ConfigWriter}
 639   @param cfg: The cluster configuration
 640   @type instance_name: string
 641   @param instance_name: Instance name
 642   @type owned_groups: set or frozenset
 643   @param owned_groups: List of currently owned node groups
 644   @type primary_only: boolean
 645   @param primary_only: Whether to check node groups for only the primary node
 646
 647   """
 648   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 649
 650   if not owned_groups.issuperset(inst_groups):
 651     raise errors.OpPrereqError("Instance %s's node groups changed since"
 652                                " locks were acquired, current groups are"
 653                                " are '%s', owning groups '%s'; retry the"
 654                                " operation" %
 655                                (instance_name,
 656                                 utils.CommaJoin(inst_groups),
 657                                 utils.CommaJoin(owned_groups)),
 658                                errors.ECODE_STATE)
 659
 660   return inst_groups
 661
 662
 663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 664   """Checks if the instances in a node group are still correct.
 665
 666   @type cfg: L{config.ConfigWriter}
 667   @param cfg: The cluster configuration
 668   @type group_uuid: string
 669   @param group_uuid: Node group UUID
 670   @type owned_instances: set or frozenset
 671   @param owned_instances: List of currently owned instances
 672
 673   """
 674   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 675   if owned_instances != wanted_instances:
 676     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 677                                " locks were acquired, wanted '%s', have '%s';"
 678                                " retry the operation" %
 679                                (group_uuid,
 680                                 utils.CommaJoin(wanted_instances),
 681                                 utils.CommaJoin(owned_instances)),
 682                                errors.ECODE_STATE)
 683
 684   return wanted_instances
 685
 686
 687 def _SupportsOob(cfg, node):
 688   """Tells if node supports OOB.
 689
 690   @type cfg: L{config.ConfigWriter}
 691   @param cfg: The cluster configuration
 692   @type node: L{objects.Node}
 693   @param node: The node
 694   @return: The OOB script if supported or an empty string otherwise
 695
 696   """
 697   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 698
 699
 700 def _CopyLockList(names):
 701   """Makes a copy of a list of lock names.
 702
 703   Handles L{locking.ALL_SET} correctly.
 704
 705   """
 706   if names == locking.ALL_SET:
 707     return locking.ALL_SET
 708   else:
 709     return names[:]
 710
 711
 712 def _GetWantedNodes(lu, nodes):
 713   """Returns list of checked and expanded node names.
 714
 715   @type lu: L{LogicalUnit}
 716   @param lu: the logical unit on whose behalf we execute
 717   @type nodes: list
 718   @param nodes: list of node names or None for all nodes
 719   @rtype: list
 720   @return: the list of nodes, sorted
 721   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 722
 723   """
 724   if nodes:
 725     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 726
 727   return utils.NiceSort(lu.cfg.GetNodeList())
 728
 729
 730 def _GetWantedInstances(lu, instances):
 731   """Returns list of checked and expanded instance names.
 732
 733   @type lu: L{LogicalUnit}
 734   @param lu: the logical unit on whose behalf we execute
 735   @type instances: list
 736   @param instances: list of instance names or None for all instances
 737   @rtype: list
 738   @return: the list of instances, sorted
 739   @raise errors.OpPrereqError: if the instances parameter is wrong type
 740   @raise errors.OpPrereqError: if any of the passed instances is not found
 741
 742   """
 743   if instances:
 744     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 745   else:
 746     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 747   return wanted
 748
 749
 750 def _GetUpdatedParams(old_params, update_dict,
 751                       use_default=True, use_none=False):
 752   """Return the new version of a parameter dictionary.
 753
 754   @type old_params: dict
 755   @param old_params: old parameters
 756   @type update_dict: dict
 757   @param update_dict: dict containing new parameter values, or
 758       constants.VALUE_DEFAULT to reset the parameter to its default
 759       value
 760   @param use_default: boolean
 761   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 762       values as 'to be deleted' values
 763   @param use_none: boolean
 764   @type use_none: whether to recognise C{None} values as 'to be
 765       deleted' values
 766   @rtype: dict
 767   @return: the new parameter dictionary
 768
 769   """
 770   params_copy = copy.deepcopy(old_params)
 771   for key, val in update_dict.iteritems():
 772     if ((use_default and val == constants.VALUE_DEFAULT) or
 773         (use_none and val is None)):
 774       try:
 775         del params_copy[key]
 776       except KeyError:
 777         pass
 778     else:
 779       params_copy[key] = val
 780   return params_copy
 781
 782
 783 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 784   """Return the new version of a instance policy.
 785
 786   @param group_policy: whether this policy applies to a group and thus
 787     we should support removal of policy entries
 788
 789   """
 790   use_none = use_default = group_policy
 791   ipolicy = copy.deepcopy(old_ipolicy)
 792   for key, value in new_ipolicy.items():
 793     if key not in constants.IPOLICY_ALL_KEYS:
 794       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 795                                  errors.ECODE_INVAL)
 796     if key in constants.IPOLICY_ISPECS:
 797       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 798       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 799                                        use_none=use_none,
 800                                        use_default=use_default)
 801     else:
 802       if (not value or value == [constants.VALUE_DEFAULT] or
 803           value == constants.VALUE_DEFAULT):
 804         if group_policy:
 805           del ipolicy[key]
 806         else:
 807           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 808                                      " on the cluster'" % key,
 809                                      errors.ECODE_INVAL)
 810       else:
 811         if key in constants.IPOLICY_PARAMETERS:
 812           # FIXME: we assume all such values are float
 813           try:
 814             ipolicy[key] = float(value)
 815           except (TypeError, ValueError), err:
 816             raise errors.OpPrereqError("Invalid value for attribute"
 817                                        " '%s': '%s', error: %s" %
 818                                        (key, value, err), errors.ECODE_INVAL)
 819         else:
 820           # FIXME: we assume all others are lists; this should be redone
 821           # in a nicer way
 822           ipolicy[key] = list(value)
 823   try:
 824     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 825   except errors.ConfigurationError, err:
 826     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 827                                errors.ECODE_INVAL)
 828   return ipolicy
 829
 830
 831 def _UpdateAndVerifySubDict(base, updates, type_check):
 832   """Updates and verifies a dict with sub dicts of the same type.
 833
 834   @param base: The dict with the old data
 835   @param updates: The dict with the new data
 836   @param type_check: Dict suitable to ForceDictType to verify correct types
 837   @returns: A new dict with updated and verified values
 838
 839   """
 840   def fn(old, value):
 841     new = _GetUpdatedParams(old, value)
 842     utils.ForceDictType(new, type_check)
 843     return new
 844
 845   ret = copy.deepcopy(base)
 846   ret.update(dict((key, fn(base.get(key, {}), value))
 847                   for key, value in updates.items()))
 848   return ret
 849
 850
 851 def _MergeAndVerifyHvState(op_input, obj_input):
 852   """Combines the hv state from an opcode with the one of the object
 853
 854   @param op_input: The input dict from the opcode
 855   @param obj_input: The input dict from the objects
 856   @return: The verified and updated dict
 857
 858   """
 859   if op_input:
 860     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 861     if invalid_hvs:
 862       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 863                                  " %s" % utils.CommaJoin(invalid_hvs),
 864                                  errors.ECODE_INVAL)
 865     if obj_input is None:
 866       obj_input = {}
 867     type_check = constants.HVSTS_PARAMETER_TYPES
 868     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 869
 870   return None
 871
 872
 873 def _MergeAndVerifyDiskState(op_input, obj_input):
 874   """Combines the disk state from an opcode with the one of the object
 875
 876   @param op_input: The input dict from the opcode
 877   @param obj_input: The input dict from the objects
 878   @return: The verified and updated dict
 879   """
 880   if op_input:
 881     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 882     if invalid_dst:
 883       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 884                                  utils.CommaJoin(invalid_dst),
 885                                  errors.ECODE_INVAL)
 886     type_check = constants.DSS_PARAMETER_TYPES
 887     if obj_input is None:
 888       obj_input = {}
 889     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 890                                               type_check))
 891                 for key, value in op_input.items())
 892
 893   return None
 894
 895
 896 def _ReleaseLocks(lu, level, names=None, keep=None):
 897   """Releases locks owned by an LU.
 898
 899   @type lu: L{LogicalUnit}
 900   @param level: Lock level
 901   @type names: list or None
 902   @param names: Names of locks to release
 903   @type keep: list or None
 904   @param keep: Names of locks to retain
 905
 906   """
 907   assert not (keep is not None and names is not None), \
 908          "Only one of the 'names' and the 'keep' parameters can be given"
 909
 910   if names is not None:
 911     should_release = names.__contains__
 912   elif keep:
 913     should_release = lambda name: name not in keep
 914   else:
 915     should_release = None
 916
 917   owned = lu.owned_locks(level)
 918   if not owned:
 919     # Not owning any lock at this level, do nothing
 920     pass
 921
 922   elif should_release:
 923     retain = []
 924     release = []
 925
 926     # Determine which locks to release
 927     for name in owned:
 928       if should_release(name):
 929         release.append(name)
 930       else:
 931         retain.append(name)
 932
 933     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 934
 935     # Release just some locks
 936     lu.glm.release(level, names=release)
 937
 938     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 939   else:
 940     # Release everything
 941     lu.glm.release(level)
 942
 943     assert not lu.glm.is_owned(level), "No locks should be owned"
 944
 945
 946 def _MapInstanceDisksToNodes(instances):
 947   """Creates a map from (node, volume) to instance name.
 948
 949   @type instances: list of L{objects.Instance}
 950   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 951
 952   """
 953   return dict(((node, vol), inst.name)
 954               for inst in instances
 955               for (node, vols) in inst.MapLVsByNode().items()
 956               for vol in vols)
 957
 958
 959 def _RunPostHook(lu, node_name):
 960   """Runs the post-hook for an opcode on a single node.
 961
 962   """
 963   hm = lu.proc.BuildHooksManager(lu)
 964   try:
 965     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 966   except Exception, err: # pylint: disable=W0703
 967     lu.LogWarning("Errors occurred running hooks on %s: %s",
 968                   node_name, err)
 969
 970
 971 def _CheckOutputFields(static, dynamic, selected):
 972   """Checks whether all selected fields are valid.
 973
 974   @type static: L{utils.FieldSet}
 975   @param static: static fields set
 976   @type dynamic: L{utils.FieldSet}
 977   @param dynamic: dynamic fields set
 978
 979   """
 980   f = utils.FieldSet()
 981   f.Extend(static)
 982   f.Extend(dynamic)
 983
 984   delta = f.NonMatching(selected)
 985   if delta:
 986     raise errors.OpPrereqError("Unknown output fields selected: %s"
 987                                % ",".join(delta), errors.ECODE_INVAL)
 988
 989
 990 def _CheckGlobalHvParams(params):
 991   """Validates that given hypervisor params are not global ones.
 992
 993   This will ensure that instances don't get customised versions of
 994   global params.
 995
 996   """
 997   used_globals = constants.HVC_GLOBALS.intersection(params)
 998   if used_globals:
 999     msg = ("The following hypervisor parameters are global and cannot"
1000            " be customized at instance level, please modify them at"
1001            " cluster level: %s" % utils.CommaJoin(used_globals))
1002     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1003
1004
1005 def _CheckNodeOnline(lu, node, msg=None):
1006   """Ensure that a given node is online.
1007
1008   @param lu: the LU on behalf of which we make the check
1009   @param node: the node to check
1010   @param msg: if passed, should be a message to replace the default one
1011   @raise errors.OpPrereqError: if the node is offline
1012
1013   """
1014   if msg is None:
1015     msg = "Can't use offline node"
1016   if lu.cfg.GetNodeInfo(node).offline:
1017     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1018
1019
1020 def _CheckNodeNotDrained(lu, node):
1021   """Ensure that a given node is not drained.
1022
1023   @param lu: the LU on behalf of which we make the check
1024   @param node: the node to check
1025   @raise errors.OpPrereqError: if the node is drained
1026
1027   """
1028   if lu.cfg.GetNodeInfo(node).drained:
1029     raise errors.OpPrereqError("Can't use drained node %s" % node,
1030                                errors.ECODE_STATE)
1031
1032
1033 def _CheckNodeVmCapable(lu, node):
1034   """Ensure that a given node is vm capable.
1035
1036   @param lu: the LU on behalf of which we make the check
1037   @param node: the node to check
1038   @raise errors.OpPrereqError: if the node is not vm capable
1039
1040   """
1041   if not lu.cfg.GetNodeInfo(node).vm_capable:
1042     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1043                                errors.ECODE_STATE)
1044
1045
1046 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1047   """Ensure that a node supports a given OS.
1048
1049   @param lu: the LU on behalf of which we make the check
1050   @param node: the node to check
1051   @param os_name: the OS to query about
1052   @param force_variant: whether to ignore variant errors
1053   @raise errors.OpPrereqError: if the node is not supporting the OS
1054
1055   """
1056   result = lu.rpc.call_os_get(node, os_name)
1057   result.Raise("OS '%s' not in supported OS list for node %s" %
1058                (os_name, node),
1059                prereq=True, ecode=errors.ECODE_INVAL)
1060   if not force_variant:
1061     _CheckOSVariant(result.payload, os_name)
1062
1063
1064 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1065   """Ensure that a node has the given secondary ip.
1066
1067   @type lu: L{LogicalUnit}
1068   @param lu: the LU on behalf of which we make the check
1069   @type node: string
1070   @param node: the node to check
1071   @type secondary_ip: string
1072   @param secondary_ip: the ip to check
1073   @type prereq: boolean
1074   @param prereq: whether to throw a prerequisite or an execute error
1075   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1076   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1077
1078   """
1079   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1080   result.Raise("Failure checking secondary ip on node %s" % node,
1081                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1082   if not result.payload:
1083     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1084            " please fix and re-run this command" % secondary_ip)
1085     if prereq:
1086       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1087     else:
1088       raise errors.OpExecError(msg)
1089
1090
1091 def _GetClusterDomainSecret():
1092   """Reads the cluster domain secret.
1093
1094   """
1095   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1096                                strict=True)
1097
1098
1099 def _CheckInstanceState(lu, instance, req_states, msg=None):
1100   """Ensure that an instance is in one of the required states.
1101
1102   @param lu: the LU on behalf of which we make the check
1103   @param instance: the instance to check
1104   @param msg: if passed, should be a message to replace the default one
1105   @raise errors.OpPrereqError: if the instance is not in the required state
1106
1107   """
1108   if msg is None:
1109     msg = ("can't use instance from outside %s states" %
1110            utils.CommaJoin(req_states))
1111   if instance.admin_state not in req_states:
1112     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1113                                (instance.name, instance.admin_state, msg),
1114                                errors.ECODE_STATE)
1115
1116   if constants.ADMINST_UP not in req_states:
1117     pnode = instance.primary_node
1118     if not lu.cfg.GetNodeInfo(pnode).offline:
1119       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1120       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1121                   prereq=True, ecode=errors.ECODE_ENVIRON)
1122       if instance.name in ins_l.payload:
1123         raise errors.OpPrereqError("Instance %s is running, %s" %
1124                                    (instance.name, msg), errors.ECODE_STATE)
1125     else:
1126       lu.LogWarning("Primary node offline, ignoring check that instance"
1127                      " is down")
1128
1129
1130 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1131   """Computes if value is in the desired range.
1132
1133   @param name: name of the parameter for which we perform the check
1134   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1135       not just 'disk')
1136   @param ipolicy: dictionary containing min, max and std values
1137   @param value: actual value that we want to use
1138   @return: None or element not meeting the criteria
1139
1140
1141   """
1142   if value in [None, constants.VALUE_AUTO]:
1143     return None
1144   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1145   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1146   if value > max_v or min_v > value:
1147     if qualifier:
1148       fqn = "%s/%s" % (name, qualifier)
1149     else:
1150       fqn = name
1151     return ("%s value %s is not in range [%s, %s]" %
1152             (fqn, value, min_v, max_v))
1153   return None
1154
1155
1156 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1157                                  nic_count, disk_sizes, spindle_use,
1158                                  _compute_fn=_ComputeMinMaxSpec):
1159   """Verifies ipolicy against provided specs.
1160
1161   @type ipolicy: dict
1162   @param ipolicy: The ipolicy
1163   @type mem_size: int
1164   @param mem_size: The memory size
1165   @type cpu_count: int
1166   @param cpu_count: Used cpu cores
1167   @type disk_count: int
1168   @param disk_count: Number of disks used
1169   @type nic_count: int
1170   @param nic_count: Number of nics used
1171   @type disk_sizes: list of ints
1172   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1173   @type spindle_use: int
1174   @param spindle_use: The number of spindles this instance uses
1175   @param _compute_fn: The compute function (unittest only)
1176   @return: A list of violations, or an empty list of no violations are found
1177
1178   """
1179   assert disk_count == len(disk_sizes)
1180
1181   test_settings = [
1182     (constants.ISPEC_MEM_SIZE, "", mem_size),
1183     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1184     (constants.ISPEC_DISK_COUNT, "", disk_count),
1185     (constants.ISPEC_NIC_COUNT, "", nic_count),
1186     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1187     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1188          for idx, d in enumerate(disk_sizes)]
1189
1190   return filter(None,
1191                 (_compute_fn(name, qualifier, ipolicy, value)
1192                  for (name, qualifier, value) in test_settings))
1193
1194
1195 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1196                                      _compute_fn=_ComputeIPolicySpecViolation):
1197   """Compute if instance meets the specs of ipolicy.
1198
1199   @type ipolicy: dict
1200   @param ipolicy: The ipolicy to verify against
1201   @type instance: L{objects.Instance}
1202   @param instance: The instance to verify
1203   @param _compute_fn: The function to verify ipolicy (unittest only)
1204   @see: L{_ComputeIPolicySpecViolation}
1205
1206   """
1207   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1208   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1209   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1210   disk_count = len(instance.disks)
1211   disk_sizes = [disk.size for disk in instance.disks]
1212   nic_count = len(instance.nics)
1213
1214   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1215                      disk_sizes, spindle_use)
1216
1217
1218 def _ComputeIPolicyInstanceSpecViolation(
1219   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1220   """Compute if instance specs meets the specs of ipolicy.
1221
1222   @type ipolicy: dict
1223   @param ipolicy: The ipolicy to verify against
1224   @param instance_spec: dict
1225   @param instance_spec: The instance spec to verify
1226   @param _compute_fn: The function to verify ipolicy (unittest only)
1227   @see: L{_ComputeIPolicySpecViolation}
1228
1229   """
1230   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1231   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1232   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1233   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1234   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1235   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1236
1237   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1238                      disk_sizes, spindle_use)
1239
1240
1241 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1242                                  target_group,
1243                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1244   """Compute if instance meets the specs of the new target group.
1245
1246   @param ipolicy: The ipolicy to verify
1247   @param instance: The instance object to verify
1248   @param current_group: The current group of the instance
1249   @param target_group: The new group of the instance
1250   @param _compute_fn: The function to verify ipolicy (unittest only)
1251   @see: L{_ComputeIPolicySpecViolation}
1252
1253   """
1254   if current_group == target_group:
1255     return []
1256   else:
1257     return _compute_fn(ipolicy, instance)
1258
1259
1260 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1261                             _compute_fn=_ComputeIPolicyNodeViolation):
1262   """Checks that the target node is correct in terms of instance policy.
1263
1264   @param ipolicy: The ipolicy to verify
1265   @param instance: The instance object to verify
1266   @param node: The new node to relocate
1267   @param ignore: Ignore violations of the ipolicy
1268   @param _compute_fn: The function to verify ipolicy (unittest only)
1269   @see: L{_ComputeIPolicySpecViolation}
1270
1271   """
1272   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1273   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1274
1275   if res:
1276     msg = ("Instance does not meet target node group's (%s) instance"
1277            " policy: %s") % (node.group, utils.CommaJoin(res))
1278     if ignore:
1279       lu.LogWarning(msg)
1280     else:
1281       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1282
1283
1284 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1285   """Computes a set of any instances that would violate the new ipolicy.
1286
1287   @param old_ipolicy: The current (still in-place) ipolicy
1288   @param new_ipolicy: The new (to become) ipolicy
1289   @param instances: List of instances to verify
1290   @return: A list of instances which violates the new ipolicy but
1291       did not before
1292
1293   """
1294   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1295           _ComputeViolatingInstances(old_ipolicy, instances))
1296
1297
1298 def _ExpandItemName(fn, name, kind):
1299   """Expand an item name.
1300
1301   @param fn: the function to use for expansion
1302   @param name: requested item name
1303   @param kind: text description ('Node' or 'Instance')
1304   @return: the resolved (full) name
1305   @raise errors.OpPrereqError: if the item is not found
1306
1307   """
1308   full_name = fn(name)
1309   if full_name is None:
1310     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1311                                errors.ECODE_NOENT)
1312   return full_name
1313
1314
1315 def _ExpandNodeName(cfg, name):
1316   """Wrapper over L{_ExpandItemName} for nodes."""
1317   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1318
1319
1320 def _ExpandInstanceName(cfg, name):
1321   """Wrapper over L{_ExpandItemName} for instance."""
1322   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1323
1324
1325 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1326                          network_type, mac_prefix, tags):
1327   """Builds network related env variables for hooks
1328
1329   This builds the hook environment from individual variables.
1330
1331   @type name: string
1332   @param name: the name of the network
1333   @type subnet: string
1334   @param subnet: the ipv4 subnet
1335   @type gateway: string
1336   @param gateway: the ipv4 gateway
1337   @type network6: string
1338   @param network6: the ipv6 subnet
1339   @type gateway6: string
1340   @param gateway6: the ipv6 gateway
1341   @type network_type: string
1342   @param network_type: the type of the network
1343   @type mac_prefix: string
1344   @param mac_prefix: the mac_prefix
1345   @type tags: list
1346   @param tags: the tags of the network
1347
1348   """
1349   env = {}
1350   if name:
1351     env["NETWORK_NAME"] = name
1352   if subnet:
1353     env["NETWORK_SUBNET"] = subnet
1354   if gateway:
1355     env["NETWORK_GATEWAY"] = gateway
1356   if network6:
1357     env["NETWORK_SUBNET6"] = network6
1358   if gateway6:
1359     env["NETWORK_GATEWAY6"] = gateway6
1360   if mac_prefix:
1361     env["NETWORK_MAC_PREFIX"] = mac_prefix
1362   if network_type:
1363     env["NETWORK_TYPE"] = network_type
1364   if tags:
1365     env["NETWORK_TAGS"] = " ".join(tags)
1366
1367   return env
1368
1369
1370 def _BuildNetworkHookEnvByObject(net):
1371   """Builds network related env varliables for hooks
1372
1373   @type net: L{objects.Network}
1374   @param net: the network object
1375
1376   """
1377   args = {
1378     "name": net.name,
1379     "subnet": net.network,
1380     "gateway": net.gateway,
1381     "network6": net.network6,
1382     "gateway6": net.gateway6,
1383     "network_type": net.network_type,
1384     "mac_prefix": net.mac_prefix,
1385     "tags": net.tags,
1386   }
1387
1388   return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
1389
1390
1391 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1392                           minmem, maxmem, vcpus, nics, disk_template, disks,
1393                           bep, hvp, hypervisor_name, tags):
1394   """Builds instance related env variables for hooks
1395
1396   This builds the hook environment from individual variables.
1397
1398   @type name: string
1399   @param name: the name of the instance
1400   @type primary_node: string
1401   @param primary_node: the name of the instance's primary node
1402   @type secondary_nodes: list
1403   @param secondary_nodes: list of secondary nodes as strings
1404   @type os_type: string
1405   @param os_type: the name of the instance's OS
1406   @type status: string
1407   @param status: the desired status of the instance
1408   @type minmem: string
1409   @param minmem: the minimum memory size of the instance
1410   @type maxmem: string
1411   @param maxmem: the maximum memory size of the instance
1412   @type vcpus: string
1413   @param vcpus: the count of VCPUs the instance has
1414   @type nics: list
1415   @param nics: list of tuples (ip, mac, mode, link, network) representing
1416       the NICs the instance has
1417   @type disk_template: string
1418   @param disk_template: the disk template of the instance
1419   @type disks: list
1420   @param disks: the list of (size, mode) pairs
1421   @type bep: dict
1422   @param bep: the backend parameters for the instance
1423   @type hvp: dict
1424   @param hvp: the hypervisor parameters for the instance
1425   @type hypervisor_name: string
1426   @param hypervisor_name: the hypervisor for the instance
1427   @type tags: list
1428   @param tags: list of instance tags as strings
1429   @rtype: dict
1430   @return: the hook environment for this instance
1431
1432   """
1433   env = {
1434     "OP_TARGET": name,
1435     "INSTANCE_NAME": name,
1436     "INSTANCE_PRIMARY": primary_node,
1437     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1438     "INSTANCE_OS_TYPE": os_type,
1439     "INSTANCE_STATUS": status,
1440     "INSTANCE_MINMEM": minmem,
1441     "INSTANCE_MAXMEM": maxmem,
1442     # TODO(2.7) remove deprecated "memory" value
1443     "INSTANCE_MEMORY": maxmem,
1444     "INSTANCE_VCPUS": vcpus,
1445     "INSTANCE_DISK_TEMPLATE": disk_template,
1446     "INSTANCE_HYPERVISOR": hypervisor_name,
1447   }
1448   if nics:
1449     nic_count = len(nics)
1450     for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1451       if ip is None:
1452         ip = ""
1453       env["INSTANCE_NIC%d_IP" % idx] = ip
1454       env["INSTANCE_NIC%d_MAC" % idx] = mac
1455       env["INSTANCE_NIC%d_MODE" % idx] = mode
1456       env["INSTANCE_NIC%d_LINK" % idx] = link
1457       if network:
1458         env["INSTANCE_NIC%d_NETWORK" % idx] = net
1459         if netinfo:
1460           nobj = objects.Network.FromDict(netinfo)
1461           if nobj.network:
1462             env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1463           if nobj.gateway:
1464             env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1465           if nobj.network6:
1466             env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1467           if nobj.gateway6:
1468             env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1469           if nobj.mac_prefix:
1470             env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1471           if nobj.network_type:
1472             env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1473           if nobj.tags:
1474             env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1475       if mode == constants.NIC_MODE_BRIDGED:
1476         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1477   else:
1478     nic_count = 0
1479
1480   env["INSTANCE_NIC_COUNT"] = nic_count
1481
1482   if disks:
1483     disk_count = len(disks)
1484     for idx, (size, mode) in enumerate(disks):
1485       env["INSTANCE_DISK%d_SIZE" % idx] = size
1486       env["INSTANCE_DISK%d_MODE" % idx] = mode
1487   else:
1488     disk_count = 0
1489
1490   env["INSTANCE_DISK_COUNT"] = disk_count
1491
1492   if not tags:
1493     tags = []
1494
1495   env["INSTANCE_TAGS"] = " ".join(tags)
1496
1497   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1498     for key, value in source.items():
1499       env["INSTANCE_%s_%s" % (kind, key)] = value
1500
1501   return env
1502
1503
1504 def _NICToTuple(lu, nic):
1505   """Build a tupple of nic information.
1506
1507   @type lu:  L{LogicalUnit}
1508   @param lu: the logical unit on whose behalf we execute
1509   @type nic: L{objects.NIC}
1510   @param nic: nic to convert to hooks tuple
1511
1512   """
1513   ip = nic.ip
1514   mac = nic.mac
1515   cluster = lu.cfg.GetClusterInfo()
1516   filled_params = cluster.SimpleFillNIC(nic.nicparams)
1517   mode = filled_params[constants.NIC_MODE]
1518   link = filled_params[constants.NIC_LINK]
1519   net = nic.network
1520   netinfo = None
1521   if net:
1522     net_uuid = lu.cfg.LookupNetwork(net)
1523     if net_uuid:
1524       nobj = lu.cfg.GetNetwork(net_uuid)
1525       netinfo = objects.Network.ToDict(nobj)
1526   return (ip, mac, mode, link, net, netinfo)
1527
1528
1529 def _NICListToTuple(lu, nics):
1530   """Build a list of nic information tuples.
1531
1532   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1533   value in LUInstanceQueryData.
1534
1535   @type lu:  L{LogicalUnit}
1536   @param lu: the logical unit on whose behalf we execute
1537   @type nics: list of L{objects.NIC}
1538   @param nics: list of nics to convert to hooks tuples
1539
1540   """
1541   hooks_nics = []
1542   for nic in nics:
1543     hooks_nics.append(_NICToTuple(lu, nic))
1544   return hooks_nics
1545
1546
1547 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1548   """Builds instance related env variables for hooks from an object.
1549
1550   @type lu: L{LogicalUnit}
1551   @param lu: the logical unit on whose behalf we execute
1552   @type instance: L{objects.Instance}
1553   @param instance: the instance for which we should build the
1554       environment
1555   @type override: dict
1556   @param override: dictionary with key/values that will override
1557       our values
1558   @rtype: dict
1559   @return: the hook environment dictionary
1560
1561   """
1562   cluster = lu.cfg.GetClusterInfo()
1563   bep = cluster.FillBE(instance)
1564   hvp = cluster.FillHV(instance)
1565   args = {
1566     "name": instance.name,
1567     "primary_node": instance.primary_node,
1568     "secondary_nodes": instance.secondary_nodes,
1569     "os_type": instance.os,
1570     "status": instance.admin_state,
1571     "maxmem": bep[constants.BE_MAXMEM],
1572     "minmem": bep[constants.BE_MINMEM],
1573     "vcpus": bep[constants.BE_VCPUS],
1574     "nics": _NICListToTuple(lu, instance.nics),
1575     "disk_template": instance.disk_template,
1576     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1577     "bep": bep,
1578     "hvp": hvp,
1579     "hypervisor_name": instance.hypervisor,
1580     "tags": instance.tags,
1581   }
1582   if override:
1583     args.update(override)
1584   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1585
1586
1587 def _AdjustCandidatePool(lu, exceptions):
1588   """Adjust the candidate pool after node operations.
1589
1590   """
1591   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1592   if mod_list:
1593     lu.LogInfo("Promoted nodes to master candidate role: %s",
1594                utils.CommaJoin(node.name for node in mod_list))
1595     for name in mod_list:
1596       lu.context.ReaddNode(name)
1597   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1598   if mc_now > mc_max:
1599     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1600                (mc_now, mc_max))
1601
1602
1603 def _DecideSelfPromotion(lu, exceptions=None):
1604   """Decide whether I should promote myself as a master candidate.
1605
1606   """
1607   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1608   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1609   # the new node will increase mc_max with one, so:
1610   mc_should = min(mc_should + 1, cp_size)
1611   return mc_now < mc_should
1612
1613
1614 def _ComputeViolatingInstances(ipolicy, instances):
1615   """Computes a set of instances who violates given ipolicy.
1616
1617   @param ipolicy: The ipolicy to verify
1618   @type instances: object.Instance
1619   @param instances: List of instances to verify
1620   @return: A frozenset of instance names violating the ipolicy
1621
1622   """
1623   return frozenset([inst.name for inst in instances
1624                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1625
1626
1627 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1628   """Check that the brigdes needed by a list of nics exist.
1629
1630   """
1631   cluster = lu.cfg.GetClusterInfo()
1632   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1633   brlist = [params[constants.NIC_LINK] for params in paramslist
1634             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1635   if brlist:
1636     result = lu.rpc.call_bridges_exist(target_node, brlist)
1637     result.Raise("Error checking bridges on destination node '%s'" %
1638                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1639
1640
1641 def _CheckInstanceBridgesExist(lu, instance, node=None):
1642   """Check that the brigdes needed by an instance exist.
1643
1644   """
1645   if node is None:
1646     node = instance.primary_node
1647   _CheckNicsBridgesExist(lu, instance.nics, node)
1648
1649
1650 def _CheckOSVariant(os_obj, name):
1651   """Check whether an OS name conforms to the os variants specification.
1652
1653   @type os_obj: L{objects.OS}
1654   @param os_obj: OS object to check
1655   @type name: string
1656   @param name: OS name passed by the user, to check for validity
1657
1658   """
1659   variant = objects.OS.GetVariant(name)
1660   if not os_obj.supported_variants:
1661     if variant:
1662       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1663                                  " passed)" % (os_obj.name, variant),
1664                                  errors.ECODE_INVAL)
1665     return
1666   if not variant:
1667     raise errors.OpPrereqError("OS name must include a variant",
1668                                errors.ECODE_INVAL)
1669
1670   if variant not in os_obj.supported_variants:
1671     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1672
1673
1674 def _GetNodeInstancesInner(cfg, fn):
1675   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1676
1677
1678 def _GetNodeInstances(cfg, node_name):
1679   """Returns a list of all primary and secondary instances on a node.
1680
1681   """
1682
1683   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1684
1685
1686 def _GetNodePrimaryInstances(cfg, node_name):
1687   """Returns primary instances on a node.
1688
1689   """
1690   return _GetNodeInstancesInner(cfg,
1691                                 lambda inst: node_name == inst.primary_node)
1692
1693
1694 def _GetNodeSecondaryInstances(cfg, node_name):
1695   """Returns secondary instances on a node.
1696
1697   """
1698   return _GetNodeInstancesInner(cfg,
1699                                 lambda inst: node_name in inst.secondary_nodes)
1700
1701
1702 def _GetStorageTypeArgs(cfg, storage_type):
1703   """Returns the arguments for a storage type.
1704
1705   """
1706   # Special case for file storage
1707   if storage_type == constants.ST_FILE:
1708     # storage.FileStorage wants a list of storage directories
1709     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1710
1711   return []
1712
1713
1714 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1715   faulty = []
1716
1717   for dev in instance.disks:
1718     cfg.SetDiskID(dev, node_name)
1719
1720   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1721                                                                 instance))
1722   result.Raise("Failed to get disk status from node %s" % node_name,
1723                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1724
1725   for idx, bdev_status in enumerate(result.payload):
1726     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1727       faulty.append(idx)
1728
1729   return faulty
1730
1731
1732 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1733   """Check the sanity of iallocator and node arguments and use the
1734   cluster-wide iallocator if appropriate.
1735
1736   Check that at most one of (iallocator, node) is specified. If none is
1737   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1738   then the LU's opcode's iallocator slot is filled with the cluster-wide
1739   default iallocator.
1740
1741   @type iallocator_slot: string
1742   @param iallocator_slot: the name of the opcode iallocator slot
1743   @type node_slot: string
1744   @param node_slot: the name of the opcode target node slot
1745
1746   """
1747   node = getattr(lu.op, node_slot, None)
1748   ialloc = getattr(lu.op, iallocator_slot, None)
1749   if node == []:
1750     node = None
1751
1752   if node is not None and ialloc is not None:
1753     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1754                                errors.ECODE_INVAL)
1755   elif ((node is None and ialloc is None) or
1756         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1757     default_iallocator = lu.cfg.GetDefaultIAllocator()
1758     if default_iallocator:
1759       setattr(lu.op, iallocator_slot, default_iallocator)
1760     else:
1761       raise errors.OpPrereqError("No iallocator or node given and no"
1762                                  " cluster-wide default iallocator found;"
1763                                  " please specify either an iallocator or a"
1764                                  " node, or set a cluster-wide default"
1765                                  " iallocator", errors.ECODE_INVAL)
1766
1767
1768 def _GetDefaultIAllocator(cfg, ialloc):
1769   """Decides on which iallocator to use.
1770
1771   @type cfg: L{config.ConfigWriter}
1772   @param cfg: Cluster configuration object
1773   @type ialloc: string or None
1774   @param ialloc: Iallocator specified in opcode
1775   @rtype: string
1776   @return: Iallocator name
1777
1778   """
1779   if not ialloc:
1780     # Use default iallocator
1781     ialloc = cfg.GetDefaultIAllocator()
1782
1783   if not ialloc:
1784     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1785                                " opcode nor as a cluster-wide default",
1786                                errors.ECODE_INVAL)
1787
1788   return ialloc
1789
1790
1791 def _CheckHostnameSane(lu, name):
1792   """Ensures that a given hostname resolves to a 'sane' name.
1793
1794   The given name is required to be a prefix of the resolved hostname,
1795   to prevent accidental mismatches.
1796
1797   @param lu: the logical unit on behalf of which we're checking
1798   @param name: the name we should resolve and check
1799   @return: the resolved hostname object
1800
1801   """
1802   hostname = netutils.GetHostname(name=name)
1803   if hostname.name != name:
1804     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1805   if not utils.MatchNameComponent(name, [hostname.name]):
1806     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1807                                 " same as given hostname '%s'") %
1808                                 (hostname.name, name), errors.ECODE_INVAL)
1809   return hostname
1810
1811
1812 class LUClusterPostInit(LogicalUnit):
1813   """Logical unit for running hooks after cluster initialization.
1814
1815   """
1816   HPATH = "cluster-init"
1817   HTYPE = constants.HTYPE_CLUSTER
1818
1819   def BuildHooksEnv(self):
1820     """Build hooks env.
1821
1822     """
1823     return {
1824       "OP_TARGET": self.cfg.GetClusterName(),
1825       }
1826
1827   def BuildHooksNodes(self):
1828     """Build hooks nodes.
1829
1830     """
1831     return ([], [self.cfg.GetMasterNode()])
1832
1833   def Exec(self, feedback_fn):
1834     """Nothing to do.
1835
1836     """
1837     return True
1838
1839
1840 class LUClusterDestroy(LogicalUnit):
1841   """Logical unit for destroying the cluster.
1842
1843   """
1844   HPATH = "cluster-destroy"
1845   HTYPE = constants.HTYPE_CLUSTER
1846
1847   def BuildHooksEnv(self):
1848     """Build hooks env.
1849
1850     """
1851     return {
1852       "OP_TARGET": self.cfg.GetClusterName(),
1853       }
1854
1855   def BuildHooksNodes(self):
1856     """Build hooks nodes.
1857
1858     """
1859     return ([], [])
1860
1861   def CheckPrereq(self):
1862     """Check prerequisites.
1863
1864     This checks whether the cluster is empty.
1865
1866     Any errors are signaled by raising errors.OpPrereqError.
1867
1868     """
1869     master = self.cfg.GetMasterNode()
1870
1871     nodelist = self.cfg.GetNodeList()
1872     if len(nodelist) != 1 or nodelist[0] != master:
1873       raise errors.OpPrereqError("There are still %d node(s) in"
1874                                  " this cluster." % (len(nodelist) - 1),
1875                                  errors.ECODE_INVAL)
1876     instancelist = self.cfg.GetInstanceList()
1877     if instancelist:
1878       raise errors.OpPrereqError("There are still %d instance(s) in"
1879                                  " this cluster." % len(instancelist),
1880                                  errors.ECODE_INVAL)
1881
1882   def Exec(self, feedback_fn):
1883     """Destroys the cluster.
1884
1885     """
1886     master_params = self.cfg.GetMasterNetworkParameters()
1887
1888     # Run post hooks on master node before it's removed
1889     _RunPostHook(self, master_params.name)
1890
1891     ems = self.cfg.GetUseExternalMipScript()
1892     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1893                                                      master_params, ems)
1894     if result.fail_msg:
1895       self.LogWarning("Error disabling the master IP address: %s",
1896                       result.fail_msg)
1897
1898     return master_params.name
1899
1900
1901 def _VerifyCertificate(filename):
1902   """Verifies a certificate for L{LUClusterVerifyConfig}.
1903
1904   @type filename: string
1905   @param filename: Path to PEM file
1906
1907   """
1908   try:
1909     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1910                                            utils.ReadFile(filename))
1911   except Exception, err: # pylint: disable=W0703
1912     return (LUClusterVerifyConfig.ETYPE_ERROR,
1913             "Failed to load X509 certificate %s: %s" % (filename, err))
1914
1915   (errcode, msg) = \
1916     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1917                                 constants.SSL_CERT_EXPIRATION_ERROR)
1918
1919   if msg:
1920     fnamemsg = "While verifying %s: %s" % (filename, msg)
1921   else:
1922     fnamemsg = None
1923
1924   if errcode is None:
1925     return (None, fnamemsg)
1926   elif errcode == utils.CERT_WARNING:
1927     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1928   elif errcode == utils.CERT_ERROR:
1929     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1930
1931   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1932
1933
1934 def _GetAllHypervisorParameters(cluster, instances):
1935   """Compute the set of all hypervisor parameters.
1936
1937   @type cluster: L{objects.Cluster}
1938   @param cluster: the cluster object
1939   @param instances: list of L{objects.Instance}
1940   @param instances: additional instances from which to obtain parameters
1941   @rtype: list of (origin, hypervisor, parameters)
1942   @return: a list with all parameters found, indicating the hypervisor they
1943        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1944
1945   """
1946   hvp_data = []
1947
1948   for hv_name in cluster.enabled_hypervisors:
1949     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1950
1951   for os_name, os_hvp in cluster.os_hvp.items():
1952     for hv_name, hv_params in os_hvp.items():
1953       if hv_params:
1954         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1955         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1956
1957   # TODO: collapse identical parameter values in a single one
1958   for instance in instances:
1959     if instance.hvparams:
1960       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1961                        cluster.FillHV(instance)))
1962
1963   return hvp_data
1964
1965
1966 class _VerifyErrors(object):
1967   """Mix-in for cluster/group verify LUs.
1968
1969   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1970   self.op and self._feedback_fn to be available.)
1971
1972   """
1973
1974   ETYPE_FIELD = "code"
1975   ETYPE_ERROR = "ERROR"
1976   ETYPE_WARNING = "WARNING"
1977
1978   def _Error(self, ecode, item, msg, *args, **kwargs):
1979     """Format an error message.
1980
1981     Based on the opcode's error_codes parameter, either format a
1982     parseable error code, or a simpler error string.
1983
1984     This must be called only from Exec and functions called from Exec.
1985
1986     """
1987     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1988     itype, etxt, _ = ecode
1989     # first complete the msg
1990     if args:
1991       msg = msg % args
1992     # then format the whole message
1993     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1994       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1995     else:
1996       if item:
1997         item = " " + item
1998       else:
1999         item = ""
2000       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2001     # and finally report it via the feedback_fn
2002     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
2003
2004   def _ErrorIf(self, cond, ecode, *args, **kwargs):
2005     """Log an error message if the passed condition is True.
2006
2007     """
2008     cond = (bool(cond)
2009             or self.op.debug_simulate_errors) # pylint: disable=E1101
2010
2011     # If the error code is in the list of ignored errors, demote the error to a
2012     # warning
2013     (_, etxt, _) = ecode
2014     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
2015       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
2016
2017     if cond:
2018       self._Error(ecode, *args, **kwargs)
2019
2020     # do not mark the operation as failed for WARN cases only
2021     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2022       self.bad = self.bad or cond
2023
2024
2025 class LUClusterVerify(NoHooksLU):
2026   """Submits all jobs necessary to verify the cluster.
2027
2028   """
2029   REQ_BGL = False
2030
2031   def ExpandNames(self):
2032     self.needed_locks = {}
2033
2034   def Exec(self, feedback_fn):
2035     jobs = []
2036
2037     if self.op.group_name:
2038       groups = [self.op.group_name]
2039       depends_fn = lambda: None
2040     else:
2041       groups = self.cfg.GetNodeGroupList()
2042
2043       # Verify global configuration
2044       jobs.append([
2045         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2046         ])
2047
2048       # Always depend on global verification
2049       depends_fn = lambda: [(-len(jobs), [])]
2050
2051     jobs.extend(
2052       [opcodes.OpClusterVerifyGroup(group_name=group,
2053                                     ignore_errors=self.op.ignore_errors,
2054                                     depends=depends_fn())]
2055       for group in groups)
2056
2057     # Fix up all parameters
2058     for op in itertools.chain(*jobs): # pylint: disable=W0142
2059       op.debug_simulate_errors = self.op.debug_simulate_errors
2060       op.verbose = self.op.verbose
2061       op.error_codes = self.op.error_codes
2062       try:
2063         op.skip_checks = self.op.skip_checks
2064       except AttributeError:
2065         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2066
2067     return ResultWithJobs(jobs)
2068
2069
2070 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2071   """Verifies the cluster config.
2072
2073   """
2074   REQ_BGL = False
2075
2076   def _VerifyHVP(self, hvp_data):
2077     """Verifies locally the syntax of the hypervisor parameters.
2078
2079     """
2080     for item, hv_name, hv_params in hvp_data:
2081       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2082              (item, hv_name))
2083       try:
2084         hv_class = hypervisor.GetHypervisor(hv_name)
2085         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2086         hv_class.CheckParameterSyntax(hv_params)
2087       except errors.GenericError, err:
2088         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2089
2090   def ExpandNames(self):
2091     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2092     self.share_locks = _ShareAll()
2093
2094   def CheckPrereq(self):
2095     """Check prerequisites.
2096
2097     """
2098     # Retrieve all information
2099     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2100     self.all_node_info = self.cfg.GetAllNodesInfo()
2101     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2102
2103   def Exec(self, feedback_fn):
2104     """Verify integrity of cluster, performing various test on nodes.
2105
2106     """
2107     self.bad = False
2108     self._feedback_fn = feedback_fn
2109
2110     feedback_fn("* Verifying cluster config")
2111
2112     for msg in self.cfg.VerifyConfig():
2113       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2114
2115     feedback_fn("* Verifying cluster certificate files")
2116
2117     for cert_filename in pathutils.ALL_CERT_FILES:
2118       (errcode, msg) = _VerifyCertificate(cert_filename)
2119       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2120
2121     feedback_fn("* Verifying hypervisor parameters")
2122
2123     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2124                                                 self.all_inst_info.values()))
2125
2126     feedback_fn("* Verifying all nodes belong to an existing group")
2127
2128     # We do this verification here because, should this bogus circumstance
2129     # occur, it would never be caught by VerifyGroup, which only acts on
2130     # nodes/instances reachable from existing node groups.
2131
2132     dangling_nodes = set(node.name for node in self.all_node_info.values()
2133                          if node.group not in self.all_group_info)
2134
2135     dangling_instances = {}
2136     no_node_instances = []
2137
2138     for inst in self.all_inst_info.values():
2139       if inst.primary_node in dangling_nodes:
2140         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2141       elif inst.primary_node not in self.all_node_info:
2142         no_node_instances.append(inst.name)
2143
2144     pretty_dangling = [
2145         "%s (%s)" %
2146         (node.name,
2147          utils.CommaJoin(dangling_instances.get(node.name,
2148                                                 ["no instances"])))
2149         for node in dangling_nodes]
2150
2151     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2152                   None,
2153                   "the following nodes (and their instances) belong to a non"
2154                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2155
2156     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2157                   None,
2158                   "the following instances have a non-existing primary-node:"
2159                   " %s", utils.CommaJoin(no_node_instances))
2160
2161     return not self.bad
2162
2163
2164 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2165   """Verifies the status of a node group.
2166
2167   """
2168   HPATH = "cluster-verify"
2169   HTYPE = constants.HTYPE_CLUSTER
2170   REQ_BGL = False
2171
2172   _HOOKS_INDENT_RE = re.compile("^", re.M)
2173
2174   class NodeImage(object):
2175     """A class representing the logical and physical status of a node.
2176
2177     @type name: string
2178     @ivar name: the node name to which this object refers
2179     @ivar volumes: a structure as returned from
2180         L{ganeti.backend.GetVolumeList} (runtime)
2181     @ivar instances: a list of running instances (runtime)
2182     @ivar pinst: list of configured primary instances (config)
2183     @ivar sinst: list of configured secondary instances (config)
2184     @ivar sbp: dictionary of {primary-node: list of instances} for all
2185         instances for which this node is secondary (config)
2186     @ivar mfree: free memory, as reported by hypervisor (runtime)
2187     @ivar dfree: free disk, as reported by the node (runtime)
2188     @ivar offline: the offline status (config)
2189     @type rpc_fail: boolean
2190     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2191         not whether the individual keys were correct) (runtime)
2192     @type lvm_fail: boolean
2193     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2194     @type hyp_fail: boolean
2195     @ivar hyp_fail: whether the RPC call didn't return the instance list
2196     @type ghost: boolean
2197     @ivar ghost: whether this is a known node or not (config)
2198     @type os_fail: boolean
2199     @ivar os_fail: whether the RPC call didn't return valid OS data
2200     @type oslist: list
2201     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2202     @type vm_capable: boolean
2203     @ivar vm_capable: whether the node can host instances
2204
2205     """
2206     def __init__(self, offline=False, name=None, vm_capable=True):
2207       self.name = name
2208       self.volumes = {}
2209       self.instances = []
2210       self.pinst = []
2211       self.sinst = []
2212       self.sbp = {}
2213       self.mfree = 0
2214       self.dfree = 0
2215       self.offline = offline
2216       self.vm_capable = vm_capable
2217       self.rpc_fail = False
2218       self.lvm_fail = False
2219       self.hyp_fail = False
2220       self.ghost = False
2221       self.os_fail = False
2222       self.oslist = {}
2223
2224   def ExpandNames(self):
2225     # This raises errors.OpPrereqError on its own:
2226     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2227
2228     # Get instances in node group; this is unsafe and needs verification later
2229     inst_names = \
2230       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2231
2232     self.needed_locks = {
2233       locking.LEVEL_INSTANCE: inst_names,
2234       locking.LEVEL_NODEGROUP: [self.group_uuid],
2235       locking.LEVEL_NODE: [],
2236       }
2237
2238     self.share_locks = _ShareAll()
2239
2240   def DeclareLocks(self, level):
2241     if level == locking.LEVEL_NODE:
2242       # Get members of node group; this is unsafe and needs verification later
2243       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2244
2245       all_inst_info = self.cfg.GetAllInstancesInfo()
2246
2247       # In Exec(), we warn about mirrored instances that have primary and
2248       # secondary living in separate node groups. To fully verify that
2249       # volumes for these instances are healthy, we will need to do an
2250       # extra call to their secondaries. We ensure here those nodes will
2251       # be locked.
2252       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2253         # Important: access only the instances whose lock is owned
2254         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2255           nodes.update(all_inst_info[inst].secondary_nodes)
2256
2257       self.needed_locks[locking.LEVEL_NODE] = nodes
2258
2259   def CheckPrereq(self):
2260     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2261     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2262
2263     group_nodes = set(self.group_info.members)
2264     group_instances = \
2265       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2266
2267     unlocked_nodes = \
2268         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2269
2270     unlocked_instances = \
2271         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2272
2273     if unlocked_nodes:
2274       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2275                                  utils.CommaJoin(unlocked_nodes),
2276                                  errors.ECODE_STATE)
2277
2278     if unlocked_instances:
2279       raise errors.OpPrereqError("Missing lock for instances: %s" %
2280                                  utils.CommaJoin(unlocked_instances),
2281                                  errors.ECODE_STATE)
2282
2283     self.all_node_info = self.cfg.GetAllNodesInfo()
2284     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2285
2286     self.my_node_names = utils.NiceSort(group_nodes)
2287     self.my_inst_names = utils.NiceSort(group_instances)
2288
2289     self.my_node_info = dict((name, self.all_node_info[name])
2290                              for name in self.my_node_names)
2291
2292     self.my_inst_info = dict((name, self.all_inst_info[name])
2293                              for name in self.my_inst_names)
2294
2295     # We detect here the nodes that will need the extra RPC calls for verifying
2296     # split LV volumes; they should be locked.
2297     extra_lv_nodes = set()
2298
2299     for inst in self.my_inst_info.values():
2300       if inst.disk_template in constants.DTS_INT_MIRROR:
2301         for nname in inst.all_nodes:
2302           if self.all_node_info[nname].group != self.group_uuid:
2303             extra_lv_nodes.add(nname)
2304
2305     unlocked_lv_nodes = \
2306         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2307
2308     if unlocked_lv_nodes:
2309       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2310                                  utils.CommaJoin(unlocked_lv_nodes),
2311                                  errors.ECODE_STATE)
2312     self.extra_lv_nodes = list(extra_lv_nodes)
2313
2314   def _VerifyNode(self, ninfo, nresult):
2315     """Perform some basic validation on data returned from a node.
2316
2317       - check the result data structure is well formed and has all the
2318         mandatory fields
2319       - check ganeti version
2320
2321     @type ninfo: L{objects.Node}
2322     @param ninfo: the node to check
2323     @param nresult: the results from the node
2324     @rtype: boolean
2325     @return: whether overall this call was successful (and we can expect
2326          reasonable values in the respose)
2327
2328     """
2329     node = ninfo.name
2330     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2331
2332     # main result, nresult should be a non-empty dict
2333     test = not nresult or not isinstance(nresult, dict)
2334     _ErrorIf(test, constants.CV_ENODERPC, node,
2335                   "unable to verify node: no data returned")
2336     if test:
2337       return False
2338
2339     # compares ganeti version
2340     local_version = constants.PROTOCOL_VERSION
2341     remote_version = nresult.get("version", None)
2342     test = not (remote_version and
2343                 isinstance(remote_version, (list, tuple)) and
2344                 len(remote_version) == 2)
2345     _ErrorIf(test, constants.CV_ENODERPC, node,
2346              "connection to node returned invalid data")
2347     if test:
2348       return False
2349
2350     test = local_version != remote_version[0]
2351     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2352              "incompatible protocol versions: master %s,"
2353              " node %s", local_version, remote_version[0])
2354     if test:
2355       return False
2356
2357     # node seems compatible, we can actually try to look into its results
2358
2359     # full package version
2360     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2361                   constants.CV_ENODEVERSION, node,
2362                   "software version mismatch: master %s, node %s",
2363                   constants.RELEASE_VERSION, remote_version[1],
2364                   code=self.ETYPE_WARNING)
2365
2366     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2367     if ninfo.vm_capable and isinstance(hyp_result, dict):
2368       for hv_name, hv_result in hyp_result.iteritems():
2369         test = hv_result is not None
2370         _ErrorIf(test, constants.CV_ENODEHV, node,
2371                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2372
2373     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2374     if ninfo.vm_capable and isinstance(hvp_result, list):
2375       for item, hv_name, hv_result in hvp_result:
2376         _ErrorIf(True, constants.CV_ENODEHV, node,
2377                  "hypervisor %s parameter verify failure (source %s): %s",
2378                  hv_name, item, hv_result)
2379
2380     test = nresult.get(constants.NV_NODESETUP,
2381                        ["Missing NODESETUP results"])
2382     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2383              "; ".join(test))
2384
2385     return True
2386
2387   def _VerifyNodeTime(self, ninfo, nresult,
2388                       nvinfo_starttime, nvinfo_endtime):
2389     """Check the node time.
2390
2391     @type ninfo: L{objects.Node}
2392     @param ninfo: the node to check
2393     @param nresult: the remote results for the node
2394     @param nvinfo_starttime: the start time of the RPC call
2395     @param nvinfo_endtime: the end time of the RPC call
2396
2397     """
2398     node = ninfo.name
2399     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2400
2401     ntime = nresult.get(constants.NV_TIME, None)
2402     try:
2403       ntime_merged = utils.MergeTime(ntime)
2404     except (ValueError, TypeError):
2405       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2406       return
2407
2408     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2409       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2410     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2411       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2412     else:
2413       ntime_diff = None
2414
2415     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2416              "Node time diverges by at least %s from master node time",
2417              ntime_diff)
2418
2419   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2420     """Check the node LVM results.
2421
2422     @type ninfo: L{objects.Node}
2423     @param ninfo: the node to check
2424     @param nresult: the remote results for the node
2425     @param vg_name: the configured VG name
2426
2427     """
2428     if vg_name is None:
2429       return
2430
2431     node = ninfo.name
2432     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2433
2434     # checks vg existence and size > 20G
2435     vglist = nresult.get(constants.NV_VGLIST, None)
2436     test = not vglist
2437     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2438     if not test:
2439       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2440                                             constants.MIN_VG_SIZE)
2441       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2442
2443     # check pv names
2444     pvlist = nresult.get(constants.NV_PVLIST, None)
2445     test = pvlist is None
2446     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2447     if not test:
2448       # check that ':' is not present in PV names, since it's a
2449       # special character for lvcreate (denotes the range of PEs to
2450       # use on the PV)
2451       for _, pvname, owner_vg in pvlist:
2452         test = ":" in pvname
2453         _ErrorIf(test, constants.CV_ENODELVM, node,
2454                  "Invalid character ':' in PV '%s' of VG '%s'",
2455                  pvname, owner_vg)
2456
2457   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2458     """Check the node bridges.
2459
2460     @type ninfo: L{objects.Node}
2461     @param ninfo: the node to check
2462     @param nresult: the remote results for the node
2463     @param bridges: the expected list of bridges
2464
2465     """
2466     if not bridges:
2467       return
2468
2469     node = ninfo.name
2470     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2471
2472     missing = nresult.get(constants.NV_BRIDGES, None)
2473     test = not isinstance(missing, list)
2474     _ErrorIf(test, constants.CV_ENODENET, node,
2475              "did not return valid bridge information")
2476     if not test:
2477       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2478                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2479
2480   def _VerifyNodeUserScripts(self, ninfo, nresult):
2481     """Check the results of user scripts presence and executability on the node
2482
2483     @type ninfo: L{objects.Node}
2484     @param ninfo: the node to check
2485     @param nresult: the remote results for the node
2486
2487     """
2488     node = ninfo.name
2489
2490     test = not constants.NV_USERSCRIPTS in nresult
2491     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2492                   "did not return user scripts information")
2493
2494     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2495     if not test:
2496       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2497                     "user scripts not present or not executable: %s" %
2498                     utils.CommaJoin(sorted(broken_scripts)))
2499
2500   def _VerifyNodeNetwork(self, ninfo, nresult):
2501     """Check the node network connectivity results.
2502
2503     @type ninfo: L{objects.Node}
2504     @param ninfo: the node to check
2505     @param nresult: the remote results for the node
2506
2507     """
2508     node = ninfo.name
2509     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2510
2511     test = constants.NV_NODELIST not in nresult
2512     _ErrorIf(test, constants.CV_ENODESSH, node,
2513              "node hasn't returned node ssh connectivity data")
2514     if not test:
2515       if nresult[constants.NV_NODELIST]:
2516         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2517           _ErrorIf(True, constants.CV_ENODESSH, node,
2518                    "ssh communication with node '%s': %s", a_node, a_msg)
2519
2520     test = constants.NV_NODENETTEST not in nresult
2521     _ErrorIf(test, constants.CV_ENODENET, node,
2522              "node hasn't returned node tcp connectivity data")
2523     if not test:
2524       if nresult[constants.NV_NODENETTEST]:
2525         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2526         for anode in nlist:
2527           _ErrorIf(True, constants.CV_ENODENET, node,
2528                    "tcp communication with node '%s': %s",
2529                    anode, nresult[constants.NV_NODENETTEST][anode])
2530
2531     test = constants.NV_MASTERIP not in nresult
2532     _ErrorIf(test, constants.CV_ENODENET, node,
2533              "node hasn't returned node master IP reachability data")
2534     if not test:
2535       if not nresult[constants.NV_MASTERIP]:
2536         if node == self.master_node:
2537           msg = "the master node cannot reach the master IP (not configured?)"
2538         else:
2539           msg = "cannot reach the master IP"
2540         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2541
2542   def _VerifyInstance(self, instance, instanceconfig, node_image,
2543                       diskstatus):
2544     """Verify an instance.
2545
2546     This function checks to see if the required block devices are
2547     available on the instance's node.
2548
2549     """
2550     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2551     node_current = instanceconfig.primary_node
2552
2553     node_vol_should = {}
2554     instanceconfig.MapLVsByNode(node_vol_should)
2555
2556     cluster = self.cfg.GetClusterInfo()
2557     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2558                                                             self.group_info)
2559     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2560     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2561              code=self.ETYPE_WARNING)
2562
2563     for node in node_vol_should:
2564       n_img = node_image[node]
2565       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2566         # ignore missing volumes on offline or broken nodes
2567         continue
2568       for volume in node_vol_should[node]:
2569         test = volume not in n_img.volumes
2570         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2571                  "volume %s missing on node %s", volume, node)
2572
2573     if instanceconfig.admin_state == constants.ADMINST_UP:
2574       pri_img = node_image[node_current]
2575       test = instance not in pri_img.instances and not pri_img.offline
2576       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2577                "instance not running on its primary node %s",
2578                node_current)
2579
2580     diskdata = [(nname, success, status, idx)
2581                 for (nname, disks) in diskstatus.items()
2582                 for idx, (success, status) in enumerate(disks)]
2583
2584     for nname, success, bdev_status, idx in diskdata:
2585       # the 'ghost node' construction in Exec() ensures that we have a
2586       # node here
2587       snode = node_image[nname]
2588       bad_snode = snode.ghost or snode.offline
2589       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2590                not success and not bad_snode,
2591                constants.CV_EINSTANCEFAULTYDISK, instance,
2592                "couldn't retrieve status for disk/%s on %s: %s",
2593                idx, nname, bdev_status)
2594       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2595                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2596                constants.CV_EINSTANCEFAULTYDISK, instance,
2597                "disk/%s on %s is faulty", idx, nname)
2598
2599   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2600     """Verify if there are any unknown volumes in the cluster.
2601
2602     The .os, .swap and backup volumes are ignored. All other volumes are
2603     reported as unknown.
2604
2605     @type reserved: L{ganeti.utils.FieldSet}
2606     @param reserved: a FieldSet of reserved volume names
2607
2608     """
2609     for node, n_img in node_image.items():
2610       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2611           self.all_node_info[node].group != self.group_uuid):
2612         # skip non-healthy nodes
2613         continue
2614       for volume in n_img.volumes:
2615         test = ((node not in node_vol_should or
2616                 volume not in node_vol_should[node]) and
2617                 not reserved.Matches(volume))
2618         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2619                       "volume %s is unknown", volume)
2620
2621   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2622     """Verify N+1 Memory Resilience.
2623
2624     Check that if one single node dies we can still start all the
2625     instances it was primary for.
2626
2627     """
2628     cluster_info = self.cfg.GetClusterInfo()
2629     for node, n_img in node_image.items():
2630       # This code checks that every node which is now listed as
2631       # secondary has enough memory to host all instances it is
2632       # supposed to should a single other node in the cluster fail.
2633       # FIXME: not ready for failover to an arbitrary node
2634       # FIXME: does not support file-backed instances
2635       # WARNING: we currently take into account down instances as well
2636       # as up ones, considering that even if they're down someone
2637       # might want to start them even in the event of a node failure.
2638       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2639         # we're skipping nodes marked offline and nodes in other groups from
2640         # the N+1 warning, since most likely we don't have good memory
2641         # infromation from them; we already list instances living on such
2642         # nodes, and that's enough warning
2643         continue
2644       #TODO(dynmem): also consider ballooning out other instances
2645       for prinode, instances in n_img.sbp.items():
2646         needed_mem = 0
2647         for instance in instances:
2648           bep = cluster_info.FillBE(instance_cfg[instance])
2649           if bep[constants.BE_AUTO_BALANCE]:
2650             needed_mem += bep[constants.BE_MINMEM]
2651         test = n_img.mfree < needed_mem
2652         self._ErrorIf(test, constants.CV_ENODEN1, node,
2653                       "not enough memory to accomodate instance failovers"
2654                       " should node %s fail (%dMiB needed, %dMiB available)",
2655                       prinode, needed_mem, n_img.mfree)
2656
2657   @classmethod
2658   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2659                    (files_all, files_opt, files_mc, files_vm)):
2660     """Verifies file checksums collected from all nodes.
2661
2662     @param errorif: Callback for reporting errors
2663     @param nodeinfo: List of L{objects.Node} objects
2664     @param master_node: Name of master node
2665     @param all_nvinfo: RPC results
2666
2667     """
2668     # Define functions determining which nodes to consider for a file
2669     files2nodefn = [
2670       (files_all, None),
2671       (files_mc, lambda node: (node.master_candidate or
2672                                node.name == master_node)),
2673       (files_vm, lambda node: node.vm_capable),
2674       ]
2675
2676     # Build mapping from filename to list of nodes which should have the file
2677     nodefiles = {}
2678     for (files, fn) in files2nodefn:
2679       if fn is None:
2680         filenodes = nodeinfo
2681       else:
2682         filenodes = filter(fn, nodeinfo)
2683       nodefiles.update((filename,
2684                         frozenset(map(operator.attrgetter("name"), filenodes)))
2685                        for filename in files)
2686
2687     assert set(nodefiles) == (files_all | files_mc | files_vm)
2688
2689     fileinfo = dict((filename, {}) for filename in nodefiles)
2690     ignore_nodes = set()
2691
2692     for node in nodeinfo:
2693       if node.offline:
2694         ignore_nodes.add(node.name)
2695         continue
2696
2697       nresult = all_nvinfo[node.name]
2698
2699       if nresult.fail_msg or not nresult.payload:
2700         node_files = None
2701       else:
2702         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2703         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2704                           for (key, value) in fingerprints.items())
2705         del fingerprints
2706
2707       test = not (node_files and isinstance(node_files, dict))
2708       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2709               "Node did not return file checksum data")
2710       if test:
2711         ignore_nodes.add(node.name)
2712         continue
2713
2714       # Build per-checksum mapping from filename to nodes having it
2715       for (filename, checksum) in node_files.items():
2716         assert filename in nodefiles
2717         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2718
2719     for (filename, checksums) in fileinfo.items():
2720       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2721
2722       # Nodes having the file
2723       with_file = frozenset(node_name
2724                             for nodes in fileinfo[filename].values()
2725                             for node_name in nodes) - ignore_nodes
2726
2727       expected_nodes = nodefiles[filename] - ignore_nodes
2728
2729       # Nodes missing file
2730       missing_file = expected_nodes - with_file
2731
2732       if filename in files_opt:
2733         # All or no nodes
2734         errorif(missing_file and missing_file != expected_nodes,
2735                 constants.CV_ECLUSTERFILECHECK, None,
2736                 "File %s is optional, but it must exist on all or no"
2737                 " nodes (not found on %s)",
2738                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2739       else:
2740         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2741                 "File %s is missing from node(s) %s", filename,
2742                 utils.CommaJoin(utils.NiceSort(missing_file)))
2743
2744         # Warn if a node has a file it shouldn't
2745         unexpected = with_file - expected_nodes
2746         errorif(unexpected,
2747                 constants.CV_ECLUSTERFILECHECK, None,
2748                 "File %s should not exist on node(s) %s",
2749                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2750
2751       # See if there are multiple versions of the file
2752       test = len(checksums) > 1
2753       if test:
2754         variants = ["variant %s on %s" %
2755                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2756                     for (idx, (checksum, nodes)) in
2757                       enumerate(sorted(checksums.items()))]
2758       else:
2759         variants = []
2760
2761       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2762               "File %s found with %s different checksums (%s)",
2763               filename, len(checksums), "; ".join(variants))
2764
2765   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2766                       drbd_map):
2767     """Verifies and the node DRBD status.
2768
2769     @type ninfo: L{objects.Node}
2770     @param ninfo: the node to check
2771     @param nresult: the remote results for the node
2772     @param instanceinfo: the dict of instances
2773     @param drbd_helper: the configured DRBD usermode helper
2774     @param drbd_map: the DRBD map as returned by
2775         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2776
2777     """
2778     node = ninfo.name
2779     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2780
2781     if drbd_helper:
2782       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2783       test = (helper_result is None)
2784       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2785                "no drbd usermode helper returned")
2786       if helper_result:
2787         status, payload = helper_result
2788         test = not status
2789         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2790                  "drbd usermode helper check unsuccessful: %s", payload)
2791         test = status and (payload != drbd_helper)
2792         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2793                  "wrong drbd usermode helper: %s", payload)
2794
2795     # compute the DRBD minors
2796     node_drbd = {}
2797     for minor, instance in drbd_map[node].items():
2798       test = instance not in instanceinfo
2799       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2800                "ghost instance '%s' in temporary DRBD map", instance)
2801         # ghost instance should not be running, but otherwise we
2802         # don't give double warnings (both ghost instance and
2803         # unallocated minor in use)
2804       if test:
2805         node_drbd[minor] = (instance, False)
2806       else:
2807         instance = instanceinfo[instance]
2808         node_drbd[minor] = (instance.name,
2809                             instance.admin_state == constants.ADMINST_UP)
2810
2811     # and now check them
2812     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2813     test = not isinstance(used_minors, (tuple, list))
2814     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2815              "cannot parse drbd status file: %s", str(used_minors))
2816     if test:
2817       # we cannot check drbd status
2818       return
2819
2820     for minor, (iname, must_exist) in node_drbd.items():
2821       test = minor not in used_minors and must_exist
2822       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2823                "drbd minor %d of instance %s is not active", minor, iname)
2824     for minor in used_minors:
2825       test = minor not in node_drbd
2826       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2827                "unallocated drbd minor %d is in use", minor)
2828
2829   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2830     """Builds the node OS structures.
2831
2832     @type ninfo: L{objects.Node}
2833     @param ninfo: the node to check
2834     @param nresult: the remote results for the node
2835     @param nimg: the node image object
2836
2837     """
2838     node = ninfo.name
2839     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2840
2841     remote_os = nresult.get(constants.NV_OSLIST, None)
2842     test = (not isinstance(remote_os, list) or
2843             not compat.all(isinstance(v, list) and len(v) == 7
2844                            for v in remote_os))
2845
2846     _ErrorIf(test, constants.CV_ENODEOS, node,
2847              "node hasn't returned valid OS data")
2848
2849     nimg.os_fail = test
2850
2851     if test:
2852       return
2853
2854     os_dict = {}
2855
2856     for (name, os_path, status, diagnose,
2857          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2858
2859       if name not in os_dict:
2860         os_dict[name] = []
2861
2862       # parameters is a list of lists instead of list of tuples due to
2863       # JSON lacking a real tuple type, fix it:
2864       parameters = [tuple(v) for v in parameters]
2865       os_dict[name].append((os_path, status, diagnose,
2866                             set(variants), set(parameters), set(api_ver)))
2867
2868     nimg.oslist = os_dict
2869
2870   def _VerifyNodeOS(self, ninfo, nimg, base):
2871     """Verifies the node OS list.
2872
2873     @type ninfo: L{objects.Node}
2874     @param ninfo: the node to check
2875     @param nimg: the node image object
2876     @param base: the 'template' node we match against (e.g. from the master)
2877
2878     """
2879     node = ninfo.name
2880     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2881
2882     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2883
2884     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2885     for os_name, os_data in nimg.oslist.items():
2886       assert os_data, "Empty OS status for OS %s?!" % os_name
2887       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2888       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2889                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2890       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2891                "OS '%s' has multiple entries (first one shadows the rest): %s",
2892                os_name, utils.CommaJoin([v[0] for v in os_data]))
2893       # comparisons with the 'base' image
2894       test = os_name not in base.oslist
2895       _ErrorIf(test, constants.CV_ENODEOS, node,
2896                "Extra OS %s not present on reference node (%s)",
2897                os_name, base.name)
2898       if test:
2899         continue
2900       assert base.oslist[os_name], "Base node has empty OS status?"
2901       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2902       if not b_status:
2903         # base OS is invalid, skipping
2904         continue
2905       for kind, a, b in [("API version", f_api, b_api),
2906                          ("variants list", f_var, b_var),
2907                          ("parameters", beautify_params(f_param),
2908                           beautify_params(b_param))]:
2909         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2910                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2911                  kind, os_name, base.name,
2912                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2913
2914     # check any missing OSes
2915     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2916     _ErrorIf(missing, constants.CV_ENODEOS, node,
2917              "OSes present on reference node %s but missing on this node: %s",
2918              base.name, utils.CommaJoin(missing))
2919
2920   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2921     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2922
2923     @type ninfo: L{objects.Node}
2924     @param ninfo: the node to check
2925     @param nresult: the remote results for the node
2926     @type is_master: bool
2927     @param is_master: Whether node is the master node
2928
2929     """
2930     node = ninfo.name
2931
2932     if (is_master and
2933         (constants.ENABLE_FILE_STORAGE or
2934          constants.ENABLE_SHARED_FILE_STORAGE)):
2935       try:
2936         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2937       except KeyError:
2938         # This should never happen
2939         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2940                       "Node did not return forbidden file storage paths")
2941       else:
2942         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2943                       "Found forbidden file storage paths: %s",
2944                       utils.CommaJoin(fspaths))
2945     else:
2946       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2947                     constants.CV_ENODEFILESTORAGEPATHS, node,
2948                     "Node should not have returned forbidden file storage"
2949                     " paths")
2950
2951   def _VerifyOob(self, ninfo, nresult):
2952     """Verifies out of band functionality of a node.
2953
2954     @type ninfo: L{objects.Node}
2955     @param ninfo: the node to check
2956     @param nresult: the remote results for the node
2957
2958     """
2959     node = ninfo.name
2960     # We just have to verify the paths on master and/or master candidates
2961     # as the oob helper is invoked on the master
2962     if ((ninfo.master_candidate or ninfo.master_capable) and
2963         constants.NV_OOB_PATHS in nresult):
2964       for path_result in nresult[constants.NV_OOB_PATHS]:
2965         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2966
2967   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2968     """Verifies and updates the node volume data.
2969
2970     This function will update a L{NodeImage}'s internal structures
2971     with data from the remote call.
2972
2973     @type ninfo: L{objects.Node}
2974     @param ninfo: the node to check
2975     @param nresult: the remote results for the node
2976     @param nimg: the node image object
2977     @param vg_name: the configured VG name
2978
2979     """
2980     node = ninfo.name
2981     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2982
2983     nimg.lvm_fail = True
2984     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2985     if vg_name is None:
2986       pass
2987     elif isinstance(lvdata, basestring):
2988       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2989                utils.SafeEncode(lvdata))
2990     elif not isinstance(lvdata, dict):
2991       _ErrorIf(True, constants.CV_ENODELVM, node,
2992                "rpc call to node failed (lvlist)")
2993     else:
2994       nimg.volumes = lvdata
2995       nimg.lvm_fail = False
2996
2997   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2998     """Verifies and updates the node instance list.
2999
3000     If the listing was successful, then updates this node's instance
3001     list. Otherwise, it marks the RPC call as failed for the instance
3002     list key.
3003
3004     @type ninfo: L{objects.Node}
3005     @param ninfo: the node to check
3006     @param nresult: the remote results for the node
3007     @param nimg: the node image object
3008
3009     """
3010     idata = nresult.get(constants.NV_INSTANCELIST, None)
3011     test = not isinstance(idata, list)
3012     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3013                   "rpc call to node failed (instancelist): %s",
3014                   utils.SafeEncode(str(idata)))
3015     if test:
3016       nimg.hyp_fail = True
3017     else:
3018       nimg.instances = idata
3019
3020   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3021     """Verifies and computes a node information map
3022
3023     @type ninfo: L{objects.Node}
3024     @param ninfo: the node to check
3025     @param nresult: the remote results for the node
3026     @param nimg: the node image object
3027     @param vg_name: the configured VG name
3028
3029     """
3030     node = ninfo.name
3031     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3032
3033     # try to read free memory (from the hypervisor)
3034     hv_info = nresult.get(constants.NV_HVINFO, None)
3035     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3036     _ErrorIf(test, constants.CV_ENODEHV, node,
3037              "rpc call to node failed (hvinfo)")
3038     if not test:
3039       try:
3040         nimg.mfree = int(hv_info["memory_free"])
3041       except (ValueError, TypeError):
3042         _ErrorIf(True, constants.CV_ENODERPC, node,
3043                  "node returned invalid nodeinfo, check hypervisor")
3044
3045     # FIXME: devise a free space model for file based instances as well
3046     if vg_name is not None:
3047       test = (constants.NV_VGLIST not in nresult or
3048               vg_name not in nresult[constants.NV_VGLIST])
3049       _ErrorIf(test, constants.CV_ENODELVM, node,
3050                "node didn't return data for the volume group '%s'"
3051                " - it is either missing or broken", vg_name)
3052       if not test:
3053         try:
3054           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3055         except (ValueError, TypeError):
3056           _ErrorIf(True, constants.CV_ENODERPC, node,
3057                    "node returned invalid LVM info, check LVM status")
3058
3059   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3060     """Gets per-disk status information for all instances.
3061
3062     @type nodelist: list of strings
3063     @param nodelist: Node names
3064     @type node_image: dict of (name, L{objects.Node})
3065     @param node_image: Node objects
3066     @type instanceinfo: dict of (name, L{objects.Instance})
3067     @param instanceinfo: Instance objects
3068     @rtype: {instance: {node: [(succes, payload)]}}
3069     @return: a dictionary of per-instance dictionaries with nodes as
3070         keys and disk information as values; the disk information is a
3071         list of tuples (success, payload)
3072
3073     """
3074     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3075
3076     node_disks = {}
3077     node_disks_devonly = {}
3078     diskless_instances = set()
3079     diskless = constants.DT_DISKLESS
3080
3081     for nname in nodelist:
3082       node_instances = list(itertools.chain(node_image[nname].pinst,
3083                                             node_image[nname].sinst))
3084       diskless_instances.update(inst for inst in node_instances
3085                                 if instanceinfo[inst].disk_template == diskless)
3086       disks = [(inst, disk)
3087                for inst in node_instances
3088                for disk in instanceinfo[inst].disks]
3089
3090       if not disks:
3091         # No need to collect data
3092         continue
3093
3094       node_disks[nname] = disks
3095
3096       # _AnnotateDiskParams makes already copies of the disks
3097       devonly = []
3098       for (inst, dev) in disks:
3099         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3100         self.cfg.SetDiskID(anno_disk, nname)
3101         devonly.append(anno_disk)
3102
3103       node_disks_devonly[nname] = devonly
3104
3105     assert len(node_disks) == len(node_disks_devonly)
3106
3107     # Collect data from all nodes with disks
3108     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3109                                                           node_disks_devonly)
3110
3111     assert len(result) == len(node_disks)
3112
3113     instdisk = {}
3114
3115     for (nname, nres) in result.items():
3116       disks = node_disks[nname]
3117
3118       if nres.offline:
3119         # No data from this node
3120         data = len(disks) * [(False, "node offline")]
3121       else:
3122         msg = nres.fail_msg
3123         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3124                  "while getting disk information: %s", msg)
3125         if msg:
3126           # No data from this node
3127           data = len(disks) * [(False, msg)]
3128         else:
3129           data = []
3130           for idx, i in enumerate(nres.payload):
3131             if isinstance(i, (tuple, list)) and len(i) == 2:
3132               data.append(i)
3133             else:
3134               logging.warning("Invalid result from node %s, entry %d: %s",
3135                               nname, idx, i)
3136               data.append((False, "Invalid result from the remote node"))
3137
3138       for ((inst, _), status) in zip(disks, data):
3139         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3140
3141     # Add empty entries for diskless instances.
3142     for inst in diskless_instances:
3143       assert inst not in instdisk
3144       instdisk[inst] = {}
3145
3146     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3147                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3148                       compat.all(isinstance(s, (tuple, list)) and
3149                                  len(s) == 2 for s in statuses)
3150                       for inst, nnames in instdisk.items()
3151                       for nname, statuses in nnames.items())
3152     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3153
3154     return instdisk
3155
3156   @staticmethod
3157   def _SshNodeSelector(group_uuid, all_nodes):
3158     """Create endless iterators for all potential SSH check hosts.
3159
3160     """
3161     nodes = [node for node in all_nodes
3162              if (node.group != group_uuid and
3163                  not node.offline)]
3164     keyfunc = operator.attrgetter("group")
3165
3166     return map(itertools.cycle,
3167                [sorted(map(operator.attrgetter("name"), names))
3168                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3169                                                   keyfunc)])
3170
3171   @classmethod
3172   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3173     """Choose which nodes should talk to which other nodes.
3174
3175     We will make nodes contact all nodes in their group, and one node from
3176     every other group.
3177
3178     @warning: This algorithm has a known issue if one node group is much
3179       smaller than others (e.g. just one node). In such a case all other
3180       nodes will talk to the single node.
3181
3182     """
3183     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3184     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3185
3186     return (online_nodes,
3187             dict((name, sorted([i.next() for i in sel]))
3188                  for name in online_nodes))
3189
3190   def BuildHooksEnv(self):
3191     """Build hooks env.
3192
3193     Cluster-Verify hooks just ran in the post phase and their failure makes
3194     the output be logged in the verify output and the verification to fail.
3195
3196     """
3197     env = {
3198       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3199       }
3200
3201     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3202                for node in self.my_node_info.values())
3203
3204     return env
3205
3206   def BuildHooksNodes(self):
3207     """Build hooks nodes.
3208
3209     """
3210     return ([], self.my_node_names)
3211
3212   def Exec(self, feedback_fn):
3213     """Verify integrity of the node group, performing various test on nodes.
3214
3215     """
3216     # This method has too many local variables. pylint: disable=R0914
3217     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3218
3219     if not self.my_node_names:
3220       # empty node group
3221       feedback_fn("* Empty node group, skipping verification")
3222       return True
3223
3224     self.bad = False
3225     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3226     verbose = self.op.verbose
3227     self._feedback_fn = feedback_fn
3228
3229     vg_name = self.cfg.GetVGName()
3230     drbd_helper = self.cfg.GetDRBDHelper()
3231     cluster = self.cfg.GetClusterInfo()
3232     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3233     hypervisors = cluster.enabled_hypervisors
3234     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3235
3236     i_non_redundant = [] # Non redundant instances
3237     i_non_a_balanced = [] # Non auto-balanced instances
3238     i_offline = 0 # Count of offline instances
3239     n_offline = 0 # Count of offline nodes
3240     n_drained = 0 # Count of nodes being drained
3241     node_vol_should = {}
3242
3243     # FIXME: verify OS list
3244
3245     # File verification
3246     filemap = _ComputeAncillaryFiles(cluster, False)
3247
3248     # do local checksums
3249     master_node = self.master_node = self.cfg.GetMasterNode()
3250     master_ip = self.cfg.GetMasterIP()
3251
3252     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3253
3254     user_scripts = []
3255     if self.cfg.GetUseExternalMipScript():
3256       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3257
3258     node_verify_param = {
3259       constants.NV_FILELIST:
3260         map(vcluster.MakeVirtualPath,
3261             utils.UniqueSequence(filename
3262                                  for files in filemap
3263                                  for filename in files)),
3264       constants.NV_NODELIST:
3265         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3266                                   self.all_node_info.values()),
3267       constants.NV_HYPERVISOR: hypervisors,
3268       constants.NV_HVPARAMS:
3269         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3270       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3271                                  for node in node_data_list
3272                                  if not node.offline],
3273       constants.NV_INSTANCELIST: hypervisors,
3274       constants.NV_VERSION: None,
3275       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3276       constants.NV_NODESETUP: None,
3277       constants.NV_TIME: None,
3278       constants.NV_MASTERIP: (master_node, master_ip),
3279       constants.NV_OSLIST: None,
3280       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3281       constants.NV_USERSCRIPTS: user_scripts,
3282       }
3283
3284     if vg_name is not None:
3285       node_verify_param[constants.NV_VGLIST] = None
3286       node_verify_param[constants.NV_LVLIST] = vg_name
3287       node_verify_param[constants.NV_PVLIST] = [vg_name]
3288
3289     if drbd_helper:
3290       node_verify_param[constants.NV_DRBDLIST] = None
3291       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3292
3293     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3294       # Load file storage paths only from master node
3295       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3296
3297     # bridge checks
3298     # FIXME: this needs to be changed per node-group, not cluster-wide
3299     bridges = set()
3300     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3301     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3302       bridges.add(default_nicpp[constants.NIC_LINK])
3303     for instance in self.my_inst_info.values():
3304       for nic in instance.nics:
3305         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3306         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3307           bridges.add(full_nic[constants.NIC_LINK])
3308
3309     if bridges:
3310       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3311
3312     # Build our expected cluster state
3313     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3314                                                  name=node.name,
3315                                                  vm_capable=node.vm_capable))
3316                       for node in node_data_list)
3317
3318     # Gather OOB paths
3319     oob_paths = []
3320     for node in self.all_node_info.values():
3321       path = _SupportsOob(self.cfg, node)
3322       if path and path not in oob_paths:
3323         oob_paths.append(path)
3324
3325     if oob_paths:
3326       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3327
3328     for instance in self.my_inst_names:
3329       inst_config = self.my_inst_info[instance]
3330       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3331         i_offline += 1
3332
3333       for nname in inst_config.all_nodes:
3334         if nname not in node_image:
3335           gnode = self.NodeImage(name=nname)
3336           gnode.ghost = (nname not in self.all_node_info)
3337           node_image[nname] = gnode
3338
3339       inst_config.MapLVsByNode(node_vol_should)
3340
3341       pnode = inst_config.primary_node
3342       node_image[pnode].pinst.append(instance)
3343
3344       for snode in inst_config.secondary_nodes:
3345         nimg = node_image[snode]
3346         nimg.sinst.append(instance)
3347         if pnode not in nimg.sbp:
3348           nimg.sbp[pnode] = []
3349         nimg.sbp[pnode].append(instance)
3350
3351     # At this point, we have the in-memory data structures complete,
3352     # except for the runtime information, which we'll gather next
3353
3354     # Due to the way our RPC system works, exact response times cannot be
3355     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3356     # time before and after executing the request, we can at least have a time
3357     # window.
3358     nvinfo_starttime = time.time()
3359     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3360                                            node_verify_param,
3361                                            self.cfg.GetClusterName())
3362     nvinfo_endtime = time.time()
3363
3364     if self.extra_lv_nodes and vg_name is not None:
3365       extra_lv_nvinfo = \
3366           self.rpc.call_node_verify(self.extra_lv_nodes,
3367                                     {constants.NV_LVLIST: vg_name},
3368                                     self.cfg.GetClusterName())
3369     else:
3370       extra_lv_nvinfo = {}
3371
3372     all_drbd_map = self.cfg.ComputeDRBDMap()
3373
3374     feedback_fn("* Gathering disk information (%s nodes)" %
3375                 len(self.my_node_names))
3376     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3377                                      self.my_inst_info)
3378
3379     feedback_fn("* Verifying configuration file consistency")
3380
3381     # If not all nodes are being checked, we need to make sure the master node
3382     # and a non-checked vm_capable node are in the list.
3383     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3384     if absent_nodes:
3385       vf_nvinfo = all_nvinfo.copy()
3386       vf_node_info = list(self.my_node_info.values())
3387       additional_nodes = []
3388       if master_node not in self.my_node_info:
3389         additional_nodes.append(master_node)
3390         vf_node_info.append(self.all_node_info[master_node])
3391       # Add the first vm_capable node we find which is not included,
3392       # excluding the master node (which we already have)
3393       for node in absent_nodes:
3394         nodeinfo = self.all_node_info[node]
3395         if (nodeinfo.vm_capable and not nodeinfo.offline and
3396             node != master_node):
3397           additional_nodes.append(node)
3398           vf_node_info.append(self.all_node_info[node])
3399           break
3400       key = constants.NV_FILELIST
3401       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3402                                                  {key: node_verify_param[key]},
3403                                                  self.cfg.GetClusterName()))
3404     else:
3405       vf_nvinfo = all_nvinfo
3406       vf_node_info = self.my_node_info.values()
3407
3408     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3409
3410     feedback_fn("* Verifying node status")
3411
3412     refos_img = None
3413
3414     for node_i in node_data_list:
3415       node = node_i.name
3416       nimg = node_image[node]
3417
3418       if node_i.offline:
3419         if verbose:
3420           feedback_fn("* Skipping offline node %s" % (node,))
3421         n_offline += 1
3422         continue
3423
3424       if node == master_node:
3425         ntype = "master"
3426       elif node_i.master_candidate:
3427         ntype = "master candidate"
3428       elif node_i.drained:
3429         ntype = "drained"
3430         n_drained += 1
3431       else:
3432         ntype = "regular"
3433       if verbose:
3434         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3435
3436       msg = all_nvinfo[node].fail_msg
3437       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3438                msg)
3439       if msg:
3440         nimg.rpc_fail = True
3441         continue
3442
3443       nresult = all_nvinfo[node].payload
3444
3445       nimg.call_ok = self._VerifyNode(node_i, nresult)
3446       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3447       self._VerifyNodeNetwork(node_i, nresult)
3448       self._VerifyNodeUserScripts(node_i, nresult)
3449       self._VerifyOob(node_i, nresult)
3450       self._VerifyFileStoragePaths(node_i, nresult,
3451                                    node == master_node)
3452
3453       if nimg.vm_capable:
3454         self._VerifyNodeLVM(node_i, nresult, vg_name)
3455         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3456                              all_drbd_map)
3457
3458         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3459         self._UpdateNodeInstances(node_i, nresult, nimg)
3460         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3461         self._UpdateNodeOS(node_i, nresult, nimg)
3462
3463         if not nimg.os_fail:
3464           if refos_img is None:
3465             refos_img = nimg
3466           self._VerifyNodeOS(node_i, nimg, refos_img)
3467         self._VerifyNodeBridges(node_i, nresult, bridges)
3468
3469         # Check whether all running instancies are primary for the node. (This
3470         # can no longer be done from _VerifyInstance below, since some of the
3471         # wrong instances could be from other node groups.)
3472         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3473
3474         for inst in non_primary_inst:
3475           test = inst in self.all_inst_info
3476           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3477                    "instance should not run on node %s", node_i.name)
3478           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3479                    "node is running unknown instance %s", inst)
3480
3481     for node, result in extra_lv_nvinfo.items():
3482       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3483                               node_image[node], vg_name)
3484
3485     feedback_fn("* Verifying instance status")
3486     for instance in self.my_inst_names:
3487       if verbose:
3488         feedback_fn("* Verifying instance %s" % instance)
3489       inst_config = self.my_inst_info[instance]
3490       self._VerifyInstance(instance, inst_config, node_image,
3491                            instdisk[instance])
3492       inst_nodes_offline = []
3493
3494       pnode = inst_config.primary_node
3495       pnode_img = node_image[pnode]
3496       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3497                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3498                " primary node failed", instance)
3499
3500       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3501                pnode_img.offline,
3502                constants.CV_EINSTANCEBADNODE, instance,
3503                "instance is marked as running and lives on offline node %s",
3504                inst_config.primary_node)
3505
3506       # If the instance is non-redundant we cannot survive losing its primary
3507       # node, so we are not N+1 compliant.
3508       if inst_config.disk_template not in constants.DTS_MIRRORED:
3509         i_non_redundant.append(instance)
3510
3511       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3512                constants.CV_EINSTANCELAYOUT,
3513                instance, "instance has multiple secondary nodes: %s",
3514                utils.CommaJoin(inst_config.secondary_nodes),
3515                code=self.ETYPE_WARNING)
3516
3517       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3518         pnode = inst_config.primary_node
3519         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3520         instance_groups = {}
3521
3522         for node in instance_nodes:
3523           instance_groups.setdefault(self.all_node_info[node].group,
3524                                      []).append(node)
3525
3526         pretty_list = [
3527           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3528           # Sort so that we always list the primary node first.
3529           for group, nodes in sorted(instance_groups.items(),
3530                                      key=lambda (_, nodes): pnode in nodes,
3531                                      reverse=True)]
3532
3533         self._ErrorIf(len(instance_groups) > 1,
3534                       constants.CV_EINSTANCESPLITGROUPS,
3535                       instance, "instance has primary and secondary nodes in"
3536                       " different groups: %s", utils.CommaJoin(pretty_list),
3537                       code=self.ETYPE_WARNING)
3538
3539       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3540         i_non_a_balanced.append(instance)
3541
3542       for snode in inst_config.secondary_nodes:
3543         s_img = node_image[snode]
3544         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3545                  snode, "instance %s, connection to secondary node failed",
3546                  instance)
3547
3548         if s_img.offline:
3549           inst_nodes_offline.append(snode)
3550
3551       # warn that the instance lives on offline nodes
3552       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3553                "instance has offline secondary node(s) %s",
3554                utils.CommaJoin(inst_nodes_offline))
3555       # ... or ghost/non-vm_capable nodes
3556       for node in inst_config.all_nodes:
3557         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3558                  instance, "instance lives on ghost node %s", node)
3559         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3560                  instance, "instance lives on non-vm_capable node %s", node)
3561
3562     feedback_fn("* Verifying orphan volumes")
3563     reserved = utils.FieldSet(*cluster.reserved_lvs)
3564
3565     # We will get spurious "unknown volume" warnings if any node of this group
3566     # is secondary for an instance whose primary is in another group. To avoid
3567     # them, we find these instances and add their volumes to node_vol_should.
3568     for inst in self.all_inst_info.values():
3569       for secondary in inst.secondary_nodes:
3570         if (secondary in self.my_node_info
3571             and inst.name not in self.my_inst_info):
3572           inst.MapLVsByNode(node_vol_should)
3573           break
3574
3575     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3576
3577     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3578       feedback_fn("* Verifying N+1 Memory redundancy")
3579       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3580
3581     feedback_fn("* Other Notes")
3582     if i_non_redundant:
3583       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3584                   % len(i_non_redundant))
3585
3586     if i_non_a_balanced:
3587       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3588                   % len(i_non_a_balanced))
3589
3590     if i_offline:
3591       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3592
3593     if n_offline:
3594       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3595
3596     if n_drained:
3597       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3598
3599     return not self.bad
3600
3601   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3602     """Analyze the post-hooks' result
3603
3604     This method analyses the hook result, handles it, and sends some
3605     nicely-formatted feedback back to the user.
3606
3607     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3608         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3609     @param hooks_results: the results of the multi-node hooks rpc call
3610     @param feedback_fn: function used send feedback back to the caller
3611     @param lu_result: previous Exec result
3612     @return: the new Exec result, based on the previous result
3613         and hook results
3614
3615     """
3616     # We only really run POST phase hooks, only for non-empty groups,
3617     # and are only interested in their results
3618     if not self.my_node_names:
3619       # empty node group
3620       pass
3621     elif phase == constants.HOOKS_PHASE_POST:
3622       # Used to change hooks' output to proper indentation
3623       feedback_fn("* Hooks Results")
3624       assert hooks_results, "invalid result from hooks"
3625
3626       for node_name in hooks_results:
3627         res = hooks_results[node_name]
3628         msg = res.fail_msg
3629         test = msg and not res.offline
3630         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3631                       "Communication failure in hooks execution: %s", msg)
3632         if res.offline or msg:
3633           # No need to investigate payload if node is offline or gave
3634           # an error.
3635           continue
3636         for script, hkr, output in res.payload:
3637           test = hkr == constants.HKR_FAIL
3638           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3639                         "Script %s failed, output:", script)
3640           if test:
3641             output = self._HOOKS_INDENT_RE.sub("      ", output)
3642             feedback_fn("%s" % output)
3643             lu_result = False
3644
3645     return lu_result
3646
3647
3648 class LUClusterVerifyDisks(NoHooksLU):
3649   """Verifies the cluster disks status.
3650
3651   """
3652   REQ_BGL = False
3653
3654   def ExpandNames(self):
3655     self.share_locks = _ShareAll()
3656     self.needed_locks = {
3657       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3658       }
3659
3660   def Exec(self, feedback_fn):
3661     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3662
3663     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3664     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3665                            for group in group_names])
3666
3667
3668 class LUGroupVerifyDisks(NoHooksLU):
3669   """Verifies the status of all disks in a node group.
3670
3671   """
3672   REQ_BGL = False
3673
3674   def ExpandNames(self):
3675     # Raises errors.OpPrereqError on its own if group can't be found
3676     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3677
3678     self.share_locks = _ShareAll()
3679     self.needed_locks = {
3680       locking.LEVEL_INSTANCE: [],
3681       locking.LEVEL_NODEGROUP: [],
3682       locking.LEVEL_NODE: [],
3683       }
3684
3685   def DeclareLocks(self, level):
3686     if level == locking.LEVEL_INSTANCE:
3687       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3688
3689       # Lock instances optimistically, needs verification once node and group
3690       # locks have been acquired
3691       self.needed_locks[locking.LEVEL_INSTANCE] = \
3692         self.cfg.GetNodeGroupInstances(self.group_uuid)
3693
3694     elif level == locking.LEVEL_NODEGROUP:
3695       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3696
3697       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3698         set([self.group_uuid] +
3699             # Lock all groups used by instances optimistically; this requires
3700             # going via the node before it's locked, requiring verification
3701             # later on
3702             [group_uuid
3703              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3704              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3705
3706     elif level == locking.LEVEL_NODE:
3707       # This will only lock the nodes in the group to be verified which contain
3708       # actual instances
3709       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3710       self._LockInstancesNodes()
3711
3712       # Lock all nodes in group to be verified
3713       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3714       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3715       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3716
3717   def CheckPrereq(self):
3718     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3719     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3720     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3721
3722     assert self.group_uuid in owned_groups
3723
3724     # Check if locked instances are still correct
3725     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3726
3727     # Get instance information
3728     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3729
3730     # Check if node groups for locked instances are still correct
3731     _CheckInstancesNodeGroups(self.cfg, self.instances,
3732                               owned_groups, owned_nodes, self.group_uuid)
3733
3734   def Exec(self, feedback_fn):
3735     """Verify integrity of cluster disks.
3736
3737     @rtype: tuple of three items
3738     @return: a tuple of (dict of node-to-node_error, list of instances
3739         which need activate-disks, dict of instance: (node, volume) for
3740         missing volumes
3741
3742     """
3743     res_nodes = {}
3744     res_instances = set()
3745     res_missing = {}
3746
3747     nv_dict = _MapInstanceDisksToNodes(
3748       [inst for inst in self.instances.values()
3749        if inst.admin_state == constants.ADMINST_UP])
3750
3751     if nv_dict:
3752       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3753                              set(self.cfg.GetVmCapableNodeList()))
3754
3755       node_lvs = self.rpc.call_lv_list(nodes, [])
3756
3757       for (node, node_res) in node_lvs.items():
3758         if node_res.offline:
3759           continue
3760
3761         msg = node_res.fail_msg
3762         if msg:
3763           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3764           res_nodes[node] = msg
3765           continue
3766
3767         for lv_name, (_, _, lv_online) in node_res.payload.items():
3768           inst = nv_dict.pop((node, lv_name), None)
3769           if not (lv_online or inst is None):
3770             res_instances.add(inst)
3771
3772       # any leftover items in nv_dict are missing LVs, let's arrange the data
3773       # better
3774       for key, inst in nv_dict.iteritems():
3775         res_missing.setdefault(inst, []).append(list(key))
3776
3777     return (res_nodes, list(res_instances), res_missing)
3778
3779
3780 class LUClusterRepairDiskSizes(NoHooksLU):
3781   """Verifies the cluster disks sizes.
3782
3783   """
3784   REQ_BGL = False
3785
3786   def ExpandNames(self):
3787     if self.op.instances:
3788       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3789       self.needed_locks = {
3790         locking.LEVEL_NODE_RES: [],
3791         locking.LEVEL_INSTANCE: self.wanted_names,
3792         }
3793       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3794     else:
3795       self.wanted_names = None
3796       self.needed_locks = {
3797         locking.LEVEL_NODE_RES: locking.ALL_SET,
3798         locking.LEVEL_INSTANCE: locking.ALL_SET,
3799         }
3800     self.share_locks = {
3801       locking.LEVEL_NODE_RES: 1,
3802       locking.LEVEL_INSTANCE: 0,
3803       }
3804
3805   def DeclareLocks(self, level):
3806     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3807       self._LockInstancesNodes(primary_only=True, level=level)
3808
3809   def CheckPrereq(self):
3810     """Check prerequisites.
3811
3812     This only checks the optional instance list against the existing names.
3813
3814     """
3815     if self.wanted_names is None:
3816       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3817
3818     self.wanted_instances = \
3819         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3820
3821   def _EnsureChildSizes(self, disk):
3822     """Ensure children of the disk have the needed disk size.
3823
3824     This is valid mainly for DRBD8 and fixes an issue where the
3825     children have smaller disk size.
3826
3827     @param disk: an L{ganeti.objects.Disk} object
3828
3829     """
3830     if disk.dev_type == constants.LD_DRBD8:
3831       assert disk.children, "Empty children for DRBD8?"
3832       fchild = disk.children[0]
3833       mismatch = fchild.size < disk.size
3834       if mismatch:
3835         self.LogInfo("Child disk has size %d, parent %d, fixing",
3836                      fchild.size, disk.size)
3837         fchild.size = disk.size
3838
3839       # and we recurse on this child only, not on the metadev
3840       return self._EnsureChildSizes(fchild) or mismatch
3841     else:
3842       return False
3843
3844   def Exec(self, feedback_fn):
3845     """Verify the size of cluster disks.
3846
3847     """
3848     # TODO: check child disks too
3849     # TODO: check differences in size between primary/secondary nodes
3850     per_node_disks = {}
3851     for instance in self.wanted_instances:
3852       pnode = instance.primary_node
3853       if pnode not in per_node_disks:
3854         per_node_disks[pnode] = []
3855       for idx, disk in enumerate(instance.disks):
3856         per_node_disks[pnode].append((instance, idx, disk))
3857
3858     assert not (frozenset(per_node_disks.keys()) -
3859                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3860       "Not owning correct locks"
3861     assert not self.owned_locks(locking.LEVEL_NODE)
3862
3863     changed = []
3864     for node, dskl in per_node_disks.items():
3865       newl = [v[2].Copy() for v in dskl]
3866       for dsk in newl:
3867         self.cfg.SetDiskID(dsk, node)
3868       result = self.rpc.call_blockdev_getsize(node, newl)
3869       if result.fail_msg:
3870         self.LogWarning("Failure in blockdev_getsize call to node"
3871                         " %s, ignoring", node)
3872         continue
3873       if len(result.payload) != len(dskl):
3874         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3875                         " result.payload=%s", node, len(dskl), result.payload)
3876         self.LogWarning("Invalid result from node %s, ignoring node results",
3877                         node)
3878         continue
3879       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3880         if size is None:
3881           self.LogWarning("Disk %d of instance %s did not return size"
3882                           " information, ignoring", idx, instance.name)
3883           continue
3884         if not isinstance(size, (int, long)):
3885           self.LogWarning("Disk %d of instance %s did not return valid"
3886                           " size information, ignoring", idx, instance.name)
3887           continue
3888         size = size >> 20
3889         if size != disk.size:
3890           self.LogInfo("Disk %d of instance %s has mismatched size,"
3891                        " correcting: recorded %d, actual %d", idx,
3892                        instance.name, disk.size, size)
3893           disk.size = size
3894           self.cfg.Update(instance, feedback_fn)
3895           changed.append((instance.name, idx, size))
3896         if self._EnsureChildSizes(disk):
3897           self.cfg.Update(instance, feedback_fn)
3898           changed.append((instance.name, idx, disk.size))
3899     return changed
3900
3901
3902 class LUClusterRename(LogicalUnit):
3903   """Rename the cluster.
3904
3905   """
3906   HPATH = "cluster-rename"
3907   HTYPE = constants.HTYPE_CLUSTER
3908
3909   def BuildHooksEnv(self):
3910     """Build hooks env.
3911
3912     """
3913     return {
3914       "OP_TARGET": self.cfg.GetClusterName(),
3915       "NEW_NAME": self.op.name,
3916       }
3917
3918   def BuildHooksNodes(self):
3919     """Build hooks nodes.
3920
3921     """
3922     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3923
3924   def CheckPrereq(self):
3925     """Verify that the passed name is a valid one.
3926
3927     """
3928     hostname = netutils.GetHostname(name=self.op.name,
3929                                     family=self.cfg.GetPrimaryIPFamily())
3930
3931     new_name = hostname.name
3932     self.ip = new_ip = hostname.ip
3933     old_name = self.cfg.GetClusterName()
3934     old_ip = self.cfg.GetMasterIP()
3935     if new_name == old_name and new_ip == old_ip:
3936       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3937                                  " cluster has changed",
3938                                  errors.ECODE_INVAL)
3939     if new_ip != old_ip:
3940       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3941         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3942                                    " reachable on the network" %
3943                                    new_ip, errors.ECODE_NOTUNIQUE)
3944
3945     self.op.name = new_name
3946
3947   def Exec(self, feedback_fn):
3948     """Rename the cluster.
3949
3950     """
3951     clustername = self.op.name
3952     new_ip = self.ip
3953
3954     # shutdown the master IP
3955     master_params = self.cfg.GetMasterNetworkParameters()
3956     ems = self.cfg.GetUseExternalMipScript()
3957     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3958                                                      master_params, ems)
3959     result.Raise("Could not disable the master role")
3960
3961     try:
3962       cluster = self.cfg.GetClusterInfo()
3963       cluster.cluster_name = clustername
3964       cluster.master_ip = new_ip
3965       self.cfg.Update(cluster, feedback_fn)
3966
3967       # update the known hosts file
3968       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3969       node_list = self.cfg.GetOnlineNodeList()
3970       try:
3971         node_list.remove(master_params.name)
3972       except ValueError:
3973         pass
3974       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3975     finally:
3976       master_params.ip = new_ip
3977       result = self.rpc.call_node_activate_master_ip(master_params.name,
3978                                                      master_params, ems)
3979       msg = result.fail_msg
3980       if msg:
3981         self.LogWarning("Could not re-enable the master role on"
3982                         " the master, please restart manually: %s", msg)
3983
3984     return clustername
3985
3986
3987 def _ValidateNetmask(cfg, netmask):
3988   """Checks if a netmask is valid.
3989
3990   @type cfg: L{config.ConfigWriter}
3991   @param cfg: The cluster configuration
3992   @type netmask: int
3993   @param netmask: the netmask to be verified
3994   @raise errors.OpPrereqError: if the validation fails
3995
3996   """
3997   ip_family = cfg.GetPrimaryIPFamily()
3998   try:
3999     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4000   except errors.ProgrammerError:
4001     raise errors.OpPrereqError("Invalid primary ip family: %s." %
4002                                ip_family, errors.ECODE_INVAL)
4003   if not ipcls.ValidateNetmask(netmask):
4004     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4005                                 (netmask), errors.ECODE_INVAL)
4006
4007
4008 class LUClusterSetParams(LogicalUnit):
4009   """Change the parameters of the cluster.
4010
4011   """
4012   HPATH = "cluster-modify"
4013   HTYPE = constants.HTYPE_CLUSTER
4014   REQ_BGL = False
4015
4016   def CheckArguments(self):
4017     """Check parameters
4018
4019     """
4020     if self.op.uid_pool:
4021       uidpool.CheckUidPool(self.op.uid_pool)
4022
4023     if self.op.add_uids:
4024       uidpool.CheckUidPool(self.op.add_uids)
4025
4026     if self.op.remove_uids:
4027       uidpool.CheckUidPool(self.op.remove_uids)
4028
4029     if self.op.master_netmask is not None:
4030       _ValidateNetmask(self.cfg, self.op.master_netmask)
4031
4032     if self.op.diskparams:
4033       for dt_params in self.op.diskparams.values():
4034         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4035       try:
4036         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4037       except errors.OpPrereqError, err:
4038         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4039                                    errors.ECODE_INVAL)
4040
4041   def ExpandNames(self):
4042     # FIXME: in the future maybe other cluster params won't require checking on
4043     # all nodes to be modified.
4044     self.needed_locks = {
4045       locking.LEVEL_NODE: locking.ALL_SET,
4046       locking.LEVEL_INSTANCE: locking.ALL_SET,
4047       locking.LEVEL_NODEGROUP: locking.ALL_SET,
4048     }
4049     self.share_locks = {
4050         locking.LEVEL_NODE: 1,
4051         locking.LEVEL_INSTANCE: 1,
4052         locking.LEVEL_NODEGROUP: 1,
4053     }
4054
4055   def BuildHooksEnv(self):
4056     """Build hooks env.
4057
4058     """
4059     return {
4060       "OP_TARGET": self.cfg.GetClusterName(),
4061       "NEW_VG_NAME": self.op.vg_name,
4062       }
4063
4064   def BuildHooksNodes(self):
4065     """Build hooks nodes.
4066
4067     """
4068     mn = self.cfg.GetMasterNode()
4069     return ([mn], [mn])
4070
4071   def CheckPrereq(self):
4072     """Check prerequisites.
4073
4074     This checks whether the given params don't conflict and
4075     if the given volume group is valid.
4076
4077     """
4078     if self.op.vg_name is not None and not self.op.vg_name:
4079       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4080         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4081                                    " instances exist", errors.ECODE_INVAL)
4082
4083     if self.op.drbd_helper is not None and not self.op.drbd_helper:
4084       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4085         raise errors.OpPrereqError("Cannot disable drbd helper while"
4086                                    " drbd-based instances exist",
4087                                    errors.ECODE_INVAL)
4088
4089     node_list = self.owned_locks(locking.LEVEL_NODE)
4090
4091     # if vg_name not None, checks given volume group on all nodes
4092     if self.op.vg_name:
4093       vglist = self.rpc.call_vg_list(node_list)
4094       for node in node_list:
4095         msg = vglist[node].fail_msg
4096         if msg:
4097           # ignoring down node
4098           self.LogWarning("Error while gathering data on node %s"
4099                           " (ignoring node): %s", node, msg)
4100           continue
4101         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4102                                               self.op.vg_name,
4103                                               constants.MIN_VG_SIZE)
4104         if vgstatus:
4105           raise errors.OpPrereqError("Error on node '%s': %s" %
4106                                      (node, vgstatus), errors.ECODE_ENVIRON)
4107
4108     if self.op.drbd_helper:
4109       # checks given drbd helper on all nodes
4110       helpers = self.rpc.call_drbd_helper(node_list)
4111       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4112         if ninfo.offline:
4113           self.LogInfo("Not checking drbd helper on offline node %s", node)
4114           continue
4115         msg = helpers[node].fail_msg
4116         if msg:
4117           raise errors.OpPrereqError("Error checking drbd helper on node"
4118                                      " '%s': %s" % (node, msg),
4119                                      errors.ECODE_ENVIRON)
4120         node_helper = helpers[node].payload
4121         if node_helper != self.op.drbd_helper:
4122           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4123                                      (node, node_helper), errors.ECODE_ENVIRON)
4124
4125     self.cluster = cluster = self.cfg.GetClusterInfo()
4126     # validate params changes
4127     if self.op.beparams:
4128       objects.UpgradeBeParams(self.op.beparams)
4129       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4130       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4131
4132     if self.op.ndparams:
4133       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4134       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4135
4136       # TODO: we need a more general way to handle resetting
4137       # cluster-level parameters to default values
4138       if self.new_ndparams["oob_program"] == "":
4139         self.new_ndparams["oob_program"] = \
4140             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4141
4142     if self.op.hv_state:
4143       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4144                                             self.cluster.hv_state_static)
4145       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4146                                for hv, values in new_hv_state.items())
4147
4148     if self.op.disk_state:
4149       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4150                                                 self.cluster.disk_state_static)
4151       self.new_disk_state = \
4152         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4153                             for name, values in svalues.items()))
4154              for storage, svalues in new_disk_state.items())
4155
4156     if self.op.ipolicy:
4157       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4158                                             group_policy=False)
4159
4160       all_instances = self.cfg.GetAllInstancesInfo().values()
4161       violations = set()
4162       for group in self.cfg.GetAllNodeGroupsInfo().values():
4163         instances = frozenset([inst for inst in all_instances
4164                                if compat.any(node in group.members
4165                                              for node in inst.all_nodes)])
4166         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4167         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4168         new = _ComputeNewInstanceViolations(ipol,
4169                                             new_ipolicy, instances)
4170         if new:
4171           violations.update(new)
4172
4173       if violations:
4174         self.LogWarning("After the ipolicy change the following instances"
4175                         " violate them: %s",
4176                         utils.CommaJoin(utils.NiceSort(violations)))
4177
4178     if self.op.nicparams:
4179       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4180       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4181       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4182       nic_errors = []
4183
4184       # check all instances for consistency
4185       for instance in self.cfg.GetAllInstancesInfo().values():
4186         for nic_idx, nic in enumerate(instance.nics):
4187           params_copy = copy.deepcopy(nic.nicparams)
4188           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4189
4190           # check parameter syntax
4191           try:
4192             objects.NIC.CheckParameterSyntax(params_filled)
4193           except errors.ConfigurationError, err:
4194             nic_errors.append("Instance %s, nic/%d: %s" %
4195                               (instance.name, nic_idx, err))
4196
4197           # if we're moving instances to routed, check that they have an ip
4198           target_mode = params_filled[constants.NIC_MODE]
4199           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4200             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4201                               " address" % (instance.name, nic_idx))
4202       if nic_errors:
4203         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4204                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4205
4206     # hypervisor list/parameters
4207     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4208     if self.op.hvparams:
4209       for hv_name, hv_dict in self.op.hvparams.items():
4210         if hv_name not in self.new_hvparams:
4211           self.new_hvparams[hv_name] = hv_dict
4212         else:
4213           self.new_hvparams[hv_name].update(hv_dict)
4214
4215     # disk template parameters
4216     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4217     if self.op.diskparams:
4218       for dt_name, dt_params in self.op.diskparams.items():
4219         if dt_name not in self.op.diskparams:
4220           self.new_diskparams[dt_name] = dt_params
4221         else:
4222           self.new_diskparams[dt_name].update(dt_params)
4223
4224     # os hypervisor parameters
4225     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4226     if self.op.os_hvp:
4227       for os_name, hvs in self.op.os_hvp.items():
4228         if os_name not in self.new_os_hvp:
4229           self.new_os_hvp[os_name] = hvs
4230         else:
4231           for hv_name, hv_dict in hvs.items():
4232             if hv_name not in self.new_os_hvp[os_name]:
4233               self.new_os_hvp[os_name][hv_name] = hv_dict
4234             else:
4235               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4236
4237     # os parameters
4238     self.new_osp = objects.FillDict(cluster.osparams, {})
4239     if self.op.osparams:
4240       for os_name, osp in self.op.osparams.items():
4241         if os_name not in self.new_osp:
4242           self.new_osp[os_name] = {}
4243
4244         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4245                                                   use_none=True)
4246
4247         if not self.new_osp[os_name]:
4248           # we removed all parameters
4249           del self.new_osp[os_name]
4250         else:
4251           # check the parameter validity (remote check)
4252           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4253                          os_name, self.new_osp[os_name])
4254
4255     # changes to the hypervisor list
4256     if self.op.enabled_hypervisors is not None:
4257       self.hv_list = self.op.enabled_hypervisors
4258       for hv in self.hv_list:
4259         # if the hypervisor doesn't already exist in the cluster
4260         # hvparams, we initialize it to empty, and then (in both
4261         # cases) we make sure to fill the defaults, as we might not
4262         # have a complete defaults list if the hypervisor wasn't
4263         # enabled before
4264         if hv not in new_hvp:
4265           new_hvp[hv] = {}
4266         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4267         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4268     else:
4269       self.hv_list = cluster.enabled_hypervisors
4270
4271     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4272       # either the enabled list has changed, or the parameters have, validate
4273       for hv_name, hv_params in self.new_hvparams.items():
4274         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4275             (self.op.enabled_hypervisors and
4276              hv_name in self.op.enabled_hypervisors)):
4277           # either this is a new hypervisor, or its parameters have changed
4278           hv_class = hypervisor.GetHypervisor(hv_name)
4279           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4280           hv_class.CheckParameterSyntax(hv_params)
4281           _CheckHVParams(self, node_list, hv_name, hv_params)
4282
4283     if self.op.os_hvp:
4284       # no need to check any newly-enabled hypervisors, since the
4285       # defaults have already been checked in the above code-block
4286       for os_name, os_hvp in self.new_os_hvp.items():
4287         for hv_name, hv_params in os_hvp.items():
4288           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4289           # we need to fill in the new os_hvp on top of the actual hv_p
4290           cluster_defaults = self.new_hvparams.get(hv_name, {})
4291           new_osp = objects.FillDict(cluster_defaults, hv_params)
4292           hv_class = hypervisor.GetHypervisor(hv_name)
4293           hv_class.CheckParameterSyntax(new_osp)
4294           _CheckHVParams(self, node_list, hv_name, new_osp)
4295
4296     if self.op.default_iallocator:
4297       alloc_script = utils.FindFile(self.op.default_iallocator,
4298                                     constants.IALLOCATOR_SEARCH_PATH,
4299                                     os.path.isfile)
4300       if alloc_script is None:
4301         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4302                                    " specified" % self.op.default_iallocator,
4303                                    errors.ECODE_INVAL)
4304
4305   def Exec(self, feedback_fn):
4306     """Change the parameters of the cluster.
4307
4308     """
4309     if self.op.vg_name is not None:
4310       new_volume = self.op.vg_name
4311       if not new_volume:
4312         new_volume = None
4313       if new_volume != self.cfg.GetVGName():
4314         self.cfg.SetVGName(new_volume)
4315       else:
4316         feedback_fn("Cluster LVM configuration already in desired"
4317                     " state, not changing")
4318     if self.op.drbd_helper is not None:
4319       new_helper = self.op.drbd_helper
4320       if not new_helper:
4321         new_helper = None
4322       if new_helper != self.cfg.GetDRBDHelper():
4323         self.cfg.SetDRBDHelper(new_helper)
4324       else:
4325         feedback_fn("Cluster DRBD helper already in desired state,"
4326                     " not changing")
4327     if self.op.hvparams:
4328       self.cluster.hvparams = self.new_hvparams
4329     if self.op.os_hvp:
4330       self.cluster.os_hvp = self.new_os_hvp
4331     if self.op.enabled_hypervisors is not None:
4332       self.cluster.hvparams = self.new_hvparams
4333       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4334     if self.op.beparams:
4335       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4336     if self.op.nicparams:
4337       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4338     if self.op.ipolicy:
4339       self.cluster.ipolicy = self.new_ipolicy
4340     if self.op.osparams:
4341       self.cluster.osparams = self.new_osp
4342     if self.op.ndparams:
4343       self.cluster.ndparams = self.new_ndparams
4344     if self.op.diskparams:
4345       self.cluster.diskparams = self.new_diskparams
4346     if self.op.hv_state:
4347       self.cluster.hv_state_static = self.new_hv_state
4348     if self.op.disk_state:
4349       self.cluster.disk_state_static = self.new_disk_state
4350
4351     if self.op.candidate_pool_size is not None:
4352       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4353       # we need to update the pool size here, otherwise the save will fail
4354       _AdjustCandidatePool(self, [])
4355
4356     if self.op.maintain_node_health is not None:
4357       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4358         feedback_fn("Note: CONFD was disabled at build time, node health"
4359                     " maintenance is not useful (still enabling it)")
4360       self.cluster.maintain_node_health = self.op.maintain_node_health
4361
4362     if self.op.prealloc_wipe_disks is not None:
4363       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4364
4365     if self.op.add_uids is not None:
4366       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4367
4368     if self.op.remove_uids is not None:
4369       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4370
4371     if self.op.uid_pool is not None:
4372       self.cluster.uid_pool = self.op.uid_pool
4373
4374     if self.op.default_iallocator is not None:
4375       self.cluster.default_iallocator = self.op.default_iallocator
4376
4377     if self.op.reserved_lvs is not None:
4378       self.cluster.reserved_lvs = self.op.reserved_lvs
4379
4380     if self.op.use_external_mip_script is not None:
4381       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4382
4383     def helper_os(aname, mods, desc):
4384       desc += " OS list"
4385       lst = getattr(self.cluster, aname)
4386       for key, val in mods:
4387         if key == constants.DDM_ADD:
4388           if val in lst:
4389             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4390           else:
4391             lst.append(val)
4392         elif key == constants.DDM_REMOVE:
4393           if val in lst:
4394             lst.remove(val)
4395           else:
4396             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4397         else:
4398           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4399
4400     if self.op.hidden_os:
4401       helper_os("hidden_os", self.op.hidden_os, "hidden")
4402
4403     if self.op.blacklisted_os:
4404       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4405
4406     if self.op.master_netdev:
4407       master_params = self.cfg.GetMasterNetworkParameters()
4408       ems = self.cfg.GetUseExternalMipScript()
4409       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4410                   self.cluster.master_netdev)
4411       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4412                                                        master_params, ems)
4413       result.Raise("Could not disable the master ip")
4414       feedback_fn("Changing master_netdev from %s to %s" %
4415                   (master_params.netdev, self.op.master_netdev))
4416       self.cluster.master_netdev = self.op.master_netdev
4417
4418     if self.op.master_netmask:
4419       master_params = self.cfg.GetMasterNetworkParameters()
4420       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4421       result = self.rpc.call_node_change_master_netmask(master_params.name,
4422                                                         master_params.netmask,
4423                                                         self.op.master_netmask,
4424                                                         master_params.ip,
4425                                                         master_params.netdev)
4426       if result.fail_msg:
4427         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4428         feedback_fn(msg)
4429
4430       self.cluster.master_netmask = self.op.master_netmask
4431
4432     self.cfg.Update(self.cluster, feedback_fn)
4433
4434     if self.op.master_netdev:
4435       master_params = self.cfg.GetMasterNetworkParameters()
4436       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4437                   self.op.master_netdev)
4438       ems = self.cfg.GetUseExternalMipScript()
4439       result = self.rpc.call_node_activate_master_ip(master_params.name,
4440                                                      master_params, ems)
4441       if result.fail_msg:
4442         self.LogWarning("Could not re-enable the master ip on"
4443                         " the master, please restart manually: %s",
4444                         result.fail_msg)
4445
4446
4447 def _UploadHelper(lu, nodes, fname):
4448   """Helper for uploading a file and showing warnings.
4449
4450   """
4451   if os.path.exists(fname):
4452     result = lu.rpc.call_upload_file(nodes, fname)
4453     for to_node, to_result in result.items():
4454       msg = to_result.fail_msg
4455       if msg:
4456         msg = ("Copy of file %s to node %s failed: %s" %
4457                (fname, to_node, msg))
4458         lu.LogWarning(msg)
4459
4460
4461 def _ComputeAncillaryFiles(cluster, redist):
4462   """Compute files external to Ganeti which need to be consistent.
4463
4464   @type redist: boolean
4465   @param redist: Whether to include files which need to be redistributed
4466
4467   """
4468   # Compute files for all nodes
4469   files_all = set([
4470     pathutils.SSH_KNOWN_HOSTS_FILE,
4471     pathutils.CONFD_HMAC_KEY,
4472     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4473     pathutils.SPICE_CERT_FILE,
4474     pathutils.SPICE_CACERT_FILE,
4475     pathutils.RAPI_USERS_FILE,
4476     ])
4477
4478   if redist:
4479     # we need to ship at least the RAPI certificate
4480     files_all.add(pathutils.RAPI_CERT_FILE)
4481   else:
4482     files_all.update(pathutils.ALL_CERT_FILES)
4483     files_all.update(ssconf.SimpleStore().GetFileList())
4484
4485   if cluster.modify_etc_hosts:
4486     files_all.add(pathutils.ETC_HOSTS)
4487
4488   if cluster.use_external_mip_script:
4489     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4490
4491   # Files which are optional, these must:
4492   # - be present in one other category as well
4493   # - either exist or not exist on all nodes of that category (mc, vm all)
4494   files_opt = set([
4495     pathutils.RAPI_USERS_FILE,
4496     ])
4497
4498   # Files which should only be on master candidates
4499   files_mc = set()
4500
4501   if not redist:
4502     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4503
4504   # File storage
4505   if (not redist and
4506       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4507     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4508     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4509
4510   # Files which should only be on VM-capable nodes
4511   files_vm = set(
4512     filename
4513     for hv_name in cluster.enabled_hypervisors
4514     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4515
4516   files_opt |= set(
4517     filename
4518     for hv_name in cluster.enabled_hypervisors
4519     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4520
4521   # Filenames in each category must be unique
4522   all_files_set = files_all | files_mc | files_vm
4523   assert (len(all_files_set) ==
4524           sum(map(len, [files_all, files_mc, files_vm]))), \
4525          "Found file listed in more than one file list"
4526
4527   # Optional files must be present in one other category
4528   assert all_files_set.issuperset(files_opt), \
4529          "Optional file not in a different required list"
4530
4531   # This one file should never ever be re-distributed via RPC
4532   assert not (redist and
4533               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4534
4535   return (files_all, files_opt, files_mc, files_vm)
4536
4537
4538 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4539   """Distribute additional files which are part of the cluster configuration.
4540
4541   ConfigWriter takes care of distributing the config and ssconf files, but
4542   there are more files which should be distributed to all nodes. This function
4543   makes sure those are copied.
4544
4545   @param lu: calling logical unit
4546   @param additional_nodes: list of nodes not in the config to distribute to
4547   @type additional_vm: boolean
4548   @param additional_vm: whether the additional nodes are vm-capable or not
4549
4550   """
4551   # Gather target nodes
4552   cluster = lu.cfg.GetClusterInfo()
4553   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4554
4555   online_nodes = lu.cfg.GetOnlineNodeList()
4556   online_set = frozenset(online_nodes)
4557   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4558
4559   if additional_nodes is not None:
4560     online_nodes.extend(additional_nodes)
4561     if additional_vm:
4562       vm_nodes.extend(additional_nodes)
4563
4564   # Never distribute to master node
4565   for nodelist in [online_nodes, vm_nodes]:
4566     if master_info.name in nodelist:
4567       nodelist.remove(master_info.name)
4568
4569   # Gather file lists
4570   (files_all, _, files_mc, files_vm) = \
4571     _ComputeAncillaryFiles(cluster, True)
4572
4573   # Never re-distribute configuration file from here
4574   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4575               pathutils.CLUSTER_CONF_FILE in files_vm)
4576   assert not files_mc, "Master candidates not handled in this function"
4577
4578   filemap = [
4579     (online_nodes, files_all),
4580     (vm_nodes, files_vm),
4581     ]
4582
4583   # Upload the files
4584   for (node_list, files) in filemap:
4585     for fname in files:
4586       _UploadHelper(lu, node_list, fname)
4587
4588
4589 class LUClusterRedistConf(NoHooksLU):
4590   """Force the redistribution of cluster configuration.
4591
4592   This is a very simple LU.
4593
4594   """
4595   REQ_BGL = False
4596
4597   def ExpandNames(self):
4598     self.needed_locks = {
4599       locking.LEVEL_NODE: locking.ALL_SET,
4600     }
4601     self.share_locks[locking.LEVEL_NODE] = 1
4602
4603   def Exec(self, feedback_fn):
4604     """Redistribute the configuration.
4605
4606     """
4607     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4608     _RedistributeAncillaryFiles(self)
4609
4610
4611 class LUClusterActivateMasterIp(NoHooksLU):
4612   """Activate the master IP on the master node.
4613
4614   """
4615   def Exec(self, feedback_fn):
4616     """Activate the master IP.
4617
4618     """
4619     master_params = self.cfg.GetMasterNetworkParameters()
4620     ems = self.cfg.GetUseExternalMipScript()
4621     result = self.rpc.call_node_activate_master_ip(master_params.name,
4622                                                    master_params, ems)
4623     result.Raise("Could not activate the master IP")
4624
4625
4626 class LUClusterDeactivateMasterIp(NoHooksLU):
4627   """Deactivate the master IP on the master node.
4628
4629   """
4630   def Exec(self, feedback_fn):
4631     """Deactivate the master IP.
4632
4633     """
4634     master_params = self.cfg.GetMasterNetworkParameters()
4635     ems = self.cfg.GetUseExternalMipScript()
4636     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4637                                                      master_params, ems)
4638     result.Raise("Could not deactivate the master IP")
4639
4640
4641 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4642   """Sleep and poll for an instance's disk to sync.
4643
4644   """
4645   if not instance.disks or disks is not None and not disks:
4646     return True
4647
4648   disks = _ExpandCheckDisks(instance, disks)
4649
4650   if not oneshot:
4651     lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4652
4653   node = instance.primary_node
4654
4655   for dev in disks:
4656     lu.cfg.SetDiskID(dev, node)
4657
4658   # TODO: Convert to utils.Retry
4659
4660   retries = 0
4661   degr_retries = 10 # in seconds, as we sleep 1 second each time
4662   while True:
4663     max_time = 0
4664     done = True
4665     cumul_degraded = False
4666     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4667     msg = rstats.fail_msg
4668     if msg:
4669       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4670       retries += 1
4671       if retries >= 10:
4672         raise errors.RemoteError("Can't contact node %s for mirror data,"
4673                                  " aborting." % node)
4674       time.sleep(6)
4675       continue
4676     rstats = rstats.payload
4677     retries = 0
4678     for i, mstat in enumerate(rstats):
4679       if mstat is None:
4680         lu.LogWarning("Can't compute data for node %s/%s",
4681                            node, disks[i].iv_name)
4682         continue
4683
4684       cumul_degraded = (cumul_degraded or
4685                         (mstat.is_degraded and mstat.sync_percent is None))
4686       if mstat.sync_percent is not None:
4687         done = False
4688         if mstat.estimated_time is not None:
4689           rem_time = ("%s remaining (estimated)" %
4690                       utils.FormatSeconds(mstat.estimated_time))
4691           max_time = mstat.estimated_time
4692         else:
4693           rem_time = "no time estimate"
4694         lu.LogInfo("- device %s: %5.2f%% done, %s",
4695                    disks[i].iv_name, mstat.sync_percent, rem_time)
4696
4697     # if we're done but degraded, let's do a few small retries, to
4698     # make sure we see a stable and not transient situation; therefore
4699     # we force restart of the loop
4700     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4701       logging.info("Degraded disks found, %d retries left", degr_retries)
4702       degr_retries -= 1
4703       time.sleep(1)
4704       continue
4705
4706     if done or oneshot:
4707       break
4708
4709     time.sleep(min(60, max_time))
4710
4711   if done:
4712     lu.LogInfo("Instance %s's disks are in sync", instance.name)
4713
4714   return not cumul_degraded
4715
4716
4717 def _BlockdevFind(lu, node, dev, instance):
4718   """Wrapper around call_blockdev_find to annotate diskparams.
4719
4720   @param lu: A reference to the lu object
4721   @param node: The node to call out
4722   @param dev: The device to find
4723   @param instance: The instance object the device belongs to
4724   @returns The result of the rpc call
4725
4726   """
4727   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4728   return lu.rpc.call_blockdev_find(node, disk)
4729
4730
4731 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4732   """Wrapper around L{_CheckDiskConsistencyInner}.
4733
4734   """
4735   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4736   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4737                                     ldisk=ldisk)
4738
4739
4740 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4741                                ldisk=False):
4742   """Check that mirrors are not degraded.
4743
4744   @attention: The device has to be annotated already.
4745
4746   The ldisk parameter, if True, will change the test from the
4747   is_degraded attribute (which represents overall non-ok status for
4748   the device(s)) to the ldisk (representing the local storage status).
4749
4750   """
4751   lu.cfg.SetDiskID(dev, node)
4752
4753   result = True
4754
4755   if on_primary or dev.AssembleOnSecondary():
4756     rstats = lu.rpc.call_blockdev_find(node, dev)
4757     msg = rstats.fail_msg
4758     if msg:
4759       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4760       result = False
4761     elif not rstats.payload:
4762       lu.LogWarning("Can't find disk on node %s", node)
4763       result = False
4764     else:
4765       if ldisk:
4766         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4767       else:
4768         result = result and not rstats.payload.is_degraded
4769
4770   if dev.children:
4771     for child in dev.children:
4772       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4773                                                      on_primary)
4774
4775   return result
4776
4777
4778 class LUOobCommand(NoHooksLU):
4779   """Logical unit for OOB handling.
4780
4781   """
4782   REQ_BGL = False
4783   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4784
4785   def ExpandNames(self):
4786     """Gather locks we need.
4787
4788     """
4789     if self.op.node_names:
4790       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4791       lock_names = self.op.node_names
4792     else:
4793       lock_names = locking.ALL_SET
4794
4795     self.needed_locks = {
4796       locking.LEVEL_NODE: lock_names,
4797       }
4798
4799   def CheckPrereq(self):
4800     """Check prerequisites.
4801
4802     This checks:
4803      - the node exists in the configuration
4804      - OOB is supported
4805
4806     Any errors are signaled by raising errors.OpPrereqError.
4807
4808     """
4809     self.nodes = []
4810     self.master_node = self.cfg.GetMasterNode()
4811
4812     assert self.op.power_delay >= 0.0
4813
4814     if self.op.node_names:
4815       if (self.op.command in self._SKIP_MASTER and
4816           self.master_node in self.op.node_names):
4817         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4818         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4819
4820         if master_oob_handler:
4821           additional_text = ("run '%s %s %s' if you want to operate on the"
4822                              " master regardless") % (master_oob_handler,
4823                                                       self.op.command,
4824                                                       self.master_node)
4825         else:
4826           additional_text = "it does not support out-of-band operations"
4827
4828         raise errors.OpPrereqError(("Operating on the master node %s is not"
4829                                     " allowed for %s; %s") %
4830                                    (self.master_node, self.op.command,
4831                                     additional_text), errors.ECODE_INVAL)
4832     else:
4833       self.op.node_names = self.cfg.GetNodeList()
4834       if self.op.command in self._SKIP_MASTER:
4835         self.op.node_names.remove(self.master_node)
4836
4837     if self.op.command in self._SKIP_MASTER:
4838       assert self.master_node not in self.op.node_names
4839
4840     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4841       if node is None:
4842         raise errors.OpPrereqError("Node %s not found" % node_name,
4843                                    errors.ECODE_NOENT)
4844       else:
4845         self.nodes.append(node)
4846
4847       if (not self.op.ignore_status and
4848           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4849         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4850                                     " not marked offline") % node_name,
4851                                    errors.ECODE_STATE)
4852
4853   def Exec(self, feedback_fn):
4854     """Execute OOB and return result if we expect any.
4855
4856     """
4857     master_node = self.master_node
4858     ret = []
4859
4860     for idx, node in enumerate(utils.NiceSort(self.nodes,
4861                                               key=lambda node: node.name)):
4862       node_entry = [(constants.RS_NORMAL, node.name)]
4863       ret.append(node_entry)
4864
4865       oob_program = _SupportsOob(self.cfg, node)
4866
4867       if not oob_program:
4868         node_entry.append((constants.RS_UNAVAIL, None))
4869         continue
4870
4871       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4872                    self.op.command, oob_program, node.name)
4873       result = self.rpc.call_run_oob(master_node, oob_program,
4874                                      self.op.command, node.name,
4875                                      self.op.timeout)
4876
4877       if result.fail_msg:
4878         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4879                         node.name, result.fail_msg)
4880         node_entry.append((constants.RS_NODATA, None))
4881       else:
4882         try:
4883           self._CheckPayload(result)
4884         except errors.OpExecError, err:
4885           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4886                           node.name, err)
4887           node_entry.append((constants.RS_NODATA, None))
4888         else:
4889           if self.op.command == constants.OOB_HEALTH:
4890             # For health we should log important events
4891             for item, status in result.payload:
4892               if status in [constants.OOB_STATUS_WARNING,
4893                             constants.OOB_STATUS_CRITICAL]:
4894                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4895                                 item, node.name, status)
4896
4897           if self.op.command == constants.OOB_POWER_ON:
4898             node.powered = True
4899           elif self.op.command == constants.OOB_POWER_OFF:
4900             node.powered = False
4901           elif self.op.command == constants.OOB_POWER_STATUS:
4902             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4903             if powered != node.powered:
4904               logging.warning(("Recorded power state (%s) of node '%s' does not"
4905                                " match actual power state (%s)"), node.powered,
4906                               node.name, powered)
4907
4908           # For configuration changing commands we should update the node
4909           if self.op.command in (constants.OOB_POWER_ON,
4910                                  constants.OOB_POWER_OFF):
4911             self.cfg.Update(node, feedback_fn)
4912
4913           node_entry.append((constants.RS_NORMAL, result.payload))
4914
4915           if (self.op.command == constants.OOB_POWER_ON and
4916               idx < len(self.nodes) - 1):
4917             time.sleep(self.op.power_delay)
4918
4919     return ret
4920
4921   def _CheckPayload(self, result):
4922     """Checks if the payload is valid.
4923
4924     @param result: RPC result
4925     @raises errors.OpExecError: If payload is not valid
4926
4927     """
4928     errs = []
4929     if self.op.command == constants.OOB_HEALTH:
4930       if not isinstance(result.payload, list):
4931         errs.append("command 'health' is expected to return a list but got %s" %
4932                     type(result.payload))
4933       else:
4934         for item, status in result.payload:
4935           if status not in constants.OOB_STATUSES:
4936             errs.append("health item '%s' has invalid status '%s'" %
4937                         (item, status))
4938
4939     if self.op.command == constants.OOB_POWER_STATUS:
4940       if not isinstance(result.payload, dict):
4941         errs.append("power-status is expected to return a dict but got %s" %
4942                     type(result.payload))
4943
4944     if self.op.command in [
4945       constants.OOB_POWER_ON,
4946       constants.OOB_POWER_OFF,
4947       constants.OOB_POWER_CYCLE,
4948       ]:
4949       if result.payload is not None:
4950         errs.append("%s is expected to not return payload but got '%s'" %
4951                     (self.op.command, result.payload))
4952
4953     if errs:
4954       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4955                                utils.CommaJoin(errs))
4956
4957
4958 class _OsQuery(_QueryBase):
4959   FIELDS = query.OS_FIELDS
4960
4961   def ExpandNames(self, lu):
4962     # Lock all nodes in shared mode
4963     # Temporary removal of locks, should be reverted later
4964     # TODO: reintroduce locks when they are lighter-weight
4965     lu.needed_locks = {}
4966     #self.share_locks[locking.LEVEL_NODE] = 1
4967     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4968
4969     # The following variables interact with _QueryBase._GetNames
4970     if self.names:
4971       self.wanted = self.names
4972     else:
4973       self.wanted = locking.ALL_SET
4974
4975     self.do_locking = self.use_locking
4976
4977   def DeclareLocks(self, lu, level):
4978     pass
4979
4980   @staticmethod
4981   def _DiagnoseByOS(rlist):
4982     """Remaps a per-node return list into an a per-os per-node dictionary
4983
4984     @param rlist: a map with node names as keys and OS objects as values
4985
4986     @rtype: dict
4987     @return: a dictionary with osnames as keys and as value another
4988         map, with nodes as keys and tuples of (path, status, diagnose,
4989         variants, parameters, api_versions) as values, eg::
4990
4991           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4992                                      (/srv/..., False, "invalid api")],
4993                            "node2": [(/srv/..., True, "", [], [])]}
4994           }
4995
4996     """
4997     all_os = {}
4998     # we build here the list of nodes that didn't fail the RPC (at RPC
4999     # level), so that nodes with a non-responding node daemon don't
5000     # make all OSes invalid
5001     good_nodes = [node_name for node_name in rlist
5002                   if not rlist[node_name].fail_msg]
5003     for node_name, nr in rlist.items():
5004       if nr.fail_msg or not nr.payload:
5005         continue
5006       for (name, path, status, diagnose, variants,
5007            params, api_versions) in nr.payload:
5008         if name not in all_os:
5009           # build a list of nodes for this os containing empty lists
5010           # for each node in node_list
5011           all_os[name] = {}
5012           for nname in good_nodes:
5013             all_os[name][nname] = []
5014         # convert params from [name, help] to (name, help)
5015         params = [tuple(v) for v in params]
5016         all_os[name][node_name].append((path, status, diagnose,
5017                                         variants, params, api_versions))
5018     return all_os
5019
5020   def _GetQueryData(self, lu):
5021     """Computes the list of nodes and their attributes.
5022
5023     """
5024     # Locking is not used
5025     assert not (compat.any(lu.glm.is_owned(level)
5026                            for level in locking.LEVELS
5027                            if level != locking.LEVEL_CLUSTER) or
5028                 self.do_locking or self.use_locking)
5029
5030     valid_nodes = [node.name
5031                    for node in lu.cfg.GetAllNodesInfo().values()
5032                    if not node.offline and node.vm_capable]
5033     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5034     cluster = lu.cfg.GetClusterInfo()
5035
5036     data = {}
5037
5038     for (os_name, os_data) in pol.items():
5039       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5040                           hidden=(os_name in cluster.hidden_os),
5041                           blacklisted=(os_name in cluster.blacklisted_os))
5042
5043       variants = set()
5044       parameters = set()
5045       api_versions = set()
5046
5047       for idx, osl in enumerate(os_data.values()):
5048         info.valid = bool(info.valid and osl and osl[0][1])
5049         if not info.valid:
5050           break
5051
5052         (node_variants, node_params, node_api) = osl[0][3:6]
5053         if idx == 0:
5054           # First entry
5055           variants.update(node_variants)
5056           parameters.update(node_params)
5057           api_versions.update(node_api)
5058         else:
5059           # Filter out inconsistent values
5060           variants.intersection_update(node_variants)
5061           parameters.intersection_update(node_params)
5062           api_versions.intersection_update(node_api)
5063
5064       info.variants = list(variants)
5065       info.parameters = list(parameters)
5066       info.api_versions = list(api_versions)
5067
5068       data[os_name] = info
5069
5070     # Prepare data in requested order
5071     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5072             if name in data]
5073
5074
5075 class LUOsDiagnose(NoHooksLU):
5076   """Logical unit for OS diagnose/query.
5077
5078   """
5079   REQ_BGL = False
5080
5081   @staticmethod
5082   def _BuildFilter(fields, names):
5083     """Builds a filter for querying OSes.
5084
5085     """
5086     name_filter = qlang.MakeSimpleFilter("name", names)
5087
5088     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5089     # respective field is not requested
5090     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5091                      for fname in ["hidden", "blacklisted"]
5092                      if fname not in fields]
5093     if "valid" not in fields:
5094       status_filter.append([qlang.OP_TRUE, "valid"])
5095
5096     if status_filter:
5097       status_filter.insert(0, qlang.OP_AND)
5098     else:
5099       status_filter = None
5100
5101     if name_filter and status_filter:
5102       return [qlang.OP_AND, name_filter, status_filter]
5103     elif name_filter:
5104       return name_filter
5105     else:
5106       return status_filter
5107
5108   def CheckArguments(self):
5109     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5110                        self.op.output_fields, False)
5111
5112   def ExpandNames(self):
5113     self.oq.ExpandNames(self)
5114
5115   def Exec(self, feedback_fn):
5116     return self.oq.OldStyleQuery(self)
5117
5118
5119 class LUNodeRemove(LogicalUnit):
5120   """Logical unit for removing a node.
5121
5122   """
5123   HPATH = "node-remove"
5124   HTYPE = constants.HTYPE_NODE
5125
5126   def BuildHooksEnv(self):
5127     """Build hooks env.
5128
5129     """
5130     return {
5131       "OP_TARGET": self.op.node_name,
5132       "NODE_NAME": self.op.node_name,
5133       }
5134
5135   def BuildHooksNodes(self):
5136     """Build hooks nodes.
5137
5138     This doesn't run on the target node in the pre phase as a failed
5139     node would then be impossible to remove.
5140
5141     """
5142     all_nodes = self.cfg.GetNodeList()
5143     try:
5144       all_nodes.remove(self.op.node_name)
5145     except ValueError:
5146       pass
5147     return (all_nodes, all_nodes)
5148
5149   def CheckPrereq(self):
5150     """Check prerequisites.
5151
5152     This checks:
5153      - the node exists in the configuration
5154      - it does not have primary or secondary instances
5155      - it's not the master
5156
5157     Any errors are signaled by raising errors.OpPrereqError.
5158
5159     """
5160     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5161     node = self.cfg.GetNodeInfo(self.op.node_name)
5162     assert node is not None
5163
5164     masternode = self.cfg.GetMasterNode()
5165     if node.name == masternode:
5166       raise errors.OpPrereqError("Node is the master node, failover to another"
5167                                  " node is required", errors.ECODE_INVAL)
5168
5169     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5170       if node.name in instance.all_nodes:
5171         raise errors.OpPrereqError("Instance %s is still running on the node,"
5172                                    " please remove first" % instance_name,
5173                                    errors.ECODE_INVAL)
5174     self.op.node_name = node.name
5175     self.node = node
5176
5177   def Exec(self, feedback_fn):
5178     """Removes the node from the cluster.
5179
5180     """
5181     node = self.node
5182     logging.info("Stopping the node daemon and removing configs from node %s",
5183                  node.name)
5184
5185     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5186
5187     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5188       "Not owning BGL"
5189
5190     # Promote nodes to master candidate as needed
5191     _AdjustCandidatePool(self, exceptions=[node.name])
5192     self.context.RemoveNode(node.name)
5193
5194     # Run post hooks on the node before it's removed
5195     _RunPostHook(self, node.name)
5196
5197     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5198     msg = result.fail_msg
5199     if msg:
5200       self.LogWarning("Errors encountered on the remote node while leaving"
5201                       " the cluster: %s", msg)
5202
5203     # Remove node from our /etc/hosts
5204     if self.cfg.GetClusterInfo().modify_etc_hosts:
5205       master_node = self.cfg.GetMasterNode()
5206       result = self.rpc.call_etc_hosts_modify(master_node,
5207                                               constants.ETC_HOSTS_REMOVE,
5208                                               node.name, None)
5209       result.Raise("Can't update hosts file with new host data")
5210       _RedistributeAncillaryFiles(self)
5211
5212
5213 class _NodeQuery(_QueryBase):
5214   FIELDS = query.NODE_FIELDS
5215
5216   def ExpandNames(self, lu):
5217     lu.needed_locks = {}
5218     lu.share_locks = _ShareAll()
5219
5220     if self.names:
5221       self.wanted = _GetWantedNodes(lu, self.names)
5222     else:
5223       self.wanted = locking.ALL_SET
5224
5225     self.do_locking = (self.use_locking and
5226                        query.NQ_LIVE in self.requested_data)
5227
5228     if self.do_locking:
5229       # If any non-static field is requested we need to lock the nodes
5230       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5231
5232   def DeclareLocks(self, lu, level):
5233     pass
5234
5235   def _GetQueryData(self, lu):
5236     """Computes the list of nodes and their attributes.
5237
5238     """
5239     all_info = lu.cfg.GetAllNodesInfo()
5240
5241     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5242
5243     # Gather data as requested
5244     if query.NQ_LIVE in self.requested_data:
5245       # filter out non-vm_capable nodes
5246       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5247
5248       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5249                                         [lu.cfg.GetHypervisorType()])
5250       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5251                        for (name, nresult) in node_data.items()
5252                        if not nresult.fail_msg and nresult.payload)
5253     else:
5254       live_data = None
5255
5256     if query.NQ_INST in self.requested_data:
5257       node_to_primary = dict([(name, set()) for name in nodenames])
5258       node_to_secondary = dict([(name, set()) for name in nodenames])
5259
5260       inst_data = lu.cfg.GetAllInstancesInfo()
5261
5262       for inst in inst_data.values():
5263         if inst.primary_node in node_to_primary:
5264           node_to_primary[inst.primary_node].add(inst.name)
5265         for secnode in inst.secondary_nodes:
5266           if secnode in node_to_secondary:
5267             node_to_secondary[secnode].add(inst.name)
5268     else:
5269       node_to_primary = None
5270       node_to_secondary = None
5271
5272     if query.NQ_OOB in self.requested_data:
5273       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5274                          for name, node in all_info.iteritems())
5275     else:
5276       oob_support = None
5277
5278     if query.NQ_GROUP in self.requested_data:
5279       groups = lu.cfg.GetAllNodeGroupsInfo()
5280     else:
5281       groups = {}
5282
5283     return query.NodeQueryData([all_info[name] for name in nodenames],
5284                                live_data, lu.cfg.GetMasterNode(),
5285                                node_to_primary, node_to_secondary, groups,
5286                                oob_support, lu.cfg.GetClusterInfo())
5287
5288
5289 class LUNodeQuery(NoHooksLU):
5290   """Logical unit for querying nodes.
5291
5292   """
5293   # pylint: disable=W0142
5294   REQ_BGL = False
5295
5296   def CheckArguments(self):
5297     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5298                          self.op.output_fields, self.op.use_locking)
5299
5300   def ExpandNames(self):
5301     self.nq.ExpandNames(self)
5302
5303   def DeclareLocks(self, level):
5304     self.nq.DeclareLocks(self, level)
5305
5306   def Exec(self, feedback_fn):
5307     return self.nq.OldStyleQuery(self)
5308
5309
5310 class LUNodeQueryvols(NoHooksLU):
5311   """Logical unit for getting volumes on node(s).
5312
5313   """
5314   REQ_BGL = False
5315   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5316   _FIELDS_STATIC = utils.FieldSet("node")
5317
5318   def CheckArguments(self):
5319     _CheckOutputFields(static=self._FIELDS_STATIC,
5320                        dynamic=self._FIELDS_DYNAMIC,
5321                        selected=self.op.output_fields)
5322
5323   def ExpandNames(self):
5324     self.share_locks = _ShareAll()
5325     self.needed_locks = {}
5326
5327     if not self.op.nodes:
5328       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5329     else:
5330       self.needed_locks[locking.LEVEL_NODE] = \
5331         _GetWantedNodes(self, self.op.nodes)
5332
5333   def Exec(self, feedback_fn):
5334     """Computes the list of nodes and their attributes.
5335
5336     """
5337     nodenames = self.owned_locks(locking.LEVEL_NODE)
5338     volumes = self.rpc.call_node_volumes(nodenames)
5339
5340     ilist = self.cfg.GetAllInstancesInfo()
5341     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5342
5343     output = []
5344     for node in nodenames:
5345       nresult = volumes[node]
5346       if nresult.offline:
5347         continue
5348       msg = nresult.fail_msg
5349       if msg:
5350         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5351         continue
5352
5353       node_vols = sorted(nresult.payload,
5354                          key=operator.itemgetter("dev"))
5355
5356       for vol in node_vols:
5357         node_output = []
5358         for field in self.op.output_fields:
5359           if field == "node":
5360             val = node
5361           elif field == "phys":
5362             val = vol["dev"]
5363           elif field == "vg":
5364             val = vol["vg"]
5365           elif field == "name":
5366             val = vol["name"]
5367           elif field == "size":
5368             val = int(float(vol["size"]))
5369           elif field == "instance":
5370             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5371           else:
5372             raise errors.ParameterError(field)
5373           node_output.append(str(val))
5374
5375         output.append(node_output)
5376
5377     return output
5378
5379
5380 class LUNodeQueryStorage(NoHooksLU):
5381   """Logical unit for getting information on storage units on node(s).
5382
5383   """
5384   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5385   REQ_BGL = False
5386
5387   def CheckArguments(self):
5388     _CheckOutputFields(static=self._FIELDS_STATIC,
5389                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5390                        selected=self.op.output_fields)
5391
5392   def ExpandNames(self):
5393     self.share_locks = _ShareAll()
5394
5395     if self.op.nodes:
5396       self.needed_locks = {
5397         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5398         }
5399     else:
5400       self.needed_locks = {
5401         locking.LEVEL_NODE: locking.ALL_SET,
5402         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5403         }
5404
5405   def Exec(self, feedback_fn):
5406     """Computes the list of nodes and their attributes.
5407
5408     """
5409     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5410
5411     # Always get name to sort by
5412     if constants.SF_NAME in self.op.output_fields:
5413       fields = self.op.output_fields[:]
5414     else:
5415       fields = [constants.SF_NAME] + self.op.output_fields
5416
5417     # Never ask for node or type as it's only known to the LU
5418     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5419       while extra in fields:
5420         fields.remove(extra)
5421
5422     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5423     name_idx = field_idx[constants.SF_NAME]
5424
5425     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5426     data = self.rpc.call_storage_list(self.nodes,
5427                                       self.op.storage_type, st_args,
5428                                       self.op.name, fields)
5429
5430     result = []
5431
5432     for node in utils.NiceSort(self.nodes):
5433       nresult = data[node]
5434       if nresult.offline:
5435         continue
5436
5437       msg = nresult.fail_msg
5438       if msg:
5439         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5440         continue
5441
5442       rows = dict([(row[name_idx], row) for row in nresult.payload])
5443
5444       for name in utils.NiceSort(rows.keys()):
5445         row = rows[name]
5446
5447         out = []
5448
5449         for field in self.op.output_fields:
5450           if field == constants.SF_NODE:
5451             val = node
5452           elif field == constants.SF_TYPE:
5453             val = self.op.storage_type
5454           elif field in field_idx:
5455             val = row[field_idx[field]]
5456           else:
5457             raise errors.ParameterError(field)
5458
5459           out.append(val)
5460
5461         result.append(out)
5462
5463     return result
5464
5465
5466 class _InstanceQuery(_QueryBase):
5467   FIELDS = query.INSTANCE_FIELDS
5468
5469   def ExpandNames(self, lu):
5470     lu.needed_locks = {}
5471     lu.share_locks = _ShareAll()
5472
5473     if self.names:
5474       self.wanted = _GetWantedInstances(lu, self.names)
5475     else:
5476       self.wanted = locking.ALL_SET
5477
5478     self.do_locking = (self.use_locking and
5479                        query.IQ_LIVE in self.requested_data)
5480     if self.do_locking:
5481       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5482       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5483       lu.needed_locks[locking.LEVEL_NODE] = []
5484       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5485
5486     self.do_grouplocks = (self.do_locking and
5487                           query.IQ_NODES in self.requested_data)
5488
5489   def DeclareLocks(self, lu, level):
5490     if self.do_locking:
5491       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5492         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5493
5494         # Lock all groups used by instances optimistically; this requires going
5495         # via the node before it's locked, requiring verification later on
5496         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5497           set(group_uuid
5498               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5499               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5500       elif level == locking.LEVEL_NODE:
5501         lu._LockInstancesNodes() # pylint: disable=W0212
5502
5503   @staticmethod
5504   def _CheckGroupLocks(lu):
5505     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5506     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5507
5508     # Check if node groups for locked instances are still correct
5509     for instance_name in owned_instances:
5510       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5511
5512   def _GetQueryData(self, lu):
5513     """Computes the list of instances and their attributes.
5514
5515     """
5516     if self.do_grouplocks:
5517       self._CheckGroupLocks(lu)
5518
5519     cluster = lu.cfg.GetClusterInfo()
5520     all_info = lu.cfg.GetAllInstancesInfo()
5521
5522     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5523
5524     instance_list = [all_info[name] for name in instance_names]
5525     nodes = frozenset(itertools.chain(*(inst.all_nodes
5526                                         for inst in instance_list)))
5527     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5528     bad_nodes = []
5529     offline_nodes = []
5530     wrongnode_inst = set()
5531
5532     # Gather data as requested
5533     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5534       live_data = {}
5535       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5536       for name in nodes:
5537         result = node_data[name]
5538         if result.offline:
5539           # offline nodes will be in both lists
5540           assert result.fail_msg
5541           offline_nodes.append(name)
5542         if result.fail_msg:
5543           bad_nodes.append(name)
5544         elif result.payload:
5545           for inst in result.payload:
5546             if inst in all_info:
5547               if all_info[inst].primary_node == name:
5548                 live_data.update(result.payload)
5549               else:
5550                 wrongnode_inst.add(inst)
5551             else:
5552               # orphan instance; we don't list it here as we don't
5553               # handle this case yet in the output of instance listing
5554               logging.warning("Orphan instance '%s' found on node %s",
5555                               inst, name)
5556         # else no instance is alive
5557     else:
5558       live_data = {}
5559
5560     if query.IQ_DISKUSAGE in self.requested_data:
5561       gmi = ganeti.masterd.instance
5562       disk_usage = dict((inst.name,
5563                          gmi.ComputeDiskSize(inst.disk_template,
5564                                              [{constants.IDISK_SIZE: disk.size}
5565                                               for disk in inst.disks]))
5566                         for inst in instance_list)
5567     else:
5568       disk_usage = None
5569
5570     if query.IQ_CONSOLE in self.requested_data:
5571       consinfo = {}
5572       for inst in instance_list:
5573         if inst.name in live_data:
5574           # Instance is running
5575           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5576         else:
5577           consinfo[inst.name] = None
5578       assert set(consinfo.keys()) == set(instance_names)
5579     else:
5580       consinfo = None
5581
5582     if query.IQ_NODES in self.requested_data:
5583       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5584                                             instance_list)))
5585       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5586       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5587                     for uuid in set(map(operator.attrgetter("group"),
5588                                         nodes.values())))
5589     else:
5590       nodes = None
5591       groups = None
5592
5593     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5594                                    disk_usage, offline_nodes, bad_nodes,
5595                                    live_data, wrongnode_inst, consinfo,
5596                                    nodes, groups)
5597
5598
5599 class LUQuery(NoHooksLU):
5600   """Query for resources/items of a certain kind.
5601
5602   """
5603   # pylint: disable=W0142
5604   REQ_BGL = False
5605
5606   def CheckArguments(self):
5607     qcls = _GetQueryImplementation(self.op.what)
5608
5609     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5610
5611   def ExpandNames(self):
5612     self.impl.ExpandNames(self)
5613
5614   def DeclareLocks(self, level):
5615     self.impl.DeclareLocks(self, level)
5616
5617   def Exec(self, feedback_fn):
5618     return self.impl.NewStyleQuery(self)
5619
5620
5621 class LUQueryFields(NoHooksLU):
5622   """Query for resources/items of a certain kind.
5623
5624   """
5625   # pylint: disable=W0142
5626   REQ_BGL = False
5627
5628   def CheckArguments(self):
5629     self.qcls = _GetQueryImplementation(self.op.what)
5630
5631   def ExpandNames(self):
5632     self.needed_locks = {}
5633
5634   def Exec(self, feedback_fn):
5635     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5636
5637
5638 class LUNodeModifyStorage(NoHooksLU):
5639   """Logical unit for modifying a storage volume on a node.
5640
5641   """
5642   REQ_BGL = False
5643
5644   def CheckArguments(self):
5645     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5646
5647     storage_type = self.op.storage_type
5648
5649     try:
5650       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5651     except KeyError:
5652       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5653                                  " modified" % storage_type,
5654                                  errors.ECODE_INVAL)
5655
5656     diff = set(self.op.changes.keys()) - modifiable
5657     if diff:
5658       raise errors.OpPrereqError("The following fields can not be modified for"
5659                                  " storage units of type '%s': %r" %
5660                                  (storage_type, list(diff)),
5661                                  errors.ECODE_INVAL)
5662
5663   def ExpandNames(self):
5664     self.needed_locks = {
5665       locking.LEVEL_NODE: self.op.node_name,
5666       }
5667
5668   def Exec(self, feedback_fn):
5669     """Computes the list of nodes and their attributes.
5670
5671     """
5672     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5673     result = self.rpc.call_storage_modify(self.op.node_name,
5674                                           self.op.storage_type, st_args,
5675                                           self.op.name, self.op.changes)
5676     result.Raise("Failed to modify storage unit '%s' on %s" %
5677                  (self.op.name, self.op.node_name))
5678
5679
5680 class LUNodeAdd(LogicalUnit):
5681   """Logical unit for adding node to the cluster.
5682
5683   """
5684   HPATH = "node-add"
5685   HTYPE = constants.HTYPE_NODE
5686   _NFLAGS = ["master_capable", "vm_capable"]
5687
5688   def CheckArguments(self):
5689     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5690     # validate/normalize the node name
5691     self.hostname = netutils.GetHostname(name=self.op.node_name,
5692                                          family=self.primary_ip_family)
5693     self.op.node_name = self.hostname.name
5694
5695     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5696       raise errors.OpPrereqError("Cannot readd the master node",
5697                                  errors.ECODE_STATE)
5698
5699     if self.op.readd and self.op.group:
5700       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5701                                  " being readded", errors.ECODE_INVAL)
5702
5703   def BuildHooksEnv(self):
5704     """Build hooks env.
5705
5706     This will run on all nodes before, and on all nodes + the new node after.
5707
5708     """
5709     return {
5710       "OP_TARGET": self.op.node_name,
5711       "NODE_NAME": self.op.node_name,
5712       "NODE_PIP": self.op.primary_ip,
5713       "NODE_SIP": self.op.secondary_ip,
5714       "MASTER_CAPABLE": str(self.op.master_capable),
5715       "VM_CAPABLE": str(self.op.vm_capable),
5716       }
5717
5718   def BuildHooksNodes(self):
5719     """Build hooks nodes.
5720
5721     """
5722     # Exclude added node
5723     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5724     post_nodes = pre_nodes + [self.op.node_name, ]
5725
5726     return (pre_nodes, post_nodes)
5727
5728   def CheckPrereq(self):
5729     """Check prerequisites.
5730
5731     This checks:
5732      - the new node is not already in the config
5733      - it is resolvable
5734      - its parameters (single/dual homed) matches the cluster
5735
5736     Any errors are signaled by raising errors.OpPrereqError.
5737
5738     """
5739     cfg = self.cfg
5740     hostname = self.hostname
5741     node = hostname.name
5742     primary_ip = self.op.primary_ip = hostname.ip
5743     if self.op.secondary_ip is None:
5744       if self.primary_ip_family == netutils.IP6Address.family:
5745         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5746                                    " IPv4 address must be given as secondary",
5747                                    errors.ECODE_INVAL)
5748       self.op.secondary_ip = primary_ip
5749
5750     secondary_ip = self.op.secondary_ip
5751     if not netutils.IP4Address.IsValid(secondary_ip):
5752       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5753                                  " address" % secondary_ip, errors.ECODE_INVAL)
5754
5755     node_list = cfg.GetNodeList()
5756     if not self.op.readd and node in node_list:
5757       raise errors.OpPrereqError("Node %s is already in the configuration" %
5758                                  node, errors.ECODE_EXISTS)
5759     elif self.op.readd and node not in node_list:
5760       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5761                                  errors.ECODE_NOENT)
5762
5763     self.changed_primary_ip = False
5764
5765     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5766       if self.op.readd and node == existing_node_name:
5767         if existing_node.secondary_ip != secondary_ip:
5768           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5769                                      " address configuration as before",
5770                                      errors.ECODE_INVAL)
5771         if existing_node.primary_ip != primary_ip:
5772           self.changed_primary_ip = True
5773
5774         continue
5775
5776       if (existing_node.primary_ip == primary_ip or
5777           existing_node.secondary_ip == primary_ip or
5778           existing_node.primary_ip == secondary_ip or
5779           existing_node.secondary_ip == secondary_ip):
5780         raise errors.OpPrereqError("New node ip address(es) conflict with"
5781                                    " existing node %s" % existing_node.name,
5782                                    errors.ECODE_NOTUNIQUE)
5783
5784     # After this 'if' block, None is no longer a valid value for the
5785     # _capable op attributes
5786     if self.op.readd:
5787       old_node = self.cfg.GetNodeInfo(node)
5788       assert old_node is not None, "Can't retrieve locked node %s" % node
5789       for attr in self._NFLAGS:
5790         if getattr(self.op, attr) is None:
5791           setattr(self.op, attr, getattr(old_node, attr))
5792     else:
5793       for attr in self._NFLAGS:
5794         if getattr(self.op, attr) is None:
5795           setattr(self.op, attr, True)
5796
5797     if self.op.readd and not self.op.vm_capable:
5798       pri, sec = cfg.GetNodeInstances(node)
5799       if pri or sec:
5800         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5801                                    " flag set to false, but it already holds"
5802                                    " instances" % node,
5803                                    errors.ECODE_STATE)
5804
5805     # check that the type of the node (single versus dual homed) is the
5806     # same as for the master
5807     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5808     master_singlehomed = myself.secondary_ip == myself.primary_ip
5809     newbie_singlehomed = secondary_ip == primary_ip
5810     if master_singlehomed != newbie_singlehomed:
5811       if master_singlehomed:
5812         raise errors.OpPrereqError("The master has no secondary ip but the"
5813                                    " new node has one",
5814                                    errors.ECODE_INVAL)
5815       else:
5816         raise errors.OpPrereqError("The master has a secondary ip but the"
5817                                    " new node doesn't have one",
5818                                    errors.ECODE_INVAL)
5819
5820     # checks reachability
5821     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5822       raise errors.OpPrereqError("Node not reachable by ping",
5823                                  errors.ECODE_ENVIRON)
5824
5825     if not newbie_singlehomed:
5826       # check reachability from my secondary ip to newbie's secondary ip
5827       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5828                               source=myself.secondary_ip):
5829         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5830                                    " based ping to node daemon port",
5831                                    errors.ECODE_ENVIRON)
5832
5833     if self.op.readd:
5834       exceptions = [node]
5835     else:
5836       exceptions = []
5837
5838     if self.op.master_capable:
5839       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5840     else:
5841       self.master_candidate = False
5842
5843     if self.op.readd:
5844       self.new_node = old_node
5845     else:
5846       node_group = cfg.LookupNodeGroup(self.op.group)
5847       self.new_node = objects.Node(name=node,
5848                                    primary_ip=primary_ip,
5849                                    secondary_ip=secondary_ip,
5850                                    master_candidate=self.master_candidate,
5851                                    offline=False, drained=False,
5852                                    group=node_group)
5853
5854     if self.op.ndparams:
5855       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5856
5857     if self.op.hv_state:
5858       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5859
5860     if self.op.disk_state:
5861       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5862
5863     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5864     #       it a property on the base class.
5865     result = rpc.DnsOnlyRunner().call_version([node])[node]
5866     result.Raise("Can't get version information from node %s" % node)
5867     if constants.PROTOCOL_VERSION == result.payload:
5868       logging.info("Communication to node %s fine, sw version %s match",
5869                    node, result.payload)
5870     else:
5871       raise errors.OpPrereqError("Version mismatch master version %s,"
5872                                  " node version %s" %
5873                                  (constants.PROTOCOL_VERSION, result.payload),
5874                                  errors.ECODE_ENVIRON)
5875
5876   def Exec(self, feedback_fn):
5877     """Adds the new node to the cluster.
5878
5879     """
5880     new_node = self.new_node
5881     node = new_node.name
5882
5883     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5884       "Not owning BGL"
5885
5886     # We adding a new node so we assume it's powered
5887     new_node.powered = True
5888
5889     # for re-adds, reset the offline/drained/master-candidate flags;
5890     # we need to reset here, otherwise offline would prevent RPC calls
5891     # later in the procedure; this also means that if the re-add
5892     # fails, we are left with a non-offlined, broken node
5893     if self.op.readd:
5894       new_node.drained = new_node.offline = False # pylint: disable=W0201
5895       self.LogInfo("Readding a node, the offline/drained flags were reset")
5896       # if we demote the node, we do cleanup later in the procedure
5897       new_node.master_candidate = self.master_candidate
5898       if self.changed_primary_ip:
5899         new_node.primary_ip = self.op.primary_ip
5900
5901     # copy the master/vm_capable flags
5902     for attr in self._NFLAGS:
5903       setattr(new_node, attr, getattr(self.op, attr))
5904
5905     # notify the user about any possible mc promotion
5906     if new_node.master_candidate:
5907       self.LogInfo("Node will be a master candidate")
5908
5909     if self.op.ndparams:
5910       new_node.ndparams = self.op.ndparams
5911     else:
5912       new_node.ndparams = {}
5913
5914     if self.op.hv_state:
5915       new_node.hv_state_static = self.new_hv_state
5916
5917     if self.op.disk_state:
5918       new_node.disk_state_static = self.new_disk_state
5919
5920     # Add node to our /etc/hosts, and add key to known_hosts
5921     if self.cfg.GetClusterInfo().modify_etc_hosts:
5922       master_node = self.cfg.GetMasterNode()
5923       result = self.rpc.call_etc_hosts_modify(master_node,
5924                                               constants.ETC_HOSTS_ADD,
5925                                               self.hostname.name,
5926                                               self.hostname.ip)
5927       result.Raise("Can't update hosts file with new host data")
5928
5929     if new_node.secondary_ip != new_node.primary_ip:
5930       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5931                                False)
5932
5933     node_verify_list = [self.cfg.GetMasterNode()]
5934     node_verify_param = {
5935       constants.NV_NODELIST: ([node], {}),
5936       # TODO: do a node-net-test as well?
5937     }
5938
5939     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5940                                        self.cfg.GetClusterName())
5941     for verifier in node_verify_list:
5942       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5943       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5944       if nl_payload:
5945         for failed in nl_payload:
5946           feedback_fn("ssh/hostname verification failed"
5947                       " (checking from %s): %s" %
5948                       (verifier, nl_payload[failed]))
5949         raise errors.OpExecError("ssh/hostname verification failed")
5950
5951     if self.op.readd:
5952       _RedistributeAncillaryFiles(self)
5953       self.context.ReaddNode(new_node)
5954       # make sure we redistribute the config
5955       self.cfg.Update(new_node, feedback_fn)
5956       # and make sure the new node will not have old files around
5957       if not new_node.master_candidate:
5958         result = self.rpc.call_node_demote_from_mc(new_node.name)
5959         msg = result.fail_msg
5960         if msg:
5961           self.LogWarning("Node failed to demote itself from master"
5962                           " candidate status: %s" % msg)
5963     else:
5964       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5965                                   additional_vm=self.op.vm_capable)
5966       self.context.AddNode(new_node, self.proc.GetECId())
5967
5968
5969 class LUNodeSetParams(LogicalUnit):
5970   """Modifies the parameters of a node.
5971
5972   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5973       to the node role (as _ROLE_*)
5974   @cvar _R2F: a dictionary from node role to tuples of flags
5975   @cvar _FLAGS: a list of attribute names corresponding to the flags
5976
5977   """
5978   HPATH = "node-modify"
5979   HTYPE = constants.HTYPE_NODE
5980   REQ_BGL = False
5981   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5982   _F2R = {
5983     (True, False, False): _ROLE_CANDIDATE,
5984     (False, True, False): _ROLE_DRAINED,
5985     (False, False, True): _ROLE_OFFLINE,
5986     (False, False, False): _ROLE_REGULAR,
5987     }
5988   _R2F = dict((v, k) for k, v in _F2R.items())
5989   _FLAGS = ["master_candidate", "drained", "offline"]
5990
5991   def CheckArguments(self):
5992     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5993     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5994                 self.op.master_capable, self.op.vm_capable,
5995                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5996                 self.op.disk_state]
5997     if all_mods.count(None) == len(all_mods):
5998       raise errors.OpPrereqError("Please pass at least one modification",
5999                                  errors.ECODE_INVAL)
6000     if all_mods.count(True) > 1:
6001       raise errors.OpPrereqError("Can't set the node into more than one"
6002                                  " state at the same time",
6003                                  errors.ECODE_INVAL)
6004
6005     # Boolean value that tells us whether we might be demoting from MC
6006     self.might_demote = (self.op.master_candidate is False or
6007                          self.op.offline is True or
6008                          self.op.drained is True or
6009                          self.op.master_capable is False)
6010
6011     if self.op.secondary_ip:
6012       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6013         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6014                                    " address" % self.op.secondary_ip,
6015                                    errors.ECODE_INVAL)
6016
6017     self.lock_all = self.op.auto_promote and self.might_demote
6018     self.lock_instances = self.op.secondary_ip is not None
6019
6020   def _InstanceFilter(self, instance):
6021     """Filter for getting affected instances.
6022
6023     """
6024     return (instance.disk_template in constants.DTS_INT_MIRROR and
6025             self.op.node_name in instance.all_nodes)
6026
6027   def ExpandNames(self):
6028     if self.lock_all:
6029       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
6030     else:
6031       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
6032
6033     # Since modifying a node can have severe effects on currently running
6034     # operations the resource lock is at least acquired in shared mode
6035     self.needed_locks[locking.LEVEL_NODE_RES] = \
6036       self.needed_locks[locking.LEVEL_NODE]
6037
6038     # Get node resource and instance locks in shared mode; they are not used
6039     # for anything but read-only access
6040     self.share_locks[locking.LEVEL_NODE_RES] = 1
6041     self.share_locks[locking.LEVEL_INSTANCE] = 1
6042
6043     if self.lock_instances:
6044       self.needed_locks[locking.LEVEL_INSTANCE] = \
6045         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6046
6047   def BuildHooksEnv(self):
6048     """Build hooks env.
6049
6050     This runs on the master node.
6051
6052     """
6053     return {
6054       "OP_TARGET": self.op.node_name,
6055       "MASTER_CANDIDATE": str(self.op.master_candidate),
6056       "OFFLINE": str(self.op.offline),
6057       "DRAINED": str(self.op.drained),
6058       "MASTER_CAPABLE": str(self.op.master_capable),
6059       "VM_CAPABLE": str(self.op.vm_capable),
6060       }
6061
6062   def BuildHooksNodes(self):
6063     """Build hooks nodes.
6064
6065     """
6066     nl = [self.cfg.GetMasterNode(), self.op.node_name]
6067     return (nl, nl)
6068
6069   def CheckPrereq(self):
6070     """Check prerequisites.
6071
6072     This only checks the instance list against the existing names.
6073
6074     """
6075     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6076
6077     if self.lock_instances:
6078       affected_instances = \
6079         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6080
6081       # Verify instance locks
6082       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6083       wanted_instances = frozenset(affected_instances.keys())
6084       if wanted_instances - owned_instances:
6085         raise errors.OpPrereqError("Instances affected by changing node %s's"
6086                                    " secondary IP address have changed since"
6087                                    " locks were acquired, wanted '%s', have"
6088                                    " '%s'; retry the operation" %
6089                                    (self.op.node_name,
6090                                     utils.CommaJoin(wanted_instances),
6091                                     utils.CommaJoin(owned_instances)),
6092                                    errors.ECODE_STATE)
6093     else:
6094       affected_instances = None
6095
6096     if (self.op.master_candidate is not None or
6097         self.op.drained is not None or
6098         self.op.offline is not None):
6099       # we can't change the master's node flags
6100       if self.op.node_name == self.cfg.GetMasterNode():
6101         raise errors.OpPrereqError("The master role can be changed"
6102                                    " only via master-failover",
6103                                    errors.ECODE_INVAL)
6104
6105     if self.op.master_candidate and not node.master_capable:
6106       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6107                                  " it a master candidate" % node.name,
6108                                  errors.ECODE_STATE)
6109
6110     if self.op.vm_capable is False:
6111       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6112       if ipri or isec:
6113         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6114                                    " the vm_capable flag" % node.name,
6115                                    errors.ECODE_STATE)
6116
6117     if node.master_candidate and self.might_demote and not self.lock_all:
6118       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6119       # check if after removing the current node, we're missing master
6120       # candidates
6121       (mc_remaining, mc_should, _) = \
6122           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6123       if mc_remaining < mc_should:
6124         raise errors.OpPrereqError("Not enough master candidates, please"
6125                                    " pass auto promote option to allow"
6126                                    " promotion (--auto-promote or RAPI"
6127                                    " auto_promote=True)", errors.ECODE_STATE)
6128
6129     self.old_flags = old_flags = (node.master_candidate,
6130                                   node.drained, node.offline)
6131     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6132     self.old_role = old_role = self._F2R[old_flags]
6133
6134     # Check for ineffective changes
6135     for attr in self._FLAGS:
6136       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6137         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6138         setattr(self.op, attr, None)
6139
6140     # Past this point, any flag change to False means a transition
6141     # away from the respective state, as only real changes are kept
6142
6143     # TODO: We might query the real power state if it supports OOB
6144     if _SupportsOob(self.cfg, node):
6145       if self.op.offline is False and not (node.powered or
6146                                            self.op.powered is True):
6147         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6148                                     " offline status can be reset") %
6149                                    self.op.node_name, errors.ECODE_STATE)
6150     elif self.op.powered is not None:
6151       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6152                                   " as it does not support out-of-band"
6153                                   " handling") % self.op.node_name,
6154                                  errors.ECODE_STATE)
6155
6156     # If we're being deofflined/drained, we'll MC ourself if needed
6157     if (self.op.drained is False or self.op.offline is False or
6158         (self.op.master_capable and not node.master_capable)):
6159       if _DecideSelfPromotion(self):
6160         self.op.master_candidate = True
6161         self.LogInfo("Auto-promoting node to master candidate")
6162
6163     # If we're no longer master capable, we'll demote ourselves from MC
6164     if self.op.master_capable is False and node.master_candidate:
6165       self.LogInfo("Demoting from master candidate")
6166       self.op.master_candidate = False
6167
6168     # Compute new role
6169     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6170     if self.op.master_candidate:
6171       new_role = self._ROLE_CANDIDATE
6172     elif self.op.drained:
6173       new_role = self._ROLE_DRAINED
6174     elif self.op.offline:
6175       new_role = self._ROLE_OFFLINE
6176     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6177       # False is still in new flags, which means we're un-setting (the
6178       # only) True flag
6179       new_role = self._ROLE_REGULAR
6180     else: # no new flags, nothing, keep old role
6181       new_role = old_role
6182
6183     self.new_role = new_role
6184
6185     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6186       # Trying to transition out of offline status
6187       result = self.rpc.call_version([node.name])[node.name]
6188       if result.fail_msg:
6189         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6190                                    " to report its version: %s" %
6191                                    (node.name, result.fail_msg),
6192                                    errors.ECODE_STATE)
6193       else:
6194         self.LogWarning("Transitioning node from offline to online state"
6195                         " without using re-add. Please make sure the node"
6196                         " is healthy!")
6197
6198     # When changing the secondary ip, verify if this is a single-homed to
6199     # multi-homed transition or vice versa, and apply the relevant
6200     # restrictions.
6201     if self.op.secondary_ip:
6202       # Ok even without locking, because this can't be changed by any LU
6203       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6204       master_singlehomed = master.secondary_ip == master.primary_ip
6205       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6206         if self.op.force and node.name == master.name:
6207           self.LogWarning("Transitioning from single-homed to multi-homed"
6208                           " cluster; all nodes will require a secondary IP"
6209                           " address")
6210         else:
6211           raise errors.OpPrereqError("Changing the secondary ip on a"
6212                                      " single-homed cluster requires the"
6213                                      " --force option to be passed, and the"
6214                                      " target node to be the master",
6215                                      errors.ECODE_INVAL)
6216       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6217         if self.op.force and node.name == master.name:
6218           self.LogWarning("Transitioning from multi-homed to single-homed"
6219                           " cluster; secondary IP addresses will have to be"
6220                           " removed")
6221         else:
6222           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6223                                      " same as the primary IP on a multi-homed"
6224                                      " cluster, unless the --force option is"
6225                                      " passed, and the target node is the"
6226                                      " master", errors.ECODE_INVAL)
6227
6228       assert not (frozenset(affected_instances) -
6229                   self.owned_locks(locking.LEVEL_INSTANCE))
6230
6231       if node.offline:
6232         if affected_instances:
6233           msg = ("Cannot change secondary IP address: offline node has"
6234                  " instances (%s) configured to use it" %
6235                  utils.CommaJoin(affected_instances.keys()))
6236           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6237       else:
6238         # On online nodes, check that no instances are running, and that
6239         # the node has the new ip and we can reach it.
6240         for instance in affected_instances.values():
6241           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6242                               msg="cannot change secondary ip")
6243
6244         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6245         if master.name != node.name:
6246           # check reachability from master secondary ip to new secondary ip
6247           if not netutils.TcpPing(self.op.secondary_ip,
6248                                   constants.DEFAULT_NODED_PORT,
6249                                   source=master.secondary_ip):
6250             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6251                                        " based ping to node daemon port",
6252                                        errors.ECODE_ENVIRON)
6253
6254     if self.op.ndparams:
6255       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6256       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6257       self.new_ndparams = new_ndparams
6258
6259     if self.op.hv_state:
6260       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6261                                                  self.node.hv_state_static)
6262
6263     if self.op.disk_state:
6264       self.new_disk_state = \
6265         _MergeAndVerifyDiskState(self.op.disk_state,
6266                                  self.node.disk_state_static)
6267
6268   def Exec(self, feedback_fn):
6269     """Modifies a node.
6270
6271     """
6272     node = self.node
6273     old_role = self.old_role
6274     new_role = self.new_role
6275
6276     result = []
6277
6278     if self.op.ndparams:
6279       node.ndparams = self.new_ndparams
6280
6281     if self.op.powered is not None:
6282       node.powered = self.op.powered
6283
6284     if self.op.hv_state:
6285       node.hv_state_static = self.new_hv_state
6286
6287     if self.op.disk_state:
6288       node.disk_state_static = self.new_disk_state
6289
6290     for attr in ["master_capable", "vm_capable"]:
6291       val = getattr(self.op, attr)
6292       if val is not None:
6293         setattr(node, attr, val)
6294         result.append((attr, str(val)))
6295
6296     if new_role != old_role:
6297       # Tell the node to demote itself, if no longer MC and not offline
6298       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6299         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6300         if msg:
6301           self.LogWarning("Node failed to demote itself: %s", msg)
6302
6303       new_flags = self._R2F[new_role]
6304       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6305         if of != nf:
6306           result.append((desc, str(nf)))
6307       (node.master_candidate, node.drained, node.offline) = new_flags
6308
6309       # we locked all nodes, we adjust the CP before updating this node
6310       if self.lock_all:
6311         _AdjustCandidatePool(self, [node.name])
6312
6313     if self.op.secondary_ip:
6314       node.secondary_ip = self.op.secondary_ip
6315       result.append(("secondary_ip", self.op.secondary_ip))
6316
6317     # this will trigger configuration file update, if needed
6318     self.cfg.Update(node, feedback_fn)
6319
6320     # this will trigger job queue propagation or cleanup if the mc
6321     # flag changed
6322     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6323       self.context.ReaddNode(node)
6324
6325     return result
6326
6327
6328 class LUNodePowercycle(NoHooksLU):
6329   """Powercycles a node.
6330
6331   """
6332   REQ_BGL = False
6333
6334   def CheckArguments(self):
6335     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6336     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6337       raise errors.OpPrereqError("The node is the master and the force"
6338                                  " parameter was not set",
6339                                  errors.ECODE_INVAL)
6340
6341   def ExpandNames(self):
6342     """Locking for PowercycleNode.
6343
6344     This is a last-resort option and shouldn't block on other
6345     jobs. Therefore, we grab no locks.
6346
6347     """
6348     self.needed_locks = {}
6349
6350   def Exec(self, feedback_fn):
6351     """Reboots a node.
6352
6353     """
6354     result = self.rpc.call_node_powercycle(self.op.node_name,
6355                                            self.cfg.GetHypervisorType())
6356     result.Raise("Failed to schedule the reboot")
6357     return result.payload
6358
6359
6360 class LUClusterQuery(NoHooksLU):
6361   """Query cluster configuration.
6362
6363   """
6364   REQ_BGL = False
6365
6366   def ExpandNames(self):
6367     self.needed_locks = {}
6368
6369   def Exec(self, feedback_fn):
6370     """Return cluster config.
6371
6372     """
6373     cluster = self.cfg.GetClusterInfo()
6374     os_hvp = {}
6375
6376     # Filter just for enabled hypervisors
6377     for os_name, hv_dict in cluster.os_hvp.items():
6378       os_hvp[os_name] = {}
6379       for hv_name, hv_params in hv_dict.items():
6380         if hv_name in cluster.enabled_hypervisors:
6381           os_hvp[os_name][hv_name] = hv_params
6382
6383     # Convert ip_family to ip_version
6384     primary_ip_version = constants.IP4_VERSION
6385     if cluster.primary_ip_family == netutils.IP6Address.family:
6386       primary_ip_version = constants.IP6_VERSION
6387
6388     result = {
6389       "software_version": constants.RELEASE_VERSION,
6390       "protocol_version": constants.PROTOCOL_VERSION,
6391       "config_version": constants.CONFIG_VERSION,
6392       "os_api_version": max(constants.OS_API_VERSIONS),
6393       "export_version": constants.EXPORT_VERSION,
6394       "architecture": runtime.GetArchInfo(),
6395       "name": cluster.cluster_name,
6396       "master": cluster.master_node,
6397       "default_hypervisor": cluster.primary_hypervisor,
6398       "enabled_hypervisors": cluster.enabled_hypervisors,
6399       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6400                         for hypervisor_name in cluster.enabled_hypervisors]),
6401       "os_hvp": os_hvp,
6402       "beparams": cluster.beparams,
6403       "osparams": cluster.osparams,
6404       "ipolicy": cluster.ipolicy,
6405       "nicparams": cluster.nicparams,
6406       "ndparams": cluster.ndparams,
6407       "diskparams": cluster.diskparams,
6408       "candidate_pool_size": cluster.candidate_pool_size,
6409       "master_netdev": cluster.master_netdev,
6410       "master_netmask": cluster.master_netmask,
6411       "use_external_mip_script": cluster.use_external_mip_script,
6412       "volume_group_name": cluster.volume_group_name,
6413       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6414       "file_storage_dir": cluster.file_storage_dir,
6415       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6416       "maintain_node_health": cluster.maintain_node_health,
6417       "ctime": cluster.ctime,
6418       "mtime": cluster.mtime,
6419       "uuid": cluster.uuid,
6420       "tags": list(cluster.GetTags()),
6421       "uid_pool": cluster.uid_pool,
6422       "default_iallocator": cluster.default_iallocator,
6423       "reserved_lvs": cluster.reserved_lvs,
6424       "primary_ip_version": primary_ip_version,
6425       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6426       "hidden_os": cluster.hidden_os,
6427       "blacklisted_os": cluster.blacklisted_os,
6428       }
6429
6430     return result
6431
6432
6433 class LUClusterConfigQuery(NoHooksLU):
6434   """Return configuration values.
6435
6436   """
6437   REQ_BGL = False
6438
6439   def CheckArguments(self):
6440     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6441
6442   def ExpandNames(self):
6443     self.cq.ExpandNames(self)
6444
6445   def DeclareLocks(self, level):
6446     self.cq.DeclareLocks(self, level)
6447
6448   def Exec(self, feedback_fn):
6449     result = self.cq.OldStyleQuery(self)
6450
6451     assert len(result) == 1
6452
6453     return result[0]
6454
6455
6456 class _ClusterQuery(_QueryBase):
6457   FIELDS = query.CLUSTER_FIELDS
6458
6459   #: Do not sort (there is only one item)
6460   SORT_FIELD = None
6461
6462   def ExpandNames(self, lu):
6463     lu.needed_locks = {}
6464
6465     # The following variables interact with _QueryBase._GetNames
6466     self.wanted = locking.ALL_SET
6467     self.do_locking = self.use_locking
6468
6469     if self.do_locking:
6470       raise errors.OpPrereqError("Can not use locking for cluster queries",
6471                                  errors.ECODE_INVAL)
6472
6473   def DeclareLocks(self, lu, level):
6474     pass
6475
6476   def _GetQueryData(self, lu):
6477     """Computes the list of nodes and their attributes.
6478
6479     """
6480     # Locking is not used
6481     assert not (compat.any(lu.glm.is_owned(level)
6482                            for level in locking.LEVELS
6483                            if level != locking.LEVEL_CLUSTER) or
6484                 self.do_locking or self.use_locking)
6485
6486     if query.CQ_CONFIG in self.requested_data:
6487       cluster = lu.cfg.GetClusterInfo()
6488     else:
6489       cluster = NotImplemented
6490
6491     if query.CQ_QUEUE_DRAINED in self.requested_data:
6492       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6493     else:
6494       drain_flag = NotImplemented
6495
6496     if query.CQ_WATCHER_PAUSE in self.requested_data:
6497       watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6498     else:
6499       watcher_pause = NotImplemented
6500
6501     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6502
6503
6504 class LUInstanceActivateDisks(NoHooksLU):
6505   """Bring up an instance's disks.
6506
6507   """
6508   REQ_BGL = False
6509
6510   def ExpandNames(self):
6511     self._ExpandAndLockInstance()
6512     self.needed_locks[locking.LEVEL_NODE] = []
6513     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6514
6515   def DeclareLocks(self, level):
6516     if level == locking.LEVEL_NODE:
6517       self._LockInstancesNodes()
6518
6519   def CheckPrereq(self):
6520     """Check prerequisites.
6521
6522     This checks that the instance is in the cluster.
6523
6524     """
6525     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6526     assert self.instance is not None, \
6527       "Cannot retrieve locked instance %s" % self.op.instance_name
6528     _CheckNodeOnline(self, self.instance.primary_node)
6529
6530   def Exec(self, feedback_fn):
6531     """Activate the disks.
6532
6533     """
6534     disks_ok, disks_info = \
6535               _AssembleInstanceDisks(self, self.instance,
6536                                      ignore_size=self.op.ignore_size)
6537     if not disks_ok:
6538       raise errors.OpExecError("Cannot activate block devices")
6539
6540     if self.op.wait_for_sync:
6541       if not _WaitForSync(self, self.instance):
6542         raise errors.OpExecError("Some disks of the instance are degraded!")
6543
6544     return disks_info
6545
6546
6547 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6548                            ignore_size=False):
6549   """Prepare the block devices for an instance.
6550
6551   This sets up the block devices on all nodes.
6552
6553   @type lu: L{LogicalUnit}
6554   @param lu: the logical unit on whose behalf we execute
6555   @type instance: L{objects.Instance}
6556   @param instance: the instance for whose disks we assemble
6557   @type disks: list of L{objects.Disk} or None
6558   @param disks: which disks to assemble (or all, if None)
6559   @type ignore_secondaries: boolean
6560   @param ignore_secondaries: if true, errors on secondary nodes
6561       won't result in an error return from the function
6562   @type ignore_size: boolean
6563   @param ignore_size: if true, the current known size of the disk
6564       will not be used during the disk activation, useful for cases
6565       when the size is wrong
6566   @return: False if the operation failed, otherwise a list of
6567       (host, instance_visible_name, node_visible_name)
6568       with the mapping from node devices to instance devices
6569
6570   """
6571   device_info = []
6572   disks_ok = True
6573   iname = instance.name
6574   disks = _ExpandCheckDisks(instance, disks)
6575
6576   # With the two passes mechanism we try to reduce the window of
6577   # opportunity for the race condition of switching DRBD to primary
6578   # before handshaking occured, but we do not eliminate it
6579
6580   # The proper fix would be to wait (with some limits) until the
6581   # connection has been made and drbd transitions from WFConnection
6582   # into any other network-connected state (Connected, SyncTarget,
6583   # SyncSource, etc.)
6584
6585   # 1st pass, assemble on all nodes in secondary mode
6586   for idx, inst_disk in enumerate(disks):
6587     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6588       if ignore_size:
6589         node_disk = node_disk.Copy()
6590         node_disk.UnsetSize()
6591       lu.cfg.SetDiskID(node_disk, node)
6592       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6593                                              False, idx)
6594       msg = result.fail_msg
6595       if msg:
6596         is_offline_secondary = (node in instance.secondary_nodes and
6597                                 result.offline)
6598         lu.LogWarning("Could not prepare block device %s on node %s"
6599                       " (is_primary=False, pass=1): %s",
6600                       inst_disk.iv_name, node, msg)
6601         if not (ignore_secondaries or is_offline_secondary):
6602           disks_ok = False
6603
6604   # FIXME: race condition on drbd migration to primary
6605
6606   # 2nd pass, do only the primary node
6607   for idx, inst_disk in enumerate(disks):
6608     dev_path = None
6609
6610     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6611       if node != instance.primary_node:
6612         continue
6613       if ignore_size:
6614         node_disk = node_disk.Copy()
6615         node_disk.UnsetSize()
6616       lu.cfg.SetDiskID(node_disk, node)
6617       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6618                                              True, idx)
6619       msg = result.fail_msg
6620       if msg:
6621         lu.LogWarning("Could not prepare block device %s on node %s"
6622                       " (is_primary=True, pass=2): %s",
6623                       inst_disk.iv_name, node, msg)
6624         disks_ok = False
6625       else:
6626         dev_path = result.payload
6627
6628     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6629
6630   # leave the disks configured for the primary node
6631   # this is a workaround that would be fixed better by
6632   # improving the logical/physical id handling
6633   for disk in disks:
6634     lu.cfg.SetDiskID(disk, instance.primary_node)
6635
6636   return disks_ok, device_info
6637
6638
6639 def _StartInstanceDisks(lu, instance, force):
6640   """Start the disks of an instance.
6641
6642   """
6643   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6644                                            ignore_secondaries=force)
6645   if not disks_ok:
6646     _ShutdownInstanceDisks(lu, instance)
6647     if force is not None and not force:
6648       lu.LogWarning("",
6649                     hint=("If the message above refers to a secondary node,"
6650                           " you can retry the operation using '--force'"))
6651     raise errors.OpExecError("Disk consistency error")
6652
6653
6654 class LUInstanceDeactivateDisks(NoHooksLU):
6655   """Shutdown an instance's disks.
6656
6657   """
6658   REQ_BGL = False
6659
6660   def ExpandNames(self):
6661     self._ExpandAndLockInstance()
6662     self.needed_locks[locking.LEVEL_NODE] = []
6663     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6664
6665   def DeclareLocks(self, level):
6666     if level == locking.LEVEL_NODE:
6667       self._LockInstancesNodes()
6668
6669   def CheckPrereq(self):
6670     """Check prerequisites.
6671
6672     This checks that the instance is in the cluster.
6673
6674     """
6675     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6676     assert self.instance is not None, \
6677       "Cannot retrieve locked instance %s" % self.op.instance_name
6678
6679   def Exec(self, feedback_fn):
6680     """Deactivate the disks
6681
6682     """
6683     instance = self.instance
6684     if self.op.force:
6685       _ShutdownInstanceDisks(self, instance)
6686     else:
6687       _SafeShutdownInstanceDisks(self, instance)
6688
6689
6690 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6691   """Shutdown block devices of an instance.
6692
6693   This function checks if an instance is running, before calling
6694   _ShutdownInstanceDisks.
6695
6696   """
6697   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6698   _ShutdownInstanceDisks(lu, instance, disks=disks)
6699
6700
6701 def _ExpandCheckDisks(instance, disks):
6702   """Return the instance disks selected by the disks list
6703
6704   @type disks: list of L{objects.Disk} or None
6705   @param disks: selected disks
6706   @rtype: list of L{objects.Disk}
6707   @return: selected instance disks to act on
6708
6709   """
6710   if disks is None:
6711     return instance.disks
6712   else:
6713     if not set(disks).issubset(instance.disks):
6714       raise errors.ProgrammerError("Can only act on disks belonging to the"
6715                                    " target instance")
6716     return disks
6717
6718
6719 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6720   """Shutdown block devices of an instance.
6721
6722   This does the shutdown on all nodes of the instance.
6723
6724   If the ignore_primary is false, errors on the primary node are
6725   ignored.
6726
6727   """
6728   all_result = True
6729   disks = _ExpandCheckDisks(instance, disks)
6730
6731   for disk in disks:
6732     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6733       lu.cfg.SetDiskID(top_disk, node)
6734       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6735       msg = result.fail_msg
6736       if msg:
6737         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6738                       disk.iv_name, node, msg)
6739         if ((node == instance.primary_node and not ignore_primary) or
6740             (node != instance.primary_node and not result.offline)):
6741           all_result = False
6742   return all_result
6743
6744
6745 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6746   """Checks if a node has enough free memory.
6747
6748   This function check if a given node has the needed amount of free
6749   memory. In case the node has less memory or we cannot get the
6750   information from the node, this function raise an OpPrereqError
6751   exception.
6752
6753   @type lu: C{LogicalUnit}
6754   @param lu: a logical unit from which we get configuration data
6755   @type node: C{str}
6756   @param node: the node to check
6757   @type reason: C{str}
6758   @param reason: string to use in the error message
6759   @type requested: C{int}
6760   @param requested: the amount of memory in MiB to check for
6761   @type hypervisor_name: C{str}
6762   @param hypervisor_name: the hypervisor to ask for memory stats
6763   @rtype: integer
6764   @return: node current free memory
6765   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6766       we cannot check the node
6767
6768   """
6769   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6770   nodeinfo[node].Raise("Can't get data from node %s" % node,
6771                        prereq=True, ecode=errors.ECODE_ENVIRON)
6772   (_, _, (hv_info, )) = nodeinfo[node].payload
6773
6774   free_mem = hv_info.get("memory_free", None)
6775   if not isinstance(free_mem, int):
6776     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6777                                " was '%s'" % (node, free_mem),
6778                                errors.ECODE_ENVIRON)
6779   if requested > free_mem:
6780     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6781                                " needed %s MiB, available %s MiB" %
6782                                (node, reason, requested, free_mem),
6783                                errors.ECODE_NORES)
6784   return free_mem
6785
6786
6787 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6788   """Checks if nodes have enough free disk space in the all VGs.
6789
6790   This function check if all given nodes have the needed amount of
6791   free disk. In case any node has less disk or we cannot get the
6792   information from the node, this function raise an OpPrereqError
6793   exception.
6794
6795   @type lu: C{LogicalUnit}
6796   @param lu: a logical unit from which we get configuration data
6797   @type nodenames: C{list}
6798   @param nodenames: the list of node names to check
6799   @type req_sizes: C{dict}
6800   @param req_sizes: the hash of vg and corresponding amount of disk in
6801       MiB to check for
6802   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6803       or we cannot check the node
6804
6805   """
6806   for vg, req_size in req_sizes.items():
6807     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6808
6809
6810 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6811   """Checks if nodes have enough free disk space in the specified VG.
6812
6813   This function check if all given nodes have the needed amount of
6814   free disk. In case any node has less disk or we cannot get the
6815   information from the node, this function raise an OpPrereqError
6816   exception.
6817
6818   @type lu: C{LogicalUnit}
6819   @param lu: a logical unit from which we get configuration data
6820   @type nodenames: C{list}
6821   @param nodenames: the list of node names to check
6822   @type vg: C{str}
6823   @param vg: the volume group to check
6824   @type requested: C{int}
6825   @param requested: the amount of disk in MiB to check for
6826   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6827       or we cannot check the node
6828
6829   """
6830   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6831   for node in nodenames:
6832     info = nodeinfo[node]
6833     info.Raise("Cannot get current information from node %s" % node,
6834                prereq=True, ecode=errors.ECODE_ENVIRON)
6835     (_, (vg_info, ), _) = info.payload
6836     vg_free = vg_info.get("vg_free", None)
6837     if not isinstance(vg_free, int):
6838       raise errors.OpPrereqError("Can't compute free disk space on node"
6839                                  " %s for vg %s, result was '%s'" %
6840                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6841     if requested > vg_free:
6842       raise errors.OpPrereqError("Not enough disk space on target node %s"
6843                                  " vg %s: required %d MiB, available %d MiB" %
6844                                  (node, vg, requested, vg_free),
6845                                  errors.ECODE_NORES)
6846
6847
6848 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6849   """Checks if nodes have enough physical CPUs
6850
6851   This function checks if all given nodes have the needed number of
6852   physical CPUs. In case any node has less CPUs or we cannot get the
6853   information from the node, this function raises an OpPrereqError
6854   exception.
6855
6856   @type lu: C{LogicalUnit}
6857   @param lu: a logical unit from which we get configuration data
6858   @type nodenames: C{list}
6859   @param nodenames: the list of node names to check
6860   @type requested: C{int}
6861   @param requested: the minimum acceptable number of physical CPUs
6862   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6863       or we cannot check the node
6864
6865   """
6866   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6867   for node in nodenames:
6868     info = nodeinfo[node]
6869     info.Raise("Cannot get current information from node %s" % node,
6870                prereq=True, ecode=errors.ECODE_ENVIRON)
6871     (_, _, (hv_info, )) = info.payload
6872     num_cpus = hv_info.get("cpu_total", None)
6873     if not isinstance(num_cpus, int):
6874       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6875                                  " on node %s, result was '%s'" %
6876                                  (node, num_cpus), errors.ECODE_ENVIRON)
6877     if requested > num_cpus:
6878       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6879                                  "required" % (node, num_cpus, requested),
6880                                  errors.ECODE_NORES)
6881
6882
6883 class LUInstanceStartup(LogicalUnit):
6884   """Starts an instance.
6885
6886   """
6887   HPATH = "instance-start"
6888   HTYPE = constants.HTYPE_INSTANCE
6889   REQ_BGL = False
6890
6891   def CheckArguments(self):
6892     # extra beparams
6893     if self.op.beparams:
6894       # fill the beparams dict
6895       objects.UpgradeBeParams(self.op.beparams)
6896       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6897
6898   def ExpandNames(self):
6899     self._ExpandAndLockInstance()
6900     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6901
6902   def DeclareLocks(self, level):
6903     if level == locking.LEVEL_NODE_RES:
6904       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6905
6906   def BuildHooksEnv(self):
6907     """Build hooks env.
6908
6909     This runs on master, primary and secondary nodes of the instance.
6910
6911     """
6912     env = {
6913       "FORCE": self.op.force,
6914       }
6915
6916     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6917
6918     return env
6919
6920   def BuildHooksNodes(self):
6921     """Build hooks nodes.
6922
6923     """
6924     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6925     return (nl, nl)
6926
6927   def CheckPrereq(self):
6928     """Check prerequisites.
6929
6930     This checks that the instance is in the cluster.
6931
6932     """
6933     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6934     assert self.instance is not None, \
6935       "Cannot retrieve locked instance %s" % self.op.instance_name
6936
6937     # extra hvparams
6938     if self.op.hvparams:
6939       # check hypervisor parameter syntax (locally)
6940       cluster = self.cfg.GetClusterInfo()
6941       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6942       filled_hvp = cluster.FillHV(instance)
6943       filled_hvp.update(self.op.hvparams)
6944       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6945       hv_type.CheckParameterSyntax(filled_hvp)
6946       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6947
6948     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6949
6950     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6951
6952     if self.primary_offline and self.op.ignore_offline_nodes:
6953       self.LogWarning("Ignoring offline primary node")
6954
6955       if self.op.hvparams or self.op.beparams:
6956         self.LogWarning("Overridden parameters are ignored")
6957     else:
6958       _CheckNodeOnline(self, instance.primary_node)
6959
6960       bep = self.cfg.GetClusterInfo().FillBE(instance)
6961       bep.update(self.op.beparams)
6962
6963       # check bridges existence
6964       _CheckInstanceBridgesExist(self, instance)
6965
6966       remote_info = self.rpc.call_instance_info(instance.primary_node,
6967                                                 instance.name,
6968                                                 instance.hypervisor)
6969       remote_info.Raise("Error checking node %s" % instance.primary_node,
6970                         prereq=True, ecode=errors.ECODE_ENVIRON)
6971       if not remote_info.payload: # not running already
6972         _CheckNodeFreeMemory(self, instance.primary_node,
6973                              "starting instance %s" % instance.name,
6974                              bep[constants.BE_MINMEM], instance.hypervisor)
6975
6976   def Exec(self, feedback_fn):
6977     """Start the instance.
6978
6979     """
6980     instance = self.instance
6981     force = self.op.force
6982
6983     if not self.op.no_remember:
6984       self.cfg.MarkInstanceUp(instance.name)
6985
6986     if self.primary_offline:
6987       assert self.op.ignore_offline_nodes
6988       self.LogInfo("Primary node offline, marked instance as started")
6989     else:
6990       node_current = instance.primary_node
6991
6992       _StartInstanceDisks(self, instance, force)
6993
6994       result = \
6995         self.rpc.call_instance_start(node_current,
6996                                      (instance, self.op.hvparams,
6997                                       self.op.beparams),
6998                                      self.op.startup_paused)
6999       msg = result.fail_msg
7000       if msg:
7001         _ShutdownInstanceDisks(self, instance)
7002         raise errors.OpExecError("Could not start instance: %s" % msg)
7003
7004
7005 class LUInstanceReboot(LogicalUnit):
7006   """Reboot an instance.
7007
7008   """
7009   HPATH = "instance-reboot"
7010   HTYPE = constants.HTYPE_INSTANCE
7011   REQ_BGL = False
7012
7013   def ExpandNames(self):
7014     self._ExpandAndLockInstance()
7015
7016   def BuildHooksEnv(self):
7017     """Build hooks env.
7018
7019     This runs on master, primary and secondary nodes of the instance.
7020
7021     """
7022     env = {
7023       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7024       "REBOOT_TYPE": self.op.reboot_type,
7025       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7026       }
7027
7028     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7029
7030     return env
7031
7032   def BuildHooksNodes(self):
7033     """Build hooks nodes.
7034
7035     """
7036     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7037     return (nl, nl)
7038
7039   def CheckPrereq(self):
7040     """Check prerequisites.
7041
7042     This checks that the instance is in the cluster.
7043
7044     """
7045     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7046     assert self.instance is not None, \
7047       "Cannot retrieve locked instance %s" % self.op.instance_name
7048     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7049     _CheckNodeOnline(self, instance.primary_node)
7050
7051     # check bridges existence
7052     _CheckInstanceBridgesExist(self, instance)
7053
7054   def Exec(self, feedback_fn):
7055     """Reboot the instance.
7056
7057     """
7058     instance = self.instance
7059     ignore_secondaries = self.op.ignore_secondaries
7060     reboot_type = self.op.reboot_type
7061
7062     remote_info = self.rpc.call_instance_info(instance.primary_node,
7063                                               instance.name,
7064                                               instance.hypervisor)
7065     remote_info.Raise("Error checking node %s" % instance.primary_node)
7066     instance_running = bool(remote_info.payload)
7067
7068     node_current = instance.primary_node
7069
7070     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7071                                             constants.INSTANCE_REBOOT_HARD]:
7072       for disk in instance.disks:
7073         self.cfg.SetDiskID(disk, node_current)
7074       result = self.rpc.call_instance_reboot(node_current, instance,
7075                                              reboot_type,
7076                                              self.op.shutdown_timeout)
7077       result.Raise("Could not reboot instance")
7078     else:
7079       if instance_running:
7080         result = self.rpc.call_instance_shutdown(node_current, instance,
7081                                                  self.op.shutdown_timeout)
7082         result.Raise("Could not shutdown instance for full reboot")
7083         _ShutdownInstanceDisks(self, instance)
7084       else:
7085         self.LogInfo("Instance %s was already stopped, starting now",
7086                      instance.name)
7087       _StartInstanceDisks(self, instance, ignore_secondaries)
7088       result = self.rpc.call_instance_start(node_current,
7089                                             (instance, None, None), False)
7090       msg = result.fail_msg
7091       if msg:
7092         _ShutdownInstanceDisks(self, instance)
7093         raise errors.OpExecError("Could not start instance for"
7094                                  " full reboot: %s" % msg)
7095
7096     self.cfg.MarkInstanceUp(instance.name)
7097
7098
7099 class LUInstanceShutdown(LogicalUnit):
7100   """Shutdown an instance.
7101
7102   """
7103   HPATH = "instance-stop"
7104   HTYPE = constants.HTYPE_INSTANCE
7105   REQ_BGL = False
7106
7107   def ExpandNames(self):
7108     self._ExpandAndLockInstance()
7109
7110   def BuildHooksEnv(self):
7111     """Build hooks env.
7112
7113     This runs on master, primary and secondary nodes of the instance.
7114
7115     """
7116     env = _BuildInstanceHookEnvByObject(self, self.instance)
7117     env["TIMEOUT"] = self.op.timeout
7118     return env
7119
7120   def BuildHooksNodes(self):
7121     """Build hooks nodes.
7122
7123     """
7124     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7125     return (nl, nl)
7126
7127   def CheckPrereq(self):
7128     """Check prerequisites.
7129
7130     This checks that the instance is in the cluster.
7131
7132     """
7133     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7134     assert self.instance is not None, \
7135       "Cannot retrieve locked instance %s" % self.op.instance_name
7136
7137     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7138
7139     self.primary_offline = \
7140       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7141
7142     if self.primary_offline and self.op.ignore_offline_nodes:
7143       self.LogWarning("Ignoring offline primary node")
7144     else:
7145       _CheckNodeOnline(self, self.instance.primary_node)
7146
7147   def Exec(self, feedback_fn):
7148     """Shutdown the instance.
7149
7150     """
7151     instance = self.instance
7152     node_current = instance.primary_node
7153     timeout = self.op.timeout
7154
7155     if not self.op.no_remember:
7156       self.cfg.MarkInstanceDown(instance.name)
7157
7158     if self.primary_offline:
7159       assert self.op.ignore_offline_nodes
7160       self.LogInfo("Primary node offline, marked instance as stopped")
7161     else:
7162       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7163       msg = result.fail_msg
7164       if msg:
7165         self.LogWarning("Could not shutdown instance: %s", msg)
7166
7167       _ShutdownInstanceDisks(self, instance)
7168
7169
7170 class LUInstanceReinstall(LogicalUnit):
7171   """Reinstall an instance.
7172
7173   """
7174   HPATH = "instance-reinstall"
7175   HTYPE = constants.HTYPE_INSTANCE
7176   REQ_BGL = False
7177
7178   def ExpandNames(self):
7179     self._ExpandAndLockInstance()
7180
7181   def BuildHooksEnv(self):
7182     """Build hooks env.
7183
7184     This runs on master, primary and secondary nodes of the instance.
7185
7186     """
7187     return _BuildInstanceHookEnvByObject(self, self.instance)
7188
7189   def BuildHooksNodes(self):
7190     """Build hooks nodes.
7191
7192     """
7193     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7194     return (nl, nl)
7195
7196   def CheckPrereq(self):
7197     """Check prerequisites.
7198
7199     This checks that the instance is in the cluster and is not running.
7200
7201     """
7202     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7203     assert instance is not None, \
7204       "Cannot retrieve locked instance %s" % self.op.instance_name
7205     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7206                      " offline, cannot reinstall")
7207
7208     if instance.disk_template == constants.DT_DISKLESS:
7209       raise errors.OpPrereqError("Instance '%s' has no disks" %
7210                                  self.op.instance_name,
7211                                  errors.ECODE_INVAL)
7212     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7213
7214     if self.op.os_type is not None:
7215       # OS verification
7216       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7217       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7218       instance_os = self.op.os_type
7219     else:
7220       instance_os = instance.os
7221
7222     nodelist = list(instance.all_nodes)
7223
7224     if self.op.osparams:
7225       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7226       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7227       self.os_inst = i_osdict # the new dict (without defaults)
7228     else:
7229       self.os_inst = None
7230
7231     self.instance = instance
7232
7233   def Exec(self, feedback_fn):
7234     """Reinstall the instance.
7235
7236     """
7237     inst = self.instance
7238
7239     if self.op.os_type is not None:
7240       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7241       inst.os = self.op.os_type
7242       # Write to configuration
7243       self.cfg.Update(inst, feedback_fn)
7244
7245     _StartInstanceDisks(self, inst, None)
7246     try:
7247       feedback_fn("Running the instance OS create scripts...")
7248       # FIXME: pass debug option from opcode to backend
7249       result = self.rpc.call_instance_os_add(inst.primary_node,
7250                                              (inst, self.os_inst), True,
7251                                              self.op.debug_level)
7252       result.Raise("Could not install OS for instance %s on node %s" %
7253                    (inst.name, inst.primary_node))
7254     finally:
7255       _ShutdownInstanceDisks(self, inst)
7256
7257
7258 class LUInstanceRecreateDisks(LogicalUnit):
7259   """Recreate an instance's missing disks.
7260
7261   """
7262   HPATH = "instance-recreate-disks"
7263   HTYPE = constants.HTYPE_INSTANCE
7264   REQ_BGL = False
7265
7266   _MODIFYABLE = frozenset([
7267     constants.IDISK_SIZE,
7268     constants.IDISK_MODE,
7269     ])
7270
7271   # New or changed disk parameters may have different semantics
7272   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7273     constants.IDISK_ADOPT,
7274
7275     # TODO: Implement support changing VG while recreating
7276     constants.IDISK_VG,
7277     constants.IDISK_METAVG,
7278     ]))
7279
7280   def _RunAllocator(self):
7281     """Run the allocator based on input opcode.
7282
7283     """
7284     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7285
7286     # FIXME
7287     # The allocator should actually run in "relocate" mode, but current
7288     # allocators don't support relocating all the nodes of an instance at
7289     # the same time. As a workaround we use "allocate" mode, but this is
7290     # suboptimal for two reasons:
7291     # - The instance name passed to the allocator is present in the list of
7292     #   existing instances, so there could be a conflict within the
7293     #   internal structures of the allocator. This doesn't happen with the
7294     #   current allocators, but it's a liability.
7295     # - The allocator counts the resources used by the instance twice: once
7296     #   because the instance exists already, and once because it tries to
7297     #   allocate a new instance.
7298     # The allocator could choose some of the nodes on which the instance is
7299     # running, but that's not a problem. If the instance nodes are broken,
7300     # they should be already be marked as drained or offline, and hence
7301     # skipped by the allocator. If instance disks have been lost for other
7302     # reasons, then recreating the disks on the same nodes should be fine.
7303     disk_template = self.instance.disk_template
7304     spindle_use = be_full[constants.BE_SPINDLE_USE]
7305     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7306                                         disk_template=disk_template,
7307                                         tags=list(self.instance.GetTags()),
7308                                         os=self.instance.os,
7309                                         nics=[{}],
7310                                         vcpus=be_full[constants.BE_VCPUS],
7311                                         memory=be_full[constants.BE_MAXMEM],
7312                                         spindle_use=spindle_use,
7313                                         disks=[{constants.IDISK_SIZE: d.size,
7314                                                 constants.IDISK_MODE: d.mode}
7315                                                 for d in self.instance.disks],
7316                                         hypervisor=self.instance.hypervisor)
7317     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7318
7319     ial.Run(self.op.iallocator)
7320
7321     assert req.RequiredNodes() == len(self.instance.all_nodes)
7322
7323     if not ial.success:
7324       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7325                                  " %s" % (self.op.iallocator, ial.info),
7326                                  errors.ECODE_NORES)
7327
7328     self.op.nodes = ial.result
7329     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7330                  self.op.instance_name, self.op.iallocator,
7331                  utils.CommaJoin(ial.result))
7332
7333   def CheckArguments(self):
7334     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7335       # Normalize and convert deprecated list of disk indices
7336       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7337
7338     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7339     if duplicates:
7340       raise errors.OpPrereqError("Some disks have been specified more than"
7341                                  " once: %s" % utils.CommaJoin(duplicates),
7342                                  errors.ECODE_INVAL)
7343
7344     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7345     # when neither iallocator nor nodes are specified
7346     if self.op.iallocator or self.op.nodes:
7347       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7348
7349     for (idx, params) in self.op.disks:
7350       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7351       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7352       if unsupported:
7353         raise errors.OpPrereqError("Parameters for disk %s try to change"
7354                                    " unmodifyable parameter(s): %s" %
7355                                    (idx, utils.CommaJoin(unsupported)),
7356                                    errors.ECODE_INVAL)
7357
7358   def ExpandNames(self):
7359     self._ExpandAndLockInstance()
7360     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7361     if self.op.nodes:
7362       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7363       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7364     else:
7365       self.needed_locks[locking.LEVEL_NODE] = []
7366       if self.op.iallocator:
7367         # iallocator will select a new node in the same group
7368         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7369     self.needed_locks[locking.LEVEL_NODE_RES] = []
7370
7371   def DeclareLocks(self, level):
7372     if level == locking.LEVEL_NODEGROUP:
7373       assert self.op.iallocator is not None
7374       assert not self.op.nodes
7375       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7376       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7377       # Lock the primary group used by the instance optimistically; this
7378       # requires going via the node before it's locked, requiring
7379       # verification later on
7380       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7381         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7382
7383     elif level == locking.LEVEL_NODE:
7384       # If an allocator is used, then we lock all the nodes in the current
7385       # instance group, as we don't know yet which ones will be selected;
7386       # if we replace the nodes without using an allocator, locks are
7387       # already declared in ExpandNames; otherwise, we need to lock all the
7388       # instance nodes for disk re-creation
7389       if self.op.iallocator:
7390         assert not self.op.nodes
7391         assert not self.needed_locks[locking.LEVEL_NODE]
7392         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7393
7394         # Lock member nodes of the group of the primary node
7395         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7396           self.needed_locks[locking.LEVEL_NODE].extend(
7397             self.cfg.GetNodeGroup(group_uuid).members)
7398       elif not self.op.nodes:
7399         self._LockInstancesNodes(primary_only=False)
7400     elif level == locking.LEVEL_NODE_RES:
7401       # Copy node locks
7402       self.needed_locks[locking.LEVEL_NODE_RES] = \
7403         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7404
7405   def BuildHooksEnv(self):
7406     """Build hooks env.
7407
7408     This runs on master, primary and secondary nodes of the instance.
7409
7410     """
7411     return _BuildInstanceHookEnvByObject(self, self.instance)
7412
7413   def BuildHooksNodes(self):
7414     """Build hooks nodes.
7415
7416     """
7417     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7418     return (nl, nl)
7419
7420   def CheckPrereq(self):
7421     """Check prerequisites.
7422
7423     This checks that the instance is in the cluster and is not running.
7424
7425     """
7426     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7427     assert instance is not None, \
7428       "Cannot retrieve locked instance %s" % self.op.instance_name
7429     if self.op.nodes:
7430       if len(self.op.nodes) != len(instance.all_nodes):
7431         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7432                                    " %d replacement nodes were specified" %
7433                                    (instance.name, len(instance.all_nodes),
7434                                     len(self.op.nodes)),
7435                                    errors.ECODE_INVAL)
7436       assert instance.disk_template != constants.DT_DRBD8 or \
7437           len(self.op.nodes) == 2
7438       assert instance.disk_template != constants.DT_PLAIN or \
7439           len(self.op.nodes) == 1
7440       primary_node = self.op.nodes[0]
7441     else:
7442       primary_node = instance.primary_node
7443     if not self.op.iallocator:
7444       _CheckNodeOnline(self, primary_node)
7445
7446     if instance.disk_template == constants.DT_DISKLESS:
7447       raise errors.OpPrereqError("Instance '%s' has no disks" %
7448                                  self.op.instance_name, errors.ECODE_INVAL)
7449
7450     # Verify if node group locks are still correct
7451     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7452     if owned_groups:
7453       # Node group locks are acquired only for the primary node (and only
7454       # when the allocator is used)
7455       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7456                                primary_only=True)
7457
7458     # if we replace nodes *and* the old primary is offline, we don't
7459     # check the instance state
7460     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7461     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7462       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7463                           msg="cannot recreate disks")
7464
7465     if self.op.disks:
7466       self.disks = dict(self.op.disks)
7467     else:
7468       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7469
7470     maxidx = max(self.disks.keys())
7471     if maxidx >= len(instance.disks):
7472       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7473                                  errors.ECODE_INVAL)
7474
7475     if ((self.op.nodes or self.op.iallocator) and
7476         sorted(self.disks.keys()) != range(len(instance.disks))):
7477       raise errors.OpPrereqError("Can't recreate disks partially and"
7478                                  " change the nodes at the same time",
7479                                  errors.ECODE_INVAL)
7480
7481     self.instance = instance
7482
7483     if self.op.iallocator:
7484       self._RunAllocator()
7485       # Release unneeded node and node resource locks
7486       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7487       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7488
7489   def Exec(self, feedback_fn):
7490     """Recreate the disks.
7491
7492     """
7493     instance = self.instance
7494
7495     assert (self.owned_locks(locking.LEVEL_NODE) ==
7496             self.owned_locks(locking.LEVEL_NODE_RES))
7497
7498     to_skip = []
7499     mods = [] # keeps track of needed changes
7500
7501     for idx, disk in enumerate(instance.disks):
7502       try:
7503         changes = self.disks[idx]
7504       except KeyError:
7505         # Disk should not be recreated
7506         to_skip.append(idx)
7507         continue
7508
7509       # update secondaries for disks, if needed
7510       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7511         # need to update the nodes and minors
7512         assert len(self.op.nodes) == 2
7513         assert len(disk.logical_id) == 6 # otherwise disk internals
7514                                          # have changed
7515         (_, _, old_port, _, _, old_secret) = disk.logical_id
7516         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7517         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7518                   new_minors[0], new_minors[1], old_secret)
7519         assert len(disk.logical_id) == len(new_id)
7520       else:
7521         new_id = None
7522
7523       mods.append((idx, new_id, changes))
7524
7525     # now that we have passed all asserts above, we can apply the mods
7526     # in a single run (to avoid partial changes)
7527     for idx, new_id, changes in mods:
7528       disk = instance.disks[idx]
7529       if new_id is not None:
7530         assert disk.dev_type == constants.LD_DRBD8
7531         disk.logical_id = new_id
7532       if changes:
7533         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7534                     mode=changes.get(constants.IDISK_MODE, None))
7535
7536     # change primary node, if needed
7537     if self.op.nodes:
7538       instance.primary_node = self.op.nodes[0]
7539       self.LogWarning("Changing the instance's nodes, you will have to"
7540                       " remove any disks left on the older nodes manually")
7541
7542     if self.op.nodes:
7543       self.cfg.Update(instance, feedback_fn)
7544
7545     # All touched nodes must be locked
7546     mylocks = self.owned_locks(locking.LEVEL_NODE)
7547     assert mylocks.issuperset(frozenset(instance.all_nodes))
7548     _CreateDisks(self, instance, to_skip=to_skip)
7549
7550
7551 class LUInstanceRename(LogicalUnit):
7552   """Rename an instance.
7553
7554   """
7555   HPATH = "instance-rename"
7556   HTYPE = constants.HTYPE_INSTANCE
7557
7558   def CheckArguments(self):
7559     """Check arguments.
7560
7561     """
7562     if self.op.ip_check and not self.op.name_check:
7563       # TODO: make the ip check more flexible and not depend on the name check
7564       raise errors.OpPrereqError("IP address check requires a name check",
7565                                  errors.ECODE_INVAL)
7566
7567   def BuildHooksEnv(self):
7568     """Build hooks env.
7569
7570     This runs on master, primary and secondary nodes of the instance.
7571
7572     """
7573     env = _BuildInstanceHookEnvByObject(self, self.instance)
7574     env["INSTANCE_NEW_NAME"] = self.op.new_name
7575     return env
7576
7577   def BuildHooksNodes(self):
7578     """Build hooks nodes.
7579
7580     """
7581     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7582     return (nl, nl)
7583
7584   def CheckPrereq(self):
7585     """Check prerequisites.
7586
7587     This checks that the instance is in the cluster and is not running.
7588
7589     """
7590     self.op.instance_name = _ExpandInstanceName(self.cfg,
7591                                                 self.op.instance_name)
7592     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7593     assert instance is not None
7594     _CheckNodeOnline(self, instance.primary_node)
7595     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7596                         msg="cannot rename")
7597     self.instance = instance
7598
7599     new_name = self.op.new_name
7600     if self.op.name_check:
7601       hostname = _CheckHostnameSane(self, new_name)
7602       new_name = self.op.new_name = hostname.name
7603       if (self.op.ip_check and
7604           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7605         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7606                                    (hostname.ip, new_name),
7607                                    errors.ECODE_NOTUNIQUE)
7608
7609     instance_list = self.cfg.GetInstanceList()
7610     if new_name in instance_list and new_name != instance.name:
7611       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7612                                  new_name, errors.ECODE_EXISTS)
7613
7614   def Exec(self, feedback_fn):
7615     """Rename the instance.
7616
7617     """
7618     inst = self.instance
7619     old_name = inst.name
7620
7621     rename_file_storage = False
7622     if (inst.disk_template in constants.DTS_FILEBASED and
7623         self.op.new_name != inst.name):
7624       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7625       rename_file_storage = True
7626
7627     self.cfg.RenameInstance(inst.name, self.op.new_name)
7628     # Change the instance lock. This is definitely safe while we hold the BGL.
7629     # Otherwise the new lock would have to be added in acquired mode.
7630     assert self.REQ_BGL
7631     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7632     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7633     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7634
7635     # re-read the instance from the configuration after rename
7636     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7637
7638     if rename_file_storage:
7639       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7640       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7641                                                      old_file_storage_dir,
7642                                                      new_file_storage_dir)
7643       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7644                    " (but the instance has been renamed in Ganeti)" %
7645                    (inst.primary_node, old_file_storage_dir,
7646                     new_file_storage_dir))
7647
7648     _StartInstanceDisks(self, inst, None)
7649     # update info on disks
7650     info = _GetInstanceInfoText(inst)
7651     for (idx, disk) in enumerate(inst.disks):
7652       for node in inst.all_nodes:
7653         self.cfg.SetDiskID(disk, node)
7654         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7655         if result.fail_msg:
7656           self.LogWarning("Error setting info on node %s for disk %s: %s",
7657                           node, idx, result.fail_msg)
7658     try:
7659       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7660                                                  old_name, self.op.debug_level)
7661       msg = result.fail_msg
7662       if msg:
7663         msg = ("Could not run OS rename script for instance %s on node %s"
7664                " (but the instance has been renamed in Ganeti): %s" %
7665                (inst.name, inst.primary_node, msg))
7666         self.LogWarning(msg)
7667     finally:
7668       _ShutdownInstanceDisks(self, inst)
7669
7670     return inst.name
7671
7672
7673 class LUInstanceRemove(LogicalUnit):
7674   """Remove an instance.
7675
7676   """
7677   HPATH = "instance-remove"
7678   HTYPE = constants.HTYPE_INSTANCE
7679   REQ_BGL = False
7680
7681   def ExpandNames(self):
7682     self._ExpandAndLockInstance()
7683     self.needed_locks[locking.LEVEL_NODE] = []
7684     self.needed_locks[locking.LEVEL_NODE_RES] = []
7685     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7686
7687   def DeclareLocks(self, level):
7688     if level == locking.LEVEL_NODE:
7689       self._LockInstancesNodes()
7690     elif level == locking.LEVEL_NODE_RES:
7691       # Copy node locks
7692       self.needed_locks[locking.LEVEL_NODE_RES] = \
7693         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7694
7695   def BuildHooksEnv(self):
7696     """Build hooks env.
7697
7698     This runs on master, primary and secondary nodes of the instance.
7699
7700     """
7701     env = _BuildInstanceHookEnvByObject(self, self.instance)
7702     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7703     return env
7704
7705   def BuildHooksNodes(self):
7706     """Build hooks nodes.
7707
7708     """
7709     nl = [self.cfg.GetMasterNode()]
7710     nl_post = list(self.instance.all_nodes) + nl
7711     return (nl, nl_post)
7712
7713   def CheckPrereq(self):
7714     """Check prerequisites.
7715
7716     This checks that the instance is in the cluster.
7717
7718     """
7719     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7720     assert self.instance is not None, \
7721       "Cannot retrieve locked instance %s" % self.op.instance_name
7722
7723   def Exec(self, feedback_fn):
7724     """Remove the instance.
7725
7726     """
7727     instance = self.instance
7728     logging.info("Shutting down instance %s on node %s",
7729                  instance.name, instance.primary_node)
7730
7731     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7732                                              self.op.shutdown_timeout)
7733     msg = result.fail_msg
7734     if msg:
7735       if self.op.ignore_failures:
7736         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7737       else:
7738         raise errors.OpExecError("Could not shutdown instance %s on"
7739                                  " node %s: %s" %
7740                                  (instance.name, instance.primary_node, msg))
7741
7742     assert (self.owned_locks(locking.LEVEL_NODE) ==
7743             self.owned_locks(locking.LEVEL_NODE_RES))
7744     assert not (set(instance.all_nodes) -
7745                 self.owned_locks(locking.LEVEL_NODE)), \
7746       "Not owning correct locks"
7747
7748     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7749
7750
7751 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7752   """Utility function to remove an instance.
7753
7754   """
7755   logging.info("Removing block devices for instance %s", instance.name)
7756
7757   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7758     if not ignore_failures:
7759       raise errors.OpExecError("Can't remove instance's disks")
7760     feedback_fn("Warning: can't remove instance's disks")
7761
7762   logging.info("Removing instance %s out of cluster config", instance.name)
7763
7764   lu.cfg.RemoveInstance(instance.name)
7765
7766   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7767     "Instance lock removal conflict"
7768
7769   # Remove lock for the instance
7770   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7771
7772
7773 class LUInstanceQuery(NoHooksLU):
7774   """Logical unit for querying instances.
7775
7776   """
7777   # pylint: disable=W0142
7778   REQ_BGL = False
7779
7780   def CheckArguments(self):
7781     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7782                              self.op.output_fields, self.op.use_locking)
7783
7784   def ExpandNames(self):
7785     self.iq.ExpandNames(self)
7786
7787   def DeclareLocks(self, level):
7788     self.iq.DeclareLocks(self, level)
7789
7790   def Exec(self, feedback_fn):
7791     return self.iq.OldStyleQuery(self)
7792
7793
7794 def _ExpandNamesForMigration(lu):
7795   """Expands names for use with L{TLMigrateInstance}.
7796
7797   @type lu: L{LogicalUnit}
7798
7799   """
7800   if lu.op.target_node is not None:
7801     lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
7802
7803   lu.needed_locks[locking.LEVEL_NODE] = []
7804   lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7805
7806   lu.needed_locks[locking.LEVEL_NODE_RES] = []
7807   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7808
7809
7810 def _DeclareLocksForMigration(lu, level):
7811   """Declares locks for L{TLMigrateInstance}.
7812
7813   @type lu: L{LogicalUnit}
7814   @param level: Lock level
7815
7816   """
7817   if level == locking.LEVEL_NODE:
7818     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
7819     if instance.disk_template in constants.DTS_EXT_MIRROR:
7820       if lu.op.target_node is None:
7821         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7822       else:
7823         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7824                                                lu.op.target_node]
7825       del lu.recalculate_locks[locking.LEVEL_NODE]
7826     else:
7827       lu._LockInstancesNodes() # pylint: disable=W0212
7828   elif level == locking.LEVEL_NODE_RES:
7829     # Copy node locks
7830     lu.needed_locks[locking.LEVEL_NODE_RES] = \
7831       _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
7832
7833
7834 class LUInstanceFailover(LogicalUnit):
7835   """Failover an instance.
7836
7837   """
7838   HPATH = "instance-failover"
7839   HTYPE = constants.HTYPE_INSTANCE
7840   REQ_BGL = False
7841
7842   def CheckArguments(self):
7843     """Check the arguments.
7844
7845     """
7846     self.iallocator = getattr(self.op, "iallocator", None)
7847     self.target_node = getattr(self.op, "target_node", None)
7848
7849   def ExpandNames(self):
7850     self._ExpandAndLockInstance()
7851     _ExpandNamesForMigration(self)
7852
7853     self._migrater = \
7854       TLMigrateInstance(self, self.op.instance_name, False, True, False,
7855                         self.op.ignore_consistency, True,
7856                         self.op.shutdown_timeout, self.op.ignore_ipolicy)
7857
7858     self.tasklets = [self._migrater]
7859
7860   def DeclareLocks(self, level):
7861     _DeclareLocksForMigration(self, level)
7862
7863   def BuildHooksEnv(self):
7864     """Build hooks env.
7865
7866     This runs on master, primary and secondary nodes of the instance.
7867
7868     """
7869     instance = self._migrater.instance
7870     source_node = instance.primary_node
7871     target_node = self.op.target_node
7872     env = {
7873       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7874       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7875       "OLD_PRIMARY": source_node,
7876       "NEW_PRIMARY": target_node,
7877       }
7878
7879     if instance.disk_template in constants.DTS_INT_MIRROR:
7880       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7881       env["NEW_SECONDARY"] = source_node
7882     else:
7883       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7884
7885     env.update(_BuildInstanceHookEnvByObject(self, instance))
7886
7887     return env
7888
7889   def BuildHooksNodes(self):
7890     """Build hooks nodes.
7891
7892     """
7893     instance = self._migrater.instance
7894     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7895     return (nl, nl + [instance.primary_node])
7896
7897
7898 class LUInstanceMigrate(LogicalUnit):
7899   """Migrate an instance.
7900
7901   This is migration without shutting down, compared to the failover,
7902   which is done with shutdown.
7903
7904   """
7905   HPATH = "instance-migrate"
7906   HTYPE = constants.HTYPE_INSTANCE
7907   REQ_BGL = False
7908
7909   def ExpandNames(self):
7910     self._ExpandAndLockInstance()
7911     _ExpandNamesForMigration(self)
7912
7913     self._migrater = \
7914       TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
7915                         False, self.op.allow_failover, False,
7916                         self.op.allow_runtime_changes,
7917                         constants.DEFAULT_SHUTDOWN_TIMEOUT,
7918                         self.op.ignore_ipolicy)
7919
7920     self.tasklets = [self._migrater]
7921
7922   def DeclareLocks(self, level):
7923     _DeclareLocksForMigration(self, level)
7924
7925   def BuildHooksEnv(self):
7926     """Build hooks env.
7927
7928     This runs on master, primary and secondary nodes of the instance.
7929
7930     """
7931     instance = self._migrater.instance
7932     source_node = instance.primary_node
7933     target_node = self.op.target_node
7934     env = _BuildInstanceHookEnvByObject(self, instance)
7935     env.update({
7936       "MIGRATE_LIVE": self._migrater.live,
7937       "MIGRATE_CLEANUP": self.op.cleanup,
7938       "OLD_PRIMARY": source_node,
7939       "NEW_PRIMARY": target_node,
7940       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7941       })
7942
7943     if instance.disk_template in constants.DTS_INT_MIRROR:
7944       env["OLD_SECONDARY"] = target_node
7945       env["NEW_SECONDARY"] = source_node
7946     else:
7947       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7948
7949     return env
7950
7951   def BuildHooksNodes(self):
7952     """Build hooks nodes.
7953
7954     """
7955     instance = self._migrater.instance
7956     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7957     return (nl, nl + [instance.primary_node])
7958
7959
7960 class LUInstanceMove(LogicalUnit):
7961   """Move an instance by data-copying.
7962
7963   """
7964   HPATH = "instance-move"
7965   HTYPE = constants.HTYPE_INSTANCE
7966   REQ_BGL = False
7967
7968   def ExpandNames(self):
7969     self._ExpandAndLockInstance()
7970     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7971     self.op.target_node = target_node
7972     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7973     self.needed_locks[locking.LEVEL_NODE_RES] = []
7974     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7975
7976   def DeclareLocks(self, level):
7977     if level == locking.LEVEL_NODE:
7978       self._LockInstancesNodes(primary_only=True)
7979     elif level == locking.LEVEL_NODE_RES:
7980       # Copy node locks
7981       self.needed_locks[locking.LEVEL_NODE_RES] = \
7982         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7983
7984   def BuildHooksEnv(self):
7985     """Build hooks env.
7986
7987     This runs on master, primary and secondary nodes of the instance.
7988
7989     """
7990     env = {
7991       "TARGET_NODE": self.op.target_node,
7992       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7993       }
7994     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7995     return env
7996
7997   def BuildHooksNodes(self):
7998     """Build hooks nodes.
7999
8000     """
8001     nl = [
8002       self.cfg.GetMasterNode(),
8003       self.instance.primary_node,
8004       self.op.target_node,
8005       ]
8006     return (nl, nl)
8007
8008   def CheckPrereq(self):
8009     """Check prerequisites.
8010
8011     This checks that the instance is in the cluster.
8012
8013     """
8014     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8015     assert self.instance is not None, \
8016       "Cannot retrieve locked instance %s" % self.op.instance_name
8017
8018     node = self.cfg.GetNodeInfo(self.op.target_node)
8019     assert node is not None, \
8020       "Cannot retrieve locked node %s" % self.op.target_node
8021
8022     self.target_node = target_node = node.name
8023
8024     if target_node == instance.primary_node:
8025       raise errors.OpPrereqError("Instance %s is already on the node %s" %
8026                                  (instance.name, target_node),
8027                                  errors.ECODE_STATE)
8028
8029     bep = self.cfg.GetClusterInfo().FillBE(instance)
8030
8031     for idx, dsk in enumerate(instance.disks):
8032       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8033         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8034                                    " cannot copy" % idx, errors.ECODE_STATE)
8035
8036     _CheckNodeOnline(self, target_node)
8037     _CheckNodeNotDrained(self, target_node)
8038     _CheckNodeVmCapable(self, target_node)
8039     cluster = self.cfg.GetClusterInfo()
8040     group_info = self.cfg.GetNodeGroup(node.group)
8041     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8042     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8043                             ignore=self.op.ignore_ipolicy)
8044
8045     if instance.admin_state == constants.ADMINST_UP:
8046       # check memory requirements on the secondary node
8047       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8048                            instance.name, bep[constants.BE_MAXMEM],
8049                            instance.hypervisor)
8050     else:
8051       self.LogInfo("Not checking memory on the secondary node as"
8052                    " instance will not be started")
8053
8054     # check bridge existance
8055     _CheckInstanceBridgesExist(self, instance, node=target_node)
8056
8057   def Exec(self, feedback_fn):
8058     """Move an instance.
8059
8060     The move is done by shutting it down on its present node, copying
8061     the data over (slow) and starting it on the new node.
8062
8063     """
8064     instance = self.instance
8065
8066     source_node = instance.primary_node
8067     target_node = self.target_node
8068
8069     self.LogInfo("Shutting down instance %s on source node %s",
8070                  instance.name, source_node)
8071
8072     assert (self.owned_locks(locking.LEVEL_NODE) ==
8073             self.owned_locks(locking.LEVEL_NODE_RES))
8074
8075     result = self.rpc.call_instance_shutdown(source_node, instance,
8076                                              self.op.shutdown_timeout)
8077     msg = result.fail_msg
8078     if msg:
8079       if self.op.ignore_consistency:
8080         self.LogWarning("Could not shutdown instance %s on node %s."
8081                         " Proceeding anyway. Please make sure node"
8082                         " %s is down. Error details: %s",
8083                         instance.name, source_node, source_node, msg)
8084       else:
8085         raise errors.OpExecError("Could not shutdown instance %s on"
8086                                  " node %s: %s" %
8087                                  (instance.name, source_node, msg))
8088
8089     # create the target disks
8090     try:
8091       _CreateDisks(self, instance, target_node=target_node)
8092     except errors.OpExecError:
8093       self.LogWarning("Device creation failed, reverting...")
8094       try:
8095         _RemoveDisks(self, instance, target_node=target_node)
8096       finally:
8097         self.cfg.ReleaseDRBDMinors(instance.name)
8098         raise
8099
8100     cluster_name = self.cfg.GetClusterInfo().cluster_name
8101
8102     errs = []
8103     # activate, get path, copy the data over
8104     for idx, disk in enumerate(instance.disks):
8105       self.LogInfo("Copying data for disk %d", idx)
8106       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8107                                                instance.name, True, idx)
8108       if result.fail_msg:
8109         self.LogWarning("Can't assemble newly created disk %d: %s",
8110                         idx, result.fail_msg)
8111         errs.append(result.fail_msg)
8112         break
8113       dev_path = result.payload
8114       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8115                                              target_node, dev_path,
8116                                              cluster_name)
8117       if result.fail_msg:
8118         self.LogWarning("Can't copy data over for disk %d: %s",
8119                         idx, result.fail_msg)
8120         errs.append(result.fail_msg)
8121         break
8122
8123     if errs:
8124       self.LogWarning("Some disks failed to copy, aborting")
8125       try:
8126         _RemoveDisks(self, instance, target_node=target_node)
8127       finally:
8128         self.cfg.ReleaseDRBDMinors(instance.name)
8129         raise errors.OpExecError("Errors during disk copy: %s" %
8130                                  (",".join(errs),))
8131
8132     instance.primary_node = target_node
8133     self.cfg.Update(instance, feedback_fn)
8134
8135     self.LogInfo("Removing the disks on the original node")
8136     _RemoveDisks(self, instance, target_node=source_node)
8137
8138     # Only start the instance if it's marked as up
8139     if instance.admin_state == constants.ADMINST_UP:
8140       self.LogInfo("Starting instance %s on node %s",
8141                    instance.name, target_node)
8142
8143       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8144                                            ignore_secondaries=True)
8145       if not disks_ok:
8146         _ShutdownInstanceDisks(self, instance)
8147         raise errors.OpExecError("Can't activate the instance's disks")
8148
8149       result = self.rpc.call_instance_start(target_node,
8150                                             (instance, None, None), False)
8151       msg = result.fail_msg
8152       if msg:
8153         _ShutdownInstanceDisks(self, instance)
8154         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8155                                  (instance.name, target_node, msg))
8156
8157
8158 class LUNodeMigrate(LogicalUnit):
8159   """Migrate all instances from a node.
8160
8161   """
8162   HPATH = "node-migrate"
8163   HTYPE = constants.HTYPE_NODE
8164   REQ_BGL = False
8165
8166   def CheckArguments(self):
8167     pass
8168
8169   def ExpandNames(self):
8170     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8171
8172     self.share_locks = _ShareAll()
8173     self.needed_locks = {
8174       locking.LEVEL_NODE: [self.op.node_name],
8175       }
8176
8177   def BuildHooksEnv(self):
8178     """Build hooks env.
8179
8180     This runs on the master, the primary and all the secondaries.
8181
8182     """
8183     return {
8184       "NODE_NAME": self.op.node_name,
8185       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8186       }
8187
8188   def BuildHooksNodes(self):
8189     """Build hooks nodes.
8190
8191     """
8192     nl = [self.cfg.GetMasterNode()]
8193     return (nl, nl)
8194
8195   def CheckPrereq(self):
8196     pass
8197
8198   def Exec(self, feedback_fn):
8199     # Prepare jobs for migration instances
8200     allow_runtime_changes = self.op.allow_runtime_changes
8201     jobs = [
8202       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8203                                  mode=self.op.mode,
8204                                  live=self.op.live,
8205                                  iallocator=self.op.iallocator,
8206                                  target_node=self.op.target_node,
8207                                  allow_runtime_changes=allow_runtime_changes,
8208                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8209       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8210
8211     # TODO: Run iallocator in this opcode and pass correct placement options to
8212     # OpInstanceMigrate. Since other jobs can modify the cluster between
8213     # running the iallocator and the actual migration, a good consistency model
8214     # will have to be found.
8215
8216     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8217             frozenset([self.op.node_name]))
8218
8219     return ResultWithJobs(jobs)
8220
8221
8222 class TLMigrateInstance(Tasklet):
8223   """Tasklet class for instance migration.
8224
8225   @type live: boolean
8226   @ivar live: whether the migration will be done live or non-live;
8227       this variable is initalized only after CheckPrereq has run
8228   @type cleanup: boolean
8229   @ivar cleanup: Wheater we cleanup from a failed migration
8230   @type iallocator: string
8231   @ivar iallocator: The iallocator used to determine target_node
8232   @type target_node: string
8233   @ivar target_node: If given, the target_node to reallocate the instance to
8234   @type failover: boolean
8235   @ivar failover: Whether operation results in failover or migration
8236   @type fallback: boolean
8237   @ivar fallback: Whether fallback to failover is allowed if migration not
8238                   possible
8239   @type ignore_consistency: boolean
8240   @ivar ignore_consistency: Wheter we should ignore consistency between source
8241                             and target node
8242   @type shutdown_timeout: int
8243   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8244   @type ignore_ipolicy: bool
8245   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8246
8247   """
8248
8249   # Constants
8250   _MIGRATION_POLL_INTERVAL = 1      # seconds
8251   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8252
8253   def __init__(self, lu, instance_name, cleanup, failover, fallback,
8254                ignore_consistency, allow_runtime_changes, shutdown_timeout,
8255                ignore_ipolicy):
8256     """Initializes this class.
8257
8258     """
8259     Tasklet.__init__(self, lu)
8260
8261     # Parameters
8262     self.instance_name = instance_name
8263     self.cleanup = cleanup
8264     self.live = False # will be overridden later
8265     self.failover = failover
8266     self.fallback = fallback
8267     self.ignore_consistency = ignore_consistency
8268     self.shutdown_timeout = shutdown_timeout
8269     self.ignore_ipolicy = ignore_ipolicy
8270     self.allow_runtime_changes = allow_runtime_changes
8271
8272   def CheckPrereq(self):
8273     """Check prerequisites.
8274
8275     This checks that the instance is in the cluster.
8276
8277     """
8278     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8279     instance = self.cfg.GetInstanceInfo(instance_name)
8280     assert instance is not None
8281     self.instance = instance
8282     cluster = self.cfg.GetClusterInfo()
8283
8284     if (not self.cleanup and
8285         not instance.admin_state == constants.ADMINST_UP and
8286         not self.failover and self.fallback):
8287       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8288                       " switching to failover")
8289       self.failover = True
8290
8291     if instance.disk_template not in constants.DTS_MIRRORED:
8292       if self.failover:
8293         text = "failovers"
8294       else:
8295         text = "migrations"
8296       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8297                                  " %s" % (instance.disk_template, text),
8298                                  errors.ECODE_STATE)
8299
8300     if instance.disk_template in constants.DTS_EXT_MIRROR:
8301       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8302
8303       if self.lu.op.iallocator:
8304         self._RunAllocator()
8305       else:
8306         # We set set self.target_node as it is required by
8307         # BuildHooksEnv
8308         self.target_node = self.lu.op.target_node
8309
8310       # Check that the target node is correct in terms of instance policy
8311       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8312       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8313       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8314                                                               group_info)
8315       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8316                               ignore=self.ignore_ipolicy)
8317
8318       # self.target_node is already populated, either directly or by the
8319       # iallocator run
8320       target_node = self.target_node
8321       if self.target_node == instance.primary_node:
8322         raise errors.OpPrereqError("Cannot migrate instance %s"
8323                                    " to its primary (%s)" %
8324                                    (instance.name, instance.primary_node),
8325                                    errors.ECODE_STATE)
8326
8327       if len(self.lu.tasklets) == 1:
8328         # It is safe to release locks only when we're the only tasklet
8329         # in the LU
8330         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8331                       keep=[instance.primary_node, self.target_node])
8332
8333     else:
8334       secondary_nodes = instance.secondary_nodes
8335       if not secondary_nodes:
8336         raise errors.ConfigurationError("No secondary node but using"
8337                                         " %s disk template" %
8338                                         instance.disk_template)
8339       target_node = secondary_nodes[0]
8340       if self.lu.op.iallocator or (self.lu.op.target_node and
8341                                    self.lu.op.target_node != target_node):
8342         if self.failover:
8343           text = "failed over"
8344         else:
8345           text = "migrated"
8346         raise errors.OpPrereqError("Instances with disk template %s cannot"
8347                                    " be %s to arbitrary nodes"
8348                                    " (neither an iallocator nor a target"
8349                                    " node can be passed)" %
8350                                    (instance.disk_template, text),
8351                                    errors.ECODE_INVAL)
8352       nodeinfo = self.cfg.GetNodeInfo(target_node)
8353       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8354       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8355                                                               group_info)
8356       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8357                               ignore=self.ignore_ipolicy)
8358
8359     i_be = cluster.FillBE(instance)
8360
8361     # check memory requirements on the secondary node
8362     if (not self.cleanup and
8363          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8364       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8365                                                "migrating instance %s" %
8366                                                instance.name,
8367                                                i_be[constants.BE_MINMEM],
8368                                                instance.hypervisor)
8369     else:
8370       self.lu.LogInfo("Not checking memory on the secondary node as"
8371                       " instance will not be started")
8372
8373     # check if failover must be forced instead of migration
8374     if (not self.cleanup and not self.failover and
8375         i_be[constants.BE_ALWAYS_FAILOVER]):
8376       self.lu.LogInfo("Instance configured to always failover; fallback"
8377                       " to failover")
8378       self.failover = True
8379
8380     # check bridge existance
8381     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8382
8383     if not self.cleanup:
8384       _CheckNodeNotDrained(self.lu, target_node)
8385       if not self.failover:
8386         result = self.rpc.call_instance_migratable(instance.primary_node,
8387                                                    instance)
8388         if result.fail_msg and self.fallback:
8389           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8390                           " failover")
8391           self.failover = True
8392         else:
8393           result.Raise("Can't migrate, please use failover",
8394                        prereq=True, ecode=errors.ECODE_STATE)
8395
8396     assert not (self.failover and self.cleanup)
8397
8398     if not self.failover:
8399       if self.lu.op.live is not None and self.lu.op.mode is not None:
8400         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8401                                    " parameters are accepted",
8402                                    errors.ECODE_INVAL)
8403       if self.lu.op.live is not None:
8404         if self.lu.op.live:
8405           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8406         else:
8407           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8408         # reset the 'live' parameter to None so that repeated
8409         # invocations of CheckPrereq do not raise an exception
8410         self.lu.op.live = None
8411       elif self.lu.op.mode is None:
8412         # read the default value from the hypervisor
8413         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8414         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8415
8416       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8417     else:
8418       # Failover is never live
8419       self.live = False
8420
8421     if not (self.failover or self.cleanup):
8422       remote_info = self.rpc.call_instance_info(instance.primary_node,
8423                                                 instance.name,
8424                                                 instance.hypervisor)
8425       remote_info.Raise("Error checking instance on node %s" %
8426                         instance.primary_node)
8427       instance_running = bool(remote_info.payload)
8428       if instance_running:
8429         self.current_mem = int(remote_info.payload["memory"])
8430
8431   def _RunAllocator(self):
8432     """Run the allocator based on input opcode.
8433
8434     """
8435     # FIXME: add a self.ignore_ipolicy option
8436     req = iallocator.IAReqRelocate(name=self.instance_name,
8437                                    relocate_from=[self.instance.primary_node])
8438     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8439
8440     ial.Run(self.lu.op.iallocator)
8441
8442     if not ial.success:
8443       raise errors.OpPrereqError("Can't compute nodes using"
8444                                  " iallocator '%s': %s" %
8445                                  (self.lu.op.iallocator, ial.info),
8446                                  errors.ECODE_NORES)
8447     self.target_node = ial.result[0]
8448     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8449                     self.instance_name, self.lu.op.iallocator,
8450                     utils.CommaJoin(ial.result))
8451
8452   def _WaitUntilSync(self):
8453     """Poll with custom rpc for disk sync.
8454
8455     This uses our own step-based rpc call.
8456
8457     """
8458     self.feedback_fn("* wait until resync is done")
8459     all_done = False
8460     while not all_done:
8461       all_done = True
8462       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8463                                             self.nodes_ip,
8464                                             (self.instance.disks,
8465                                              self.instance))
8466       min_percent = 100
8467       for node, nres in result.items():
8468         nres.Raise("Cannot resync disks on node %s" % node)
8469         node_done, node_percent = nres.payload
8470         all_done = all_done and node_done
8471         if node_percent is not None:
8472           min_percent = min(min_percent, node_percent)
8473       if not all_done:
8474         if min_percent < 100:
8475           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8476         time.sleep(2)
8477
8478   def _EnsureSecondary(self, node):
8479     """Demote a node to secondary.
8480
8481     """
8482     self.feedback_fn("* switching node %s to secondary mode" % node)
8483
8484     for dev in self.instance.disks:
8485       self.cfg.SetDiskID(dev, node)
8486
8487     result = self.rpc.call_blockdev_close(node, self.instance.name,
8488                                           self.instance.disks)
8489     result.Raise("Cannot change disk to secondary on node %s" % node)
8490
8491   def _GoStandalone(self):
8492     """Disconnect from the network.
8493
8494     """
8495     self.feedback_fn("* changing into standalone mode")
8496     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8497                                                self.instance.disks)
8498     for node, nres in result.items():
8499       nres.Raise("Cannot disconnect disks node %s" % node)
8500
8501   def _GoReconnect(self, multimaster):
8502     """Reconnect to the network.
8503
8504     """
8505     if multimaster:
8506       msg = "dual-master"
8507     else:
8508       msg = "single-master"
8509     self.feedback_fn("* changing disks into %s mode" % msg)
8510     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8511                                            (self.instance.disks, self.instance),
8512                                            self.instance.name, multimaster)
8513     for node, nres in result.items():
8514       nres.Raise("Cannot change disks config on node %s" % node)
8515
8516   def _ExecCleanup(self):
8517     """Try to cleanup after a failed migration.
8518
8519     The cleanup is done by:
8520       - check that the instance is running only on one node
8521         (and update the config if needed)
8522       - change disks on its secondary node to secondary
8523       - wait until disks are fully synchronized
8524       - disconnect from the network
8525       - change disks into single-master mode
8526       - wait again until disks are fully synchronized
8527
8528     """
8529     instance = self.instance
8530     target_node = self.target_node
8531     source_node = self.source_node
8532
8533     # check running on only one node
8534     self.feedback_fn("* checking where the instance actually runs"
8535                      " (if this hangs, the hypervisor might be in"
8536                      " a bad state)")
8537     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8538     for node, result in ins_l.items():
8539       result.Raise("Can't contact node %s" % node)
8540
8541     runningon_source = instance.name in ins_l[source_node].payload
8542     runningon_target = instance.name in ins_l[target_node].payload
8543
8544     if runningon_source and runningon_target:
8545       raise errors.OpExecError("Instance seems to be running on two nodes,"
8546                                " or the hypervisor is confused; you will have"
8547                                " to ensure manually that it runs only on one"
8548                                " and restart this operation")
8549
8550     if not (runningon_source or runningon_target):
8551       raise errors.OpExecError("Instance does not seem to be running at all;"
8552                                " in this case it's safer to repair by"
8553                                " running 'gnt-instance stop' to ensure disk"
8554                                " shutdown, and then restarting it")
8555
8556     if runningon_target:
8557       # the migration has actually succeeded, we need to update the config
8558       self.feedback_fn("* instance running on secondary node (%s),"
8559                        " updating config" % target_node)
8560       instance.primary_node = target_node
8561       self.cfg.Update(instance, self.feedback_fn)
8562       demoted_node = source_node
8563     else:
8564       self.feedback_fn("* instance confirmed to be running on its"
8565                        " primary node (%s)" % source_node)
8566       demoted_node = target_node
8567
8568     if instance.disk_template in constants.DTS_INT_MIRROR:
8569       self._EnsureSecondary(demoted_node)
8570       try:
8571         self._WaitUntilSync()
8572       except errors.OpExecError:
8573         # we ignore here errors, since if the device is standalone, it
8574         # won't be able to sync
8575         pass
8576       self._GoStandalone()
8577       self._GoReconnect(False)
8578       self._WaitUntilSync()
8579
8580     self.feedback_fn("* done")
8581
8582   def _RevertDiskStatus(self):
8583     """Try to revert the disk status after a failed migration.
8584
8585     """
8586     target_node = self.target_node
8587     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8588       return
8589
8590     try:
8591       self._EnsureSecondary(target_node)
8592       self._GoStandalone()
8593       self._GoReconnect(False)
8594       self._WaitUntilSync()
8595     except errors.OpExecError, err:
8596       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8597                          " please try to recover the instance manually;"
8598                          " error '%s'" % str(err))
8599
8600   def _AbortMigration(self):
8601     """Call the hypervisor code to abort a started migration.
8602
8603     """
8604     instance = self.instance
8605     target_node = self.target_node
8606     source_node = self.source_node
8607     migration_info = self.migration_info
8608
8609     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8610                                                                  instance,
8611                                                                  migration_info,
8612                                                                  False)
8613     abort_msg = abort_result.fail_msg
8614     if abort_msg:
8615       logging.error("Aborting migration failed on target node %s: %s",
8616                     target_node, abort_msg)
8617       # Don't raise an exception here, as we stil have to try to revert the
8618       # disk status, even if this step failed.
8619
8620     abort_result = self.rpc.call_instance_finalize_migration_src(
8621       source_node, instance, False, self.live)
8622     abort_msg = abort_result.fail_msg
8623     if abort_msg:
8624       logging.error("Aborting migration failed on source node %s: %s",
8625                     source_node, abort_msg)
8626
8627   def _ExecMigration(self):
8628     """Migrate an instance.
8629
8630     The migrate is done by:
8631       - change the disks into dual-master mode
8632       - wait until disks are fully synchronized again
8633       - migrate the instance
8634       - change disks on the new secondary node (the old primary) to secondary
8635       - wait until disks are fully synchronized
8636       - change disks into single-master mode
8637
8638     """
8639     instance = self.instance
8640     target_node = self.target_node
8641     source_node = self.source_node
8642
8643     # Check for hypervisor version mismatch and warn the user.
8644     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8645                                        None, [self.instance.hypervisor])
8646     for ninfo in nodeinfo.values():
8647       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8648                   ninfo.node)
8649     (_, _, (src_info, )) = nodeinfo[source_node].payload
8650     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8651
8652     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8653         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8654       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8655       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8656       if src_version != dst_version:
8657         self.feedback_fn("* warning: hypervisor version mismatch between"
8658                          " source (%s) and target (%s) node" %
8659                          (src_version, dst_version))
8660
8661     self.feedback_fn("* checking disk consistency between source and target")
8662     for (idx, dev) in enumerate(instance.disks):
8663       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8664         raise errors.OpExecError("Disk %s is degraded or not fully"
8665                                  " synchronized on target node,"
8666                                  " aborting migration" % idx)
8667
8668     if self.current_mem > self.tgt_free_mem:
8669       if not self.allow_runtime_changes:
8670         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8671                                  " free memory to fit instance %s on target"
8672                                  " node %s (have %dMB, need %dMB)" %
8673                                  (instance.name, target_node,
8674                                   self.tgt_free_mem, self.current_mem))
8675       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8676       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8677                                                      instance,
8678                                                      self.tgt_free_mem)
8679       rpcres.Raise("Cannot modify instance runtime memory")
8680
8681     # First get the migration information from the remote node
8682     result = self.rpc.call_migration_info(source_node, instance)
8683     msg = result.fail_msg
8684     if msg:
8685       log_err = ("Failed fetching source migration information from %s: %s" %
8686                  (source_node, msg))
8687       logging.error(log_err)
8688       raise errors.OpExecError(log_err)
8689
8690     self.migration_info = migration_info = result.payload
8691
8692     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8693       # Then switch the disks to master/master mode
8694       self._EnsureSecondary(target_node)
8695       self._GoStandalone()
8696       self._GoReconnect(True)
8697       self._WaitUntilSync()
8698
8699     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8700     result = self.rpc.call_accept_instance(target_node,
8701                                            instance,
8702                                            migration_info,
8703                                            self.nodes_ip[target_node])
8704
8705     msg = result.fail_msg
8706     if msg:
8707       logging.error("Instance pre-migration failed, trying to revert"
8708                     " disk status: %s", msg)
8709       self.feedback_fn("Pre-migration failed, aborting")
8710       self._AbortMigration()
8711       self._RevertDiskStatus()
8712       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8713                                (instance.name, msg))
8714
8715     self.feedback_fn("* migrating instance to %s" % target_node)
8716     result = self.rpc.call_instance_migrate(source_node, instance,
8717                                             self.nodes_ip[target_node],
8718                                             self.live)
8719     msg = result.fail_msg
8720     if msg:
8721       logging.error("Instance migration failed, trying to revert"
8722                     " disk status: %s", msg)
8723       self.feedback_fn("Migration failed, aborting")
8724       self._AbortMigration()
8725       self._RevertDiskStatus()
8726       raise errors.OpExecError("Could not migrate instance %s: %s" %
8727                                (instance.name, msg))
8728
8729     self.feedback_fn("* starting memory transfer")
8730     last_feedback = time.time()
8731     while True:
8732       result = self.rpc.call_instance_get_migration_status(source_node,
8733                                                            instance)
8734       msg = result.fail_msg
8735       ms = result.payload   # MigrationStatus instance
8736       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8737         logging.error("Instance migration failed, trying to revert"
8738                       " disk status: %s", msg)
8739         self.feedback_fn("Migration failed, aborting")
8740         self._AbortMigration()
8741         self._RevertDiskStatus()
8742         if not msg:
8743           msg = "hypervisor returned failure"
8744         raise errors.OpExecError("Could not migrate instance %s: %s" %
8745                                  (instance.name, msg))
8746
8747       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8748         self.feedback_fn("* memory transfer complete")
8749         break
8750
8751       if (utils.TimeoutExpired(last_feedback,
8752                                self._MIGRATION_FEEDBACK_INTERVAL) and
8753           ms.transferred_ram is not None):
8754         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8755         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8756         last_feedback = time.time()
8757
8758       time.sleep(self._MIGRATION_POLL_INTERVAL)
8759
8760     result = self.rpc.call_instance_finalize_migration_src(source_node,
8761                                                            instance,
8762                                                            True,
8763                                                            self.live)
8764     msg = result.fail_msg
8765     if msg:
8766       logging.error("Instance migration succeeded, but finalization failed"
8767                     " on the source node: %s", msg)
8768       raise errors.OpExecError("Could not finalize instance migration: %s" %
8769                                msg)
8770
8771     instance.primary_node = target_node
8772
8773     # distribute new instance config to the other nodes
8774     self.cfg.Update(instance, self.feedback_fn)
8775
8776     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8777                                                            instance,
8778                                                            migration_info,
8779                                                            True)
8780     msg = result.fail_msg
8781     if msg:
8782       logging.error("Instance migration succeeded, but finalization failed"
8783                     " on the target node: %s", msg)
8784       raise errors.OpExecError("Could not finalize instance migration: %s" %
8785                                msg)
8786
8787     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8788       self._EnsureSecondary(source_node)
8789       self._WaitUntilSync()
8790       self._GoStandalone()
8791       self._GoReconnect(False)
8792       self._WaitUntilSync()
8793
8794     # If the instance's disk template is `rbd' and there was a successful
8795     # migration, unmap the device from the source node.
8796     if self.instance.disk_template == constants.DT_RBD:
8797       disks = _ExpandCheckDisks(instance, instance.disks)
8798       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8799       for disk in disks:
8800         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8801         msg = result.fail_msg
8802         if msg:
8803           logging.error("Migration was successful, but couldn't unmap the"
8804                         " block device %s on source node %s: %s",
8805                         disk.iv_name, source_node, msg)
8806           logging.error("You need to unmap the device %s manually on %s",
8807                         disk.iv_name, source_node)
8808
8809     self.feedback_fn("* done")
8810
8811   def _ExecFailover(self):
8812     """Failover an instance.
8813
8814     The failover is done by shutting it down on its present node and
8815     starting it on the secondary.
8816
8817     """
8818     instance = self.instance
8819     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8820
8821     source_node = instance.primary_node
8822     target_node = self.target_node
8823
8824     if instance.admin_state == constants.ADMINST_UP:
8825       self.feedback_fn("* checking disk consistency between source and target")
8826       for (idx, dev) in enumerate(instance.disks):
8827         # for drbd, these are drbd over lvm
8828         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8829                                      False):
8830           if primary_node.offline:
8831             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8832                              " target node %s" %
8833                              (primary_node.name, idx, target_node))
8834           elif not self.ignore_consistency:
8835             raise errors.OpExecError("Disk %s is degraded on target node,"
8836                                      " aborting failover" % idx)
8837     else:
8838       self.feedback_fn("* not checking disk consistency as instance is not"
8839                        " running")
8840
8841     self.feedback_fn("* shutting down instance on source node")
8842     logging.info("Shutting down instance %s on node %s",
8843                  instance.name, source_node)
8844
8845     result = self.rpc.call_instance_shutdown(source_node, instance,
8846                                              self.shutdown_timeout)
8847     msg = result.fail_msg
8848     if msg:
8849       if self.ignore_consistency or primary_node.offline:
8850         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8851                            " proceeding anyway; please make sure node"
8852                            " %s is down; error details: %s",
8853                            instance.name, source_node, source_node, msg)
8854       else:
8855         raise errors.OpExecError("Could not shutdown instance %s on"
8856                                  " node %s: %s" %
8857                                  (instance.name, source_node, msg))
8858
8859     self.feedback_fn("* deactivating the instance's disks on source node")
8860     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8861       raise errors.OpExecError("Can't shut down the instance's disks")
8862
8863     instance.primary_node = target_node
8864     # distribute new instance config to the other nodes
8865     self.cfg.Update(instance, self.feedback_fn)
8866
8867     # Only start the instance if it's marked as up
8868     if instance.admin_state == constants.ADMINST_UP:
8869       self.feedback_fn("* activating the instance's disks on target node %s" %
8870                        target_node)
8871       logging.info("Starting instance %s on node %s",
8872                    instance.name, target_node)
8873
8874       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8875                                            ignore_secondaries=True)
8876       if not disks_ok:
8877         _ShutdownInstanceDisks(self.lu, instance)
8878         raise errors.OpExecError("Can't activate the instance's disks")
8879
8880       self.feedback_fn("* starting the instance on the target node %s" %
8881                        target_node)
8882       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8883                                             False)
8884       msg = result.fail_msg
8885       if msg:
8886         _ShutdownInstanceDisks(self.lu, instance)
8887         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8888                                  (instance.name, target_node, msg))
8889
8890   def Exec(self, feedback_fn):
8891     """Perform the migration.
8892
8893     """
8894     self.feedback_fn = feedback_fn
8895     self.source_node = self.instance.primary_node
8896
8897     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8898     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8899       self.target_node = self.instance.secondary_nodes[0]
8900       # Otherwise self.target_node has been populated either
8901       # directly, or through an iallocator.
8902
8903     self.all_nodes = [self.source_node, self.target_node]
8904     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8905                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8906
8907     if self.failover:
8908       feedback_fn("Failover instance %s" % self.instance.name)
8909       self._ExecFailover()
8910     else:
8911       feedback_fn("Migrating instance %s" % self.instance.name)
8912
8913       if self.cleanup:
8914         return self._ExecCleanup()
8915       else:
8916         return self._ExecMigration()
8917
8918
8919 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8920                     force_open):
8921   """Wrapper around L{_CreateBlockDevInner}.
8922
8923   This method annotates the root device first.
8924
8925   """
8926   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8927   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8928                               force_open)
8929
8930
8931 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8932                          info, force_open):
8933   """Create a tree of block devices on a given node.
8934
8935   If this device type has to be created on secondaries, create it and
8936   all its children.
8937
8938   If not, just recurse to children keeping the same 'force' value.
8939
8940   @attention: The device has to be annotated already.
8941
8942   @param lu: the lu on whose behalf we execute
8943   @param node: the node on which to create the device
8944   @type instance: L{objects.Instance}
8945   @param instance: the instance which owns the device
8946   @type device: L{objects.Disk}
8947   @param device: the device to create
8948   @type force_create: boolean
8949   @param force_create: whether to force creation of this device; this
8950       will be change to True whenever we find a device which has
8951       CreateOnSecondary() attribute
8952   @param info: the extra 'metadata' we should attach to the device
8953       (this will be represented as a LVM tag)
8954   @type force_open: boolean
8955   @param force_open: this parameter will be passes to the
8956       L{backend.BlockdevCreate} function where it specifies
8957       whether we run on primary or not, and it affects both
8958       the child assembly and the device own Open() execution
8959
8960   """
8961   if device.CreateOnSecondary():
8962     force_create = True
8963
8964   if device.children:
8965     for child in device.children:
8966       _CreateBlockDevInner(lu, node, instance, child, force_create,
8967                            info, force_open)
8968
8969   if not force_create:
8970     return
8971
8972   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8973
8974
8975 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8976   """Create a single block device on a given node.
8977
8978   This will not recurse over children of the device, so they must be
8979   created in advance.
8980
8981   @param lu: the lu on whose behalf we execute
8982   @param node: the node on which to create the device
8983   @type instance: L{objects.Instance}
8984   @param instance: the instance which owns the device
8985   @type device: L{objects.Disk}
8986   @param device: the device to create
8987   @param info: the extra 'metadata' we should attach to the device
8988       (this will be represented as a LVM tag)
8989   @type force_open: boolean
8990   @param force_open: this parameter will be passes to the
8991       L{backend.BlockdevCreate} function where it specifies
8992       whether we run on primary or not, and it affects both
8993       the child assembly and the device own Open() execution
8994
8995   """
8996   lu.cfg.SetDiskID(device, node)
8997   result = lu.rpc.call_blockdev_create(node, device, device.size,
8998                                        instance.name, force_open, info)
8999   result.Raise("Can't create block device %s on"
9000                " node %s for instance %s" % (device, node, instance.name))
9001   if device.physical_id is None:
9002     device.physical_id = result.payload
9003
9004
9005 def _GenerateUniqueNames(lu, exts):
9006   """Generate a suitable LV name.
9007
9008   This will generate a logical volume name for the given instance.
9009
9010   """
9011   results = []
9012   for val in exts:
9013     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9014     results.append("%s%s" % (new_id, val))
9015   return results
9016
9017
9018 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9019                          iv_name, p_minor, s_minor):
9020   """Generate a drbd8 device complete with its children.
9021
9022   """
9023   assert len(vgnames) == len(names) == 2
9024   port = lu.cfg.AllocatePort()
9025   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9026
9027   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9028                           logical_id=(vgnames[0], names[0]),
9029                           params={})
9030   dev_meta = objects.Disk(dev_type=constants.LD_LV,
9031                           size=constants.DRBD_META_SIZE,
9032                           logical_id=(vgnames[1], names[1]),
9033                           params={})
9034   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9035                           logical_id=(primary, secondary, port,
9036                                       p_minor, s_minor,
9037                                       shared_secret),
9038                           children=[dev_data, dev_meta],
9039                           iv_name=iv_name, params={})
9040   return drbd_dev
9041
9042
9043 _DISK_TEMPLATE_NAME_PREFIX = {
9044   constants.DT_PLAIN: "",
9045   constants.DT_RBD: ".rbd",
9046   }
9047
9048
9049 _DISK_TEMPLATE_DEVICE_TYPE = {
9050   constants.DT_PLAIN: constants.LD_LV,
9051   constants.DT_FILE: constants.LD_FILE,
9052   constants.DT_SHARED_FILE: constants.LD_FILE,
9053   constants.DT_BLOCK: constants.LD_BLOCKDEV,
9054   constants.DT_RBD: constants.LD_RBD,
9055   }
9056
9057
9058 def _GenerateDiskTemplate(
9059   lu, template_name, instance_name, primary_node, secondary_nodes,
9060   disk_info, file_storage_dir, file_driver, base_index,
9061   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9062   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9063   """Generate the entire disk layout for a given template type.
9064
9065   """
9066   #TODO: compute space requirements
9067
9068   vgname = lu.cfg.GetVGName()
9069   disk_count = len(disk_info)
9070   disks = []
9071
9072   if template_name == constants.DT_DISKLESS:
9073     pass
9074   elif template_name == constants.DT_DRBD8:
9075     if len(secondary_nodes) != 1:
9076       raise errors.ProgrammerError("Wrong template configuration")
9077     remote_node = secondary_nodes[0]
9078     minors = lu.cfg.AllocateDRBDMinor(
9079       [primary_node, remote_node] * len(disk_info), instance_name)
9080
9081     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9082                                                        full_disk_params)
9083     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9084
9085     names = []
9086     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9087                                                for i in range(disk_count)]):
9088       names.append(lv_prefix + "_data")
9089       names.append(lv_prefix + "_meta")
9090     for idx, disk in enumerate(disk_info):
9091       disk_index = idx + base_index
9092       data_vg = disk.get(constants.IDISK_VG, vgname)
9093       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9094       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9095                                       disk[constants.IDISK_SIZE],
9096                                       [data_vg, meta_vg],
9097                                       names[idx * 2:idx * 2 + 2],
9098                                       "disk/%d" % disk_index,
9099                                       minors[idx * 2], minors[idx * 2 + 1])
9100       disk_dev.mode = disk[constants.IDISK_MODE]
9101       disks.append(disk_dev)
9102   else:
9103     if secondary_nodes:
9104       raise errors.ProgrammerError("Wrong template configuration")
9105
9106     if template_name == constants.DT_FILE:
9107       _req_file_storage()
9108     elif template_name == constants.DT_SHARED_FILE:
9109       _req_shr_file_storage()
9110
9111     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9112     if name_prefix is None:
9113       names = None
9114     else:
9115       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9116                                         (name_prefix, base_index + i)
9117                                         for i in range(disk_count)])
9118
9119     if template_name == constants.DT_PLAIN:
9120
9121       def logical_id_fn(idx, _, disk):
9122         vg = disk.get(constants.IDISK_VG, vgname)
9123         return (vg, names[idx])
9124
9125     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9126       logical_id_fn = \
9127         lambda _, disk_index, disk: (file_driver,
9128                                      "%s/disk%d" % (file_storage_dir,
9129                                                     disk_index))
9130     elif template_name == constants.DT_BLOCK:
9131       logical_id_fn = \
9132         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9133                                        disk[constants.IDISK_ADOPT])
9134     elif template_name == constants.DT_RBD:
9135       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9136     else:
9137       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9138
9139     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9140
9141     for idx, disk in enumerate(disk_info):
9142       disk_index = idx + base_index
9143       size = disk[constants.IDISK_SIZE]
9144       feedback_fn("* disk %s, size %s" %
9145                   (disk_index, utils.FormatUnit(size, "h")))
9146       disks.append(objects.Disk(dev_type=dev_type, size=size,
9147                                 logical_id=logical_id_fn(idx, disk_index, disk),
9148                                 iv_name="disk/%d" % disk_index,
9149                                 mode=disk[constants.IDISK_MODE],
9150                                 params={}))
9151
9152   return disks
9153
9154
9155 def _GetInstanceInfoText(instance):
9156   """Compute that text that should be added to the disk's metadata.
9157
9158   """
9159   return "originstname+%s" % instance.name
9160
9161
9162 def _CalcEta(time_taken, written, total_size):
9163   """Calculates the ETA based on size written and total size.
9164
9165   @param time_taken: The time taken so far
9166   @param written: amount written so far
9167   @param total_size: The total size of data to be written
9168   @return: The remaining time in seconds
9169
9170   """
9171   avg_time = time_taken / float(written)
9172   return (total_size - written) * avg_time
9173
9174
9175 def _WipeDisks(lu, instance, disks=None):
9176   """Wipes instance disks.
9177
9178   @type lu: L{LogicalUnit}
9179   @param lu: the logical unit on whose behalf we execute
9180   @type instance: L{objects.Instance}
9181   @param instance: the instance whose disks we should create
9182   @return: the success of the wipe
9183
9184   """
9185   node = instance.primary_node
9186
9187   if disks is None:
9188     disks = [(idx, disk, 0)
9189              for (idx, disk) in enumerate(instance.disks)]
9190
9191   for (_, device, _) in disks:
9192     lu.cfg.SetDiskID(device, node)
9193
9194   logging.info("Pausing synchronization of disks of instance '%s'",
9195                instance.name)
9196   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9197                                                   (map(compat.snd, disks),
9198                                                    instance),
9199                                                   True)
9200   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9201
9202   for idx, success in enumerate(result.payload):
9203     if not success:
9204       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9205                    " failed", idx, instance.name)
9206
9207   try:
9208     for (idx, device, offset) in disks:
9209       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9210       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9211       wipe_chunk_size = \
9212         int(min(constants.MAX_WIPE_CHUNK,
9213                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9214
9215       size = device.size
9216       last_output = 0
9217       start_time = time.time()
9218
9219       if offset == 0:
9220         info_text = ""
9221       else:
9222         info_text = (" (from %s to %s)" %
9223                      (utils.FormatUnit(offset, "h"),
9224                       utils.FormatUnit(size, "h")))
9225
9226       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9227
9228       logging.info("Wiping disk %d for instance %s on node %s using"
9229                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9230
9231       while offset < size:
9232         wipe_size = min(wipe_chunk_size, size - offset)
9233
9234         logging.debug("Wiping disk %d, offset %s, chunk %s",
9235                       idx, offset, wipe_size)
9236
9237         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9238                                            wipe_size)
9239         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9240                      (idx, offset, wipe_size))
9241
9242         now = time.time()
9243         offset += wipe_size
9244         if now - last_output >= 60:
9245           eta = _CalcEta(now - start_time, offset, size)
9246           lu.LogInfo(" - done: %.1f%% ETA: %s",
9247                      offset / float(size) * 100, utils.FormatSeconds(eta))
9248           last_output = now
9249   finally:
9250     logging.info("Resuming synchronization of disks for instance '%s'",
9251                  instance.name)
9252
9253     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9254                                                     (map(compat.snd, disks),
9255                                                      instance),
9256                                                     False)
9257
9258     if result.fail_msg:
9259       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9260                     node, result.fail_msg)
9261     else:
9262       for idx, success in enumerate(result.payload):
9263         if not success:
9264           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9265                         " failed", idx, instance.name)
9266
9267
9268 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9269   """Create all disks for an instance.
9270
9271   This abstracts away some work from AddInstance.
9272
9273   @type lu: L{LogicalUnit}
9274   @param lu: the logical unit on whose behalf we execute
9275   @type instance: L{objects.Instance}
9276   @param instance: the instance whose disks we should create
9277   @type to_skip: list
9278   @param to_skip: list of indices to skip
9279   @type target_node: string
9280   @param target_node: if passed, overrides the target node for creation
9281   @rtype: boolean
9282   @return: the success of the creation
9283
9284   """
9285   info = _GetInstanceInfoText(instance)
9286   if target_node is None:
9287     pnode = instance.primary_node
9288     all_nodes = instance.all_nodes
9289   else:
9290     pnode = target_node
9291     all_nodes = [pnode]
9292
9293   if instance.disk_template in constants.DTS_FILEBASED:
9294     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9295     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9296
9297     result.Raise("Failed to create directory '%s' on"
9298                  " node %s" % (file_storage_dir, pnode))
9299
9300   # Note: this needs to be kept in sync with adding of disks in
9301   # LUInstanceSetParams
9302   for idx, device in enumerate(instance.disks):
9303     if to_skip and idx in to_skip:
9304       continue
9305     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9306     #HARDCODE
9307     for node in all_nodes:
9308       f_create = node == pnode
9309       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9310
9311
9312 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9313   """Remove all disks for an instance.
9314
9315   This abstracts away some work from `AddInstance()` and
9316   `RemoveInstance()`. Note that in case some of the devices couldn't
9317   be removed, the removal will continue with the other ones (compare
9318   with `_CreateDisks()`).
9319
9320   @type lu: L{LogicalUnit}
9321   @param lu: the logical unit on whose behalf we execute
9322   @type instance: L{objects.Instance}
9323   @param instance: the instance whose disks we should remove
9324   @type target_node: string
9325   @param target_node: used to override the node on which to remove the disks
9326   @rtype: boolean
9327   @return: the success of the removal
9328
9329   """
9330   logging.info("Removing block devices for instance %s", instance.name)
9331
9332   all_result = True
9333   ports_to_release = set()
9334   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9335   for (idx, device) in enumerate(anno_disks):
9336     if target_node:
9337       edata = [(target_node, device)]
9338     else:
9339       edata = device.ComputeNodeTree(instance.primary_node)
9340     for node, disk in edata:
9341       lu.cfg.SetDiskID(disk, node)
9342       result = lu.rpc.call_blockdev_remove(node, disk)
9343       if result.fail_msg:
9344         lu.LogWarning("Could not remove disk %s on node %s,"
9345                       " continuing anyway: %s", idx, node, result.fail_msg)
9346         if not (result.offline and node != instance.primary_node):
9347           all_result = False
9348
9349     # if this is a DRBD disk, return its port to the pool
9350     if device.dev_type in constants.LDS_DRBD:
9351       ports_to_release.add(device.logical_id[2])
9352
9353   if all_result or ignore_failures:
9354     for port in ports_to_release:
9355       lu.cfg.AddTcpUdpPort(port)
9356
9357   if instance.disk_template in constants.DTS_FILEBASED:
9358     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9359     if target_node:
9360       tgt = target_node
9361     else:
9362       tgt = instance.primary_node
9363     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9364     if result.fail_msg:
9365       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9366                     file_storage_dir, instance.primary_node, result.fail_msg)
9367       all_result = False
9368
9369   return all_result
9370
9371
9372 def _ComputeDiskSizePerVG(disk_template, disks):
9373   """Compute disk size requirements in the volume group
9374
9375   """
9376   def _compute(disks, payload):
9377     """Universal algorithm.
9378
9379     """
9380     vgs = {}
9381     for disk in disks:
9382       vgs[disk[constants.IDISK_VG]] = \
9383         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9384
9385     return vgs
9386
9387   # Required free disk space as a function of disk and swap space
9388   req_size_dict = {
9389     constants.DT_DISKLESS: {},
9390     constants.DT_PLAIN: _compute(disks, 0),
9391     # 128 MB are added for drbd metadata for each disk
9392     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9393     constants.DT_FILE: {},
9394     constants.DT_SHARED_FILE: {},
9395   }
9396
9397   if disk_template not in req_size_dict:
9398     raise errors.ProgrammerError("Disk template '%s' size requirement"
9399                                  " is unknown" % disk_template)
9400
9401   return req_size_dict[disk_template]
9402
9403
9404 def _FilterVmNodes(lu, nodenames):
9405   """Filters out non-vm_capable nodes from a list.
9406
9407   @type lu: L{LogicalUnit}
9408   @param lu: the logical unit for which we check
9409   @type nodenames: list
9410   @param nodenames: the list of nodes on which we should check
9411   @rtype: list
9412   @return: the list of vm-capable nodes
9413
9414   """
9415   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9416   return [name for name in nodenames if name not in vm_nodes]
9417
9418
9419 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9420   """Hypervisor parameter validation.
9421
9422   This function abstract the hypervisor parameter validation to be
9423   used in both instance create and instance modify.
9424
9425   @type lu: L{LogicalUnit}
9426   @param lu: the logical unit for which we check
9427   @type nodenames: list
9428   @param nodenames: the list of nodes on which we should check
9429   @type hvname: string
9430   @param hvname: the name of the hypervisor we should use
9431   @type hvparams: dict
9432   @param hvparams: the parameters which we need to check
9433   @raise errors.OpPrereqError: if the parameters are not valid
9434
9435   """
9436   nodenames = _FilterVmNodes(lu, nodenames)
9437
9438   cluster = lu.cfg.GetClusterInfo()
9439   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9440
9441   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9442   for node in nodenames:
9443     info = hvinfo[node]
9444     if info.offline:
9445       continue
9446     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9447
9448
9449 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9450   """OS parameters validation.
9451
9452   @type lu: L{LogicalUnit}
9453   @param lu: the logical unit for which we check
9454   @type required: boolean
9455   @param required: whether the validation should fail if the OS is not
9456       found
9457   @type nodenames: list
9458   @param nodenames: the list of nodes on which we should check
9459   @type osname: string
9460   @param osname: the name of the hypervisor we should use
9461   @type osparams: dict
9462   @param osparams: the parameters which we need to check
9463   @raise errors.OpPrereqError: if the parameters are not valid
9464
9465   """
9466   nodenames = _FilterVmNodes(lu, nodenames)
9467   result = lu.rpc.call_os_validate(nodenames, required, osname,
9468                                    [constants.OS_VALIDATE_PARAMETERS],
9469                                    osparams)
9470   for node, nres in result.items():
9471     # we don't check for offline cases since this should be run only
9472     # against the master node and/or an instance's nodes
9473     nres.Raise("OS Parameters validation failed on node %s" % node)
9474     if not nres.payload:
9475       lu.LogInfo("OS %s not found on node %s, validation skipped",
9476                  osname, node)
9477
9478
9479 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9480   """Wrapper around IAReqInstanceAlloc.
9481
9482   @param op: The instance opcode
9483   @param disks: The computed disks
9484   @param nics: The computed nics
9485   @param beparams: The full filled beparams
9486
9487   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9488
9489   """
9490   spindle_use = beparams[constants.BE_SPINDLE_USE]
9491   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9492                                        disk_template=op.disk_template,
9493                                        tags=op.tags,
9494                                        os=op.os_type,
9495                                        vcpus=beparams[constants.BE_VCPUS],
9496                                        memory=beparams[constants.BE_MAXMEM],
9497                                        spindle_use=spindle_use,
9498                                        disks=disks,
9499                                        nics=[n.ToDict() for n in nics],
9500                                        hypervisor=op.hypervisor)
9501
9502
9503 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9504   """Computes the nics.
9505
9506   @param op: The instance opcode
9507   @param cluster: Cluster configuration object
9508   @param default_ip: The default ip to assign
9509   @param cfg: An instance of the configuration object
9510   @param ec_id: Execution context ID
9511
9512   @returns: The build up nics
9513
9514   """
9515   nics = []
9516   for nic in op.nics:
9517     nic_mode_req = nic.get(constants.INIC_MODE, None)
9518     nic_mode = nic_mode_req
9519     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9520       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9521
9522     net = nic.get(constants.INIC_NETWORK, None)
9523     link = nic.get(constants.NIC_LINK, None)
9524     ip = nic.get(constants.INIC_IP, None)
9525
9526     if net is None or net.lower() == constants.VALUE_NONE:
9527       net = None
9528     else:
9529       if nic_mode_req is not None or link is not None:
9530         raise errors.OpPrereqError("If network is given, no mode or link"
9531                                    " is allowed to be passed",
9532                                    errors.ECODE_INVAL)
9533
9534     # ip validity checks
9535     if ip is None or ip.lower() == constants.VALUE_NONE:
9536       nic_ip = None
9537     elif ip.lower() == constants.VALUE_AUTO:
9538       if not op.name_check:
9539         raise errors.OpPrereqError("IP address set to auto but name checks"
9540                                    " have been skipped",
9541                                    errors.ECODE_INVAL)
9542       nic_ip = default_ip
9543     else:
9544       # We defer pool operations until later, so that the iallocator has
9545       # filled in the instance's node(s) dimara
9546       if ip.lower() == constants.NIC_IP_POOL:
9547         if net is None:
9548           raise errors.OpPrereqError("if ip=pool, parameter network"
9549                                      " must be passed too",
9550                                      errors.ECODE_INVAL)
9551
9552       elif not netutils.IPAddress.IsValid(ip):
9553         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9554                                    errors.ECODE_INVAL)
9555
9556       nic_ip = ip
9557
9558     # TODO: check the ip address for uniqueness
9559     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9560       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9561                                  errors.ECODE_INVAL)
9562
9563     # MAC address verification
9564     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9565     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9566       mac = utils.NormalizeAndValidateMac(mac)
9567
9568       try:
9569         # TODO: We need to factor this out
9570         cfg.ReserveMAC(mac, ec_id)
9571       except errors.ReservationError:
9572         raise errors.OpPrereqError("MAC address %s already in use"
9573                                    " in cluster" % mac,
9574                                    errors.ECODE_NOTUNIQUE)
9575
9576     #  Build nic parameters
9577     nicparams = {}
9578     if nic_mode_req:
9579       nicparams[constants.NIC_MODE] = nic_mode
9580     if link:
9581       nicparams[constants.NIC_LINK] = link
9582
9583     check_params = cluster.SimpleFillNIC(nicparams)
9584     objects.NIC.CheckParameterSyntax(check_params)
9585     nics.append(objects.NIC(mac=mac, ip=nic_ip,
9586                             network=net, nicparams=nicparams))
9587
9588   return nics
9589
9590
9591 def _ComputeDisks(op, default_vg):
9592   """Computes the instance disks.
9593
9594   @param op: The instance opcode
9595   @param default_vg: The default_vg to assume
9596
9597   @return: The computer disks
9598
9599   """
9600   disks = []
9601   for disk in op.disks:
9602     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9603     if mode not in constants.DISK_ACCESS_SET:
9604       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9605                                  mode, errors.ECODE_INVAL)
9606     size = disk.get(constants.IDISK_SIZE, None)
9607     if size is None:
9608       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9609     try:
9610       size = int(size)
9611     except (TypeError, ValueError):
9612       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9613                                  errors.ECODE_INVAL)
9614
9615     data_vg = disk.get(constants.IDISK_VG, default_vg)
9616     new_disk = {
9617       constants.IDISK_SIZE: size,
9618       constants.IDISK_MODE: mode,
9619       constants.IDISK_VG: data_vg,
9620       }
9621     if constants.IDISK_METAVG in disk:
9622       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9623     if constants.IDISK_ADOPT in disk:
9624       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9625     disks.append(new_disk)
9626
9627   return disks
9628
9629
9630 def _ComputeFullBeParams(op, cluster):
9631   """Computes the full beparams.
9632
9633   @param op: The instance opcode
9634   @param cluster: The cluster config object
9635
9636   @return: The fully filled beparams
9637
9638   """
9639   default_beparams = cluster.beparams[constants.PP_DEFAULT]
9640   for param, value in op.beparams.iteritems():
9641     if value == constants.VALUE_AUTO:
9642       op.beparams[param] = default_beparams[param]
9643   objects.UpgradeBeParams(op.beparams)
9644   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9645   return cluster.SimpleFillBE(op.beparams)
9646
9647
9648 class LUInstanceCreate(LogicalUnit):
9649   """Create an instance.
9650
9651   """
9652   HPATH = "instance-add"
9653   HTYPE = constants.HTYPE_INSTANCE
9654   REQ_BGL = False
9655
9656   def CheckArguments(self):
9657     """Check arguments.
9658
9659     """
9660     # do not require name_check to ease forward/backward compatibility
9661     # for tools
9662     if self.op.no_install and self.op.start:
9663       self.LogInfo("No-installation mode selected, disabling startup")
9664       self.op.start = False
9665     # validate/normalize the instance name
9666     self.op.instance_name = \
9667       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9668
9669     if self.op.ip_check and not self.op.name_check:
9670       # TODO: make the ip check more flexible and not depend on the name check
9671       raise errors.OpPrereqError("Cannot do IP address check without a name"
9672                                  " check", errors.ECODE_INVAL)
9673
9674     # check nics' parameter names
9675     for nic in self.op.nics:
9676       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9677
9678     # check disks. parameter names and consistent adopt/no-adopt strategy
9679     has_adopt = has_no_adopt = False
9680     for disk in self.op.disks:
9681       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9682       if constants.IDISK_ADOPT in disk:
9683         has_adopt = True
9684       else:
9685         has_no_adopt = True
9686     if has_adopt and has_no_adopt:
9687       raise errors.OpPrereqError("Either all disks are adopted or none is",
9688                                  errors.ECODE_INVAL)
9689     if has_adopt:
9690       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9691         raise errors.OpPrereqError("Disk adoption is not supported for the"
9692                                    " '%s' disk template" %
9693                                    self.op.disk_template,
9694                                    errors.ECODE_INVAL)
9695       if self.op.iallocator is not None:
9696         raise errors.OpPrereqError("Disk adoption not allowed with an"
9697                                    " iallocator script", errors.ECODE_INVAL)
9698       if self.op.mode == constants.INSTANCE_IMPORT:
9699         raise errors.OpPrereqError("Disk adoption not allowed for"
9700                                    " instance import", errors.ECODE_INVAL)
9701     else:
9702       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9703         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9704                                    " but no 'adopt' parameter given" %
9705                                    self.op.disk_template,
9706                                    errors.ECODE_INVAL)
9707
9708     self.adopt_disks = has_adopt
9709
9710     # instance name verification
9711     if self.op.name_check:
9712       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9713       self.op.instance_name = self.hostname1.name
9714       # used in CheckPrereq for ip ping check
9715       self.check_ip = self.hostname1.ip
9716     else:
9717       self.check_ip = None
9718
9719     # file storage checks
9720     if (self.op.file_driver and
9721         not self.op.file_driver in constants.FILE_DRIVER):
9722       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9723                                  self.op.file_driver, errors.ECODE_INVAL)
9724
9725     if self.op.disk_template == constants.DT_FILE:
9726       opcodes.RequireFileStorage()
9727     elif self.op.disk_template == constants.DT_SHARED_FILE:
9728       opcodes.RequireSharedFileStorage()
9729
9730     ### Node/iallocator related checks
9731     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9732
9733     if self.op.pnode is not None:
9734       if self.op.disk_template in constants.DTS_INT_MIRROR:
9735         if self.op.snode is None:
9736           raise errors.OpPrereqError("The networked disk templates need"
9737                                      " a mirror node", errors.ECODE_INVAL)
9738       elif self.op.snode:
9739         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9740                         " template")
9741         self.op.snode = None
9742
9743     self._cds = _GetClusterDomainSecret()
9744
9745     if self.op.mode == constants.INSTANCE_IMPORT:
9746       # On import force_variant must be True, because if we forced it at
9747       # initial install, our only chance when importing it back is that it
9748       # works again!
9749       self.op.force_variant = True
9750
9751       if self.op.no_install:
9752         self.LogInfo("No-installation mode has no effect during import")
9753
9754     elif self.op.mode == constants.INSTANCE_CREATE:
9755       if self.op.os_type is None:
9756         raise errors.OpPrereqError("No guest OS specified",
9757                                    errors.ECODE_INVAL)
9758       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9759         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9760                                    " installation" % self.op.os_type,
9761                                    errors.ECODE_STATE)
9762       if self.op.disk_template is None:
9763         raise errors.OpPrereqError("No disk template specified",
9764                                    errors.ECODE_INVAL)
9765
9766     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9767       # Check handshake to ensure both clusters have the same domain secret
9768       src_handshake = self.op.source_handshake
9769       if not src_handshake:
9770         raise errors.OpPrereqError("Missing source handshake",
9771                                    errors.ECODE_INVAL)
9772
9773       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9774                                                            src_handshake)
9775       if errmsg:
9776         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9777                                    errors.ECODE_INVAL)
9778
9779       # Load and check source CA
9780       self.source_x509_ca_pem = self.op.source_x509_ca
9781       if not self.source_x509_ca_pem:
9782         raise errors.OpPrereqError("Missing source X509 CA",
9783                                    errors.ECODE_INVAL)
9784
9785       try:
9786         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9787                                                     self._cds)
9788       except OpenSSL.crypto.Error, err:
9789         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9790                                    (err, ), errors.ECODE_INVAL)
9791
9792       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9793       if errcode is not None:
9794         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9795                                    errors.ECODE_INVAL)
9796
9797       self.source_x509_ca = cert
9798
9799       src_instance_name = self.op.source_instance_name
9800       if not src_instance_name:
9801         raise errors.OpPrereqError("Missing source instance name",
9802                                    errors.ECODE_INVAL)
9803
9804       self.source_instance_name = \
9805           netutils.GetHostname(name=src_instance_name).name
9806
9807     else:
9808       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9809                                  self.op.mode, errors.ECODE_INVAL)
9810
9811   def ExpandNames(self):
9812     """ExpandNames for CreateInstance.
9813
9814     Figure out the right locks for instance creation.
9815
9816     """
9817     self.needed_locks = {}
9818
9819     instance_name = self.op.instance_name
9820     # this is just a preventive check, but someone might still add this
9821     # instance in the meantime, and creation will fail at lock-add time
9822     if instance_name in self.cfg.GetInstanceList():
9823       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9824                                  instance_name, errors.ECODE_EXISTS)
9825
9826     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9827
9828     if self.op.iallocator:
9829       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9830       # specifying a group on instance creation and then selecting nodes from
9831       # that group
9832       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9833       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9834     else:
9835       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9836       nodelist = [self.op.pnode]
9837       if self.op.snode is not None:
9838         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9839         nodelist.append(self.op.snode)
9840       self.needed_locks[locking.LEVEL_NODE] = nodelist
9841       # Lock resources of instance's primary and secondary nodes (copy to
9842       # prevent accidential modification)
9843       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9844
9845     # in case of import lock the source node too
9846     if self.op.mode == constants.INSTANCE_IMPORT:
9847       src_node = self.op.src_node
9848       src_path = self.op.src_path
9849
9850       if src_path is None:
9851         self.op.src_path = src_path = self.op.instance_name
9852
9853       if src_node is None:
9854         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9855         self.op.src_node = None
9856         if os.path.isabs(src_path):
9857           raise errors.OpPrereqError("Importing an instance from a path"
9858                                      " requires a source node option",
9859                                      errors.ECODE_INVAL)
9860       else:
9861         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9862         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9863           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9864         if not os.path.isabs(src_path):
9865           self.op.src_path = src_path = \
9866             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9867
9868   def _RunAllocator(self):
9869     """Run the allocator based on input opcode.
9870
9871     """
9872     #TODO Export network to iallocator so that it chooses a pnode
9873     #     in a nodegroup that has the desired network connected to
9874     req = _CreateInstanceAllocRequest(self.op, self.disks,
9875                                       self.nics, self.be_full)
9876     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9877
9878     ial.Run(self.op.iallocator)
9879
9880     if not ial.success:
9881       raise errors.OpPrereqError("Can't compute nodes using"
9882                                  " iallocator '%s': %s" %
9883                                  (self.op.iallocator, ial.info),
9884                                  errors.ECODE_NORES)
9885     self.op.pnode = ial.result[0]
9886     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9887                  self.op.instance_name, self.op.iallocator,
9888                  utils.CommaJoin(ial.result))
9889
9890     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9891
9892     if req.RequiredNodes() == 2:
9893       self.op.snode = ial.result[1]
9894
9895   def BuildHooksEnv(self):
9896     """Build hooks env.
9897
9898     This runs on master, primary and secondary nodes of the instance.
9899
9900     """
9901     env = {
9902       "ADD_MODE": self.op.mode,
9903       }
9904     if self.op.mode == constants.INSTANCE_IMPORT:
9905       env["SRC_NODE"] = self.op.src_node
9906       env["SRC_PATH"] = self.op.src_path
9907       env["SRC_IMAGES"] = self.src_images
9908
9909     env.update(_BuildInstanceHookEnv(
9910       name=self.op.instance_name,
9911       primary_node=self.op.pnode,
9912       secondary_nodes=self.secondaries,
9913       status=self.op.start,
9914       os_type=self.op.os_type,
9915       minmem=self.be_full[constants.BE_MINMEM],
9916       maxmem=self.be_full[constants.BE_MAXMEM],
9917       vcpus=self.be_full[constants.BE_VCPUS],
9918       nics=_NICListToTuple(self, self.nics),
9919       disk_template=self.op.disk_template,
9920       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9921              for d in self.disks],
9922       bep=self.be_full,
9923       hvp=self.hv_full,
9924       hypervisor_name=self.op.hypervisor,
9925       tags=self.op.tags,
9926     ))
9927
9928     return env
9929
9930   def BuildHooksNodes(self):
9931     """Build hooks nodes.
9932
9933     """
9934     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9935     return nl, nl
9936
9937   def _ReadExportInfo(self):
9938     """Reads the export information from disk.
9939
9940     It will override the opcode source node and path with the actual
9941     information, if these two were not specified before.
9942
9943     @return: the export information
9944
9945     """
9946     assert self.op.mode == constants.INSTANCE_IMPORT
9947
9948     src_node = self.op.src_node
9949     src_path = self.op.src_path
9950
9951     if src_node is None:
9952       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9953       exp_list = self.rpc.call_export_list(locked_nodes)
9954       found = False
9955       for node in exp_list:
9956         if exp_list[node].fail_msg:
9957           continue
9958         if src_path in exp_list[node].payload:
9959           found = True
9960           self.op.src_node = src_node = node
9961           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9962                                                        src_path)
9963           break
9964       if not found:
9965         raise errors.OpPrereqError("No export found for relative path %s" %
9966                                     src_path, errors.ECODE_INVAL)
9967
9968     _CheckNodeOnline(self, src_node)
9969     result = self.rpc.call_export_info(src_node, src_path)
9970     result.Raise("No export or invalid export found in dir %s" % src_path)
9971
9972     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9973     if not export_info.has_section(constants.INISECT_EXP):
9974       raise errors.ProgrammerError("Corrupted export config",
9975                                    errors.ECODE_ENVIRON)
9976
9977     ei_version = export_info.get(constants.INISECT_EXP, "version")
9978     if (int(ei_version) != constants.EXPORT_VERSION):
9979       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9980                                  (ei_version, constants.EXPORT_VERSION),
9981                                  errors.ECODE_ENVIRON)
9982     return export_info
9983
9984   def _ReadExportParams(self, einfo):
9985     """Use export parameters as defaults.
9986
9987     In case the opcode doesn't specify (as in override) some instance
9988     parameters, then try to use them from the export information, if
9989     that declares them.
9990
9991     """
9992     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9993
9994     if self.op.disk_template is None:
9995       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9996         self.op.disk_template = einfo.get(constants.INISECT_INS,
9997                                           "disk_template")
9998         if self.op.disk_template not in constants.DISK_TEMPLATES:
9999           raise errors.OpPrereqError("Disk template specified in configuration"
10000                                      " file is not one of the allowed values:"
10001                                      " %s" %
10002                                      " ".join(constants.DISK_TEMPLATES),
10003                                      errors.ECODE_INVAL)
10004       else:
10005         raise errors.OpPrereqError("No disk template specified and the export"
10006                                    " is missing the disk_template information",
10007                                    errors.ECODE_INVAL)
10008
10009     if not self.op.disks:
10010       disks = []
10011       # TODO: import the disk iv_name too
10012       for idx in range(constants.MAX_DISKS):
10013         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10014           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10015           disks.append({constants.IDISK_SIZE: disk_sz})
10016       self.op.disks = disks
10017       if not disks and self.op.disk_template != constants.DT_DISKLESS:
10018         raise errors.OpPrereqError("No disk info specified and the export"
10019                                    " is missing the disk information",
10020                                    errors.ECODE_INVAL)
10021
10022     if not self.op.nics:
10023       nics = []
10024       for idx in range(constants.MAX_NICS):
10025         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10026           ndict = {}
10027           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10028             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10029             ndict[name] = v
10030           nics.append(ndict)
10031         else:
10032           break
10033       self.op.nics = nics
10034
10035     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10036       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10037
10038     if (self.op.hypervisor is None and
10039         einfo.has_option(constants.INISECT_INS, "hypervisor")):
10040       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10041
10042     if einfo.has_section(constants.INISECT_HYP):
10043       # use the export parameters but do not override the ones
10044       # specified by the user
10045       for name, value in einfo.items(constants.INISECT_HYP):
10046         if name not in self.op.hvparams:
10047           self.op.hvparams[name] = value
10048
10049     if einfo.has_section(constants.INISECT_BEP):
10050       # use the parameters, without overriding
10051       for name, value in einfo.items(constants.INISECT_BEP):
10052         if name not in self.op.beparams:
10053           self.op.beparams[name] = value
10054         # Compatibility for the old "memory" be param
10055         if name == constants.BE_MEMORY:
10056           if constants.BE_MAXMEM not in self.op.beparams:
10057             self.op.beparams[constants.BE_MAXMEM] = value
10058           if constants.BE_MINMEM not in self.op.beparams:
10059             self.op.beparams[constants.BE_MINMEM] = value
10060     else:
10061       # try to read the parameters old style, from the main section
10062       for name in constants.BES_PARAMETERS:
10063         if (name not in self.op.beparams and
10064             einfo.has_option(constants.INISECT_INS, name)):
10065           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10066
10067     if einfo.has_section(constants.INISECT_OSP):
10068       # use the parameters, without overriding
10069       for name, value in einfo.items(constants.INISECT_OSP):
10070         if name not in self.op.osparams:
10071           self.op.osparams[name] = value
10072
10073   def _RevertToDefaults(self, cluster):
10074     """Revert the instance parameters to the default values.
10075
10076     """
10077     # hvparams
10078     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10079     for name in self.op.hvparams.keys():
10080       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10081         del self.op.hvparams[name]
10082     # beparams
10083     be_defs = cluster.SimpleFillBE({})
10084     for name in self.op.beparams.keys():
10085       if name in be_defs and be_defs[name] == self.op.beparams[name]:
10086         del self.op.beparams[name]
10087     # nic params
10088     nic_defs = cluster.SimpleFillNIC({})
10089     for nic in self.op.nics:
10090       for name in constants.NICS_PARAMETERS:
10091         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10092           del nic[name]
10093     # osparams
10094     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10095     for name in self.op.osparams.keys():
10096       if name in os_defs and os_defs[name] == self.op.osparams[name]:
10097         del self.op.osparams[name]
10098
10099   def _CalculateFileStorageDir(self):
10100     """Calculate final instance file storage dir.
10101
10102     """
10103     # file storage dir calculation/check
10104     self.instance_file_storage_dir = None
10105     if self.op.disk_template in constants.DTS_FILEBASED:
10106       # build the full file storage dir path
10107       joinargs = []
10108
10109       if self.op.disk_template == constants.DT_SHARED_FILE:
10110         get_fsd_fn = self.cfg.GetSharedFileStorageDir
10111       else:
10112         get_fsd_fn = self.cfg.GetFileStorageDir
10113
10114       cfg_storagedir = get_fsd_fn()
10115       if not cfg_storagedir:
10116         raise errors.OpPrereqError("Cluster file storage dir not defined",
10117                                    errors.ECODE_STATE)
10118       joinargs.append(cfg_storagedir)
10119
10120       if self.op.file_storage_dir is not None:
10121         joinargs.append(self.op.file_storage_dir)
10122
10123       joinargs.append(self.op.instance_name)
10124
10125       # pylint: disable=W0142
10126       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10127
10128   def CheckPrereq(self): # pylint: disable=R0914
10129     """Check prerequisites.
10130
10131     """
10132     self._CalculateFileStorageDir()
10133
10134     if self.op.mode == constants.INSTANCE_IMPORT:
10135       export_info = self._ReadExportInfo()
10136       self._ReadExportParams(export_info)
10137       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10138     else:
10139       self._old_instance_name = None
10140
10141     if (not self.cfg.GetVGName() and
10142         self.op.disk_template not in constants.DTS_NOT_LVM):
10143       raise errors.OpPrereqError("Cluster does not support lvm-based"
10144                                  " instances", errors.ECODE_STATE)
10145
10146     if (self.op.hypervisor is None or
10147         self.op.hypervisor == constants.VALUE_AUTO):
10148       self.op.hypervisor = self.cfg.GetHypervisorType()
10149
10150     cluster = self.cfg.GetClusterInfo()
10151     enabled_hvs = cluster.enabled_hypervisors
10152     if self.op.hypervisor not in enabled_hvs:
10153       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10154                                  " cluster (%s)" %
10155                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10156                                  errors.ECODE_STATE)
10157
10158     # Check tag validity
10159     for tag in self.op.tags:
10160       objects.TaggableObject.ValidateTag(tag)
10161
10162     # check hypervisor parameter syntax (locally)
10163     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10164     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10165                                       self.op.hvparams)
10166     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10167     hv_type.CheckParameterSyntax(filled_hvp)
10168     self.hv_full = filled_hvp
10169     # check that we don't specify global parameters on an instance
10170     _CheckGlobalHvParams(self.op.hvparams)
10171
10172     # fill and remember the beparams dict
10173     self.be_full = _ComputeFullBeParams(self.op, cluster)
10174
10175     # build os parameters
10176     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10177
10178     # now that hvp/bep are in final format, let's reset to defaults,
10179     # if told to do so
10180     if self.op.identify_defaults:
10181       self._RevertToDefaults(cluster)
10182
10183     # NIC buildup
10184     self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10185                              self.proc.GetECId())
10186
10187     # disk checks/pre-build
10188     default_vg = self.cfg.GetVGName()
10189     self.disks = _ComputeDisks(self.op, default_vg)
10190
10191     if self.op.mode == constants.INSTANCE_IMPORT:
10192       disk_images = []
10193       for idx in range(len(self.disks)):
10194         option = "disk%d_dump" % idx
10195         if export_info.has_option(constants.INISECT_INS, option):
10196           # FIXME: are the old os-es, disk sizes, etc. useful?
10197           export_name = export_info.get(constants.INISECT_INS, option)
10198           image = utils.PathJoin(self.op.src_path, export_name)
10199           disk_images.append(image)
10200         else:
10201           disk_images.append(False)
10202
10203       self.src_images = disk_images
10204
10205       if self.op.instance_name == self._old_instance_name:
10206         for idx, nic in enumerate(self.nics):
10207           if nic.mac == constants.VALUE_AUTO:
10208             nic_mac_ini = "nic%d_mac" % idx
10209             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10210
10211     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10212
10213     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10214     if self.op.ip_check:
10215       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10216         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10217                                    (self.check_ip, self.op.instance_name),
10218                                    errors.ECODE_NOTUNIQUE)
10219
10220     #### mac address generation
10221     # By generating here the mac address both the allocator and the hooks get
10222     # the real final mac address rather than the 'auto' or 'generate' value.
10223     # There is a race condition between the generation and the instance object
10224     # creation, which means that we know the mac is valid now, but we're not
10225     # sure it will be when we actually add the instance. If things go bad
10226     # adding the instance will abort because of a duplicate mac, and the
10227     # creation job will fail.
10228     for nic in self.nics:
10229       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10230         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10231
10232     #### allocator run
10233
10234     if self.op.iallocator is not None:
10235       self._RunAllocator()
10236
10237     # Release all unneeded node locks
10238     _ReleaseLocks(self, locking.LEVEL_NODE,
10239                   keep=filter(None, [self.op.pnode, self.op.snode,
10240                                      self.op.src_node]))
10241     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10242                   keep=filter(None, [self.op.pnode, self.op.snode,
10243                                      self.op.src_node]))
10244
10245     #### node related checks
10246
10247     # check primary node
10248     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10249     assert self.pnode is not None, \
10250       "Cannot retrieve locked node %s" % self.op.pnode
10251     if pnode.offline:
10252       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10253                                  pnode.name, errors.ECODE_STATE)
10254     if pnode.drained:
10255       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10256                                  pnode.name, errors.ECODE_STATE)
10257     if not pnode.vm_capable:
10258       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10259                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10260
10261     self.secondaries = []
10262
10263     # Fill in any IPs from IP pools. This must happen here, because we need to
10264     # know the nic's primary node, as specified by the iallocator
10265     for idx, nic in enumerate(self.nics):
10266       net = nic.network
10267       if net is not None:
10268         netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10269         if netparams is None:
10270           raise errors.OpPrereqError("No netparams found for network"
10271                                      " %s. Propably not connected to"
10272                                      " node's %s nodegroup" %
10273                                      (net, self.pnode.name),
10274                                      errors.ECODE_INVAL)
10275         self.LogInfo("NIC/%d inherits netparams %s" %
10276                      (idx, netparams.values()))
10277         nic.nicparams = dict(netparams)
10278         if nic.ip is not None:
10279           if nic.ip.lower() == constants.NIC_IP_POOL:
10280             try:
10281               nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10282             except errors.ReservationError:
10283               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10284                                          " from the address pool" % idx,
10285                                          errors.ECODE_STATE)
10286             self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10287           else:
10288             try:
10289               self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10290             except errors.ReservationError:
10291               raise errors.OpPrereqError("IP address %s already in use"
10292                                          " or does not belong to network %s" %
10293                                          (nic.ip, net),
10294                                          errors.ECODE_NOTUNIQUE)
10295       else:
10296         # net is None, ip None or given
10297         if self.op.conflicts_check:
10298           _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10299
10300     # mirror node verification
10301     if self.op.disk_template in constants.DTS_INT_MIRROR:
10302       if self.op.snode == pnode.name:
10303         raise errors.OpPrereqError("The secondary node cannot be the"
10304                                    " primary node", errors.ECODE_INVAL)
10305       _CheckNodeOnline(self, self.op.snode)
10306       _CheckNodeNotDrained(self, self.op.snode)
10307       _CheckNodeVmCapable(self, self.op.snode)
10308       self.secondaries.append(self.op.snode)
10309
10310       snode = self.cfg.GetNodeInfo(self.op.snode)
10311       if pnode.group != snode.group:
10312         self.LogWarning("The primary and secondary nodes are in two"
10313                         " different node groups; the disk parameters"
10314                         " from the first disk's node group will be"
10315                         " used")
10316
10317     nodenames = [pnode.name] + self.secondaries
10318
10319     # Verify instance specs
10320     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10321     ispec = {
10322       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10323       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10324       constants.ISPEC_DISK_COUNT: len(self.disks),
10325       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10326       constants.ISPEC_NIC_COUNT: len(self.nics),
10327       constants.ISPEC_SPINDLE_USE: spindle_use,
10328       }
10329
10330     group_info = self.cfg.GetNodeGroup(pnode.group)
10331     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10332     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10333     if not self.op.ignore_ipolicy and res:
10334       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10335              (pnode.group, group_info.name, utils.CommaJoin(res)))
10336       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10337
10338     if not self.adopt_disks:
10339       if self.op.disk_template == constants.DT_RBD:
10340         # _CheckRADOSFreeSpace() is just a placeholder.
10341         # Any function that checks prerequisites can be placed here.
10342         # Check if there is enough space on the RADOS cluster.
10343         _CheckRADOSFreeSpace()
10344       else:
10345         # Check lv size requirements, if not adopting
10346         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10347         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10348
10349     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10350       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10351                                 disk[constants.IDISK_ADOPT])
10352                      for disk in self.disks])
10353       if len(all_lvs) != len(self.disks):
10354         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10355                                    errors.ECODE_INVAL)
10356       for lv_name in all_lvs:
10357         try:
10358           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10359           # to ReserveLV uses the same syntax
10360           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10361         except errors.ReservationError:
10362           raise errors.OpPrereqError("LV named %s used by another instance" %
10363                                      lv_name, errors.ECODE_NOTUNIQUE)
10364
10365       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10366       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10367
10368       node_lvs = self.rpc.call_lv_list([pnode.name],
10369                                        vg_names.payload.keys())[pnode.name]
10370       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10371       node_lvs = node_lvs.payload
10372
10373       delta = all_lvs.difference(node_lvs.keys())
10374       if delta:
10375         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10376                                    utils.CommaJoin(delta),
10377                                    errors.ECODE_INVAL)
10378       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10379       if online_lvs:
10380         raise errors.OpPrereqError("Online logical volumes found, cannot"
10381                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10382                                    errors.ECODE_STATE)
10383       # update the size of disk based on what is found
10384       for dsk in self.disks:
10385         dsk[constants.IDISK_SIZE] = \
10386           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10387                                         dsk[constants.IDISK_ADOPT])][0]))
10388
10389     elif self.op.disk_template == constants.DT_BLOCK:
10390       # Normalize and de-duplicate device paths
10391       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10392                        for disk in self.disks])
10393       if len(all_disks) != len(self.disks):
10394         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10395                                    errors.ECODE_INVAL)
10396       baddisks = [d for d in all_disks
10397                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10398       if baddisks:
10399         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10400                                    " cannot be adopted" %
10401                                    (utils.CommaJoin(baddisks),
10402                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10403                                    errors.ECODE_INVAL)
10404
10405       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10406                                             list(all_disks))[pnode.name]
10407       node_disks.Raise("Cannot get block device information from node %s" %
10408                        pnode.name)
10409       node_disks = node_disks.payload
10410       delta = all_disks.difference(node_disks.keys())
10411       if delta:
10412         raise errors.OpPrereqError("Missing block device(s): %s" %
10413                                    utils.CommaJoin(delta),
10414                                    errors.ECODE_INVAL)
10415       for dsk in self.disks:
10416         dsk[constants.IDISK_SIZE] = \
10417           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10418
10419     # Verify instance specs
10420     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10421     ispec = {
10422       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10423       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10424       constants.ISPEC_DISK_COUNT: len(self.disks),
10425       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10426                                   for disk in self.disks],
10427       constants.ISPEC_NIC_COUNT: len(self.nics),
10428       constants.ISPEC_SPINDLE_USE: spindle_use,
10429       }
10430
10431     group_info = self.cfg.GetNodeGroup(pnode.group)
10432     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10433     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10434     if not self.op.ignore_ipolicy and res:
10435       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10436                                   " policy: %s") % (pnode.group,
10437                                                     utils.CommaJoin(res)),
10438                                   errors.ECODE_INVAL)
10439
10440     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10441
10442     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10443     # check OS parameters (remotely)
10444     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10445
10446     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10447
10448     # memory check on primary node
10449     #TODO(dynmem): use MINMEM for checking
10450     if self.op.start:
10451       _CheckNodeFreeMemory(self, self.pnode.name,
10452                            "creating instance %s" % self.op.instance_name,
10453                            self.be_full[constants.BE_MAXMEM],
10454                            self.op.hypervisor)
10455
10456     self.dry_run_result = list(nodenames)
10457
10458   def Exec(self, feedback_fn):
10459     """Create and add the instance to the cluster.
10460
10461     """
10462     instance = self.op.instance_name
10463     pnode_name = self.pnode.name
10464
10465     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10466                 self.owned_locks(locking.LEVEL_NODE)), \
10467       "Node locks differ from node resource locks"
10468
10469     ht_kind = self.op.hypervisor
10470     if ht_kind in constants.HTS_REQ_PORT:
10471       network_port = self.cfg.AllocatePort()
10472     else:
10473       network_port = None
10474
10475     # This is ugly but we got a chicken-egg problem here
10476     # We can only take the group disk parameters, as the instance
10477     # has no disks yet (we are generating them right here).
10478     node = self.cfg.GetNodeInfo(pnode_name)
10479     nodegroup = self.cfg.GetNodeGroup(node.group)
10480     disks = _GenerateDiskTemplate(self,
10481                                   self.op.disk_template,
10482                                   instance, pnode_name,
10483                                   self.secondaries,
10484                                   self.disks,
10485                                   self.instance_file_storage_dir,
10486                                   self.op.file_driver,
10487                                   0,
10488                                   feedback_fn,
10489                                   self.cfg.GetGroupDiskParams(nodegroup))
10490
10491     iobj = objects.Instance(name=instance, os=self.op.os_type,
10492                             primary_node=pnode_name,
10493                             nics=self.nics, disks=disks,
10494                             disk_template=self.op.disk_template,
10495                             admin_state=constants.ADMINST_DOWN,
10496                             network_port=network_port,
10497                             beparams=self.op.beparams,
10498                             hvparams=self.op.hvparams,
10499                             hypervisor=self.op.hypervisor,
10500                             osparams=self.op.osparams,
10501                             )
10502
10503     if self.op.tags:
10504       for tag in self.op.tags:
10505         iobj.AddTag(tag)
10506
10507     if self.adopt_disks:
10508       if self.op.disk_template == constants.DT_PLAIN:
10509         # rename LVs to the newly-generated names; we need to construct
10510         # 'fake' LV disks with the old data, plus the new unique_id
10511         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10512         rename_to = []
10513         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10514           rename_to.append(t_dsk.logical_id)
10515           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10516           self.cfg.SetDiskID(t_dsk, pnode_name)
10517         result = self.rpc.call_blockdev_rename(pnode_name,
10518                                                zip(tmp_disks, rename_to))
10519         result.Raise("Failed to rename adoped LVs")
10520     else:
10521       feedback_fn("* creating instance disks...")
10522       try:
10523         _CreateDisks(self, iobj)
10524       except errors.OpExecError:
10525         self.LogWarning("Device creation failed, reverting...")
10526         try:
10527           _RemoveDisks(self, iobj)
10528         finally:
10529           self.cfg.ReleaseDRBDMinors(instance)
10530           raise
10531
10532     feedback_fn("adding instance %s to cluster config" % instance)
10533
10534     self.cfg.AddInstance(iobj, self.proc.GetECId())
10535
10536     # Declare that we don't want to remove the instance lock anymore, as we've
10537     # added the instance to the config
10538     del self.remove_locks[locking.LEVEL_INSTANCE]
10539
10540     if self.op.mode == constants.INSTANCE_IMPORT:
10541       # Release unused nodes
10542       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10543     else:
10544       # Release all nodes
10545       _ReleaseLocks(self, locking.LEVEL_NODE)
10546
10547     disk_abort = False
10548     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10549       feedback_fn("* wiping instance disks...")
10550       try:
10551         _WipeDisks(self, iobj)
10552       except errors.OpExecError, err:
10553         logging.exception("Wiping disks failed")
10554         self.LogWarning("Wiping instance disks failed (%s)", err)
10555         disk_abort = True
10556
10557     if disk_abort:
10558       # Something is already wrong with the disks, don't do anything else
10559       pass
10560     elif self.op.wait_for_sync:
10561       disk_abort = not _WaitForSync(self, iobj)
10562     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10563       # make sure the disks are not degraded (still sync-ing is ok)
10564       feedback_fn("* checking mirrors status")
10565       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10566     else:
10567       disk_abort = False
10568
10569     if disk_abort:
10570       _RemoveDisks(self, iobj)
10571       self.cfg.RemoveInstance(iobj.name)
10572       # Make sure the instance lock gets removed
10573       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10574       raise errors.OpExecError("There are some degraded disks for"
10575                                " this instance")
10576
10577     # Release all node resource locks
10578     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10579
10580     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10581       # we need to set the disks ID to the primary node, since the
10582       # preceding code might or might have not done it, depending on
10583       # disk template and other options
10584       for disk in iobj.disks:
10585         self.cfg.SetDiskID(disk, pnode_name)
10586       if self.op.mode == constants.INSTANCE_CREATE:
10587         if not self.op.no_install:
10588           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10589                         not self.op.wait_for_sync)
10590           if pause_sync:
10591             feedback_fn("* pausing disk sync to install instance OS")
10592             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10593                                                               (iobj.disks,
10594                                                                iobj), True)
10595             for idx, success in enumerate(result.payload):
10596               if not success:
10597                 logging.warn("pause-sync of instance %s for disk %d failed",
10598                              instance, idx)
10599
10600           feedback_fn("* running the instance OS create scripts...")
10601           # FIXME: pass debug option from opcode to backend
10602           os_add_result = \
10603             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10604                                           self.op.debug_level)
10605           if pause_sync:
10606             feedback_fn("* resuming disk sync")
10607             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10608                                                               (iobj.disks,
10609                                                                iobj), False)
10610             for idx, success in enumerate(result.payload):
10611               if not success:
10612                 logging.warn("resume-sync of instance %s for disk %d failed",
10613                              instance, idx)
10614
10615           os_add_result.Raise("Could not add os for instance %s"
10616                               " on node %s" % (instance, pnode_name))
10617
10618       else:
10619         if self.op.mode == constants.INSTANCE_IMPORT:
10620           feedback_fn("* running the instance OS import scripts...")
10621
10622           transfers = []
10623
10624           for idx, image in enumerate(self.src_images):
10625             if not image:
10626               continue
10627
10628             # FIXME: pass debug option from opcode to backend
10629             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10630                                                constants.IEIO_FILE, (image, ),
10631                                                constants.IEIO_SCRIPT,
10632                                                (iobj.disks[idx], idx),
10633                                                None)
10634             transfers.append(dt)
10635
10636           import_result = \
10637             masterd.instance.TransferInstanceData(self, feedback_fn,
10638                                                   self.op.src_node, pnode_name,
10639                                                   self.pnode.secondary_ip,
10640                                                   iobj, transfers)
10641           if not compat.all(import_result):
10642             self.LogWarning("Some disks for instance %s on node %s were not"
10643                             " imported successfully" % (instance, pnode_name))
10644
10645           rename_from = self._old_instance_name
10646
10647         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10648           feedback_fn("* preparing remote import...")
10649           # The source cluster will stop the instance before attempting to make
10650           # a connection. In some cases stopping an instance can take a long
10651           # time, hence the shutdown timeout is added to the connection
10652           # timeout.
10653           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10654                              self.op.source_shutdown_timeout)
10655           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10656
10657           assert iobj.primary_node == self.pnode.name
10658           disk_results = \
10659             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10660                                           self.source_x509_ca,
10661                                           self._cds, timeouts)
10662           if not compat.all(disk_results):
10663             # TODO: Should the instance still be started, even if some disks
10664             # failed to import (valid for local imports, too)?
10665             self.LogWarning("Some disks for instance %s on node %s were not"
10666                             " imported successfully" % (instance, pnode_name))
10667
10668           rename_from = self.source_instance_name
10669
10670         else:
10671           # also checked in the prereq part
10672           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10673                                        % self.op.mode)
10674
10675         # Run rename script on newly imported instance
10676         assert iobj.name == instance
10677         feedback_fn("Running rename script for %s" % instance)
10678         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10679                                                    rename_from,
10680                                                    self.op.debug_level)
10681         if result.fail_msg:
10682           self.LogWarning("Failed to run rename script for %s on node"
10683                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10684
10685     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10686
10687     if self.op.start:
10688       iobj.admin_state = constants.ADMINST_UP
10689       self.cfg.Update(iobj, feedback_fn)
10690       logging.info("Starting instance %s on node %s", instance, pnode_name)
10691       feedback_fn("* starting instance...")
10692       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10693                                             False)
10694       result.Raise("Could not start instance")
10695
10696     return list(iobj.all_nodes)
10697
10698
10699 class LUInstanceMultiAlloc(NoHooksLU):
10700   """Allocates multiple instances at the same time.
10701
10702   """
10703   REQ_BGL = False
10704
10705   def CheckArguments(self):
10706     """Check arguments.
10707
10708     """
10709     nodes = []
10710     for inst in self.op.instances:
10711       if inst.iallocator is not None:
10712         raise errors.OpPrereqError("iallocator are not allowed to be set on"
10713                                    " instance objects", errors.ECODE_INVAL)
10714       nodes.append(bool(inst.pnode))
10715       if inst.disk_template in constants.DTS_INT_MIRROR:
10716         nodes.append(bool(inst.snode))
10717
10718     has_nodes = compat.any(nodes)
10719     if compat.all(nodes) ^ has_nodes:
10720       raise errors.OpPrereqError("There are instance objects providing"
10721                                  " pnode/snode while others do not",
10722                                  errors.ECODE_INVAL)
10723
10724     if self.op.iallocator is None:
10725       default_iallocator = self.cfg.GetDefaultIAllocator()
10726       if default_iallocator and has_nodes:
10727         self.op.iallocator = default_iallocator
10728       else:
10729         raise errors.OpPrereqError("No iallocator or nodes on the instances"
10730                                    " given and no cluster-wide default"
10731                                    " iallocator found; please specify either"
10732                                    " an iallocator or nodes on the instances"
10733                                    " or set a cluster-wide default iallocator",
10734                                    errors.ECODE_INVAL)
10735
10736     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10737     if dups:
10738       raise errors.OpPrereqError("There are duplicate instance names: %s" %
10739                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
10740
10741   def ExpandNames(self):
10742     """Calculate the locks.
10743
10744     """
10745     self.share_locks = _ShareAll()
10746     self.needed_locks = {}
10747
10748     if self.op.iallocator:
10749       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10750       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10751     else:
10752       nodeslist = []
10753       for inst in self.op.instances:
10754         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10755         nodeslist.append(inst.pnode)
10756         if inst.snode is not None:
10757           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10758           nodeslist.append(inst.snode)
10759
10760       self.needed_locks[locking.LEVEL_NODE] = nodeslist
10761       # Lock resources of instance's primary and secondary nodes (copy to
10762       # prevent accidential modification)
10763       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10764
10765   def CheckPrereq(self):
10766     """Check prerequisite.
10767
10768     """
10769     cluster = self.cfg.GetClusterInfo()
10770     default_vg = self.cfg.GetVGName()
10771     ec_id = self.proc.GetECId()
10772
10773     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10774                                          _ComputeNics(op, cluster, None,
10775                                                       self.cfg, ec_id),
10776                                          _ComputeFullBeParams(op, cluster))
10777              for op in self.op.instances]
10778
10779     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10780     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10781
10782     ial.Run(self.op.iallocator)
10783
10784     if not ial.success:
10785       raise errors.OpPrereqError("Can't compute nodes using"
10786                                  " iallocator '%s': %s" %
10787                                  (self.op.iallocator, ial.info),
10788                                  errors.ECODE_NORES)
10789
10790     self.ia_result = ial.result
10791
10792     if self.op.dry_run:
10793       self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10794         constants.JOB_IDS_KEY: [],
10795         })
10796
10797   def _ConstructPartialResult(self):
10798     """Contructs the partial result.
10799
10800     """
10801     (allocatable, failed) = self.ia_result
10802     return {
10803       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10804         map(compat.fst, allocatable),
10805       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10806       }
10807
10808   def Exec(self, feedback_fn):
10809     """Executes the opcode.
10810
10811     """
10812     op2inst = dict((op.instance_name, op) for op in self.op.instances)
10813     (allocatable, failed) = self.ia_result
10814
10815     jobs = []
10816     for (name, nodes) in allocatable:
10817       op = op2inst.pop(name)
10818
10819       if len(nodes) > 1:
10820         (op.pnode, op.snode) = nodes
10821       else:
10822         (op.pnode,) = nodes
10823
10824       jobs.append([op])
10825
10826     missing = set(op2inst.keys()) - set(failed)
10827     assert not missing, \
10828       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10829
10830     return ResultWithJobs(jobs, **self._ConstructPartialResult())
10831
10832
10833 def _CheckRADOSFreeSpace():
10834   """Compute disk size requirements inside the RADOS cluster.
10835
10836   """
10837   # For the RADOS cluster we assume there is always enough space.
10838   pass
10839
10840
10841 class LUInstanceConsole(NoHooksLU):
10842   """Connect to an instance's console.
10843
10844   This is somewhat special in that it returns the command line that
10845   you need to run on the master node in order to connect to the
10846   console.
10847
10848   """
10849   REQ_BGL = False
10850
10851   def ExpandNames(self):
10852     self.share_locks = _ShareAll()
10853     self._ExpandAndLockInstance()
10854
10855   def CheckPrereq(self):
10856     """Check prerequisites.
10857
10858     This checks that the instance is in the cluster.
10859
10860     """
10861     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10862     assert self.instance is not None, \
10863       "Cannot retrieve locked instance %s" % self.op.instance_name
10864     _CheckNodeOnline(self, self.instance.primary_node)
10865
10866   def Exec(self, feedback_fn):
10867     """Connect to the console of an instance
10868
10869     """
10870     instance = self.instance
10871     node = instance.primary_node
10872
10873     node_insts = self.rpc.call_instance_list([node],
10874                                              [instance.hypervisor])[node]
10875     node_insts.Raise("Can't get node information from %s" % node)
10876
10877     if instance.name not in node_insts.payload:
10878       if instance.admin_state == constants.ADMINST_UP:
10879         state = constants.INSTST_ERRORDOWN
10880       elif instance.admin_state == constants.ADMINST_DOWN:
10881         state = constants.INSTST_ADMINDOWN
10882       else:
10883         state = constants.INSTST_ADMINOFFLINE
10884       raise errors.OpExecError("Instance %s is not running (state %s)" %
10885                                (instance.name, state))
10886
10887     logging.debug("Connecting to console of %s on %s", instance.name, node)
10888
10889     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10890
10891
10892 def _GetInstanceConsole(cluster, instance):
10893   """Returns console information for an instance.
10894
10895   @type cluster: L{objects.Cluster}
10896   @type instance: L{objects.Instance}
10897   @rtype: dict
10898
10899   """
10900   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10901   # beparams and hvparams are passed separately, to avoid editing the
10902   # instance and then saving the defaults in the instance itself.
10903   hvparams = cluster.FillHV(instance)
10904   beparams = cluster.FillBE(instance)
10905   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10906
10907   assert console.instance == instance.name
10908   assert console.Validate()
10909
10910   return console.ToDict()
10911
10912
10913 class LUInstanceReplaceDisks(LogicalUnit):
10914   """Replace the disks of an instance.
10915
10916   """
10917   HPATH = "mirrors-replace"
10918   HTYPE = constants.HTYPE_INSTANCE
10919   REQ_BGL = False
10920
10921   def CheckArguments(self):
10922     """Check arguments.
10923
10924     """
10925     remote_node = self.op.remote_node
10926     ialloc = self.op.iallocator
10927     if self.op.mode == constants.REPLACE_DISK_CHG:
10928       if remote_node is None and ialloc is None:
10929         raise errors.OpPrereqError("When changing the secondary either an"
10930                                    " iallocator script must be used or the"
10931                                    " new node given", errors.ECODE_INVAL)
10932       else:
10933         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10934
10935     elif remote_node is not None or ialloc is not None:
10936       # Not replacing the secondary
10937       raise errors.OpPrereqError("The iallocator and new node options can"
10938                                  " only be used when changing the"
10939                                  " secondary node", errors.ECODE_INVAL)
10940
10941   def ExpandNames(self):
10942     self._ExpandAndLockInstance()
10943
10944     assert locking.LEVEL_NODE not in self.needed_locks
10945     assert locking.LEVEL_NODE_RES not in self.needed_locks
10946     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10947
10948     assert self.op.iallocator is None or self.op.remote_node is None, \
10949       "Conflicting options"
10950
10951     if self.op.remote_node is not None:
10952       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10953
10954       # Warning: do not remove the locking of the new secondary here
10955       # unless DRBD8.AddChildren is changed to work in parallel;
10956       # currently it doesn't since parallel invocations of
10957       # FindUnusedMinor will conflict
10958       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10959       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10960     else:
10961       self.needed_locks[locking.LEVEL_NODE] = []
10962       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10963
10964       if self.op.iallocator is not None:
10965         # iallocator will select a new node in the same group
10966         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10967
10968     self.needed_locks[locking.LEVEL_NODE_RES] = []
10969
10970     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10971                                    self.op.iallocator, self.op.remote_node,
10972                                    self.op.disks, self.op.early_release,
10973                                    self.op.ignore_ipolicy)
10974
10975     self.tasklets = [self.replacer]
10976
10977   def DeclareLocks(self, level):
10978     if level == locking.LEVEL_NODEGROUP:
10979       assert self.op.remote_node is None
10980       assert self.op.iallocator is not None
10981       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10982
10983       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10984       # Lock all groups used by instance optimistically; this requires going
10985       # via the node before it's locked, requiring verification later on
10986       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10987         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10988
10989     elif level == locking.LEVEL_NODE:
10990       if self.op.iallocator is not None:
10991         assert self.op.remote_node is None
10992         assert not self.needed_locks[locking.LEVEL_NODE]
10993
10994         # Lock member nodes of all locked groups
10995         self.needed_locks[locking.LEVEL_NODE] = \
10996             [node_name
10997              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10998              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10999       else:
11000         self._LockInstancesNodes()
11001     elif level == locking.LEVEL_NODE_RES:
11002       # Reuse node locks
11003       self.needed_locks[locking.LEVEL_NODE_RES] = \
11004         self.needed_locks[locking.LEVEL_NODE]
11005
11006   def BuildHooksEnv(self):
11007     """Build hooks env.
11008
11009     This runs on the master, the primary and all the secondaries.
11010
11011     """
11012     instance = self.replacer.instance
11013     env = {
11014       "MODE": self.op.mode,
11015       "NEW_SECONDARY": self.op.remote_node,
11016       "OLD_SECONDARY": instance.secondary_nodes[0],
11017       }
11018     env.update(_BuildInstanceHookEnvByObject(self, instance))
11019     return env
11020
11021   def BuildHooksNodes(self):
11022     """Build hooks nodes.
11023
11024     """
11025     instance = self.replacer.instance
11026     nl = [
11027       self.cfg.GetMasterNode(),
11028       instance.primary_node,
11029       ]
11030     if self.op.remote_node is not None:
11031       nl.append(self.op.remote_node)
11032     return nl, nl
11033
11034   def CheckPrereq(self):
11035     """Check prerequisites.
11036
11037     """
11038     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11039             self.op.iallocator is None)
11040
11041     # Verify if node group locks are still correct
11042     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11043     if owned_groups:
11044       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11045
11046     return LogicalUnit.CheckPrereq(self)
11047
11048
11049 class TLReplaceDisks(Tasklet):
11050   """Replaces disks for an instance.
11051
11052   Note: Locking is not within the scope of this class.
11053
11054   """
11055   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11056                disks, early_release, ignore_ipolicy):
11057     """Initializes this class.
11058
11059     """
11060     Tasklet.__init__(self, lu)
11061
11062     # Parameters
11063     self.instance_name = instance_name
11064     self.mode = mode
11065     self.iallocator_name = iallocator_name
11066     self.remote_node = remote_node
11067     self.disks = disks
11068     self.early_release = early_release
11069     self.ignore_ipolicy = ignore_ipolicy
11070
11071     # Runtime data
11072     self.instance = None
11073     self.new_node = None
11074     self.target_node = None
11075     self.other_node = None
11076     self.remote_node_info = None
11077     self.node_secondary_ip = None
11078
11079   @staticmethod
11080   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11081     """Compute a new secondary node using an IAllocator.
11082
11083     """
11084     req = iallocator.IAReqRelocate(name=instance_name,
11085                                    relocate_from=list(relocate_from))
11086     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11087
11088     ial.Run(iallocator_name)
11089
11090     if not ial.success:
11091       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11092                                  " %s" % (iallocator_name, ial.info),
11093                                  errors.ECODE_NORES)
11094
11095     remote_node_name = ial.result[0]
11096
11097     lu.LogInfo("Selected new secondary for instance '%s': %s",
11098                instance_name, remote_node_name)
11099
11100     return remote_node_name
11101
11102   def _FindFaultyDisks(self, node_name):
11103     """Wrapper for L{_FindFaultyInstanceDisks}.
11104
11105     """
11106     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11107                                     node_name, True)
11108
11109   def _CheckDisksActivated(self, instance):
11110     """Checks if the instance disks are activated.
11111
11112     @param instance: The instance to check disks
11113     @return: True if they are activated, False otherwise
11114
11115     """
11116     nodes = instance.all_nodes
11117
11118     for idx, dev in enumerate(instance.disks):
11119       for node in nodes:
11120         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11121         self.cfg.SetDiskID(dev, node)
11122
11123         result = _BlockdevFind(self, node, dev, instance)
11124
11125         if result.offline:
11126           continue
11127         elif result.fail_msg or not result.payload:
11128           return False
11129
11130     return True
11131
11132   def CheckPrereq(self):
11133     """Check prerequisites.
11134
11135     This checks that the instance is in the cluster.
11136
11137     """
11138     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11139     assert instance is not None, \
11140       "Cannot retrieve locked instance %s" % self.instance_name
11141
11142     if instance.disk_template != constants.DT_DRBD8:
11143       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11144                                  " instances", errors.ECODE_INVAL)
11145
11146     if len(instance.secondary_nodes) != 1:
11147       raise errors.OpPrereqError("The instance has a strange layout,"
11148                                  " expected one secondary but found %d" %
11149                                  len(instance.secondary_nodes),
11150                                  errors.ECODE_FAULT)
11151
11152     instance = self.instance
11153     secondary_node = instance.secondary_nodes[0]
11154
11155     if self.iallocator_name is None:
11156       remote_node = self.remote_node
11157     else:
11158       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11159                                        instance.name, instance.secondary_nodes)
11160
11161     if remote_node is None:
11162       self.remote_node_info = None
11163     else:
11164       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11165              "Remote node '%s' is not locked" % remote_node
11166
11167       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11168       assert self.remote_node_info is not None, \
11169         "Cannot retrieve locked node %s" % remote_node
11170
11171     if remote_node == self.instance.primary_node:
11172       raise errors.OpPrereqError("The specified node is the primary node of"
11173                                  " the instance", errors.ECODE_INVAL)
11174
11175     if remote_node == secondary_node:
11176       raise errors.OpPrereqError("The specified node is already the"
11177                                  " secondary node of the instance",
11178                                  errors.ECODE_INVAL)
11179
11180     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11181                                     constants.REPLACE_DISK_CHG):
11182       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11183                                  errors.ECODE_INVAL)
11184
11185     if self.mode == constants.REPLACE_DISK_AUTO:
11186       if not self._CheckDisksActivated(instance):
11187         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11188                                    " first" % self.instance_name,
11189                                    errors.ECODE_STATE)
11190       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11191       faulty_secondary = self._FindFaultyDisks(secondary_node)
11192
11193       if faulty_primary and faulty_secondary:
11194         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11195                                    " one node and can not be repaired"
11196                                    " automatically" % self.instance_name,
11197                                    errors.ECODE_STATE)
11198
11199       if faulty_primary:
11200         self.disks = faulty_primary
11201         self.target_node = instance.primary_node
11202         self.other_node = secondary_node
11203         check_nodes = [self.target_node, self.other_node]
11204       elif faulty_secondary:
11205         self.disks = faulty_secondary
11206         self.target_node = secondary_node
11207         self.other_node = instance.primary_node
11208         check_nodes = [self.target_node, self.other_node]
11209       else:
11210         self.disks = []
11211         check_nodes = []
11212
11213     else:
11214       # Non-automatic modes
11215       if self.mode == constants.REPLACE_DISK_PRI:
11216         self.target_node = instance.primary_node
11217         self.other_node = secondary_node
11218         check_nodes = [self.target_node, self.other_node]
11219
11220       elif self.mode == constants.REPLACE_DISK_SEC:
11221         self.target_node = secondary_node
11222         self.other_node = instance.primary_node
11223         check_nodes = [self.target_node, self.other_node]
11224
11225       elif self.mode == constants.REPLACE_DISK_CHG:
11226         self.new_node = remote_node
11227         self.other_node = instance.primary_node
11228         self.target_node = secondary_node
11229         check_nodes = [self.new_node, self.other_node]
11230
11231         _CheckNodeNotDrained(self.lu, remote_node)
11232         _CheckNodeVmCapable(self.lu, remote_node)
11233
11234         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11235         assert old_node_info is not None
11236         if old_node_info.offline and not self.early_release:
11237           # doesn't make sense to delay the release
11238           self.early_release = True
11239           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11240                           " early-release mode", secondary_node)
11241
11242       else:
11243         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11244                                      self.mode)
11245
11246       # If not specified all disks should be replaced
11247       if not self.disks:
11248         self.disks = range(len(self.instance.disks))
11249
11250     # TODO: This is ugly, but right now we can't distinguish between internal
11251     # submitted opcode and external one. We should fix that.
11252     if self.remote_node_info:
11253       # We change the node, lets verify it still meets instance policy
11254       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11255       cluster = self.cfg.GetClusterInfo()
11256       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11257                                                               new_group_info)
11258       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11259                               ignore=self.ignore_ipolicy)
11260
11261     for node in check_nodes:
11262       _CheckNodeOnline(self.lu, node)
11263
11264     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11265                                                           self.other_node,
11266                                                           self.target_node]
11267                               if node_name is not None)
11268
11269     # Release unneeded node and node resource locks
11270     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11271     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11272
11273     # Release any owned node group
11274     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11275       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11276
11277     # Check whether disks are valid
11278     for disk_idx in self.disks:
11279       instance.FindDisk(disk_idx)
11280
11281     # Get secondary node IP addresses
11282     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11283                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11284
11285   def Exec(self, feedback_fn):
11286     """Execute disk replacement.
11287
11288     This dispatches the disk replacement to the appropriate handler.
11289
11290     """
11291     if __debug__:
11292       # Verify owned locks before starting operation
11293       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11294       assert set(owned_nodes) == set(self.node_secondary_ip), \
11295           ("Incorrect node locks, owning %s, expected %s" %
11296            (owned_nodes, self.node_secondary_ip.keys()))
11297       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11298               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11299
11300       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11301       assert list(owned_instances) == [self.instance_name], \
11302           "Instance '%s' not locked" % self.instance_name
11303
11304       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11305           "Should not own any node group lock at this point"
11306
11307     if not self.disks:
11308       feedback_fn("No disks need replacement for instance '%s'" %
11309                   self.instance.name)
11310       return
11311
11312     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11313                 (utils.CommaJoin(self.disks), self.instance.name))
11314     feedback_fn("Current primary node: %s", self.instance.primary_node)
11315     feedback_fn("Current seconary node: %s",
11316                 utils.CommaJoin(self.instance.secondary_nodes))
11317
11318     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11319
11320     # Activate the instance disks if we're replacing them on a down instance
11321     if activate_disks:
11322       _StartInstanceDisks(self.lu, self.instance, True)
11323
11324     try:
11325       # Should we replace the secondary node?
11326       if self.new_node is not None:
11327         fn = self._ExecDrbd8Secondary
11328       else:
11329         fn = self._ExecDrbd8DiskOnly
11330
11331       result = fn(feedback_fn)
11332     finally:
11333       # Deactivate the instance disks if we're replacing them on a
11334       # down instance
11335       if activate_disks:
11336         _SafeShutdownInstanceDisks(self.lu, self.instance)
11337
11338     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11339
11340     if __debug__:
11341       # Verify owned locks
11342       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11343       nodes = frozenset(self.node_secondary_ip)
11344       assert ((self.early_release and not owned_nodes) or
11345               (not self.early_release and not (set(owned_nodes) - nodes))), \
11346         ("Not owning the correct locks, early_release=%s, owned=%r,"
11347          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11348
11349     return result
11350
11351   def _CheckVolumeGroup(self, nodes):
11352     self.lu.LogInfo("Checking volume groups")
11353
11354     vgname = self.cfg.GetVGName()
11355
11356     # Make sure volume group exists on all involved nodes
11357     results = self.rpc.call_vg_list(nodes)
11358     if not results:
11359       raise errors.OpExecError("Can't list volume groups on the nodes")
11360
11361     for node in nodes:
11362       res = results[node]
11363       res.Raise("Error checking node %s" % node)
11364       if vgname not in res.payload:
11365         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11366                                  (vgname, node))
11367
11368   def _CheckDisksExistence(self, nodes):
11369     # Check disk existence
11370     for idx, dev in enumerate(self.instance.disks):
11371       if idx not in self.disks:
11372         continue
11373
11374       for node in nodes:
11375         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11376         self.cfg.SetDiskID(dev, node)
11377
11378         result = _BlockdevFind(self, node, dev, self.instance)
11379
11380         msg = result.fail_msg
11381         if msg or not result.payload:
11382           if not msg:
11383             msg = "disk not found"
11384           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11385                                    (idx, node, msg))
11386
11387   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11388     for idx, dev in enumerate(self.instance.disks):
11389       if idx not in self.disks:
11390         continue
11391
11392       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11393                       (idx, node_name))
11394
11395       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11396                                    on_primary, ldisk=ldisk):
11397         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11398                                  " replace disks for instance %s" %
11399                                  (node_name, self.instance.name))
11400
11401   def _CreateNewStorage(self, node_name):
11402     """Create new storage on the primary or secondary node.
11403
11404     This is only used for same-node replaces, not for changing the
11405     secondary node, hence we don't want to modify the existing disk.
11406
11407     """
11408     iv_names = {}
11409
11410     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11411     for idx, dev in enumerate(disks):
11412       if idx not in self.disks:
11413         continue
11414
11415       self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11416
11417       self.cfg.SetDiskID(dev, node_name)
11418
11419       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11420       names = _GenerateUniqueNames(self.lu, lv_names)
11421
11422       (data_disk, meta_disk) = dev.children
11423       vg_data = data_disk.logical_id[0]
11424       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11425                              logical_id=(vg_data, names[0]),
11426                              params=data_disk.params)
11427       vg_meta = meta_disk.logical_id[0]
11428       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11429                              size=constants.DRBD_META_SIZE,
11430                              logical_id=(vg_meta, names[1]),
11431                              params=meta_disk.params)
11432
11433       new_lvs = [lv_data, lv_meta]
11434       old_lvs = [child.Copy() for child in dev.children]
11435       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11436
11437       # we pass force_create=True to force the LVM creation
11438       for new_lv in new_lvs:
11439         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11440                              _GetInstanceInfoText(self.instance), False)
11441
11442     return iv_names
11443
11444   def _CheckDevices(self, node_name, iv_names):
11445     for name, (dev, _, _) in iv_names.iteritems():
11446       self.cfg.SetDiskID(dev, node_name)
11447
11448       result = _BlockdevFind(self, node_name, dev, self.instance)
11449
11450       msg = result.fail_msg
11451       if msg or not result.payload:
11452         if not msg:
11453           msg = "disk not found"
11454         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11455                                  (name, msg))
11456
11457       if result.payload.is_degraded:
11458         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11459
11460   def _RemoveOldStorage(self, node_name, iv_names):
11461     for name, (_, old_lvs, _) in iv_names.iteritems():
11462       self.lu.LogInfo("Remove logical volumes for %s", name)
11463
11464       for lv in old_lvs:
11465         self.cfg.SetDiskID(lv, node_name)
11466
11467         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11468         if msg:
11469           self.lu.LogWarning("Can't remove old LV: %s", msg,
11470                              hint="remove unused LVs manually")
11471
11472   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11473     """Replace a disk on the primary or secondary for DRBD 8.
11474
11475     The algorithm for replace is quite complicated:
11476
11477       1. for each disk to be replaced:
11478
11479         1. create new LVs on the target node with unique names
11480         1. detach old LVs from the drbd device
11481         1. rename old LVs to name_replaced.<time_t>
11482         1. rename new LVs to old LVs
11483         1. attach the new LVs (with the old names now) to the drbd device
11484
11485       1. wait for sync across all devices
11486
11487       1. for each modified disk:
11488
11489         1. remove old LVs (which have the name name_replaces.<time_t>)
11490
11491     Failures are not very well handled.
11492
11493     """
11494     steps_total = 6
11495
11496     # Step: check device activation
11497     self.lu.LogStep(1, steps_total, "Check device existence")
11498     self._CheckDisksExistence([self.other_node, self.target_node])
11499     self._CheckVolumeGroup([self.target_node, self.other_node])
11500
11501     # Step: check other node consistency
11502     self.lu.LogStep(2, steps_total, "Check peer consistency")
11503     self._CheckDisksConsistency(self.other_node,
11504                                 self.other_node == self.instance.primary_node,
11505                                 False)
11506
11507     # Step: create new storage
11508     self.lu.LogStep(3, steps_total, "Allocate new storage")
11509     iv_names = self._CreateNewStorage(self.target_node)
11510
11511     # Step: for each lv, detach+rename*2+attach
11512     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11513     for dev, old_lvs, new_lvs in iv_names.itervalues():
11514       self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11515
11516       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11517                                                      old_lvs)
11518       result.Raise("Can't detach drbd from local storage on node"
11519                    " %s for device %s" % (self.target_node, dev.iv_name))
11520       #dev.children = []
11521       #cfg.Update(instance)
11522
11523       # ok, we created the new LVs, so now we know we have the needed
11524       # storage; as such, we proceed on the target node to rename
11525       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11526       # using the assumption that logical_id == physical_id (which in
11527       # turn is the unique_id on that node)
11528
11529       # FIXME(iustin): use a better name for the replaced LVs
11530       temp_suffix = int(time.time())
11531       ren_fn = lambda d, suff: (d.physical_id[0],
11532                                 d.physical_id[1] + "_replaced-%s" % suff)
11533
11534       # Build the rename list based on what LVs exist on the node
11535       rename_old_to_new = []
11536       for to_ren in old_lvs:
11537         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11538         if not result.fail_msg and result.payload:
11539           # device exists
11540           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11541
11542       self.lu.LogInfo("Renaming the old LVs on the target node")
11543       result = self.rpc.call_blockdev_rename(self.target_node,
11544                                              rename_old_to_new)
11545       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11546
11547       # Now we rename the new LVs to the old LVs
11548       self.lu.LogInfo("Renaming the new LVs on the target node")
11549       rename_new_to_old = [(new, old.physical_id)
11550                            for old, new in zip(old_lvs, new_lvs)]
11551       result = self.rpc.call_blockdev_rename(self.target_node,
11552                                              rename_new_to_old)
11553       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11554
11555       # Intermediate steps of in memory modifications
11556       for old, new in zip(old_lvs, new_lvs):
11557         new.logical_id = old.logical_id
11558         self.cfg.SetDiskID(new, self.target_node)
11559
11560       # We need to modify old_lvs so that removal later removes the
11561       # right LVs, not the newly added ones; note that old_lvs is a
11562       # copy here
11563       for disk in old_lvs:
11564         disk.logical_id = ren_fn(disk, temp_suffix)
11565         self.cfg.SetDiskID(disk, self.target_node)
11566
11567       # Now that the new lvs have the old name, we can add them to the device
11568       self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
11569       result = self.rpc.call_blockdev_addchildren(self.target_node,
11570                                                   (dev, self.instance), new_lvs)
11571       msg = result.fail_msg
11572       if msg:
11573         for new_lv in new_lvs:
11574           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11575                                                new_lv).fail_msg
11576           if msg2:
11577             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11578                                hint=("cleanup manually the unused logical"
11579                                      "volumes"))
11580         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11581
11582     cstep = itertools.count(5)
11583
11584     if self.early_release:
11585       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11586       self._RemoveOldStorage(self.target_node, iv_names)
11587       # TODO: Check if releasing locks early still makes sense
11588       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11589     else:
11590       # Release all resource locks except those used by the instance
11591       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11592                     keep=self.node_secondary_ip.keys())
11593
11594     # Release all node locks while waiting for sync
11595     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11596
11597     # TODO: Can the instance lock be downgraded here? Take the optional disk
11598     # shutdown in the caller into consideration.
11599
11600     # Wait for sync
11601     # This can fail as the old devices are degraded and _WaitForSync
11602     # does a combined result over all disks, so we don't check its return value
11603     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11604     _WaitForSync(self.lu, self.instance)
11605
11606     # Check all devices manually
11607     self._CheckDevices(self.instance.primary_node, iv_names)
11608
11609     # Step: remove old storage
11610     if not self.early_release:
11611       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11612       self._RemoveOldStorage(self.target_node, iv_names)
11613
11614   def _ExecDrbd8Secondary(self, feedback_fn):
11615     """Replace the secondary node for DRBD 8.
11616
11617     The algorithm for replace is quite complicated:
11618       - for all disks of the instance:
11619         - create new LVs on the new node with same names
11620         - shutdown the drbd device on the old secondary
11621         - disconnect the drbd network on the primary
11622         - create the drbd device on the new secondary
11623         - network attach the drbd on the primary, using an artifice:
11624           the drbd code for Attach() will connect to the network if it
11625           finds a device which is connected to the good local disks but
11626           not network enabled
11627       - wait for sync across all devices
11628       - remove all disks from the old secondary
11629
11630     Failures are not very well handled.
11631
11632     """
11633     steps_total = 6
11634
11635     pnode = self.instance.primary_node
11636
11637     # Step: check device activation
11638     self.lu.LogStep(1, steps_total, "Check device existence")
11639     self._CheckDisksExistence([self.instance.primary_node])
11640     self._CheckVolumeGroup([self.instance.primary_node])
11641
11642     # Step: check other node consistency
11643     self.lu.LogStep(2, steps_total, "Check peer consistency")
11644     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11645
11646     # Step: create new storage
11647     self.lu.LogStep(3, steps_total, "Allocate new storage")
11648     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11649     for idx, dev in enumerate(disks):
11650       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11651                       (self.new_node, idx))
11652       # we pass force_create=True to force LVM creation
11653       for new_lv in dev.children:
11654         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11655                              True, _GetInstanceInfoText(self.instance), False)
11656
11657     # Step 4: dbrd minors and drbd setups changes
11658     # after this, we must manually remove the drbd minors on both the
11659     # error and the success paths
11660     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11661     minors = self.cfg.AllocateDRBDMinor([self.new_node
11662                                          for dev in self.instance.disks],
11663                                         self.instance.name)
11664     logging.debug("Allocated minors %r", minors)
11665
11666     iv_names = {}
11667     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11668       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11669                       (self.new_node, idx))
11670       # create new devices on new_node; note that we create two IDs:
11671       # one without port, so the drbd will be activated without
11672       # networking information on the new node at this stage, and one
11673       # with network, for the latter activation in step 4
11674       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11675       if self.instance.primary_node == o_node1:
11676         p_minor = o_minor1
11677       else:
11678         assert self.instance.primary_node == o_node2, "Three-node instance?"
11679         p_minor = o_minor2
11680
11681       new_alone_id = (self.instance.primary_node, self.new_node, None,
11682                       p_minor, new_minor, o_secret)
11683       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11684                     p_minor, new_minor, o_secret)
11685
11686       iv_names[idx] = (dev, dev.children, new_net_id)
11687       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11688                     new_net_id)
11689       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11690                               logical_id=new_alone_id,
11691                               children=dev.children,
11692                               size=dev.size,
11693                               params={})
11694       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11695                                              self.cfg)
11696       try:
11697         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11698                               anno_new_drbd,
11699                               _GetInstanceInfoText(self.instance), False)
11700       except errors.GenericError:
11701         self.cfg.ReleaseDRBDMinors(self.instance.name)
11702         raise
11703
11704     # We have new devices, shutdown the drbd on the old secondary
11705     for idx, dev in enumerate(self.instance.disks):
11706       self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
11707       self.cfg.SetDiskID(dev, self.target_node)
11708       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11709                                             (dev, self.instance)).fail_msg
11710       if msg:
11711         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11712                            "node: %s" % (idx, msg),
11713                            hint=("Please cleanup this device manually as"
11714                                  " soon as possible"))
11715
11716     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11717     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11718                                                self.instance.disks)[pnode]
11719
11720     msg = result.fail_msg
11721     if msg:
11722       # detaches didn't succeed (unlikely)
11723       self.cfg.ReleaseDRBDMinors(self.instance.name)
11724       raise errors.OpExecError("Can't detach the disks from the network on"
11725                                " old node: %s" % (msg,))
11726
11727     # if we managed to detach at least one, we update all the disks of
11728     # the instance to point to the new secondary
11729     self.lu.LogInfo("Updating instance configuration")
11730     for dev, _, new_logical_id in iv_names.itervalues():
11731       dev.logical_id = new_logical_id
11732       self.cfg.SetDiskID(dev, self.instance.primary_node)
11733
11734     self.cfg.Update(self.instance, feedback_fn)
11735
11736     # Release all node locks (the configuration has been updated)
11737     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11738
11739     # and now perform the drbd attach
11740     self.lu.LogInfo("Attaching primary drbds to new secondary"
11741                     " (standalone => connected)")
11742     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11743                                             self.new_node],
11744                                            self.node_secondary_ip,
11745                                            (self.instance.disks, self.instance),
11746                                            self.instance.name,
11747                                            False)
11748     for to_node, to_result in result.items():
11749       msg = to_result.fail_msg
11750       if msg:
11751         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11752                            to_node, msg,
11753                            hint=("please do a gnt-instance info to see the"
11754                                  " status of disks"))
11755
11756     cstep = itertools.count(5)
11757
11758     if self.early_release:
11759       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11760       self._RemoveOldStorage(self.target_node, iv_names)
11761       # TODO: Check if releasing locks early still makes sense
11762       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11763     else:
11764       # Release all resource locks except those used by the instance
11765       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11766                     keep=self.node_secondary_ip.keys())
11767
11768     # TODO: Can the instance lock be downgraded here? Take the optional disk
11769     # shutdown in the caller into consideration.
11770
11771     # Wait for sync
11772     # This can fail as the old devices are degraded and _WaitForSync
11773     # does a combined result over all disks, so we don't check its return value
11774     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11775     _WaitForSync(self.lu, self.instance)
11776
11777     # Check all devices manually
11778     self._CheckDevices(self.instance.primary_node, iv_names)
11779
11780     # Step: remove old storage
11781     if not self.early_release:
11782       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11783       self._RemoveOldStorage(self.target_node, iv_names)
11784
11785
11786 class LURepairNodeStorage(NoHooksLU):
11787   """Repairs the volume group on a node.
11788
11789   """
11790   REQ_BGL = False
11791
11792   def CheckArguments(self):
11793     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11794
11795     storage_type = self.op.storage_type
11796
11797     if (constants.SO_FIX_CONSISTENCY not in
11798         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11799       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11800                                  " repaired" % storage_type,
11801                                  errors.ECODE_INVAL)
11802
11803   def ExpandNames(self):
11804     self.needed_locks = {
11805       locking.LEVEL_NODE: [self.op.node_name],
11806       }
11807
11808   def _CheckFaultyDisks(self, instance, node_name):
11809     """Ensure faulty disks abort the opcode or at least warn."""
11810     try:
11811       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11812                                   node_name, True):
11813         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11814                                    " node '%s'" % (instance.name, node_name),
11815                                    errors.ECODE_STATE)
11816     except errors.OpPrereqError, err:
11817       if self.op.ignore_consistency:
11818         self.LogWarning(str(err.args[0]))
11819       else:
11820         raise
11821
11822   def CheckPrereq(self):
11823     """Check prerequisites.
11824
11825     """
11826     # Check whether any instance on this node has faulty disks
11827     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11828       if inst.admin_state != constants.ADMINST_UP:
11829         continue
11830       check_nodes = set(inst.all_nodes)
11831       check_nodes.discard(self.op.node_name)
11832       for inst_node_name in check_nodes:
11833         self._CheckFaultyDisks(inst, inst_node_name)
11834
11835   def Exec(self, feedback_fn):
11836     feedback_fn("Repairing storage unit '%s' on %s ..." %
11837                 (self.op.name, self.op.node_name))
11838
11839     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11840     result = self.rpc.call_storage_execute(self.op.node_name,
11841                                            self.op.storage_type, st_args,
11842                                            self.op.name,
11843                                            constants.SO_FIX_CONSISTENCY)
11844     result.Raise("Failed to repair storage unit '%s' on %s" %
11845                  (self.op.name, self.op.node_name))
11846
11847
11848 class LUNodeEvacuate(NoHooksLU):
11849   """Evacuates instances off a list of nodes.
11850
11851   """
11852   REQ_BGL = False
11853
11854   _MODE2IALLOCATOR = {
11855     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11856     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11857     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11858     }
11859   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11860   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11861           constants.IALLOCATOR_NEVAC_MODES)
11862
11863   def CheckArguments(self):
11864     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11865
11866   def ExpandNames(self):
11867     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11868
11869     if self.op.remote_node is not None:
11870       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11871       assert self.op.remote_node
11872
11873       if self.op.remote_node == self.op.node_name:
11874         raise errors.OpPrereqError("Can not use evacuated node as a new"
11875                                    " secondary node", errors.ECODE_INVAL)
11876
11877       if self.op.mode != constants.NODE_EVAC_SEC:
11878         raise errors.OpPrereqError("Without the use of an iallocator only"
11879                                    " secondary instances can be evacuated",
11880                                    errors.ECODE_INVAL)
11881
11882     # Declare locks
11883     self.share_locks = _ShareAll()
11884     self.needed_locks = {
11885       locking.LEVEL_INSTANCE: [],
11886       locking.LEVEL_NODEGROUP: [],
11887       locking.LEVEL_NODE: [],
11888       }
11889
11890     # Determine nodes (via group) optimistically, needs verification once locks
11891     # have been acquired
11892     self.lock_nodes = self._DetermineNodes()
11893
11894   def _DetermineNodes(self):
11895     """Gets the list of nodes to operate on.
11896
11897     """
11898     if self.op.remote_node is None:
11899       # Iallocator will choose any node(s) in the same group
11900       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11901     else:
11902       group_nodes = frozenset([self.op.remote_node])
11903
11904     # Determine nodes to be locked
11905     return set([self.op.node_name]) | group_nodes
11906
11907   def _DetermineInstances(self):
11908     """Builds list of instances to operate on.
11909
11910     """
11911     assert self.op.mode in constants.NODE_EVAC_MODES
11912
11913     if self.op.mode == constants.NODE_EVAC_PRI:
11914       # Primary instances only
11915       inst_fn = _GetNodePrimaryInstances
11916       assert self.op.remote_node is None, \
11917         "Evacuating primary instances requires iallocator"
11918     elif self.op.mode == constants.NODE_EVAC_SEC:
11919       # Secondary instances only
11920       inst_fn = _GetNodeSecondaryInstances
11921     else:
11922       # All instances
11923       assert self.op.mode == constants.NODE_EVAC_ALL
11924       inst_fn = _GetNodeInstances
11925       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11926       # per instance
11927       raise errors.OpPrereqError("Due to an issue with the iallocator"
11928                                  " interface it is not possible to evacuate"
11929                                  " all instances at once; specify explicitly"
11930                                  " whether to evacuate primary or secondary"
11931                                  " instances",
11932                                  errors.ECODE_INVAL)
11933
11934     return inst_fn(self.cfg, self.op.node_name)
11935
11936   def DeclareLocks(self, level):
11937     if level == locking.LEVEL_INSTANCE:
11938       # Lock instances optimistically, needs verification once node and group
11939       # locks have been acquired
11940       self.needed_locks[locking.LEVEL_INSTANCE] = \
11941         set(i.name for i in self._DetermineInstances())
11942
11943     elif level == locking.LEVEL_NODEGROUP:
11944       # Lock node groups for all potential target nodes optimistically, needs
11945       # verification once nodes have been acquired
11946       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11947         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11948
11949     elif level == locking.LEVEL_NODE:
11950       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11951
11952   def CheckPrereq(self):
11953     # Verify locks
11954     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11955     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11956     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11957
11958     need_nodes = self._DetermineNodes()
11959
11960     if not owned_nodes.issuperset(need_nodes):
11961       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11962                                  " locks were acquired, current nodes are"
11963                                  " are '%s', used to be '%s'; retry the"
11964                                  " operation" %
11965                                  (self.op.node_name,
11966                                   utils.CommaJoin(need_nodes),
11967                                   utils.CommaJoin(owned_nodes)),
11968                                  errors.ECODE_STATE)
11969
11970     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11971     if owned_groups != wanted_groups:
11972       raise errors.OpExecError("Node groups changed since locks were acquired,"
11973                                " current groups are '%s', used to be '%s';"
11974                                " retry the operation" %
11975                                (utils.CommaJoin(wanted_groups),
11976                                 utils.CommaJoin(owned_groups)))
11977
11978     # Determine affected instances
11979     self.instances = self._DetermineInstances()
11980     self.instance_names = [i.name for i in self.instances]
11981
11982     if set(self.instance_names) != owned_instances:
11983       raise errors.OpExecError("Instances on node '%s' changed since locks"
11984                                " were acquired, current instances are '%s',"
11985                                " used to be '%s'; retry the operation" %
11986                                (self.op.node_name,
11987                                 utils.CommaJoin(self.instance_names),
11988                                 utils.CommaJoin(owned_instances)))
11989
11990     if self.instance_names:
11991       self.LogInfo("Evacuating instances from node '%s': %s",
11992                    self.op.node_name,
11993                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11994     else:
11995       self.LogInfo("No instances to evacuate from node '%s'",
11996                    self.op.node_name)
11997
11998     if self.op.remote_node is not None:
11999       for i in self.instances:
12000         if i.primary_node == self.op.remote_node:
12001           raise errors.OpPrereqError("Node %s is the primary node of"
12002                                      " instance %s, cannot use it as"
12003                                      " secondary" %
12004                                      (self.op.remote_node, i.name),
12005                                      errors.ECODE_INVAL)
12006
12007   def Exec(self, feedback_fn):
12008     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12009
12010     if not self.instance_names:
12011       # No instances to evacuate
12012       jobs = []
12013
12014     elif self.op.iallocator is not None:
12015       # TODO: Implement relocation to other group
12016       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12017       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12018                                      instances=list(self.instance_names))
12019       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12020
12021       ial.Run(self.op.iallocator)
12022
12023       if not ial.success:
12024         raise errors.OpPrereqError("Can't compute node evacuation using"
12025                                    " iallocator '%s': %s" %
12026                                    (self.op.iallocator, ial.info),
12027                                    errors.ECODE_NORES)
12028
12029       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12030
12031     elif self.op.remote_node is not None:
12032       assert self.op.mode == constants.NODE_EVAC_SEC
12033       jobs = [
12034         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12035                                         remote_node=self.op.remote_node,
12036                                         disks=[],
12037                                         mode=constants.REPLACE_DISK_CHG,
12038                                         early_release=self.op.early_release)]
12039         for instance_name in self.instance_names]
12040
12041     else:
12042       raise errors.ProgrammerError("No iallocator or remote node")
12043
12044     return ResultWithJobs(jobs)
12045
12046
12047 def _SetOpEarlyRelease(early_release, op):
12048   """Sets C{early_release} flag on opcodes if available.
12049
12050   """
12051   try:
12052     op.early_release = early_release
12053   except AttributeError:
12054     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12055
12056   return op
12057
12058
12059 def _NodeEvacDest(use_nodes, group, nodes):
12060   """Returns group or nodes depending on caller's choice.
12061
12062   """
12063   if use_nodes:
12064     return utils.CommaJoin(nodes)
12065   else:
12066     return group
12067
12068
12069 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12070   """Unpacks the result of change-group and node-evacuate iallocator requests.
12071
12072   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12073   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12074
12075   @type lu: L{LogicalUnit}
12076   @param lu: Logical unit instance
12077   @type alloc_result: tuple/list
12078   @param alloc_result: Result from iallocator
12079   @type early_release: bool
12080   @param early_release: Whether to release locks early if possible
12081   @type use_nodes: bool
12082   @param use_nodes: Whether to display node names instead of groups
12083
12084   """
12085   (moved, failed, jobs) = alloc_result
12086
12087   if failed:
12088     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12089                                  for (name, reason) in failed)
12090     lu.LogWarning("Unable to evacuate instances %s", failreason)
12091     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12092
12093   if moved:
12094     lu.LogInfo("Instances to be moved: %s",
12095                utils.CommaJoin("%s (to %s)" %
12096                                (name, _NodeEvacDest(use_nodes, group, nodes))
12097                                for (name, group, nodes) in moved))
12098
12099   return [map(compat.partial(_SetOpEarlyRelease, early_release),
12100               map(opcodes.OpCode.LoadOpCode, ops))
12101           for ops in jobs]
12102
12103
12104 def _DiskSizeInBytesToMebibytes(lu, size):
12105   """Converts a disk size in bytes to mebibytes.
12106
12107   Warns and rounds up if the size isn't an even multiple of 1 MiB.
12108
12109   """
12110   (mib, remainder) = divmod(size, 1024 * 1024)
12111
12112   if remainder != 0:
12113     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12114                   " to not overwrite existing data (%s bytes will not be"
12115                   " wiped)", (1024 * 1024) - remainder)
12116     mib += 1
12117
12118   return mib
12119
12120
12121 class LUInstanceGrowDisk(LogicalUnit):
12122   """Grow a disk of an instance.
12123
12124   """
12125   HPATH = "disk-grow"
12126   HTYPE = constants.HTYPE_INSTANCE
12127   REQ_BGL = False
12128
12129   def ExpandNames(self):
12130     self._ExpandAndLockInstance()
12131     self.needed_locks[locking.LEVEL_NODE] = []
12132     self.needed_locks[locking.LEVEL_NODE_RES] = []
12133     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12134     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12135
12136   def DeclareLocks(self, level):
12137     if level == locking.LEVEL_NODE:
12138       self._LockInstancesNodes()
12139     elif level == locking.LEVEL_NODE_RES:
12140       # Copy node locks
12141       self.needed_locks[locking.LEVEL_NODE_RES] = \
12142         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12143
12144   def BuildHooksEnv(self):
12145     """Build hooks env.
12146
12147     This runs on the master, the primary and all the secondaries.
12148
12149     """
12150     env = {
12151       "DISK": self.op.disk,
12152       "AMOUNT": self.op.amount,
12153       "ABSOLUTE": self.op.absolute,
12154       }
12155     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12156     return env
12157
12158   def BuildHooksNodes(self):
12159     """Build hooks nodes.
12160
12161     """
12162     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12163     return (nl, nl)
12164
12165   def CheckPrereq(self):
12166     """Check prerequisites.
12167
12168     This checks that the instance is in the cluster.
12169
12170     """
12171     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12172     assert instance is not None, \
12173       "Cannot retrieve locked instance %s" % self.op.instance_name
12174     nodenames = list(instance.all_nodes)
12175     for node in nodenames:
12176       _CheckNodeOnline(self, node)
12177
12178     self.instance = instance
12179
12180     if instance.disk_template not in constants.DTS_GROWABLE:
12181       raise errors.OpPrereqError("Instance's disk layout does not support"
12182                                  " growing", errors.ECODE_INVAL)
12183
12184     self.disk = instance.FindDisk(self.op.disk)
12185
12186     if self.op.absolute:
12187       self.target = self.op.amount
12188       self.delta = self.target - self.disk.size
12189       if self.delta < 0:
12190         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12191                                    "current disk size (%s)" %
12192                                    (utils.FormatUnit(self.target, "h"),
12193                                     utils.FormatUnit(self.disk.size, "h")),
12194                                    errors.ECODE_STATE)
12195     else:
12196       self.delta = self.op.amount
12197       self.target = self.disk.size + self.delta
12198       if self.delta < 0:
12199         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12200                                    utils.FormatUnit(self.delta, "h"),
12201                                    errors.ECODE_INVAL)
12202
12203     if instance.disk_template not in (constants.DT_FILE,
12204                                       constants.DT_SHARED_FILE,
12205                                       constants.DT_RBD):
12206       # TODO: check the free disk space for file, when that feature will be
12207       # supported
12208       _CheckNodesFreeDiskPerVG(self, nodenames,
12209                                self.disk.ComputeGrowth(self.delta))
12210
12211   def Exec(self, feedback_fn):
12212     """Execute disk grow.
12213
12214     """
12215     instance = self.instance
12216     disk = self.disk
12217
12218     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12219     assert (self.owned_locks(locking.LEVEL_NODE) ==
12220             self.owned_locks(locking.LEVEL_NODE_RES))
12221
12222     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12223
12224     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12225     if not disks_ok:
12226       raise errors.OpExecError("Cannot activate block device to grow")
12227
12228     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12229                 (self.op.disk, instance.name,
12230                  utils.FormatUnit(self.delta, "h"),
12231                  utils.FormatUnit(self.target, "h")))
12232
12233     # First run all grow ops in dry-run mode
12234     for node in instance.all_nodes:
12235       self.cfg.SetDiskID(disk, node)
12236       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12237                                            True, True)
12238       result.Raise("Dry-run grow request failed to node %s" % node)
12239
12240     if wipe_disks:
12241       # Get disk size from primary node for wiping
12242       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12243       result.Raise("Failed to retrieve disk size from node '%s'" %
12244                    instance.primary_node)
12245
12246       (disk_size_in_bytes, ) = result.payload
12247
12248       if disk_size_in_bytes is None:
12249         raise errors.OpExecError("Failed to retrieve disk size from primary"
12250                                  " node '%s'" % instance.primary_node)
12251
12252       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12253
12254       assert old_disk_size >= disk.size, \
12255         ("Retrieved disk size too small (got %s, should be at least %s)" %
12256          (old_disk_size, disk.size))
12257     else:
12258       old_disk_size = None
12259
12260     # We know that (as far as we can test) operations across different
12261     # nodes will succeed, time to run it for real on the backing storage
12262     for node in instance.all_nodes:
12263       self.cfg.SetDiskID(disk, node)
12264       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12265                                            False, True)
12266       result.Raise("Grow request failed to node %s" % node)
12267
12268     # And now execute it for logical storage, on the primary node
12269     node = instance.primary_node
12270     self.cfg.SetDiskID(disk, node)
12271     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12272                                          False, False)
12273     result.Raise("Grow request failed to node %s" % node)
12274
12275     disk.RecordGrow(self.delta)
12276     self.cfg.Update(instance, feedback_fn)
12277
12278     # Changes have been recorded, release node lock
12279     _ReleaseLocks(self, locking.LEVEL_NODE)
12280
12281     # Downgrade lock while waiting for sync
12282     self.glm.downgrade(locking.LEVEL_INSTANCE)
12283
12284     assert wipe_disks ^ (old_disk_size is None)
12285
12286     if wipe_disks:
12287       assert instance.disks[self.op.disk] == disk
12288
12289       # Wipe newly added disk space
12290       _WipeDisks(self, instance,
12291                  disks=[(self.op.disk, disk, old_disk_size)])
12292
12293     if self.op.wait_for_sync:
12294       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12295       if disk_abort:
12296         self.LogWarning("Disk syncing has not returned a good status; check"
12297                         " the instance")
12298       if instance.admin_state != constants.ADMINST_UP:
12299         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12300     elif instance.admin_state != constants.ADMINST_UP:
12301       self.LogWarning("Not shutting down the disk even if the instance is"
12302                       " not supposed to be running because no wait for"
12303                       " sync mode was requested")
12304
12305     assert self.owned_locks(locking.LEVEL_NODE_RES)
12306     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12307
12308
12309 class LUInstanceQueryData(NoHooksLU):
12310   """Query runtime instance data.
12311
12312   """
12313   REQ_BGL = False
12314
12315   def ExpandNames(self):
12316     self.needed_locks = {}
12317
12318     # Use locking if requested or when non-static information is wanted
12319     if not (self.op.static or self.op.use_locking):
12320       self.LogWarning("Non-static data requested, locks need to be acquired")
12321       self.op.use_locking = True
12322
12323     if self.op.instances or not self.op.use_locking:
12324       # Expand instance names right here
12325       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12326     else:
12327       # Will use acquired locks
12328       self.wanted_names = None
12329
12330     if self.op.use_locking:
12331       self.share_locks = _ShareAll()
12332
12333       if self.wanted_names is None:
12334         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12335       else:
12336         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12337
12338       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12339       self.needed_locks[locking.LEVEL_NODE] = []
12340       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12341
12342   def DeclareLocks(self, level):
12343     if self.op.use_locking:
12344       if level == locking.LEVEL_NODEGROUP:
12345         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12346
12347         # Lock all groups used by instances optimistically; this requires going
12348         # via the node before it's locked, requiring verification later on
12349         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12350           frozenset(group_uuid
12351                     for instance_name in owned_instances
12352                     for group_uuid in
12353                       self.cfg.GetInstanceNodeGroups(instance_name))
12354
12355       elif level == locking.LEVEL_NODE:
12356         self._LockInstancesNodes()
12357
12358   def CheckPrereq(self):
12359     """Check prerequisites.
12360
12361     This only checks the optional instance list against the existing names.
12362
12363     """
12364     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12365     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12366     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12367
12368     if self.wanted_names is None:
12369       assert self.op.use_locking, "Locking was not used"
12370       self.wanted_names = owned_instances
12371
12372     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12373
12374     if self.op.use_locking:
12375       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12376                                 None)
12377     else:
12378       assert not (owned_instances or owned_groups or owned_nodes)
12379
12380     self.wanted_instances = instances.values()
12381
12382   def _ComputeBlockdevStatus(self, node, instance, dev):
12383     """Returns the status of a block device
12384
12385     """
12386     if self.op.static or not node:
12387       return None
12388
12389     self.cfg.SetDiskID(dev, node)
12390
12391     result = self.rpc.call_blockdev_find(node, dev)
12392     if result.offline:
12393       return None
12394
12395     result.Raise("Can't compute disk status for %s" % instance.name)
12396
12397     status = result.payload
12398     if status is None:
12399       return None
12400
12401     return (status.dev_path, status.major, status.minor,
12402             status.sync_percent, status.estimated_time,
12403             status.is_degraded, status.ldisk_status)
12404
12405   def _ComputeDiskStatus(self, instance, snode, dev):
12406     """Compute block device status.
12407
12408     """
12409     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12410
12411     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12412
12413   def _ComputeDiskStatusInner(self, instance, snode, dev):
12414     """Compute block device status.
12415
12416     @attention: The device has to be annotated already.
12417
12418     """
12419     if dev.dev_type in constants.LDS_DRBD:
12420       # we change the snode then (otherwise we use the one passed in)
12421       if dev.logical_id[0] == instance.primary_node:
12422         snode = dev.logical_id[1]
12423       else:
12424         snode = dev.logical_id[0]
12425
12426     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12427                                               instance, dev)
12428     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12429
12430     if dev.children:
12431       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12432                                         instance, snode),
12433                          dev.children)
12434     else:
12435       dev_children = []
12436
12437     return {
12438       "iv_name": dev.iv_name,
12439       "dev_type": dev.dev_type,
12440       "logical_id": dev.logical_id,
12441       "physical_id": dev.physical_id,
12442       "pstatus": dev_pstatus,
12443       "sstatus": dev_sstatus,
12444       "children": dev_children,
12445       "mode": dev.mode,
12446       "size": dev.size,
12447       }
12448
12449   def Exec(self, feedback_fn):
12450     """Gather and return data"""
12451     result = {}
12452
12453     cluster = self.cfg.GetClusterInfo()
12454
12455     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12456     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12457
12458     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12459                                                  for node in nodes.values()))
12460
12461     group2name_fn = lambda uuid: groups[uuid].name
12462
12463     for instance in self.wanted_instances:
12464       pnode = nodes[instance.primary_node]
12465
12466       if self.op.static or pnode.offline:
12467         remote_state = None
12468         if pnode.offline:
12469           self.LogWarning("Primary node %s is marked offline, returning static"
12470                           " information only for instance %s" %
12471                           (pnode.name, instance.name))
12472       else:
12473         remote_info = self.rpc.call_instance_info(instance.primary_node,
12474                                                   instance.name,
12475                                                   instance.hypervisor)
12476         remote_info.Raise("Error checking node %s" % instance.primary_node)
12477         remote_info = remote_info.payload
12478         if remote_info and "state" in remote_info:
12479           remote_state = "up"
12480         else:
12481           if instance.admin_state == constants.ADMINST_UP:
12482             remote_state = "down"
12483           else:
12484             remote_state = instance.admin_state
12485
12486       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12487                   instance.disks)
12488
12489       snodes_group_uuids = [nodes[snode_name].group
12490                             for snode_name in instance.secondary_nodes]
12491
12492       result[instance.name] = {
12493         "name": instance.name,
12494         "config_state": instance.admin_state,
12495         "run_state": remote_state,
12496         "pnode": instance.primary_node,
12497         "pnode_group_uuid": pnode.group,
12498         "pnode_group_name": group2name_fn(pnode.group),
12499         "snodes": instance.secondary_nodes,
12500         "snodes_group_uuids": snodes_group_uuids,
12501         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12502         "os": instance.os,
12503         # this happens to be the same format used for hooks
12504         "nics": _NICListToTuple(self, instance.nics),
12505         "disk_template": instance.disk_template,
12506         "disks": disks,
12507         "hypervisor": instance.hypervisor,
12508         "network_port": instance.network_port,
12509         "hv_instance": instance.hvparams,
12510         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12511         "be_instance": instance.beparams,
12512         "be_actual": cluster.FillBE(instance),
12513         "os_instance": instance.osparams,
12514         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12515         "serial_no": instance.serial_no,
12516         "mtime": instance.mtime,
12517         "ctime": instance.ctime,
12518         "uuid": instance.uuid,
12519         }
12520
12521     return result
12522
12523
12524 def PrepareContainerMods(mods, private_fn):
12525   """Prepares a list of container modifications by adding a private data field.
12526
12527   @type mods: list of tuples; (operation, index, parameters)
12528   @param mods: List of modifications
12529   @type private_fn: callable or None
12530   @param private_fn: Callable for constructing a private data field for a
12531     modification
12532   @rtype: list
12533
12534   """
12535   if private_fn is None:
12536     fn = lambda: None
12537   else:
12538     fn = private_fn
12539
12540   return [(op, idx, params, fn()) for (op, idx, params) in mods]
12541
12542
12543 #: Type description for changes as returned by L{ApplyContainerMods}'s
12544 #: callbacks
12545 _TApplyContModsCbChanges = \
12546   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12547     ht.TNonEmptyString,
12548     ht.TAny,
12549     ])))
12550
12551
12552 def ApplyContainerMods(kind, container, chgdesc, mods,
12553                        create_fn, modify_fn, remove_fn):
12554   """Applies descriptions in C{mods} to C{container}.
12555
12556   @type kind: string
12557   @param kind: One-word item description
12558   @type container: list
12559   @param container: Container to modify
12560   @type chgdesc: None or list
12561   @param chgdesc: List of applied changes
12562   @type mods: list
12563   @param mods: Modifications as returned by L{PrepareContainerMods}
12564   @type create_fn: callable
12565   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12566     receives absolute item index, parameters and private data object as added
12567     by L{PrepareContainerMods}, returns tuple containing new item and changes
12568     as list
12569   @type modify_fn: callable
12570   @param modify_fn: Callback for modifying an existing item
12571     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12572     and private data object as added by L{PrepareContainerMods}, returns
12573     changes as list
12574   @type remove_fn: callable
12575   @param remove_fn: Callback on removing item; receives absolute item index,
12576     item and private data object as added by L{PrepareContainerMods}
12577
12578   """
12579   for (op, idx, params, private) in mods:
12580     if idx == -1:
12581       # Append
12582       absidx = len(container) - 1
12583     elif idx < 0:
12584       raise IndexError("Not accepting negative indices other than -1")
12585     elif idx > len(container):
12586       raise IndexError("Got %s index %s, but there are only %s" %
12587                        (kind, idx, len(container)))
12588     else:
12589       absidx = idx
12590
12591     changes = None
12592
12593     if op == constants.DDM_ADD:
12594       # Calculate where item will be added
12595       if idx == -1:
12596         addidx = len(container)
12597       else:
12598         addidx = idx
12599
12600       if create_fn is None:
12601         item = params
12602       else:
12603         (item, changes) = create_fn(addidx, params, private)
12604
12605       if idx == -1:
12606         container.append(item)
12607       else:
12608         assert idx >= 0
12609         assert idx <= len(container)
12610         # list.insert does so before the specified index
12611         container.insert(idx, item)
12612     else:
12613       # Retrieve existing item
12614       try:
12615         item = container[absidx]
12616       except IndexError:
12617         raise IndexError("Invalid %s index %s" % (kind, idx))
12618
12619       if op == constants.DDM_REMOVE:
12620         assert not params
12621
12622         if remove_fn is not None:
12623           remove_fn(absidx, item, private)
12624
12625         changes = [("%s/%s" % (kind, absidx), "remove")]
12626
12627         assert container[absidx] == item
12628         del container[absidx]
12629       elif op == constants.DDM_MODIFY:
12630         if modify_fn is not None:
12631           changes = modify_fn(absidx, item, params, private)
12632       else:
12633         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12634
12635     assert _TApplyContModsCbChanges(changes)
12636
12637     if not (chgdesc is None or changes is None):
12638       chgdesc.extend(changes)
12639
12640
12641 def _UpdateIvNames(base_index, disks):
12642   """Updates the C{iv_name} attribute of disks.
12643
12644   @type disks: list of L{objects.Disk}
12645
12646   """
12647   for (idx, disk) in enumerate(disks):
12648     disk.iv_name = "disk/%s" % (base_index + idx, )
12649
12650
12651 class _InstNicModPrivate:
12652   """Data structure for network interface modifications.
12653
12654   Used by L{LUInstanceSetParams}.
12655
12656   """
12657   def __init__(self):
12658     self.params = None
12659     self.filled = None
12660
12661
12662 class LUInstanceSetParams(LogicalUnit):
12663   """Modifies an instances's parameters.
12664
12665   """
12666   HPATH = "instance-modify"
12667   HTYPE = constants.HTYPE_INSTANCE
12668   REQ_BGL = False
12669
12670   @staticmethod
12671   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12672     assert ht.TList(mods)
12673     assert not mods or len(mods[0]) in (2, 3)
12674
12675     if mods and len(mods[0]) == 2:
12676       result = []
12677
12678       addremove = 0
12679       for op, params in mods:
12680         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12681           result.append((op, -1, params))
12682           addremove += 1
12683
12684           if addremove > 1:
12685             raise errors.OpPrereqError("Only one %s add or remove operation is"
12686                                        " supported at a time" % kind,
12687                                        errors.ECODE_INVAL)
12688         else:
12689           result.append((constants.DDM_MODIFY, op, params))
12690
12691       assert verify_fn(result)
12692     else:
12693       result = mods
12694
12695     return result
12696
12697   @staticmethod
12698   def _CheckMods(kind, mods, key_types, item_fn):
12699     """Ensures requested disk/NIC modifications are valid.
12700
12701     """
12702     for (op, _, params) in mods:
12703       assert ht.TDict(params)
12704
12705       utils.ForceDictType(params, key_types)
12706
12707       if op == constants.DDM_REMOVE:
12708         if params:
12709           raise errors.OpPrereqError("No settings should be passed when"
12710                                      " removing a %s" % kind,
12711                                      errors.ECODE_INVAL)
12712       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12713         item_fn(op, params)
12714       else:
12715         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12716
12717   @staticmethod
12718   def _VerifyDiskModification(op, params):
12719     """Verifies a disk modification.
12720
12721     """
12722     if op == constants.DDM_ADD:
12723       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12724       if mode not in constants.DISK_ACCESS_SET:
12725         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12726                                    errors.ECODE_INVAL)
12727
12728       size = params.get(constants.IDISK_SIZE, None)
12729       if size is None:
12730         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12731                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12732
12733       try:
12734         size = int(size)
12735       except (TypeError, ValueError), err:
12736         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12737                                    errors.ECODE_INVAL)
12738
12739       params[constants.IDISK_SIZE] = size
12740
12741     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12742       raise errors.OpPrereqError("Disk size change not possible, use"
12743                                  " grow-disk", errors.ECODE_INVAL)
12744
12745   @staticmethod
12746   def _VerifyNicModification(op, params):
12747     """Verifies a network interface modification.
12748
12749     """
12750     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12751       ip = params.get(constants.INIC_IP, None)
12752       req_net = params.get(constants.INIC_NETWORK, None)
12753       link = params.get(constants.NIC_LINK, None)
12754       mode = params.get(constants.NIC_MODE, None)
12755       if req_net is not None:
12756         if req_net.lower() == constants.VALUE_NONE:
12757           params[constants.INIC_NETWORK] = None
12758           req_net = None
12759         elif link is not None or mode is not None:
12760           raise errors.OpPrereqError("If network is given"
12761                                      " mode or link should not",
12762                                      errors.ECODE_INVAL)
12763
12764       if op == constants.DDM_ADD:
12765         macaddr = params.get(constants.INIC_MAC, None)
12766         if macaddr is None:
12767           params[constants.INIC_MAC] = constants.VALUE_AUTO
12768
12769       if ip is not None:
12770         if ip.lower() == constants.VALUE_NONE:
12771           params[constants.INIC_IP] = None
12772         else:
12773           if ip.lower() == constants.NIC_IP_POOL:
12774             if op == constants.DDM_ADD and req_net is None:
12775               raise errors.OpPrereqError("If ip=pool, parameter network"
12776                                          " cannot be none",
12777                                          errors.ECODE_INVAL)
12778           else:
12779             if not netutils.IPAddress.IsValid(ip):
12780               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12781                                          errors.ECODE_INVAL)
12782
12783       if constants.INIC_MAC in params:
12784         macaddr = params[constants.INIC_MAC]
12785         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12786           macaddr = utils.NormalizeAndValidateMac(macaddr)
12787
12788         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12789           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12790                                      " modifying an existing NIC",
12791                                      errors.ECODE_INVAL)
12792
12793   def CheckArguments(self):
12794     if not (self.op.nics or self.op.disks or self.op.disk_template or
12795             self.op.hvparams or self.op.beparams or self.op.os_name or
12796             self.op.offline is not None or self.op.runtime_mem):
12797       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12798
12799     if self.op.hvparams:
12800       _CheckGlobalHvParams(self.op.hvparams)
12801
12802     self.op.disks = self._UpgradeDiskNicMods(
12803       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12804     self.op.nics = self._UpgradeDiskNicMods(
12805       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12806
12807     # Check disk modifications
12808     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12809                     self._VerifyDiskModification)
12810
12811     if self.op.disks and self.op.disk_template is not None:
12812       raise errors.OpPrereqError("Disk template conversion and other disk"
12813                                  " changes not supported at the same time",
12814                                  errors.ECODE_INVAL)
12815
12816     if (self.op.disk_template and
12817         self.op.disk_template in constants.DTS_INT_MIRROR and
12818         self.op.remote_node is None):
12819       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12820                                  " one requires specifying a secondary node",
12821                                  errors.ECODE_INVAL)
12822
12823     # Check NIC modifications
12824     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12825                     self._VerifyNicModification)
12826
12827   def ExpandNames(self):
12828     self._ExpandAndLockInstance()
12829     self.needed_locks[locking.LEVEL_NODEGROUP] = []
12830     # Can't even acquire node locks in shared mode as upcoming changes in
12831     # Ganeti 2.6 will start to modify the node object on disk conversion
12832     self.needed_locks[locking.LEVEL_NODE] = []
12833     self.needed_locks[locking.LEVEL_NODE_RES] = []
12834     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12835     # Look node group to look up the ipolicy
12836     self.share_locks[locking.LEVEL_NODEGROUP] = 1
12837
12838   def DeclareLocks(self, level):
12839     if level == locking.LEVEL_NODEGROUP:
12840       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12841       # Acquire locks for the instance's nodegroups optimistically. Needs
12842       # to be verified in CheckPrereq
12843       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12844         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12845     elif level == locking.LEVEL_NODE:
12846       self._LockInstancesNodes()
12847       if self.op.disk_template and self.op.remote_node:
12848         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12849         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12850     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12851       # Copy node locks
12852       self.needed_locks[locking.LEVEL_NODE_RES] = \
12853         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12854
12855   def BuildHooksEnv(self):
12856     """Build hooks env.
12857
12858     This runs on the master, primary and secondaries.
12859
12860     """
12861     args = {}
12862     if constants.BE_MINMEM in self.be_new:
12863       args["minmem"] = self.be_new[constants.BE_MINMEM]
12864     if constants.BE_MAXMEM in self.be_new:
12865       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12866     if constants.BE_VCPUS in self.be_new:
12867       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12868     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12869     # information at all.
12870
12871     if self._new_nics is not None:
12872       nics = []
12873
12874       for nic in self._new_nics:
12875         n = copy.deepcopy(nic)
12876         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12877         n.nicparams = nicparams
12878         nics.append(_NICToTuple(self, n))
12879
12880       args["nics"] = nics
12881
12882     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12883     if self.op.disk_template:
12884       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12885     if self.op.runtime_mem:
12886       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12887
12888     return env
12889
12890   def BuildHooksNodes(self):
12891     """Build hooks nodes.
12892
12893     """
12894     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12895     return (nl, nl)
12896
12897   def _PrepareNicModification(self, params, private, old_ip, old_net,
12898                               old_params, cluster, pnode):
12899
12900     update_params_dict = dict([(key, params[key])
12901                                for key in constants.NICS_PARAMETERS
12902                                if key in params])
12903
12904     req_link = update_params_dict.get(constants.NIC_LINK, None)
12905     req_mode = update_params_dict.get(constants.NIC_MODE, None)
12906
12907     new_net = params.get(constants.INIC_NETWORK, old_net)
12908     if new_net is not None:
12909       netparams = self.cfg.GetGroupNetParams(new_net, pnode)
12910       if netparams is None:
12911         raise errors.OpPrereqError("No netparams found for the network"
12912                                    " %s, probably not connected" % new_net,
12913                                    errors.ECODE_INVAL)
12914       new_params = dict(netparams)
12915     else:
12916       new_params = _GetUpdatedParams(old_params, update_params_dict)
12917
12918     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12919
12920     new_filled_params = cluster.SimpleFillNIC(new_params)
12921     objects.NIC.CheckParameterSyntax(new_filled_params)
12922
12923     new_mode = new_filled_params[constants.NIC_MODE]
12924     if new_mode == constants.NIC_MODE_BRIDGED:
12925       bridge = new_filled_params[constants.NIC_LINK]
12926       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12927       if msg:
12928         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12929         if self.op.force:
12930           self.warn.append(msg)
12931         else:
12932           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12933
12934     elif new_mode == constants.NIC_MODE_ROUTED:
12935       ip = params.get(constants.INIC_IP, old_ip)
12936       if ip is None:
12937         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12938                                    " on a routed NIC", errors.ECODE_INVAL)
12939
12940     if constants.INIC_MAC in params:
12941       mac = params[constants.INIC_MAC]
12942       if mac is None:
12943         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12944                                    errors.ECODE_INVAL)
12945       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12946         # otherwise generate the MAC address
12947         params[constants.INIC_MAC] = \
12948           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12949       else:
12950         # or validate/reserve the current one
12951         try:
12952           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12953         except errors.ReservationError:
12954           raise errors.OpPrereqError("MAC address '%s' already in use"
12955                                      " in cluster" % mac,
12956                                      errors.ECODE_NOTUNIQUE)
12957     elif new_net != old_net:
12958
12959       def get_net_prefix(net):
12960         if net:
12961           uuid = self.cfg.LookupNetwork(net)
12962           if uuid:
12963             nobj = self.cfg.GetNetwork(uuid)
12964             return nobj.mac_prefix
12965         return None
12966
12967       new_prefix = get_net_prefix(new_net)
12968       old_prefix = get_net_prefix(old_net)
12969       if old_prefix != new_prefix:
12970         params[constants.INIC_MAC] = \
12971           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12972
12973     #if there is a change in nic-network configuration
12974     new_ip = params.get(constants.INIC_IP, old_ip)
12975     if (new_ip, new_net) != (old_ip, old_net):
12976       if new_ip:
12977         if new_net:
12978           if new_ip.lower() == constants.NIC_IP_POOL:
12979             try:
12980               new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
12981             except errors.ReservationError:
12982               raise errors.OpPrereqError("Unable to get a free IP"
12983                                          " from the address pool",
12984                                          errors.ECODE_STATE)
12985             self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
12986             params[constants.INIC_IP] = new_ip
12987           elif new_ip != old_ip or new_net != old_net:
12988             try:
12989               self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
12990               self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
12991             except errors.ReservationError:
12992               raise errors.OpPrereqError("IP %s not available in network %s" %
12993                                          (new_ip, new_net),
12994                                          errors.ECODE_NOTUNIQUE)
12995         elif new_ip.lower() == constants.NIC_IP_POOL:
12996           raise errors.OpPrereqError("ip=pool, but no network found",
12997                                      errors.ECODE_INVAL)
12998         else:
12999           # new net is None
13000           if self.op.conflicts_check:
13001             _CheckForConflictingIp(self, new_ip, pnode)
13002
13003       if old_ip:
13004         if old_net:
13005           try:
13006             self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13007           except errors.AddressPoolError:
13008             logging.warning("Release IP %s not contained in network %s",
13009                             old_ip, old_net)
13010
13011     # there are no changes in (net, ip) tuple
13012     elif (old_net is not None and
13013           (req_link is not None or req_mode is not None)):
13014       raise errors.OpPrereqError("Not allowed to change link or mode of"
13015                                  " a NIC that is connected to a network",
13016                                  errors.ECODE_INVAL)
13017
13018     private.params = new_params
13019     private.filled = new_filled_params
13020
13021   def CheckPrereq(self):
13022     """Check prerequisites.
13023
13024     This only checks the instance list against the existing names.
13025
13026     """
13027     assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13028     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13029
13030     cluster = self.cluster = self.cfg.GetClusterInfo()
13031     assert self.instance is not None, \
13032       "Cannot retrieve locked instance %s" % self.op.instance_name
13033
13034     pnode = instance.primary_node
13035     assert pnode in self.owned_locks(locking.LEVEL_NODE)
13036     nodelist = list(instance.all_nodes)
13037     pnode_info = self.cfg.GetNodeInfo(pnode)
13038     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13039
13040     #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13041     assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13042     group_info = self.cfg.GetNodeGroup(pnode_info.group)
13043
13044     # dictionary with instance information after the modification
13045     ispec = {}
13046
13047     # Prepare disk/NIC modifications
13048     self.diskmod = PrepareContainerMods(self.op.disks, None)
13049     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13050
13051     # OS change
13052     if self.op.os_name and not self.op.force:
13053       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13054                       self.op.force_variant)
13055       instance_os = self.op.os_name
13056     else:
13057       instance_os = instance.os
13058
13059     assert not (self.op.disk_template and self.op.disks), \
13060       "Can't modify disk template and apply disk changes at the same time"
13061
13062     if self.op.disk_template:
13063       if instance.disk_template == self.op.disk_template:
13064         raise errors.OpPrereqError("Instance already has disk template %s" %
13065                                    instance.disk_template, errors.ECODE_INVAL)
13066
13067       if (instance.disk_template,
13068           self.op.disk_template) not in self._DISK_CONVERSIONS:
13069         raise errors.OpPrereqError("Unsupported disk template conversion from"
13070                                    " %s to %s" % (instance.disk_template,
13071                                                   self.op.disk_template),
13072                                    errors.ECODE_INVAL)
13073       _CheckInstanceState(self, instance, INSTANCE_DOWN,
13074                           msg="cannot change disk template")
13075       if self.op.disk_template in constants.DTS_INT_MIRROR:
13076         if self.op.remote_node == pnode:
13077           raise errors.OpPrereqError("Given new secondary node %s is the same"
13078                                      " as the primary node of the instance" %
13079                                      self.op.remote_node, errors.ECODE_STATE)
13080         _CheckNodeOnline(self, self.op.remote_node)
13081         _CheckNodeNotDrained(self, self.op.remote_node)
13082         # FIXME: here we assume that the old instance type is DT_PLAIN
13083         assert instance.disk_template == constants.DT_PLAIN
13084         disks = [{constants.IDISK_SIZE: d.size,
13085                   constants.IDISK_VG: d.logical_id[0]}
13086                  for d in instance.disks]
13087         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13088         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13089
13090         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13091         snode_group = self.cfg.GetNodeGroup(snode_info.group)
13092         ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13093                                                                 snode_group)
13094         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13095                                 ignore=self.op.ignore_ipolicy)
13096         if pnode_info.group != snode_info.group:
13097           self.LogWarning("The primary and secondary nodes are in two"
13098                           " different node groups; the disk parameters"
13099                           " from the first disk's node group will be"
13100                           " used")
13101
13102     # hvparams processing
13103     if self.op.hvparams:
13104       hv_type = instance.hypervisor
13105       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13106       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13107       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13108
13109       # local check
13110       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
13111       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13112       self.hv_proposed = self.hv_new = hv_new # the new actual values
13113       self.hv_inst = i_hvdict # the new dict (without defaults)
13114     else:
13115       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13116                                               instance.hvparams)
13117       self.hv_new = self.hv_inst = {}
13118
13119     # beparams processing
13120     if self.op.beparams:
13121       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13122                                    use_none=True)
13123       objects.UpgradeBeParams(i_bedict)
13124       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13125       be_new = cluster.SimpleFillBE(i_bedict)
13126       self.be_proposed = self.be_new = be_new # the new actual values
13127       self.be_inst = i_bedict # the new dict (without defaults)
13128     else:
13129       self.be_new = self.be_inst = {}
13130       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13131     be_old = cluster.FillBE(instance)
13132
13133     # CPU param validation -- checking every time a parameter is
13134     # changed to cover all cases where either CPU mask or vcpus have
13135     # changed
13136     if (constants.BE_VCPUS in self.be_proposed and
13137         constants.HV_CPU_MASK in self.hv_proposed):
13138       cpu_list = \
13139         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13140       # Verify mask is consistent with number of vCPUs. Can skip this
13141       # test if only 1 entry in the CPU mask, which means same mask
13142       # is applied to all vCPUs.
13143       if (len(cpu_list) > 1 and
13144           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13145         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13146                                    " CPU mask [%s]" %
13147                                    (self.be_proposed[constants.BE_VCPUS],
13148                                     self.hv_proposed[constants.HV_CPU_MASK]),
13149                                    errors.ECODE_INVAL)
13150
13151       # Only perform this test if a new CPU mask is given
13152       if constants.HV_CPU_MASK in self.hv_new:
13153         # Calculate the largest CPU number requested
13154         max_requested_cpu = max(map(max, cpu_list))
13155         # Check that all of the instance's nodes have enough physical CPUs to
13156         # satisfy the requested CPU mask
13157         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13158                                 max_requested_cpu + 1, instance.hypervisor)
13159
13160     # osparams processing
13161     if self.op.osparams:
13162       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13163       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13164       self.os_inst = i_osdict # the new dict (without defaults)
13165     else:
13166       self.os_inst = {}
13167
13168     self.warn = []
13169
13170     #TODO(dynmem): do the appropriate check involving MINMEM
13171     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13172         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13173       mem_check_list = [pnode]
13174       if be_new[constants.BE_AUTO_BALANCE]:
13175         # either we changed auto_balance to yes or it was from before
13176         mem_check_list.extend(instance.secondary_nodes)
13177       instance_info = self.rpc.call_instance_info(pnode, instance.name,
13178                                                   instance.hypervisor)
13179       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13180                                          [instance.hypervisor])
13181       pninfo = nodeinfo[pnode]
13182       msg = pninfo.fail_msg
13183       if msg:
13184         # Assume the primary node is unreachable and go ahead
13185         self.warn.append("Can't get info from primary node %s: %s" %
13186                          (pnode, msg))
13187       else:
13188         (_, _, (pnhvinfo, )) = pninfo.payload
13189         if not isinstance(pnhvinfo.get("memory_free", None), int):
13190           self.warn.append("Node data from primary node %s doesn't contain"
13191                            " free memory information" % pnode)
13192         elif instance_info.fail_msg:
13193           self.warn.append("Can't get instance runtime information: %s" %
13194                            instance_info.fail_msg)
13195         else:
13196           if instance_info.payload:
13197             current_mem = int(instance_info.payload["memory"])
13198           else:
13199             # Assume instance not running
13200             # (there is a slight race condition here, but it's not very
13201             # probable, and we have no other way to check)
13202             # TODO: Describe race condition
13203             current_mem = 0
13204           #TODO(dynmem): do the appropriate check involving MINMEM
13205           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13206                       pnhvinfo["memory_free"])
13207           if miss_mem > 0:
13208             raise errors.OpPrereqError("This change will prevent the instance"
13209                                        " from starting, due to %d MB of memory"
13210                                        " missing on its primary node" %
13211                                        miss_mem, errors.ECODE_NORES)
13212
13213       if be_new[constants.BE_AUTO_BALANCE]:
13214         for node, nres in nodeinfo.items():
13215           if node not in instance.secondary_nodes:
13216             continue
13217           nres.Raise("Can't get info from secondary node %s" % node,
13218                      prereq=True, ecode=errors.ECODE_STATE)
13219           (_, _, (nhvinfo, )) = nres.payload
13220           if not isinstance(nhvinfo.get("memory_free", None), int):
13221             raise errors.OpPrereqError("Secondary node %s didn't return free"
13222                                        " memory information" % node,
13223                                        errors.ECODE_STATE)
13224           #TODO(dynmem): do the appropriate check involving MINMEM
13225           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13226             raise errors.OpPrereqError("This change will prevent the instance"
13227                                        " from failover to its secondary node"
13228                                        " %s, due to not enough memory" % node,
13229                                        errors.ECODE_STATE)
13230
13231     if self.op.runtime_mem:
13232       remote_info = self.rpc.call_instance_info(instance.primary_node,
13233                                                 instance.name,
13234                                                 instance.hypervisor)
13235       remote_info.Raise("Error checking node %s" % instance.primary_node)
13236       if not remote_info.payload: # not running already
13237         raise errors.OpPrereqError("Instance %s is not running" %
13238                                    instance.name, errors.ECODE_STATE)
13239
13240       current_memory = remote_info.payload["memory"]
13241       if (not self.op.force and
13242            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13243             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13244         raise errors.OpPrereqError("Instance %s must have memory between %d"
13245                                    " and %d MB of memory unless --force is"
13246                                    " given" %
13247                                    (instance.name,
13248                                     self.be_proposed[constants.BE_MINMEM],
13249                                     self.be_proposed[constants.BE_MAXMEM]),
13250                                    errors.ECODE_INVAL)
13251
13252       delta = self.op.runtime_mem - current_memory
13253       if delta > 0:
13254         _CheckNodeFreeMemory(self, instance.primary_node,
13255                              "ballooning memory for instance %s" %
13256                              instance.name, delta, instance.hypervisor)
13257
13258     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13259       raise errors.OpPrereqError("Disk operations not supported for"
13260                                  " diskless instances", errors.ECODE_INVAL)
13261
13262     def _PrepareNicCreate(_, params, private):
13263       self._PrepareNicModification(params, private, None, None,
13264                                    {}, cluster, pnode)
13265       return (None, None)
13266
13267     def _PrepareNicMod(_, nic, params, private):
13268       self._PrepareNicModification(params, private, nic.ip, nic.network,
13269                                    nic.nicparams, cluster, pnode)
13270       return None
13271
13272     def _PrepareNicRemove(_, params, __):
13273       ip = params.ip
13274       net = params.network
13275       if net is not None and ip is not None:
13276         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13277
13278     # Verify NIC changes (operating on copy)
13279     nics = instance.nics[:]
13280     ApplyContainerMods("NIC", nics, None, self.nicmod,
13281                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13282     if len(nics) > constants.MAX_NICS:
13283       raise errors.OpPrereqError("Instance has too many network interfaces"
13284                                  " (%d), cannot add more" % constants.MAX_NICS,
13285                                  errors.ECODE_STATE)
13286
13287     # Verify disk changes (operating on a copy)
13288     disks = instance.disks[:]
13289     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13290     if len(disks) > constants.MAX_DISKS:
13291       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13292                                  " more" % constants.MAX_DISKS,
13293                                  errors.ECODE_STATE)
13294     disk_sizes = [disk.size for disk in instance.disks]
13295     disk_sizes.extend(params["size"] for (op, idx, params, private) in
13296                       self.diskmod if op == constants.DDM_ADD)
13297     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13298     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13299
13300     if self.op.offline is not None:
13301       if self.op.offline:
13302         msg = "can't change to offline"
13303       else:
13304         msg = "can't change to online"
13305       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13306
13307     # Pre-compute NIC changes (necessary to use result in hooks)
13308     self._nic_chgdesc = []
13309     if self.nicmod:
13310       # Operate on copies as this is still in prereq
13311       nics = [nic.Copy() for nic in instance.nics]
13312       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13313                          self._CreateNewNic, self._ApplyNicMods, None)
13314       self._new_nics = nics
13315       ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13316     else:
13317       self._new_nics = None
13318       ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13319
13320     if not self.op.ignore_ipolicy:
13321       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13322                                                               group_info)
13323
13324       # Fill ispec with backend parameters
13325       ispec[constants.ISPEC_SPINDLE_USE] = \
13326         self.be_new.get(constants.BE_SPINDLE_USE, None)
13327       ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13328                                                          None)
13329
13330       # Copy ispec to verify parameters with min/max values separately
13331       ispec_max = ispec.copy()
13332       ispec_max[constants.ISPEC_MEM_SIZE] = \
13333         self.be_new.get(constants.BE_MAXMEM, None)
13334       res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13335       ispec_min = ispec.copy()
13336       ispec_min[constants.ISPEC_MEM_SIZE] = \
13337         self.be_new.get(constants.BE_MINMEM, None)
13338       res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13339
13340       if (res_max or res_min):
13341         # FIXME: Improve error message by including information about whether
13342         # the upper or lower limit of the parameter fails the ipolicy.
13343         msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13344                (group_info, group_info.name,
13345                 utils.CommaJoin(set(res_max + res_min))))
13346         raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13347
13348   def _ConvertPlainToDrbd(self, feedback_fn):
13349     """Converts an instance from plain to drbd.
13350
13351     """
13352     feedback_fn("Converting template to drbd")
13353     instance = self.instance
13354     pnode = instance.primary_node
13355     snode = self.op.remote_node
13356
13357     assert instance.disk_template == constants.DT_PLAIN
13358
13359     # create a fake disk info for _GenerateDiskTemplate
13360     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13361                   constants.IDISK_VG: d.logical_id[0]}
13362                  for d in instance.disks]
13363     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13364                                       instance.name, pnode, [snode],
13365                                       disk_info, None, None, 0, feedback_fn,
13366                                       self.diskparams)
13367     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13368                                         self.diskparams)
13369     info = _GetInstanceInfoText(instance)
13370     feedback_fn("Creating additional volumes...")
13371     # first, create the missing data and meta devices
13372     for disk in anno_disks:
13373       # unfortunately this is... not too nice
13374       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13375                             info, True)
13376       for child in disk.children:
13377         _CreateSingleBlockDev(self, snode, instance, child, info, True)
13378     # at this stage, all new LVs have been created, we can rename the
13379     # old ones
13380     feedback_fn("Renaming original volumes...")
13381     rename_list = [(o, n.children[0].logical_id)
13382                    for (o, n) in zip(instance.disks, new_disks)]
13383     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13384     result.Raise("Failed to rename original LVs")
13385
13386     feedback_fn("Initializing DRBD devices...")
13387     # all child devices are in place, we can now create the DRBD devices
13388     for disk in anno_disks:
13389       for node in [pnode, snode]:
13390         f_create = node == pnode
13391         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13392
13393     # at this point, the instance has been modified
13394     instance.disk_template = constants.DT_DRBD8
13395     instance.disks = new_disks
13396     self.cfg.Update(instance, feedback_fn)
13397
13398     # Release node locks while waiting for sync
13399     _ReleaseLocks(self, locking.LEVEL_NODE)
13400
13401     # disks are created, waiting for sync
13402     disk_abort = not _WaitForSync(self, instance,
13403                                   oneshot=not self.op.wait_for_sync)
13404     if disk_abort:
13405       raise errors.OpExecError("There are some degraded disks for"
13406                                " this instance, please cleanup manually")
13407
13408     # Node resource locks will be released by caller
13409
13410   def _ConvertDrbdToPlain(self, feedback_fn):
13411     """Converts an instance from drbd to plain.
13412
13413     """
13414     instance = self.instance
13415
13416     assert len(instance.secondary_nodes) == 1
13417     assert instance.disk_template == constants.DT_DRBD8
13418
13419     pnode = instance.primary_node
13420     snode = instance.secondary_nodes[0]
13421     feedback_fn("Converting template to plain")
13422
13423     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13424     new_disks = [d.children[0] for d in instance.disks]
13425
13426     # copy over size and mode
13427     for parent, child in zip(old_disks, new_disks):
13428       child.size = parent.size
13429       child.mode = parent.mode
13430
13431     # this is a DRBD disk, return its port to the pool
13432     # NOTE: this must be done right before the call to cfg.Update!
13433     for disk in old_disks:
13434       tcp_port = disk.logical_id[2]
13435       self.cfg.AddTcpUdpPort(tcp_port)
13436
13437     # update instance structure
13438     instance.disks = new_disks
13439     instance.disk_template = constants.DT_PLAIN
13440     self.cfg.Update(instance, feedback_fn)
13441
13442     # Release locks in case removing disks takes a while
13443     _ReleaseLocks(self, locking.LEVEL_NODE)
13444
13445     feedback_fn("Removing volumes on the secondary node...")
13446     for disk in old_disks:
13447       self.cfg.SetDiskID(disk, snode)
13448       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13449       if msg:
13450         self.LogWarning("Could not remove block device %s on node %s,"
13451                         " continuing anyway: %s", disk.iv_name, snode, msg)
13452
13453     feedback_fn("Removing unneeded volumes on the primary node...")
13454     for idx, disk in enumerate(old_disks):
13455       meta = disk.children[1]
13456       self.cfg.SetDiskID(meta, pnode)
13457       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13458       if msg:
13459         self.LogWarning("Could not remove metadata for disk %d on node %s,"
13460                         " continuing anyway: %s", idx, pnode, msg)
13461
13462   def _CreateNewDisk(self, idx, params, _):
13463     """Creates a new disk.
13464
13465     """
13466     instance = self.instance
13467
13468     # add a new disk
13469     if instance.disk_template in constants.DTS_FILEBASED:
13470       (file_driver, file_path) = instance.disks[0].logical_id
13471       file_path = os.path.dirname(file_path)
13472     else:
13473       file_driver = file_path = None
13474
13475     disk = \
13476       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13477                             instance.primary_node, instance.secondary_nodes,
13478                             [params], file_path, file_driver, idx,
13479                             self.Log, self.diskparams)[0]
13480
13481     info = _GetInstanceInfoText(instance)
13482
13483     logging.info("Creating volume %s for instance %s",
13484                  disk.iv_name, instance.name)
13485     # Note: this needs to be kept in sync with _CreateDisks
13486     #HARDCODE
13487     for node in instance.all_nodes:
13488       f_create = (node == instance.primary_node)
13489       try:
13490         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13491       except errors.OpExecError, err:
13492         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13493                         disk.iv_name, disk, node, err)
13494
13495     return (disk, [
13496       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13497       ])
13498
13499   @staticmethod
13500   def _ModifyDisk(idx, disk, params, _):
13501     """Modifies a disk.
13502
13503     """
13504     disk.mode = params[constants.IDISK_MODE]
13505
13506     return [
13507       ("disk.mode/%d" % idx, disk.mode),
13508       ]
13509
13510   def _RemoveDisk(self, idx, root, _):
13511     """Removes a disk.
13512
13513     """
13514     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13515     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13516       self.cfg.SetDiskID(disk, node)
13517       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13518       if msg:
13519         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13520                         " continuing anyway", idx, node, msg)
13521
13522     # if this is a DRBD disk, return its port to the pool
13523     if root.dev_type in constants.LDS_DRBD:
13524       self.cfg.AddTcpUdpPort(root.logical_id[2])
13525
13526   @staticmethod
13527   def _CreateNewNic(idx, params, private):
13528     """Creates data structure for a new network interface.
13529
13530     """
13531     mac = params[constants.INIC_MAC]
13532     ip = params.get(constants.INIC_IP, None)
13533     net = params.get(constants.INIC_NETWORK, None)
13534     #TODO: not private.filled?? can a nic have no nicparams??
13535     nicparams = private.filled
13536
13537     return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
13538       ("nic.%d" % idx,
13539        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13540        (mac, ip, private.filled[constants.NIC_MODE],
13541        private.filled[constants.NIC_LINK],
13542        net)),
13543       ])
13544
13545   @staticmethod
13546   def _ApplyNicMods(idx, nic, params, private):
13547     """Modifies a network interface.
13548
13549     """
13550     changes = []
13551
13552     for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13553       if key in params:
13554         changes.append(("nic.%s/%d" % (key, idx), params[key]))
13555         setattr(nic, key, params[key])
13556
13557     if private.filled:
13558       nic.nicparams = private.filled
13559
13560       for (key, val) in nic.nicparams.items():
13561         changes.append(("nic.%s/%d" % (key, idx), val))
13562
13563     return changes
13564
13565   def Exec(self, feedback_fn):
13566     """Modifies an instance.
13567
13568     All parameters take effect only at the next restart of the instance.
13569
13570     """
13571     # Process here the warnings from CheckPrereq, as we don't have a
13572     # feedback_fn there.
13573     # TODO: Replace with self.LogWarning
13574     for warn in self.warn:
13575       feedback_fn("WARNING: %s" % warn)
13576
13577     assert ((self.op.disk_template is None) ^
13578             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13579       "Not owning any node resource locks"
13580
13581     result = []
13582     instance = self.instance
13583
13584     # runtime memory
13585     if self.op.runtime_mem:
13586       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13587                                                      instance,
13588                                                      self.op.runtime_mem)
13589       rpcres.Raise("Cannot modify instance runtime memory")
13590       result.append(("runtime_memory", self.op.runtime_mem))
13591
13592     # Apply disk changes
13593     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13594                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13595     _UpdateIvNames(0, instance.disks)
13596
13597     if self.op.disk_template:
13598       if __debug__:
13599         check_nodes = set(instance.all_nodes)
13600         if self.op.remote_node:
13601           check_nodes.add(self.op.remote_node)
13602         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13603           owned = self.owned_locks(level)
13604           assert not (check_nodes - owned), \
13605             ("Not owning the correct locks, owning %r, expected at least %r" %
13606              (owned, check_nodes))
13607
13608       r_shut = _ShutdownInstanceDisks(self, instance)
13609       if not r_shut:
13610         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13611                                  " proceed with disk template conversion")
13612       mode = (instance.disk_template, self.op.disk_template)
13613       try:
13614         self._DISK_CONVERSIONS[mode](self, feedback_fn)
13615       except:
13616         self.cfg.ReleaseDRBDMinors(instance.name)
13617         raise
13618       result.append(("disk_template", self.op.disk_template))
13619
13620       assert instance.disk_template == self.op.disk_template, \
13621         ("Expected disk template '%s', found '%s'" %
13622          (self.op.disk_template, instance.disk_template))
13623
13624     # Release node and resource locks if there are any (they might already have
13625     # been released during disk conversion)
13626     _ReleaseLocks(self, locking.LEVEL_NODE)
13627     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13628
13629     # Apply NIC changes
13630     if self._new_nics is not None:
13631       instance.nics = self._new_nics
13632       result.extend(self._nic_chgdesc)
13633
13634     # hvparams changes
13635     if self.op.hvparams:
13636       instance.hvparams = self.hv_inst
13637       for key, val in self.op.hvparams.iteritems():
13638         result.append(("hv/%s" % key, val))
13639
13640     # beparams changes
13641     if self.op.beparams:
13642       instance.beparams = self.be_inst
13643       for key, val in self.op.beparams.iteritems():
13644         result.append(("be/%s" % key, val))
13645
13646     # OS change
13647     if self.op.os_name:
13648       instance.os = self.op.os_name
13649
13650     # osparams changes
13651     if self.op.osparams:
13652       instance.osparams = self.os_inst
13653       for key, val in self.op.osparams.iteritems():
13654         result.append(("os/%s" % key, val))
13655
13656     if self.op.offline is None:
13657       # Ignore
13658       pass
13659     elif self.op.offline:
13660       # Mark instance as offline
13661       self.cfg.MarkInstanceOffline(instance.name)
13662       result.append(("admin_state", constants.ADMINST_OFFLINE))
13663     else:
13664       # Mark instance as online, but stopped
13665       self.cfg.MarkInstanceDown(instance.name)
13666       result.append(("admin_state", constants.ADMINST_DOWN))
13667
13668     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13669
13670     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13671                 self.owned_locks(locking.LEVEL_NODE)), \
13672       "All node locks should have been released by now"
13673
13674     return result
13675
13676   _DISK_CONVERSIONS = {
13677     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13678     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13679     }
13680
13681
13682 class LUInstanceChangeGroup(LogicalUnit):
13683   HPATH = "instance-change-group"
13684   HTYPE = constants.HTYPE_INSTANCE
13685   REQ_BGL = False
13686
13687   def ExpandNames(self):
13688     self.share_locks = _ShareAll()
13689     self.needed_locks = {
13690       locking.LEVEL_NODEGROUP: [],
13691       locking.LEVEL_NODE: [],
13692       }
13693
13694     self._ExpandAndLockInstance()
13695
13696     if self.op.target_groups:
13697       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13698                                   self.op.target_groups)
13699     else:
13700       self.req_target_uuids = None
13701
13702     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13703
13704   def DeclareLocks(self, level):
13705     if level == locking.LEVEL_NODEGROUP:
13706       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13707
13708       if self.req_target_uuids:
13709         lock_groups = set(self.req_target_uuids)
13710
13711         # Lock all groups used by instance optimistically; this requires going
13712         # via the node before it's locked, requiring verification later on
13713         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13714         lock_groups.update(instance_groups)
13715       else:
13716         # No target groups, need to lock all of them
13717         lock_groups = locking.ALL_SET
13718
13719       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13720
13721     elif level == locking.LEVEL_NODE:
13722       if self.req_target_uuids:
13723         # Lock all nodes used by instances
13724         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13725         self._LockInstancesNodes()
13726
13727         # Lock all nodes in all potential target groups
13728         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13729                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13730         member_nodes = [node_name
13731                         for group in lock_groups
13732                         for node_name in self.cfg.GetNodeGroup(group).members]
13733         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13734       else:
13735         # Lock all nodes as all groups are potential targets
13736         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13737
13738   def CheckPrereq(self):
13739     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13740     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13741     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13742
13743     assert (self.req_target_uuids is None or
13744             owned_groups.issuperset(self.req_target_uuids))
13745     assert owned_instances == set([self.op.instance_name])
13746
13747     # Get instance information
13748     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13749
13750     # Check if node groups for locked instance are still correct
13751     assert owned_nodes.issuperset(self.instance.all_nodes), \
13752       ("Instance %s's nodes changed while we kept the lock" %
13753        self.op.instance_name)
13754
13755     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13756                                            owned_groups)
13757
13758     if self.req_target_uuids:
13759       # User requested specific target groups
13760       self.target_uuids = frozenset(self.req_target_uuids)
13761     else:
13762       # All groups except those used by the instance are potential targets
13763       self.target_uuids = owned_groups - inst_groups
13764
13765     conflicting_groups = self.target_uuids & inst_groups
13766     if conflicting_groups:
13767       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13768                                  " used by the instance '%s'" %
13769                                  (utils.CommaJoin(conflicting_groups),
13770                                   self.op.instance_name),
13771                                  errors.ECODE_INVAL)
13772
13773     if not self.target_uuids:
13774       raise errors.OpPrereqError("There are no possible target groups",
13775                                  errors.ECODE_INVAL)
13776
13777   def BuildHooksEnv(self):
13778     """Build hooks env.
13779
13780     """
13781     assert self.target_uuids
13782
13783     env = {
13784       "TARGET_GROUPS": " ".join(self.target_uuids),
13785       }
13786
13787     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13788
13789     return env
13790
13791   def BuildHooksNodes(self):
13792     """Build hooks nodes.
13793
13794     """
13795     mn = self.cfg.GetMasterNode()
13796     return ([mn], [mn])
13797
13798   def Exec(self, feedback_fn):
13799     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13800
13801     assert instances == [self.op.instance_name], "Instance not locked"
13802
13803     req = iallocator.IAReqGroupChange(instances=instances,
13804                                       target_groups=list(self.target_uuids))
13805     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13806
13807     ial.Run(self.op.iallocator)
13808
13809     if not ial.success:
13810       raise errors.OpPrereqError("Can't compute solution for changing group of"
13811                                  " instance '%s' using iallocator '%s': %s" %
13812                                  (self.op.instance_name, self.op.iallocator,
13813                                   ial.info), errors.ECODE_NORES)
13814
13815     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13816
13817     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13818                  " instance '%s'", len(jobs), self.op.instance_name)
13819
13820     return ResultWithJobs(jobs)
13821
13822
13823 class LUBackupQuery(NoHooksLU):
13824   """Query the exports list
13825
13826   """
13827   REQ_BGL = False
13828
13829   def CheckArguments(self):
13830     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13831                              ["node", "export"], self.op.use_locking)
13832
13833   def ExpandNames(self):
13834     self.expq.ExpandNames(self)
13835
13836   def DeclareLocks(self, level):
13837     self.expq.DeclareLocks(self, level)
13838
13839   def Exec(self, feedback_fn):
13840     result = {}
13841
13842     for (node, expname) in self.expq.OldStyleQuery(self):
13843       if expname is None:
13844         result[node] = False
13845       else:
13846         result.setdefault(node, []).append(expname)
13847
13848     return result
13849
13850
13851 class _ExportQuery(_QueryBase):
13852   FIELDS = query.EXPORT_FIELDS
13853
13854   #: The node name is not a unique key for this query
13855   SORT_FIELD = "node"
13856
13857   def ExpandNames(self, lu):
13858     lu.needed_locks = {}
13859
13860     # The following variables interact with _QueryBase._GetNames
13861     if self.names:
13862       self.wanted = _GetWantedNodes(lu, self.names)
13863     else:
13864       self.wanted = locking.ALL_SET
13865
13866     self.do_locking = self.use_locking
13867
13868     if self.do_locking:
13869       lu.share_locks = _ShareAll()
13870       lu.needed_locks = {
13871         locking.LEVEL_NODE: self.wanted,
13872         }
13873
13874   def DeclareLocks(self, lu, level):
13875     pass
13876
13877   def _GetQueryData(self, lu):
13878     """Computes the list of nodes and their attributes.
13879
13880     """
13881     # Locking is not used
13882     # TODO
13883     assert not (compat.any(lu.glm.is_owned(level)
13884                            for level in locking.LEVELS
13885                            if level != locking.LEVEL_CLUSTER) or
13886                 self.do_locking or self.use_locking)
13887
13888     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13889
13890     result = []
13891
13892     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13893       if nres.fail_msg:
13894         result.append((node, None))
13895       else:
13896         result.extend((node, expname) for expname in nres.payload)
13897
13898     return result
13899
13900
13901 class LUBackupPrepare(NoHooksLU):
13902   """Prepares an instance for an export and returns useful information.
13903
13904   """
13905   REQ_BGL = False
13906
13907   def ExpandNames(self):
13908     self._ExpandAndLockInstance()
13909
13910   def CheckPrereq(self):
13911     """Check prerequisites.
13912
13913     """
13914     instance_name = self.op.instance_name
13915
13916     self.instance = self.cfg.GetInstanceInfo(instance_name)
13917     assert self.instance is not None, \
13918           "Cannot retrieve locked instance %s" % self.op.instance_name
13919     _CheckNodeOnline(self, self.instance.primary_node)
13920
13921     self._cds = _GetClusterDomainSecret()
13922
13923   def Exec(self, feedback_fn):
13924     """Prepares an instance for an export.
13925
13926     """
13927     instance = self.instance
13928
13929     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13930       salt = utils.GenerateSecret(8)
13931
13932       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13933       result = self.rpc.call_x509_cert_create(instance.primary_node,
13934                                               constants.RIE_CERT_VALIDITY)
13935       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13936
13937       (name, cert_pem) = result.payload
13938
13939       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13940                                              cert_pem)
13941
13942       return {
13943         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13944         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13945                           salt),
13946         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13947         }
13948
13949     return None
13950
13951
13952 class LUBackupExport(LogicalUnit):
13953   """Export an instance to an image in the cluster.
13954
13955   """
13956   HPATH = "instance-export"
13957   HTYPE = constants.HTYPE_INSTANCE
13958   REQ_BGL = False
13959
13960   def CheckArguments(self):
13961     """Check the arguments.
13962
13963     """
13964     self.x509_key_name = self.op.x509_key_name
13965     self.dest_x509_ca_pem = self.op.destination_x509_ca
13966
13967     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13968       if not self.x509_key_name:
13969         raise errors.OpPrereqError("Missing X509 key name for encryption",
13970                                    errors.ECODE_INVAL)
13971
13972       if not self.dest_x509_ca_pem:
13973         raise errors.OpPrereqError("Missing destination X509 CA",
13974                                    errors.ECODE_INVAL)
13975
13976   def ExpandNames(self):
13977     self._ExpandAndLockInstance()
13978
13979     # Lock all nodes for local exports
13980     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13981       # FIXME: lock only instance primary and destination node
13982       #
13983       # Sad but true, for now we have do lock all nodes, as we don't know where
13984       # the previous export might be, and in this LU we search for it and
13985       # remove it from its current node. In the future we could fix this by:
13986       #  - making a tasklet to search (share-lock all), then create the
13987       #    new one, then one to remove, after
13988       #  - removing the removal operation altogether
13989       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13990
13991   def DeclareLocks(self, level):
13992     """Last minute lock declaration."""
13993     # All nodes are locked anyway, so nothing to do here.
13994
13995   def BuildHooksEnv(self):
13996     """Build hooks env.
13997
13998     This will run on the master, primary node and target node.
13999
14000     """
14001     env = {
14002       "EXPORT_MODE": self.op.mode,
14003       "EXPORT_NODE": self.op.target_node,
14004       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14005       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14006       # TODO: Generic function for boolean env variables
14007       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14008       }
14009
14010     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14011
14012     return env
14013
14014   def BuildHooksNodes(self):
14015     """Build hooks nodes.
14016
14017     """
14018     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14019
14020     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14021       nl.append(self.op.target_node)
14022
14023     return (nl, nl)
14024
14025   def CheckPrereq(self):
14026     """Check prerequisites.
14027
14028     This checks that the instance and node names are valid.
14029
14030     """
14031     instance_name = self.op.instance_name
14032
14033     self.instance = self.cfg.GetInstanceInfo(instance_name)
14034     assert self.instance is not None, \
14035           "Cannot retrieve locked instance %s" % self.op.instance_name
14036     _CheckNodeOnline(self, self.instance.primary_node)
14037
14038     if (self.op.remove_instance and
14039         self.instance.admin_state == constants.ADMINST_UP and
14040         not self.op.shutdown):
14041       raise errors.OpPrereqError("Can not remove instance without shutting it"
14042                                  " down before", errors.ECODE_STATE)
14043
14044     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14045       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14046       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14047       assert self.dst_node is not None
14048
14049       _CheckNodeOnline(self, self.dst_node.name)
14050       _CheckNodeNotDrained(self, self.dst_node.name)
14051
14052       self._cds = None
14053       self.dest_disk_info = None
14054       self.dest_x509_ca = None
14055
14056     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14057       self.dst_node = None
14058
14059       if len(self.op.target_node) != len(self.instance.disks):
14060         raise errors.OpPrereqError(("Received destination information for %s"
14061                                     " disks, but instance %s has %s disks") %
14062                                    (len(self.op.target_node), instance_name,
14063                                     len(self.instance.disks)),
14064                                    errors.ECODE_INVAL)
14065
14066       cds = _GetClusterDomainSecret()
14067
14068       # Check X509 key name
14069       try:
14070         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14071       except (TypeError, ValueError), err:
14072         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14073                                    errors.ECODE_INVAL)
14074
14075       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14076         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14077                                    errors.ECODE_INVAL)
14078
14079       # Load and verify CA
14080       try:
14081         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14082       except OpenSSL.crypto.Error, err:
14083         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14084                                    (err, ), errors.ECODE_INVAL)
14085
14086       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14087       if errcode is not None:
14088         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14089                                    (msg, ), errors.ECODE_INVAL)
14090
14091       self.dest_x509_ca = cert
14092
14093       # Verify target information
14094       disk_info = []
14095       for idx, disk_data in enumerate(self.op.target_node):
14096         try:
14097           (host, port, magic) = \
14098             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14099         except errors.GenericError, err:
14100           raise errors.OpPrereqError("Target info for disk %s: %s" %
14101                                      (idx, err), errors.ECODE_INVAL)
14102
14103         disk_info.append((host, port, magic))
14104
14105       assert len(disk_info) == len(self.op.target_node)
14106       self.dest_disk_info = disk_info
14107
14108     else:
14109       raise errors.ProgrammerError("Unhandled export mode %r" %
14110                                    self.op.mode)
14111
14112     # instance disk type verification
14113     # TODO: Implement export support for file-based disks
14114     for disk in self.instance.disks:
14115       if disk.dev_type == constants.LD_FILE:
14116         raise errors.OpPrereqError("Export not supported for instances with"
14117                                    " file-based disks", errors.ECODE_INVAL)
14118
14119   def _CleanupExports(self, feedback_fn):
14120     """Removes exports of current instance from all other nodes.
14121
14122     If an instance in a cluster with nodes A..D was exported to node C, its
14123     exports will be removed from the nodes A, B and D.
14124
14125     """
14126     assert self.op.mode != constants.EXPORT_MODE_REMOTE
14127
14128     nodelist = self.cfg.GetNodeList()
14129     nodelist.remove(self.dst_node.name)
14130
14131     # on one-node clusters nodelist will be empty after the removal
14132     # if we proceed the backup would be removed because OpBackupQuery
14133     # substitutes an empty list with the full cluster node list.
14134     iname = self.instance.name
14135     if nodelist:
14136       feedback_fn("Removing old exports for instance %s" % iname)
14137       exportlist = self.rpc.call_export_list(nodelist)
14138       for node in exportlist:
14139         if exportlist[node].fail_msg:
14140           continue
14141         if iname in exportlist[node].payload:
14142           msg = self.rpc.call_export_remove(node, iname).fail_msg
14143           if msg:
14144             self.LogWarning("Could not remove older export for instance %s"
14145                             " on node %s: %s", iname, node, msg)
14146
14147   def Exec(self, feedback_fn):
14148     """Export an instance to an image in the cluster.
14149
14150     """
14151     assert self.op.mode in constants.EXPORT_MODES
14152
14153     instance = self.instance
14154     src_node = instance.primary_node
14155
14156     if self.op.shutdown:
14157       # shutdown the instance, but not the disks
14158       feedback_fn("Shutting down instance %s" % instance.name)
14159       result = self.rpc.call_instance_shutdown(src_node, instance,
14160                                                self.op.shutdown_timeout)
14161       # TODO: Maybe ignore failures if ignore_remove_failures is set
14162       result.Raise("Could not shutdown instance %s on"
14163                    " node %s" % (instance.name, src_node))
14164
14165     # set the disks ID correctly since call_instance_start needs the
14166     # correct drbd minor to create the symlinks
14167     for disk in instance.disks:
14168       self.cfg.SetDiskID(disk, src_node)
14169
14170     activate_disks = (instance.admin_state != constants.ADMINST_UP)
14171
14172     if activate_disks:
14173       # Activate the instance disks if we'exporting a stopped instance
14174       feedback_fn("Activating disks for %s" % instance.name)
14175       _StartInstanceDisks(self, instance, None)
14176
14177     try:
14178       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14179                                                      instance)
14180
14181       helper.CreateSnapshots()
14182       try:
14183         if (self.op.shutdown and
14184             instance.admin_state == constants.ADMINST_UP and
14185             not self.op.remove_instance):
14186           assert not activate_disks
14187           feedback_fn("Starting instance %s" % instance.name)
14188           result = self.rpc.call_instance_start(src_node,
14189                                                 (instance, None, None), False)
14190           msg = result.fail_msg
14191           if msg:
14192             feedback_fn("Failed to start instance: %s" % msg)
14193             _ShutdownInstanceDisks(self, instance)
14194             raise errors.OpExecError("Could not start instance: %s" % msg)
14195
14196         if self.op.mode == constants.EXPORT_MODE_LOCAL:
14197           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14198         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14199           connect_timeout = constants.RIE_CONNECT_TIMEOUT
14200           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14201
14202           (key_name, _, _) = self.x509_key_name
14203
14204           dest_ca_pem = \
14205             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14206                                             self.dest_x509_ca)
14207
14208           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14209                                                      key_name, dest_ca_pem,
14210                                                      timeouts)
14211       finally:
14212         helper.Cleanup()
14213
14214       # Check for backwards compatibility
14215       assert len(dresults) == len(instance.disks)
14216       assert compat.all(isinstance(i, bool) for i in dresults), \
14217              "Not all results are boolean: %r" % dresults
14218
14219     finally:
14220       if activate_disks:
14221         feedback_fn("Deactivating disks for %s" % instance.name)
14222         _ShutdownInstanceDisks(self, instance)
14223
14224     if not (compat.all(dresults) and fin_resu):
14225       failures = []
14226       if not fin_resu:
14227         failures.append("export finalization")
14228       if not compat.all(dresults):
14229         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14230                                if not dsk)
14231         failures.append("disk export: disk(s) %s" % fdsk)
14232
14233       raise errors.OpExecError("Export failed, errors in %s" %
14234                                utils.CommaJoin(failures))
14235
14236     # At this point, the export was successful, we can cleanup/finish
14237
14238     # Remove instance if requested
14239     if self.op.remove_instance:
14240       feedback_fn("Removing instance %s" % instance.name)
14241       _RemoveInstance(self, feedback_fn, instance,
14242                       self.op.ignore_remove_failures)
14243
14244     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14245       self._CleanupExports(feedback_fn)
14246
14247     return fin_resu, dresults
14248
14249
14250 class LUBackupRemove(NoHooksLU):
14251   """Remove exports related to the named instance.
14252
14253   """
14254   REQ_BGL = False
14255
14256   def ExpandNames(self):
14257     self.needed_locks = {}
14258     # We need all nodes to be locked in order for RemoveExport to work, but we
14259     # don't need to lock the instance itself, as nothing will happen to it (and
14260     # we can remove exports also for a removed instance)
14261     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14262
14263   def Exec(self, feedback_fn):
14264     """Remove any export.
14265
14266     """
14267     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14268     # If the instance was not found we'll try with the name that was passed in.
14269     # This will only work if it was an FQDN, though.
14270     fqdn_warn = False
14271     if not instance_name:
14272       fqdn_warn = True
14273       instance_name = self.op.instance_name
14274
14275     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14276     exportlist = self.rpc.call_export_list(locked_nodes)
14277     found = False
14278     for node in exportlist:
14279       msg = exportlist[node].fail_msg
14280       if msg:
14281         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14282         continue
14283       if instance_name in exportlist[node].payload:
14284         found = True
14285         result = self.rpc.call_export_remove(node, instance_name)
14286         msg = result.fail_msg
14287         if msg:
14288           logging.error("Could not remove export for instance %s"
14289                         " on node %s: %s", instance_name, node, msg)
14290
14291     if fqdn_warn and not found:
14292       feedback_fn("Export not found. If trying to remove an export belonging"
14293                   " to a deleted instance please use its Fully Qualified"
14294                   " Domain Name.")
14295
14296
14297 class LUGroupAdd(LogicalUnit):
14298   """Logical unit for creating node groups.
14299
14300   """
14301   HPATH = "group-add"
14302   HTYPE = constants.HTYPE_GROUP
14303   REQ_BGL = False
14304
14305   def ExpandNames(self):
14306     # We need the new group's UUID here so that we can create and acquire the
14307     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14308     # that it should not check whether the UUID exists in the configuration.
14309     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14310     self.needed_locks = {}
14311     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14312
14313   def CheckPrereq(self):
14314     """Check prerequisites.
14315
14316     This checks that the given group name is not an existing node group
14317     already.
14318
14319     """
14320     try:
14321       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14322     except errors.OpPrereqError:
14323       pass
14324     else:
14325       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14326                                  " node group (UUID: %s)" %
14327                                  (self.op.group_name, existing_uuid),
14328                                  errors.ECODE_EXISTS)
14329
14330     if self.op.ndparams:
14331       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14332
14333     if self.op.hv_state:
14334       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14335     else:
14336       self.new_hv_state = None
14337
14338     if self.op.disk_state:
14339       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14340     else:
14341       self.new_disk_state = None
14342
14343     if self.op.diskparams:
14344       for templ in constants.DISK_TEMPLATES:
14345         if templ in self.op.diskparams:
14346           utils.ForceDictType(self.op.diskparams[templ],
14347                               constants.DISK_DT_TYPES)
14348       self.new_diskparams = self.op.diskparams
14349       try:
14350         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14351       except errors.OpPrereqError, err:
14352         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14353                                    errors.ECODE_INVAL)
14354     else:
14355       self.new_diskparams = {}
14356
14357     if self.op.ipolicy:
14358       cluster = self.cfg.GetClusterInfo()
14359       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14360       try:
14361         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14362       except errors.ConfigurationError, err:
14363         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14364                                    errors.ECODE_INVAL)
14365
14366   def BuildHooksEnv(self):
14367     """Build hooks env.
14368
14369     """
14370     return {
14371       "GROUP_NAME": self.op.group_name,
14372       }
14373
14374   def BuildHooksNodes(self):
14375     """Build hooks nodes.
14376
14377     """
14378     mn = self.cfg.GetMasterNode()
14379     return ([mn], [mn])
14380
14381   def Exec(self, feedback_fn):
14382     """Add the node group to the cluster.
14383
14384     """
14385     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14386                                   uuid=self.group_uuid,
14387                                   alloc_policy=self.op.alloc_policy,
14388                                   ndparams=self.op.ndparams,
14389                                   diskparams=self.new_diskparams,
14390                                   ipolicy=self.op.ipolicy,
14391                                   hv_state_static=self.new_hv_state,
14392                                   disk_state_static=self.new_disk_state)
14393
14394     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14395     del self.remove_locks[locking.LEVEL_NODEGROUP]
14396
14397
14398 class LUGroupAssignNodes(NoHooksLU):
14399   """Logical unit for assigning nodes to groups.
14400
14401   """
14402   REQ_BGL = False
14403
14404   def ExpandNames(self):
14405     # These raise errors.OpPrereqError on their own:
14406     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14407     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14408
14409     # We want to lock all the affected nodes and groups. We have readily
14410     # available the list of nodes, and the *destination* group. To gather the
14411     # list of "source" groups, we need to fetch node information later on.
14412     self.needed_locks = {
14413       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14414       locking.LEVEL_NODE: self.op.nodes,
14415       }
14416
14417   def DeclareLocks(self, level):
14418     if level == locking.LEVEL_NODEGROUP:
14419       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14420
14421       # Try to get all affected nodes' groups without having the group or node
14422       # lock yet. Needs verification later in the code flow.
14423       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14424
14425       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14426
14427   def CheckPrereq(self):
14428     """Check prerequisites.
14429
14430     """
14431     assert self.needed_locks[locking.LEVEL_NODEGROUP]
14432     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14433             frozenset(self.op.nodes))
14434
14435     expected_locks = (set([self.group_uuid]) |
14436                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14437     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14438     if actual_locks != expected_locks:
14439       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14440                                " current groups are '%s', used to be '%s'" %
14441                                (utils.CommaJoin(expected_locks),
14442                                 utils.CommaJoin(actual_locks)))
14443
14444     self.node_data = self.cfg.GetAllNodesInfo()
14445     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14446     instance_data = self.cfg.GetAllInstancesInfo()
14447
14448     if self.group is None:
14449       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14450                                (self.op.group_name, self.group_uuid))
14451
14452     (new_splits, previous_splits) = \
14453       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14454                                              for node in self.op.nodes],
14455                                             self.node_data, instance_data)
14456
14457     if new_splits:
14458       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14459
14460       if not self.op.force:
14461         raise errors.OpExecError("The following instances get split by this"
14462                                  " change and --force was not given: %s" %
14463                                  fmt_new_splits)
14464       else:
14465         self.LogWarning("This operation will split the following instances: %s",
14466                         fmt_new_splits)
14467
14468         if previous_splits:
14469           self.LogWarning("In addition, these already-split instances continue"
14470                           " to be split across groups: %s",
14471                           utils.CommaJoin(utils.NiceSort(previous_splits)))
14472
14473   def Exec(self, feedback_fn):
14474     """Assign nodes to a new group.
14475
14476     """
14477     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14478
14479     self.cfg.AssignGroupNodes(mods)
14480
14481   @staticmethod
14482   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14483     """Check for split instances after a node assignment.
14484
14485     This method considers a series of node assignments as an atomic operation,
14486     and returns information about split instances after applying the set of
14487     changes.
14488
14489     In particular, it returns information about newly split instances, and
14490     instances that were already split, and remain so after the change.
14491
14492     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14493     considered.
14494
14495     @type changes: list of (node_name, new_group_uuid) pairs.
14496     @param changes: list of node assignments to consider.
14497     @param node_data: a dict with data for all nodes
14498     @param instance_data: a dict with all instances to consider
14499     @rtype: a two-tuple
14500     @return: a list of instances that were previously okay and result split as a
14501       consequence of this change, and a list of instances that were previously
14502       split and this change does not fix.
14503
14504     """
14505     changed_nodes = dict((node, group) for node, group in changes
14506                          if node_data[node].group != group)
14507
14508     all_split_instances = set()
14509     previously_split_instances = set()
14510
14511     def InstanceNodes(instance):
14512       return [instance.primary_node] + list(instance.secondary_nodes)
14513
14514     for inst in instance_data.values():
14515       if inst.disk_template not in constants.DTS_INT_MIRROR:
14516         continue
14517
14518       instance_nodes = InstanceNodes(inst)
14519
14520       if len(set(node_data[node].group for node in instance_nodes)) > 1:
14521         previously_split_instances.add(inst.name)
14522
14523       if len(set(changed_nodes.get(node, node_data[node].group)
14524                  for node in instance_nodes)) > 1:
14525         all_split_instances.add(inst.name)
14526
14527     return (list(all_split_instances - previously_split_instances),
14528             list(previously_split_instances & all_split_instances))
14529
14530
14531 class _GroupQuery(_QueryBase):
14532   FIELDS = query.GROUP_FIELDS
14533
14534   def ExpandNames(self, lu):
14535     lu.needed_locks = {}
14536
14537     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14538     self._cluster = lu.cfg.GetClusterInfo()
14539     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14540
14541     if not self.names:
14542       self.wanted = [name_to_uuid[name]
14543                      for name in utils.NiceSort(name_to_uuid.keys())]
14544     else:
14545       # Accept names to be either names or UUIDs.
14546       missing = []
14547       self.wanted = []
14548       all_uuid = frozenset(self._all_groups.keys())
14549
14550       for name in self.names:
14551         if name in all_uuid:
14552           self.wanted.append(name)
14553         elif name in name_to_uuid:
14554           self.wanted.append(name_to_uuid[name])
14555         else:
14556           missing.append(name)
14557
14558       if missing:
14559         raise errors.OpPrereqError("Some groups do not exist: %s" %
14560                                    utils.CommaJoin(missing),
14561                                    errors.ECODE_NOENT)
14562
14563   def DeclareLocks(self, lu, level):
14564     pass
14565
14566   def _GetQueryData(self, lu):
14567     """Computes the list of node groups and their attributes.
14568
14569     """
14570     do_nodes = query.GQ_NODE in self.requested_data
14571     do_instances = query.GQ_INST in self.requested_data
14572
14573     group_to_nodes = None
14574     group_to_instances = None
14575
14576     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14577     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14578     # latter GetAllInstancesInfo() is not enough, for we have to go through
14579     # instance->node. Hence, we will need to process nodes even if we only need
14580     # instance information.
14581     if do_nodes or do_instances:
14582       all_nodes = lu.cfg.GetAllNodesInfo()
14583       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14584       node_to_group = {}
14585
14586       for node in all_nodes.values():
14587         if node.group in group_to_nodes:
14588           group_to_nodes[node.group].append(node.name)
14589           node_to_group[node.name] = node.group
14590
14591       if do_instances:
14592         all_instances = lu.cfg.GetAllInstancesInfo()
14593         group_to_instances = dict((uuid, []) for uuid in self.wanted)
14594
14595         for instance in all_instances.values():
14596           node = instance.primary_node
14597           if node in node_to_group:
14598             group_to_instances[node_to_group[node]].append(instance.name)
14599
14600         if not do_nodes:
14601           # Do not pass on node information if it was not requested.
14602           group_to_nodes = None
14603
14604     return query.GroupQueryData(self._cluster,
14605                                 [self._all_groups[uuid]
14606                                  for uuid in self.wanted],
14607                                 group_to_nodes, group_to_instances,
14608                                 query.GQ_DISKPARAMS in self.requested_data)
14609
14610
14611 class LUGroupQuery(NoHooksLU):
14612   """Logical unit for querying node groups.
14613
14614   """
14615   REQ_BGL = False
14616
14617   def CheckArguments(self):
14618     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14619                           self.op.output_fields, False)
14620
14621   def ExpandNames(self):
14622     self.gq.ExpandNames(self)
14623
14624   def DeclareLocks(self, level):
14625     self.gq.DeclareLocks(self, level)
14626
14627   def Exec(self, feedback_fn):
14628     return self.gq.OldStyleQuery(self)
14629
14630
14631 class LUGroupSetParams(LogicalUnit):
14632   """Modifies the parameters of a node group.
14633
14634   """
14635   HPATH = "group-modify"
14636   HTYPE = constants.HTYPE_GROUP
14637   REQ_BGL = False
14638
14639   def CheckArguments(self):
14640     all_changes = [
14641       self.op.ndparams,
14642       self.op.diskparams,
14643       self.op.alloc_policy,
14644       self.op.hv_state,
14645       self.op.disk_state,
14646       self.op.ipolicy,
14647       ]
14648
14649     if all_changes.count(None) == len(all_changes):
14650       raise errors.OpPrereqError("Please pass at least one modification",
14651                                  errors.ECODE_INVAL)
14652
14653   def ExpandNames(self):
14654     # This raises errors.OpPrereqError on its own:
14655     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14656
14657     self.needed_locks = {
14658       locking.LEVEL_INSTANCE: [],
14659       locking.LEVEL_NODEGROUP: [self.group_uuid],
14660       }
14661
14662     self.share_locks[locking.LEVEL_INSTANCE] = 1
14663
14664   def DeclareLocks(self, level):
14665     if level == locking.LEVEL_INSTANCE:
14666       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14667
14668       # Lock instances optimistically, needs verification once group lock has
14669       # been acquired
14670       self.needed_locks[locking.LEVEL_INSTANCE] = \
14671           self.cfg.GetNodeGroupInstances(self.group_uuid)
14672
14673   @staticmethod
14674   def _UpdateAndVerifyDiskParams(old, new):
14675     """Updates and verifies disk parameters.
14676
14677     """
14678     new_params = _GetUpdatedParams(old, new)
14679     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14680     return new_params
14681
14682   def CheckPrereq(self):
14683     """Check prerequisites.
14684
14685     """
14686     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14687
14688     # Check if locked instances are still correct
14689     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14690
14691     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14692     cluster = self.cfg.GetClusterInfo()
14693
14694     if self.group is None:
14695       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14696                                (self.op.group_name, self.group_uuid))
14697
14698     if self.op.ndparams:
14699       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14700       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14701       self.new_ndparams = new_ndparams
14702
14703     if self.op.diskparams:
14704       diskparams = self.group.diskparams
14705       uavdp = self._UpdateAndVerifyDiskParams
14706       # For each disktemplate subdict update and verify the values
14707       new_diskparams = dict((dt,
14708                              uavdp(diskparams.get(dt, {}),
14709                                    self.op.diskparams[dt]))
14710                             for dt in constants.DISK_TEMPLATES
14711                             if dt in self.op.diskparams)
14712       # As we've all subdicts of diskparams ready, lets merge the actual
14713       # dict with all updated subdicts
14714       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14715       try:
14716         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14717       except errors.OpPrereqError, err:
14718         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14719                                    errors.ECODE_INVAL)
14720
14721     if self.op.hv_state:
14722       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14723                                                  self.group.hv_state_static)
14724
14725     if self.op.disk_state:
14726       self.new_disk_state = \
14727         _MergeAndVerifyDiskState(self.op.disk_state,
14728                                  self.group.disk_state_static)
14729
14730     if self.op.ipolicy:
14731       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14732                                             self.op.ipolicy,
14733                                             group_policy=True)
14734
14735       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14736       inst_filter = lambda inst: inst.name in owned_instances
14737       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14738       gmi = ganeti.masterd.instance
14739       violations = \
14740           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14741                                                                   self.group),
14742                                         new_ipolicy, instances)
14743
14744       if violations:
14745         self.LogWarning("After the ipolicy change the following instances"
14746                         " violate them: %s",
14747                         utils.CommaJoin(violations))
14748
14749   def BuildHooksEnv(self):
14750     """Build hooks env.
14751
14752     """
14753     return {
14754       "GROUP_NAME": self.op.group_name,
14755       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14756       }
14757
14758   def BuildHooksNodes(self):
14759     """Build hooks nodes.
14760
14761     """
14762     mn = self.cfg.GetMasterNode()
14763     return ([mn], [mn])
14764
14765   def Exec(self, feedback_fn):
14766     """Modifies the node group.
14767
14768     """
14769     result = []
14770
14771     if self.op.ndparams:
14772       self.group.ndparams = self.new_ndparams
14773       result.append(("ndparams", str(self.group.ndparams)))
14774
14775     if self.op.diskparams:
14776       self.group.diskparams = self.new_diskparams
14777       result.append(("diskparams", str(self.group.diskparams)))
14778
14779     if self.op.alloc_policy:
14780       self.group.alloc_policy = self.op.alloc_policy
14781
14782     if self.op.hv_state:
14783       self.group.hv_state_static = self.new_hv_state
14784
14785     if self.op.disk_state:
14786       self.group.disk_state_static = self.new_disk_state
14787
14788     if self.op.ipolicy:
14789       self.group.ipolicy = self.new_ipolicy
14790
14791     self.cfg.Update(self.group, feedback_fn)
14792     return result
14793
14794
14795 class LUGroupRemove(LogicalUnit):
14796   HPATH = "group-remove"
14797   HTYPE = constants.HTYPE_GROUP
14798   REQ_BGL = False
14799
14800   def ExpandNames(self):
14801     # This will raises errors.OpPrereqError on its own:
14802     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14803     self.needed_locks = {
14804       locking.LEVEL_NODEGROUP: [self.group_uuid],
14805       }
14806
14807   def CheckPrereq(self):
14808     """Check prerequisites.
14809
14810     This checks that the given group name exists as a node group, that is
14811     empty (i.e., contains no nodes), and that is not the last group of the
14812     cluster.
14813
14814     """
14815     # Verify that the group is empty.
14816     group_nodes = [node.name
14817                    for node in self.cfg.GetAllNodesInfo().values()
14818                    if node.group == self.group_uuid]
14819
14820     if group_nodes:
14821       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14822                                  " nodes: %s" %
14823                                  (self.op.group_name,
14824                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14825                                  errors.ECODE_STATE)
14826
14827     # Verify the cluster would not be left group-less.
14828     if len(self.cfg.GetNodeGroupList()) == 1:
14829       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14830                                  " removed" % self.op.group_name,
14831                                  errors.ECODE_STATE)
14832
14833   def BuildHooksEnv(self):
14834     """Build hooks env.
14835
14836     """
14837     return {
14838       "GROUP_NAME": self.op.group_name,
14839       }
14840
14841   def BuildHooksNodes(self):
14842     """Build hooks nodes.
14843
14844     """
14845     mn = self.cfg.GetMasterNode()
14846     return ([mn], [mn])
14847
14848   def Exec(self, feedback_fn):
14849     """Remove the node group.
14850
14851     """
14852     try:
14853       self.cfg.RemoveNodeGroup(self.group_uuid)
14854     except errors.ConfigurationError:
14855       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14856                                (self.op.group_name, self.group_uuid))
14857
14858     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14859
14860
14861 class LUGroupRename(LogicalUnit):
14862   HPATH = "group-rename"
14863   HTYPE = constants.HTYPE_GROUP
14864   REQ_BGL = False
14865
14866   def ExpandNames(self):
14867     # This raises errors.OpPrereqError on its own:
14868     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14869
14870     self.needed_locks = {
14871       locking.LEVEL_NODEGROUP: [self.group_uuid],
14872       }
14873
14874   def CheckPrereq(self):
14875     """Check prerequisites.
14876
14877     Ensures requested new name is not yet used.
14878
14879     """
14880     try:
14881       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14882     except errors.OpPrereqError:
14883       pass
14884     else:
14885       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14886                                  " node group (UUID: %s)" %
14887                                  (self.op.new_name, new_name_uuid),
14888                                  errors.ECODE_EXISTS)
14889
14890   def BuildHooksEnv(self):
14891     """Build hooks env.
14892
14893     """
14894     return {
14895       "OLD_NAME": self.op.group_name,
14896       "NEW_NAME": self.op.new_name,
14897       }
14898
14899   def BuildHooksNodes(self):
14900     """Build hooks nodes.
14901
14902     """
14903     mn = self.cfg.GetMasterNode()
14904
14905     all_nodes = self.cfg.GetAllNodesInfo()
14906     all_nodes.pop(mn, None)
14907
14908     run_nodes = [mn]
14909     run_nodes.extend(node.name for node in all_nodes.values()
14910                      if node.group == self.group_uuid)
14911
14912     return (run_nodes, run_nodes)
14913
14914   def Exec(self, feedback_fn):
14915     """Rename the node group.
14916
14917     """
14918     group = self.cfg.GetNodeGroup(self.group_uuid)
14919
14920     if group is None:
14921       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14922                                (self.op.group_name, self.group_uuid))
14923
14924     group.name = self.op.new_name
14925     self.cfg.Update(group, feedback_fn)
14926
14927     return self.op.new_name
14928
14929
14930 class LUGroupEvacuate(LogicalUnit):
14931   HPATH = "group-evacuate"
14932   HTYPE = constants.HTYPE_GROUP
14933   REQ_BGL = False
14934
14935   def ExpandNames(self):
14936     # This raises errors.OpPrereqError on its own:
14937     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14938
14939     if self.op.target_groups:
14940       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14941                                   self.op.target_groups)
14942     else:
14943       self.req_target_uuids = []
14944
14945     if self.group_uuid in self.req_target_uuids:
14946       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14947                                  " as a target group (targets are %s)" %
14948                                  (self.group_uuid,
14949                                   utils.CommaJoin(self.req_target_uuids)),
14950                                  errors.ECODE_INVAL)
14951
14952     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14953
14954     self.share_locks = _ShareAll()
14955     self.needed_locks = {
14956       locking.LEVEL_INSTANCE: [],
14957       locking.LEVEL_NODEGROUP: [],
14958       locking.LEVEL_NODE: [],
14959       }
14960
14961   def DeclareLocks(self, level):
14962     if level == locking.LEVEL_INSTANCE:
14963       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14964
14965       # Lock instances optimistically, needs verification once node and group
14966       # locks have been acquired
14967       self.needed_locks[locking.LEVEL_INSTANCE] = \
14968         self.cfg.GetNodeGroupInstances(self.group_uuid)
14969
14970     elif level == locking.LEVEL_NODEGROUP:
14971       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14972
14973       if self.req_target_uuids:
14974         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14975
14976         # Lock all groups used by instances optimistically; this requires going
14977         # via the node before it's locked, requiring verification later on
14978         lock_groups.update(group_uuid
14979                            for instance_name in
14980                              self.owned_locks(locking.LEVEL_INSTANCE)
14981                            for group_uuid in
14982                              self.cfg.GetInstanceNodeGroups(instance_name))
14983       else:
14984         # No target groups, need to lock all of them
14985         lock_groups = locking.ALL_SET
14986
14987       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14988
14989     elif level == locking.LEVEL_NODE:
14990       # This will only lock the nodes in the group to be evacuated which
14991       # contain actual instances
14992       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14993       self._LockInstancesNodes()
14994
14995       # Lock all nodes in group to be evacuated and target groups
14996       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14997       assert self.group_uuid in owned_groups
14998       member_nodes = [node_name
14999                       for group in owned_groups
15000                       for node_name in self.cfg.GetNodeGroup(group).members]
15001       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15002
15003   def CheckPrereq(self):
15004     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15005     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15006     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15007
15008     assert owned_groups.issuperset(self.req_target_uuids)
15009     assert self.group_uuid in owned_groups
15010
15011     # Check if locked instances are still correct
15012     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15013
15014     # Get instance information
15015     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15016
15017     # Check if node groups for locked instances are still correct
15018     _CheckInstancesNodeGroups(self.cfg, self.instances,
15019                               owned_groups, owned_nodes, self.group_uuid)
15020
15021     if self.req_target_uuids:
15022       # User requested specific target groups
15023       self.target_uuids = self.req_target_uuids
15024     else:
15025       # All groups except the one to be evacuated are potential targets
15026       self.target_uuids = [group_uuid for group_uuid in owned_groups
15027                            if group_uuid != self.group_uuid]
15028
15029       if not self.target_uuids:
15030         raise errors.OpPrereqError("There are no possible target groups",
15031                                    errors.ECODE_INVAL)
15032
15033   def BuildHooksEnv(self):
15034     """Build hooks env.
15035
15036     """
15037     return {
15038       "GROUP_NAME": self.op.group_name,
15039       "TARGET_GROUPS": " ".join(self.target_uuids),
15040       }
15041
15042   def BuildHooksNodes(self):
15043     """Build hooks nodes.
15044
15045     """
15046     mn = self.cfg.GetMasterNode()
15047
15048     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15049
15050     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15051
15052     return (run_nodes, run_nodes)
15053
15054   def Exec(self, feedback_fn):
15055     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15056
15057     assert self.group_uuid not in self.target_uuids
15058
15059     req = iallocator.IAReqGroupChange(instances=instances,
15060                                       target_groups=self.target_uuids)
15061     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15062
15063     ial.Run(self.op.iallocator)
15064
15065     if not ial.success:
15066       raise errors.OpPrereqError("Can't compute group evacuation using"
15067                                  " iallocator '%s': %s" %
15068                                  (self.op.iallocator, ial.info),
15069                                  errors.ECODE_NORES)
15070
15071     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15072
15073     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15074                  len(jobs), self.op.group_name)
15075
15076     return ResultWithJobs(jobs)
15077
15078
15079 class TagsLU(NoHooksLU): # pylint: disable=W0223
15080   """Generic tags LU.
15081
15082   This is an abstract class which is the parent of all the other tags LUs.
15083
15084   """
15085   def ExpandNames(self):
15086     self.group_uuid = None
15087     self.needed_locks = {}
15088
15089     if self.op.kind == constants.TAG_NODE:
15090       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15091       lock_level = locking.LEVEL_NODE
15092       lock_name = self.op.name
15093     elif self.op.kind == constants.TAG_INSTANCE:
15094       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15095       lock_level = locking.LEVEL_INSTANCE
15096       lock_name = self.op.name
15097     elif self.op.kind == constants.TAG_NODEGROUP:
15098       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15099       lock_level = locking.LEVEL_NODEGROUP
15100       lock_name = self.group_uuid
15101     elif self.op.kind == constants.TAG_NETWORK:
15102       self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15103       lock_level = locking.LEVEL_NETWORK
15104       lock_name = self.network_uuid
15105     else:
15106       lock_level = None
15107       lock_name = None
15108
15109     if lock_level and getattr(self.op, "use_locking", True):
15110       self.needed_locks[lock_level] = lock_name
15111
15112     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15113     # not possible to acquire the BGL based on opcode parameters)
15114
15115   def CheckPrereq(self):
15116     """Check prerequisites.
15117
15118     """
15119     if self.op.kind == constants.TAG_CLUSTER:
15120       self.target = self.cfg.GetClusterInfo()
15121     elif self.op.kind == constants.TAG_NODE:
15122       self.target = self.cfg.GetNodeInfo(self.op.name)
15123     elif self.op.kind == constants.TAG_INSTANCE:
15124       self.target = self.cfg.GetInstanceInfo(self.op.name)
15125     elif self.op.kind == constants.TAG_NODEGROUP:
15126       self.target = self.cfg.GetNodeGroup(self.group_uuid)
15127     elif self.op.kind == constants.TAG_NETWORK:
15128       self.target = self.cfg.GetNetwork(self.network_uuid)
15129     else:
15130       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15131                                  str(self.op.kind), errors.ECODE_INVAL)
15132
15133
15134 class LUTagsGet(TagsLU):
15135   """Returns the tags of a given object.
15136
15137   """
15138   REQ_BGL = False
15139
15140   def ExpandNames(self):
15141     TagsLU.ExpandNames(self)
15142
15143     # Share locks as this is only a read operation
15144     self.share_locks = _ShareAll()
15145
15146   def Exec(self, feedback_fn):
15147     """Returns the tag list.
15148
15149     """
15150     return list(self.target.GetTags())
15151
15152
15153 class LUTagsSearch(NoHooksLU):
15154   """Searches the tags for a given pattern.
15155
15156   """
15157   REQ_BGL = False
15158
15159   def ExpandNames(self):
15160     self.needed_locks = {}
15161
15162   def CheckPrereq(self):
15163     """Check prerequisites.
15164
15165     This checks the pattern passed for validity by compiling it.
15166
15167     """
15168     try:
15169       self.re = re.compile(self.op.pattern)
15170     except re.error, err:
15171       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15172                                  (self.op.pattern, err), errors.ECODE_INVAL)
15173
15174   def Exec(self, feedback_fn):
15175     """Returns the tag list.
15176
15177     """
15178     cfg = self.cfg
15179     tgts = [("/cluster", cfg.GetClusterInfo())]
15180     ilist = cfg.GetAllInstancesInfo().values()
15181     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15182     nlist = cfg.GetAllNodesInfo().values()
15183     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15184     tgts.extend(("/nodegroup/%s" % n.name, n)
15185                 for n in cfg.GetAllNodeGroupsInfo().values())
15186     results = []
15187     for path, target in tgts:
15188       for tag in target.GetTags():
15189         if self.re.search(tag):
15190           results.append((path, tag))
15191     return results
15192
15193
15194 class LUTagsSet(TagsLU):
15195   """Sets a tag on a given object.
15196
15197   """
15198   REQ_BGL = False
15199
15200   def CheckPrereq(self):
15201     """Check prerequisites.
15202
15203     This checks the type and length of the tag name and value.
15204
15205     """
15206     TagsLU.CheckPrereq(self)
15207     for tag in self.op.tags:
15208       objects.TaggableObject.ValidateTag(tag)
15209
15210   def Exec(self, feedback_fn):
15211     """Sets the tag.
15212
15213     """
15214     try:
15215       for tag in self.op.tags:
15216         self.target.AddTag(tag)
15217     except errors.TagError, err:
15218       raise errors.OpExecError("Error while setting tag: %s" % str(err))
15219     self.cfg.Update(self.target, feedback_fn)
15220
15221
15222 class LUTagsDel(TagsLU):
15223   """Delete a list of tags from a given object.
15224
15225   """
15226   REQ_BGL = False
15227
15228   def CheckPrereq(self):
15229     """Check prerequisites.
15230
15231     This checks that we have the given tag.
15232
15233     """
15234     TagsLU.CheckPrereq(self)
15235     for tag in self.op.tags:
15236       objects.TaggableObject.ValidateTag(tag)
15237     del_tags = frozenset(self.op.tags)
15238     cur_tags = self.target.GetTags()
15239
15240     diff_tags = del_tags - cur_tags
15241     if diff_tags:
15242       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15243       raise errors.OpPrereqError("Tag(s) %s not found" %
15244                                  (utils.CommaJoin(diff_names), ),
15245                                  errors.ECODE_NOENT)
15246
15247   def Exec(self, feedback_fn):
15248     """Remove the tag from the object.
15249
15250     """
15251     for tag in self.op.tags:
15252       self.target.RemoveTag(tag)
15253     self.cfg.Update(self.target, feedback_fn)
15254
15255
15256 class LUTestDelay(NoHooksLU):
15257   """Sleep for a specified amount of time.
15258
15259   This LU sleeps on the master and/or nodes for a specified amount of
15260   time.
15261
15262   """
15263   REQ_BGL = False
15264
15265   def ExpandNames(self):
15266     """Expand names and set required locks.
15267
15268     This expands the node list, if any.
15269
15270     """
15271     self.needed_locks = {}
15272     if self.op.on_nodes:
15273       # _GetWantedNodes can be used here, but is not always appropriate to use
15274       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15275       # more information.
15276       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15277       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15278
15279   def _TestDelay(self):
15280     """Do the actual sleep.
15281
15282     """
15283     if self.op.on_master:
15284       if not utils.TestDelay(self.op.duration):
15285         raise errors.OpExecError("Error during master delay test")
15286     if self.op.on_nodes:
15287       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15288       for node, node_result in result.items():
15289         node_result.Raise("Failure during rpc call to node %s" % node)
15290
15291   def Exec(self, feedback_fn):
15292     """Execute the test delay opcode, with the wanted repetitions.
15293
15294     """
15295     if self.op.repeat == 0:
15296       self._TestDelay()
15297     else:
15298       top_value = self.op.repeat - 1
15299       for i in range(self.op.repeat):
15300         self.LogInfo("Test delay iteration %d/%d", i, top_value)
15301         self._TestDelay()
15302
15303
15304 class LURestrictedCommand(NoHooksLU):
15305   """Logical unit for executing restricted commands.
15306
15307   """
15308   REQ_BGL = False
15309
15310   def ExpandNames(self):
15311     if self.op.nodes:
15312       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15313
15314     self.needed_locks = {
15315       locking.LEVEL_NODE: self.op.nodes,
15316       }
15317     self.share_locks = {
15318       locking.LEVEL_NODE: not self.op.use_locking,
15319       }
15320
15321   def CheckPrereq(self):
15322     """Check prerequisites.
15323
15324     """
15325
15326   def Exec(self, feedback_fn):
15327     """Execute restricted command and return output.
15328
15329     """
15330     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15331
15332     # Check if correct locks are held
15333     assert set(self.op.nodes).issubset(owned_nodes)
15334
15335     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15336
15337     result = []
15338
15339     for node_name in self.op.nodes:
15340       nres = rpcres[node_name]
15341       if nres.fail_msg:
15342         msg = ("Command '%s' on node '%s' failed: %s" %
15343                (self.op.command, node_name, nres.fail_msg))
15344         result.append((False, msg))
15345       else:
15346         result.append((True, nres.payload))
15347
15348     return result
15349
15350
15351 class LUTestJqueue(NoHooksLU):
15352   """Utility LU to test some aspects of the job queue.
15353
15354   """
15355   REQ_BGL = False
15356
15357   # Must be lower than default timeout for WaitForJobChange to see whether it
15358   # notices changed jobs
15359   _CLIENT_CONNECT_TIMEOUT = 20.0
15360   _CLIENT_CONFIRM_TIMEOUT = 60.0
15361
15362   @classmethod
15363   def _NotifyUsingSocket(cls, cb, errcls):
15364     """Opens a Unix socket and waits for another program to connect.
15365
15366     @type cb: callable
15367     @param cb: Callback to send socket name to client
15368     @type errcls: class
15369     @param errcls: Exception class to use for errors
15370
15371     """
15372     # Using a temporary directory as there's no easy way to create temporary
15373     # sockets without writing a custom loop around tempfile.mktemp and
15374     # socket.bind
15375     tmpdir = tempfile.mkdtemp()
15376     try:
15377       tmpsock = utils.PathJoin(tmpdir, "sock")
15378
15379       logging.debug("Creating temporary socket at %s", tmpsock)
15380       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15381       try:
15382         sock.bind(tmpsock)
15383         sock.listen(1)
15384
15385         # Send details to client
15386         cb(tmpsock)
15387
15388         # Wait for client to connect before continuing
15389         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15390         try:
15391           (conn, _) = sock.accept()
15392         except socket.error, err:
15393           raise errcls("Client didn't connect in time (%s)" % err)
15394       finally:
15395         sock.close()
15396     finally:
15397       # Remove as soon as client is connected
15398       shutil.rmtree(tmpdir)
15399
15400     # Wait for client to close
15401     try:
15402       try:
15403         # pylint: disable=E1101
15404         # Instance of '_socketobject' has no ... member
15405         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15406         conn.recv(1)
15407       except socket.error, err:
15408         raise errcls("Client failed to confirm notification (%s)" % err)
15409     finally:
15410       conn.close()
15411
15412   def _SendNotification(self, test, arg, sockname):
15413     """Sends a notification to the client.
15414
15415     @type test: string
15416     @param test: Test name
15417     @param arg: Test argument (depends on test)
15418     @type sockname: string
15419     @param sockname: Socket path
15420
15421     """
15422     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15423
15424   def _Notify(self, prereq, test, arg):
15425     """Notifies the client of a test.
15426
15427     @type prereq: bool
15428     @param prereq: Whether this is a prereq-phase test
15429     @type test: string
15430     @param test: Test name
15431     @param arg: Test argument (depends on test)
15432
15433     """
15434     if prereq:
15435       errcls = errors.OpPrereqError
15436     else:
15437       errcls = errors.OpExecError
15438
15439     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15440                                                   test, arg),
15441                                    errcls)
15442
15443   def CheckArguments(self):
15444     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15445     self.expandnames_calls = 0
15446
15447   def ExpandNames(self):
15448     checkargs_calls = getattr(self, "checkargs_calls", 0)
15449     if checkargs_calls < 1:
15450       raise errors.ProgrammerError("CheckArguments was not called")
15451
15452     self.expandnames_calls += 1
15453
15454     if self.op.notify_waitlock:
15455       self._Notify(True, constants.JQT_EXPANDNAMES, None)
15456
15457     self.LogInfo("Expanding names")
15458
15459     # Get lock on master node (just to get a lock, not for a particular reason)
15460     self.needed_locks = {
15461       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15462       }
15463
15464   def Exec(self, feedback_fn):
15465     if self.expandnames_calls < 1:
15466       raise errors.ProgrammerError("ExpandNames was not called")
15467
15468     if self.op.notify_exec:
15469       self._Notify(False, constants.JQT_EXEC, None)
15470
15471     self.LogInfo("Executing")
15472
15473     if self.op.log_messages:
15474       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15475       for idx, msg in enumerate(self.op.log_messages):
15476         self.LogInfo("Sending log message %s", idx + 1)
15477         feedback_fn(constants.JQT_MSGPREFIX + msg)
15478         # Report how many test messages have been sent
15479         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15480
15481     if self.op.fail:
15482       raise errors.OpExecError("Opcode failure was requested")
15483
15484     return True
15485
15486
15487 class LUTestAllocator(NoHooksLU):
15488   """Run allocator tests.
15489
15490   This LU runs the allocator tests
15491
15492   """
15493   def CheckPrereq(self):
15494     """Check prerequisites.
15495
15496     This checks the opcode parameters depending on the director and mode test.
15497
15498     """
15499     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15500                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
15501       for attr in ["memory", "disks", "disk_template",
15502                    "os", "tags", "nics", "vcpus"]:
15503         if not hasattr(self.op, attr):
15504           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15505                                      attr, errors.ECODE_INVAL)
15506       iname = self.cfg.ExpandInstanceName(self.op.name)
15507       if iname is not None:
15508         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15509                                    iname, errors.ECODE_EXISTS)
15510       if not isinstance(self.op.nics, list):
15511         raise errors.OpPrereqError("Invalid parameter 'nics'",
15512                                    errors.ECODE_INVAL)
15513       if not isinstance(self.op.disks, list):
15514         raise errors.OpPrereqError("Invalid parameter 'disks'",
15515                                    errors.ECODE_INVAL)
15516       for row in self.op.disks:
15517         if (not isinstance(row, dict) or
15518             constants.IDISK_SIZE not in row or
15519             not isinstance(row[constants.IDISK_SIZE], int) or
15520             constants.IDISK_MODE not in row or
15521             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15522           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15523                                      " parameter", errors.ECODE_INVAL)
15524       if self.op.hypervisor is None:
15525         self.op.hypervisor = self.cfg.GetHypervisorType()
15526     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15527       fname = _ExpandInstanceName(self.cfg, self.op.name)
15528       self.op.name = fname
15529       self.relocate_from = \
15530           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15531     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15532                           constants.IALLOCATOR_MODE_NODE_EVAC):
15533       if not self.op.instances:
15534         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15535       self.op.instances = _GetWantedInstances(self, self.op.instances)
15536     else:
15537       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15538                                  self.op.mode, errors.ECODE_INVAL)
15539
15540     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15541       if self.op.iallocator is None:
15542         raise errors.OpPrereqError("Missing allocator name",
15543                                    errors.ECODE_INVAL)
15544     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15545       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15546                                  self.op.direction, errors.ECODE_INVAL)
15547
15548   def Exec(self, feedback_fn):
15549     """Run the allocator test.
15550
15551     """
15552     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15553       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15554                                           memory=self.op.memory,
15555                                           disks=self.op.disks,
15556                                           disk_template=self.op.disk_template,
15557                                           os=self.op.os,
15558                                           tags=self.op.tags,
15559                                           nics=self.op.nics,
15560                                           vcpus=self.op.vcpus,
15561                                           spindle_use=self.op.spindle_use,
15562                                           hypervisor=self.op.hypervisor)
15563     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15564       req = iallocator.IAReqRelocate(name=self.op.name,
15565                                      relocate_from=list(self.relocate_from))
15566     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15567       req = iallocator.IAReqGroupChange(instances=self.op.instances,
15568                                         target_groups=self.op.target_groups)
15569     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15570       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15571                                      evac_mode=self.op.evac_mode)
15572     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15573       disk_template = self.op.disk_template
15574       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15575                                              memory=self.op.memory,
15576                                              disks=self.op.disks,
15577                                              disk_template=disk_template,
15578                                              os=self.op.os,
15579                                              tags=self.op.tags,
15580                                              nics=self.op.nics,
15581                                              vcpus=self.op.vcpus,
15582                                              spindle_use=self.op.spindle_use,
15583                                              hypervisor=self.op.hypervisor)
15584                for idx in range(self.op.count)]
15585       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15586     else:
15587       raise errors.ProgrammerError("Uncatched mode %s in"
15588                                    " LUTestAllocator.Exec", self.op.mode)
15589
15590     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15591     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15592       result = ial.in_text
15593     else:
15594       ial.Run(self.op.iallocator, validate=False)
15595       result = ial.out_text
15596     return result
15597
15598
15599 class LUNetworkAdd(LogicalUnit):
15600   """Logical unit for creating networks.
15601
15602   """
15603   HPATH = "network-add"
15604   HTYPE = constants.HTYPE_NETWORK
15605   REQ_BGL = False
15606
15607   def BuildHooksNodes(self):
15608     """Build hooks nodes.
15609
15610     """
15611     mn = self.cfg.GetMasterNode()
15612     return ([mn], [mn])
15613
15614   def ExpandNames(self):
15615     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15616
15617     if self.op.conflicts_check:
15618       self.share_locks[locking.LEVEL_NODE] = 1
15619       self.needed_locks = {
15620         locking.LEVEL_NODE: locking.ALL_SET,
15621         }
15622     else:
15623       self.needed_locks = {}
15624
15625     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15626
15627   def CheckPrereq(self):
15628     """Check prerequisites.
15629
15630     This checks that the given group name is not an existing node group
15631     already.
15632
15633     """
15634     if self.op.network is None:
15635       raise errors.OpPrereqError("Network must be given",
15636                                  errors.ECODE_INVAL)
15637
15638     uuid = self.cfg.LookupNetwork(self.op.network_name)
15639
15640     if uuid:
15641       raise errors.OpPrereqError("Network '%s' already defined" %
15642                                  self.op.network, errors.ECODE_EXISTS)
15643
15644     if self.op.mac_prefix:
15645       utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15646
15647     # Check tag validity
15648     for tag in self.op.tags:
15649       objects.TaggableObject.ValidateTag(tag)
15650
15651   def BuildHooksEnv(self):
15652     """Build hooks env.
15653
15654     """
15655     args = {
15656       "name": self.op.network_name,
15657       "subnet": self.op.network,
15658       "gateway": self.op.gateway,
15659       "network6": self.op.network6,
15660       "gateway6": self.op.gateway6,
15661       "mac_prefix": self.op.mac_prefix,
15662       "network_type": self.op.network_type,
15663       "tags": self.op.tags,
15664       }
15665     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15666
15667   def Exec(self, feedback_fn):
15668     """Add the ip pool to the cluster.
15669
15670     """
15671     nobj = objects.Network(name=self.op.network_name,
15672                            network=self.op.network,
15673                            gateway=self.op.gateway,
15674                            network6=self.op.network6,
15675                            gateway6=self.op.gateway6,
15676                            mac_prefix=self.op.mac_prefix,
15677                            network_type=self.op.network_type,
15678                            uuid=self.network_uuid,
15679                            family=constants.IP4_VERSION)
15680     # Initialize the associated address pool
15681     try:
15682       pool = network.AddressPool.InitializeNetwork(nobj)
15683     except errors.AddressPoolError, e:
15684       raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15685
15686     # Check if we need to reserve the nodes and the cluster master IP
15687     # These may not be allocated to any instances in routed mode, as
15688     # they wouldn't function anyway.
15689     if self.op.conflicts_check:
15690       for node in self.cfg.GetAllNodesInfo().values():
15691         for ip in [node.primary_ip, node.secondary_ip]:
15692           try:
15693             if pool.Contains(ip):
15694               pool.Reserve(ip)
15695               self.LogInfo("Reserved IP address of node '%s' (%s)",
15696                            node.name, ip)
15697           except errors.AddressPoolError:
15698             self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
15699                             node.name, ip)
15700
15701       master_ip = self.cfg.GetClusterInfo().master_ip
15702       try:
15703         if pool.Contains(master_ip):
15704           pool.Reserve(master_ip)
15705           self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
15706       except errors.AddressPoolError:
15707         self.LogWarning("Cannot reserve cluster master IP address (%s)",
15708                         master_ip)
15709
15710     if self.op.add_reserved_ips:
15711       for ip in self.op.add_reserved_ips:
15712         try:
15713           pool.Reserve(ip, external=True)
15714         except errors.AddressPoolError, e:
15715           raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15716
15717     if self.op.tags:
15718       for tag in self.op.tags:
15719         nobj.AddTag(tag)
15720
15721     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15722     del self.remove_locks[locking.LEVEL_NETWORK]
15723
15724
15725 class LUNetworkRemove(LogicalUnit):
15726   HPATH = "network-remove"
15727   HTYPE = constants.HTYPE_NETWORK
15728   REQ_BGL = False
15729
15730   def ExpandNames(self):
15731     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15732
15733     if not self.network_uuid:
15734       raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
15735                                  errors.ECODE_INVAL)
15736
15737     self.share_locks[locking.LEVEL_NODEGROUP] = 1
15738     self.needed_locks = {
15739       locking.LEVEL_NETWORK: [self.network_uuid],
15740       locking.LEVEL_NODEGROUP: locking.ALL_SET,
15741       }
15742
15743   def CheckPrereq(self):
15744     """Check prerequisites.
15745
15746     This checks that the given network name exists as a network, that is
15747     empty (i.e., contains no nodes), and that is not the last group of the
15748     cluster.
15749
15750     """
15751
15752     # Verify that the network is not conncted.
15753     node_groups = [group.name
15754                    for group in self.cfg.GetAllNodeGroupsInfo().values()
15755                    for net in group.networks.keys()
15756                    if net == self.network_uuid]
15757
15758     if node_groups:
15759       self.LogWarning("Nework '%s' is connected to the following"
15760                       " node groups: %s" % (self.op.network_name,
15761                       utils.CommaJoin(utils.NiceSort(node_groups))))
15762       raise errors.OpPrereqError("Network still connected",
15763                                  errors.ECODE_STATE)
15764
15765   def BuildHooksEnv(self):
15766     """Build hooks env.
15767
15768     """
15769     return {
15770       "NETWORK_NAME": self.op.network_name,
15771       }
15772
15773   def BuildHooksNodes(self):
15774     """Build hooks nodes.
15775
15776     """
15777     mn = self.cfg.GetMasterNode()
15778     return ([mn], [mn])
15779
15780   def Exec(self, feedback_fn):
15781     """Remove the network.
15782
15783     """
15784     try:
15785       self.cfg.RemoveNetwork(self.network_uuid)
15786     except errors.ConfigurationError:
15787       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15788                                (self.op.network_name, self.network_uuid))
15789
15790
15791 class LUNetworkSetParams(LogicalUnit):
15792   """Modifies the parameters of a network.
15793
15794   """
15795   HPATH = "network-modify"
15796   HTYPE = constants.HTYPE_NETWORK
15797   REQ_BGL = False
15798
15799   def CheckArguments(self):
15800     if (self.op.gateway and
15801         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15802       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15803                                  " at once", errors.ECODE_INVAL)
15804
15805   def ExpandNames(self):
15806     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15807     self.network = self.cfg.GetNetwork(self.network_uuid)
15808     if self.network is None:
15809       raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
15810                                  (self.op.network_name, self.network_uuid),
15811                                  errors.ECODE_INVAL)
15812     self.needed_locks = {
15813       locking.LEVEL_NETWORK: [self.network_uuid],
15814       }
15815
15816   def CheckPrereq(self):
15817     """Check prerequisites.
15818
15819     """
15820     self.gateway = self.network.gateway
15821     self.network_type = self.network.network_type
15822     self.mac_prefix = self.network.mac_prefix
15823     self.network6 = self.network.network6
15824     self.gateway6 = self.network.gateway6
15825     self.tags = self.network.tags
15826
15827     self.pool = network.AddressPool(self.network)
15828
15829     if self.op.gateway:
15830       if self.op.gateway == constants.VALUE_NONE:
15831         self.gateway = None
15832       else:
15833         self.gateway = self.op.gateway
15834         if self.pool.IsReserved(self.gateway):
15835           raise errors.OpPrereqError("%s is already reserved" %
15836                                      self.gateway, errors.ECODE_INVAL)
15837
15838     if self.op.network_type:
15839       if self.op.network_type == constants.VALUE_NONE:
15840         self.network_type = None
15841       else:
15842         self.network_type = self.op.network_type
15843
15844     if self.op.mac_prefix:
15845       if self.op.mac_prefix == constants.VALUE_NONE:
15846         self.mac_prefix = None
15847       else:
15848         utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15849         self.mac_prefix = self.op.mac_prefix
15850
15851     if self.op.gateway6:
15852       if self.op.gateway6 == constants.VALUE_NONE:
15853         self.gateway6 = None
15854       else:
15855         self.gateway6 = self.op.gateway6
15856
15857     if self.op.network6:
15858       if self.op.network6 == constants.VALUE_NONE:
15859         self.network6 = None
15860       else:
15861         self.network6 = self.op.network6
15862
15863   def BuildHooksEnv(self):
15864     """Build hooks env.
15865
15866     """
15867     args = {
15868       "name": self.op.network_name,
15869       "subnet": self.network.network,
15870       "gateway": self.gateway,
15871       "network6": self.network6,
15872       "gateway6": self.gateway6,
15873       "mac_prefix": self.mac_prefix,
15874       "network_type": self.network_type,
15875       "tags": self.tags,
15876       }
15877     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15878
15879   def BuildHooksNodes(self):
15880     """Build hooks nodes.
15881
15882     """
15883     mn = self.cfg.GetMasterNode()
15884     return ([mn], [mn])
15885
15886   def Exec(self, feedback_fn):
15887     """Modifies the network.
15888
15889     """
15890     #TODO: reserve/release via temporary reservation manager
15891     #      extend cfg.ReserveIp/ReleaseIp with the external flag
15892     if self.op.gateway:
15893       if self.gateway == self.network.gateway:
15894         self.LogWarning("Gateway is already %s", self.gateway)
15895       else:
15896         if self.gateway:
15897           self.pool.Reserve(self.gateway, external=True)
15898         if self.network.gateway:
15899           self.pool.Release(self.network.gateway, external=True)
15900         self.network.gateway = self.gateway
15901
15902     if self.op.add_reserved_ips:
15903       for ip in self.op.add_reserved_ips:
15904         try:
15905           if self.pool.IsReserved(ip):
15906             self.LogWarning("IP address %s is already reserved", ip)
15907           else:
15908             self.pool.Reserve(ip, external=True)
15909         except errors.AddressPoolError, err:
15910           self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
15911
15912     if self.op.remove_reserved_ips:
15913       for ip in self.op.remove_reserved_ips:
15914         if ip == self.network.gateway:
15915           self.LogWarning("Cannot unreserve Gateway's IP")
15916           continue
15917         try:
15918           if not self.pool.IsReserved(ip):
15919             self.LogWarning("IP address %s is already unreserved", ip)
15920           else:
15921             self.pool.Release(ip, external=True)
15922         except errors.AddressPoolError, err:
15923           self.LogWarning("Cannot release IP address %s: %s", ip, err)
15924
15925     if self.op.mac_prefix:
15926       self.network.mac_prefix = self.mac_prefix
15927
15928     if self.op.network6:
15929       self.network.network6 = self.network6
15930
15931     if self.op.gateway6:
15932       self.network.gateway6 = self.gateway6
15933
15934     if self.op.network_type:
15935       self.network.network_type = self.network_type
15936
15937     self.pool.Validate()
15938
15939     self.cfg.Update(self.network, feedback_fn)
15940
15941
15942 class _NetworkQuery(_QueryBase):
15943   FIELDS = query.NETWORK_FIELDS
15944
15945   def ExpandNames(self, lu):
15946     lu.needed_locks = {}
15947
15948     self._all_networks = lu.cfg.GetAllNetworksInfo()
15949     name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
15950
15951     if not self.names:
15952       self.wanted = [name_to_uuid[name]
15953                      for name in utils.NiceSort(name_to_uuid.keys())]
15954     else:
15955       # Accept names to be either names or UUIDs.
15956       missing = []
15957       self.wanted = []
15958       all_uuid = frozenset(self._all_networks.keys())
15959
15960       for name in self.names:
15961         if name in all_uuid:
15962           self.wanted.append(name)
15963         elif name in name_to_uuid:
15964           self.wanted.append(name_to_uuid[name])
15965         else:
15966           missing.append(name)
15967
15968       if missing:
15969         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
15970                                    errors.ECODE_NOENT)
15971
15972   def DeclareLocks(self, lu, level):
15973     pass
15974
15975   def _GetQueryData(self, lu):
15976     """Computes the list of networks and their attributes.
15977
15978     """
15979     do_instances = query.NETQ_INST in self.requested_data
15980     do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
15981     do_stats = query.NETQ_STATS in self.requested_data
15982
15983     network_to_groups = None
15984     network_to_instances = None
15985     stats = None
15986
15987     # For NETQ_GROUP, we need to map network->[groups]
15988     if do_groups:
15989       all_groups = lu.cfg.GetAllNodeGroupsInfo()
15990       network_to_groups = dict((uuid, []) for uuid in self.wanted)
15991
15992       if do_instances:
15993         all_instances = lu.cfg.GetAllInstancesInfo()
15994         all_nodes = lu.cfg.GetAllNodesInfo()
15995         network_to_instances = dict((uuid, []) for uuid in self.wanted)
15996
15997       for group in all_groups.values():
15998         if do_instances:
15999           group_nodes = [node.name for node in all_nodes.values() if
16000                          node.group == group.uuid]
16001           group_instances = [instance for instance in all_instances.values()
16002                              if instance.primary_node in group_nodes]
16003
16004         for net_uuid in group.networks.keys():
16005           if net_uuid in network_to_groups:
16006             netparams = group.networks[net_uuid]
16007             mode = netparams[constants.NIC_MODE]
16008             link = netparams[constants.NIC_LINK]
16009             info = group.name + "(" + mode + ", " + link + ")"
16010             network_to_groups[net_uuid].append(info)
16011
16012             if do_instances:
16013               for instance in group_instances:
16014                 for nic in instance.nics:
16015                   if nic.network == self._all_networks[net_uuid].name:
16016                     network_to_instances[net_uuid].append(instance.name)
16017                     break
16018
16019     if do_stats:
16020       stats = {}
16021       for uuid, net in self._all_networks.items():
16022         if uuid in self.wanted:
16023           pool = network.AddressPool(net)
16024           stats[uuid] = {
16025             "free_count": pool.GetFreeCount(),
16026             "reserved_count": pool.GetReservedCount(),
16027             "map": pool.GetMap(),
16028             "external_reservations":
16029               utils.CommaJoin(pool.GetExternalReservations()),
16030             }
16031
16032     return query.NetworkQueryData([self._all_networks[uuid]
16033                                    for uuid in self.wanted],
16034                                    network_to_groups,
16035                                    network_to_instances,
16036                                    stats)
16037
16038
16039 class LUNetworkQuery(NoHooksLU):
16040   """Logical unit for querying networks.
16041
16042   """
16043   REQ_BGL = False
16044
16045   def CheckArguments(self):
16046     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16047                             self.op.output_fields, False)
16048
16049   def ExpandNames(self):
16050     self.nq.ExpandNames(self)
16051
16052   def Exec(self, feedback_fn):
16053     return self.nq.OldStyleQuery(self)
16054
16055
16056 class LUNetworkConnect(LogicalUnit):
16057   """Connect a network to a nodegroup
16058
16059   """
16060   HPATH = "network-connect"
16061   HTYPE = constants.HTYPE_NETWORK
16062   REQ_BGL = False
16063
16064   def ExpandNames(self):
16065     self.network_name = self.op.network_name
16066     self.group_name = self.op.group_name
16067     self.network_mode = self.op.network_mode
16068     self.network_link = self.op.network_link
16069
16070     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16071     self.network = self.cfg.GetNetwork(self.network_uuid)
16072     if self.network is None:
16073       raise errors.OpPrereqError("Network %s does not exist" %
16074                                  self.network_name, errors.ECODE_INVAL)
16075
16076     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16077     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16078     if self.group is None:
16079       raise errors.OpPrereqError("Group %s does not exist" %
16080                                  self.group_name, errors.ECODE_INVAL)
16081
16082     self.share_locks[locking.LEVEL_INSTANCE] = 1
16083     self.needed_locks = {
16084       locking.LEVEL_INSTANCE: [],
16085       locking.LEVEL_NODEGROUP: [self.group_uuid],
16086       }
16087
16088   def DeclareLocks(self, level):
16089     if level == locking.LEVEL_INSTANCE:
16090       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16091
16092       # Lock instances optimistically, needs verification once group lock has
16093       # been acquired
16094       if self.op.conflicts_check:
16095         self.needed_locks[locking.LEVEL_INSTANCE] = \
16096             self.cfg.GetNodeGroupInstances(self.group_uuid)
16097         self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16098
16099   def BuildHooksEnv(self):
16100     ret = {
16101       "GROUP_NAME": self.group_name,
16102       "GROUP_NETWORK_MODE": self.network_mode,
16103       "GROUP_NETWORK_LINK": self.network_link,
16104       }
16105     ret.update(_BuildNetworkHookEnvByObject(self.network))
16106     return ret
16107
16108   def BuildHooksNodes(self):
16109     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16110     return (nodes, nodes)
16111
16112   def CheckPrereq(self):
16113     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16114     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16115
16116     assert self.group_uuid in owned_groups
16117
16118     # Check if locked instances are still correct
16119     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16120
16121     l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16122                                       for i in value)
16123
16124     self.netparams = {
16125       constants.NIC_MODE: self.network_mode,
16126       constants.NIC_LINK: self.network_link,
16127       }
16128     objects.NIC.CheckParameterSyntax(self.netparams)
16129
16130     #if self.network_mode == constants.NIC_MODE_BRIDGED:
16131     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16132     self.connected = False
16133     if self.network_uuid in self.group.networks:
16134       self.LogWarning("Network '%s' is already mapped to group '%s'" %
16135                       (self.network_name, self.group.name))
16136       self.connected = True
16137       return
16138
16139     if self.op.conflicts_check:
16140       pool = network.AddressPool(self.network)
16141       conflicting_instances = []
16142
16143       for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
16144         for idx, nic in enumerate(instance.nics):
16145           if pool.Contains(nic.ip):
16146             conflicting_instances.append((instance.name, idx, nic.ip))
16147
16148       if conflicting_instances:
16149         self.LogWarning("Following occurences use IPs from network %s"
16150                         " that is about to connect to nodegroup %s: %s" %
16151                         (self.network_name, self.group.name,
16152                         l(conflicting_instances)))
16153         raise errors.OpPrereqError("Conflicting IPs found."
16154                                    " Please remove/modify"
16155                                    " corresponding NICs",
16156                                    errors.ECODE_INVAL)
16157
16158   def Exec(self, feedback_fn):
16159     if self.connected:
16160       return
16161
16162     self.group.networks[self.network_uuid] = self.netparams
16163     self.cfg.Update(self.group, feedback_fn)
16164
16165
16166 class LUNetworkDisconnect(LogicalUnit):
16167   """Disconnect a network to a nodegroup
16168
16169   """
16170   HPATH = "network-disconnect"
16171   HTYPE = constants.HTYPE_NETWORK
16172   REQ_BGL = False
16173
16174   def ExpandNames(self):
16175     self.network_name = self.op.network_name
16176     self.group_name = self.op.group_name
16177
16178     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16179     self.network = self.cfg.GetNetwork(self.network_uuid)
16180     if self.network is None:
16181       raise errors.OpPrereqError("Network %s does not exist" %
16182                                  self.network_name, errors.ECODE_INVAL)
16183
16184     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16185     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16186     if self.group is None:
16187       raise errors.OpPrereqError("Group %s does not exist" %
16188                                  self.group_name, errors.ECODE_INVAL)
16189
16190     self.needed_locks = {
16191       locking.LEVEL_NODEGROUP: [self.group_uuid],
16192       }
16193     self.share_locks[locking.LEVEL_INSTANCE] = 1
16194
16195   def DeclareLocks(self, level):
16196     if level == locking.LEVEL_INSTANCE:
16197       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16198
16199       # Lock instances optimistically, needs verification once group lock has
16200       # been acquired
16201       if self.op.conflicts_check:
16202         self.needed_locks[locking.LEVEL_INSTANCE] = \
16203           self.cfg.GetNodeGroupInstances(self.group_uuid)
16204
16205   def BuildHooksEnv(self):
16206     ret = {
16207       "GROUP_NAME": self.group_name,
16208       }
16209     ret.update(_BuildNetworkHookEnvByObject(self.network))
16210     return ret
16211
16212   def BuildHooksNodes(self):
16213     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16214     return (nodes, nodes)
16215
16216   def CheckPrereq(self):
16217     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16218     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16219
16220     assert self.group_uuid in owned_groups
16221
16222     # Check if locked instances are still correct
16223     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16224
16225     l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16226                                       for i in value)
16227
16228     self.connected = True
16229     if self.network_uuid not in self.group.networks:
16230       self.LogWarning("Network '%s' is not mapped to group '%s'",
16231                       self.network_name, self.group.name)
16232       self.connected = False
16233       return
16234
16235     if self.op.conflicts_check:
16236       conflicting_instances = []
16237
16238       for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
16239         for idx, nic in enumerate(instance.nics):
16240           if nic.network == self.network_name:
16241             conflicting_instances.append((instance.name, idx, nic.ip))
16242
16243       if conflicting_instances:
16244         self.LogWarning("Following occurences use IPs from network %s"
16245                            " that is about to disconnected from the nodegroup"
16246                            " %s: %s" %
16247                            (self.network_name, self.group.name,
16248                             l(conflicting_instances)))
16249         raise errors.OpPrereqError("Conflicting IPs."
16250                                    " Please remove/modify"
16251                                    " corresponding NICS",
16252                                    errors.ECODE_INVAL)
16253
16254   def Exec(self, feedback_fn):
16255     if not self.connected:
16256       return
16257
16258     del self.group.networks[self.network_uuid]
16259     self.cfg.Update(self.group, feedback_fn)
16260
16261
16262 #: Query type implementations
16263 _QUERY_IMPL = {
16264   constants.QR_CLUSTER: _ClusterQuery,
16265   constants.QR_INSTANCE: _InstanceQuery,
16266   constants.QR_NODE: _NodeQuery,
16267   constants.QR_GROUP: _GroupQuery,
16268   constants.QR_NETWORK: _NetworkQuery,
16269   constants.QR_OS: _OsQuery,
16270   constants.QR_EXPORT: _ExportQuery,
16271   }
16272
16273 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16274
16275
16276 def _GetQueryImplementation(name):
16277   """Returns the implemtnation for a query type.
16278
16279   @param name: Query type, must be one of L{constants.QR_VIA_OP}
16280
16281   """
16282   try:
16283     return _QUERY_IMPL[name]
16284   except KeyError:
16285     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16286                                errors.ECODE_INVAL)
16287
16288
16289 def _CheckForConflictingIp(lu, ip, node):
16290   """In case of conflicting ip raise error.
16291
16292   @type ip: string
16293   @param ip: ip address
16294   @type node: string
16295   @param node: node name
16296
16297   """
16298   (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16299   if conf_net is not None:
16300     raise errors.OpPrereqError("Conflicting IP found:"
16301                                " %s <> %s." % (ip, conf_net),
16302                                errors.ECODE_INVAL)
16303
16304   return (None, None)