code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti import network
  65 from ganeti.masterd import iallocator
  66
  67 import ganeti.masterd.instance # pylint: disable=W0611
  68
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141
 142     # Dictionaries used to declare locking needs to mcpu
 143     self.needed_locks = None
 144     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 145     self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
 146
 147     self.add_locks = {}
 148     self.remove_locks = {}
 149
 150     # Used to force good behavior when calling helper functions
 151     self.recalculate_locks = {}
 152
 153     # logging
 154     self.Log = processor.Log # pylint: disable=C0103
 155     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 156     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 157     self.LogStep = processor.LogStep # pylint: disable=C0103
 158     # support for dry-run
 159     self.dry_run_result = None
 160     # support for generic debug attribute
 161     if (not hasattr(self.op, "debug_level") or
 162         not isinstance(self.op.debug_level, int)):
 163       self.op.debug_level = 0
 164
 165     # Tasklets
 166     self.tasklets = None
 167
 168     # Validate opcode parameters and set defaults
 169     self.op.Validate(True)
 170
 171     self.CheckArguments()
 172
 173   def CheckArguments(self):
 174     """Check syntactic validity for the opcode arguments.
 175
 176     This method is for doing a simple syntactic check and ensure
 177     validity of opcode parameters, without any cluster-related
 178     checks. While the same can be accomplished in ExpandNames and/or
 179     CheckPrereq, doing these separate is better because:
 180
 181       - ExpandNames is left as as purely a lock-related function
 182       - CheckPrereq is run after we have acquired locks (and possible
 183         waited for them)
 184
 185     The function is allowed to change the self.op attribute so that
 186     later methods can no longer worry about missing parameters.
 187
 188     """
 189     pass
 190
 191   def ExpandNames(self):
 192     """Expand names for this LU.
 193
 194     This method is called before starting to execute the opcode, and it should
 195     update all the parameters of the opcode to their canonical form (e.g. a
 196     short node name must be fully expanded after this method has successfully
 197     completed). This way locking, hooks, logging, etc. can work correctly.
 198
 199     LUs which implement this method must also populate the self.needed_locks
 200     member, as a dict with lock levels as keys, and a list of needed lock names
 201     as values. Rules:
 202
 203       - use an empty dict if you don't need any lock
 204       - if you don't need any lock at a particular level omit that
 205         level (note that in this case C{DeclareLocks} won't be called
 206         at all for that level)
 207       - if you need locks at a level, but you can't calculate it in
 208         this function, initialise that level with an empty list and do
 209         further processing in L{LogicalUnit.DeclareLocks} (see that
 210         function's docstring)
 211       - don't put anything for the BGL level
 212       - if you want all locks at a level use L{locking.ALL_SET} as a value
 213
 214     If you need to share locks (rather than acquire them exclusively) at one
 215     level you can modify self.share_locks, setting a true value (usually 1) for
 216     that level. By default locks are not shared.
 217
 218     This function can also define a list of tasklets, which then will be
 219     executed in order instead of the usual LU-level CheckPrereq and Exec
 220     functions, if those are not defined by the LU.
 221
 222     Examples::
 223
 224       # Acquire all nodes and one instance
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: locking.ALL_SET,
 227         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 228       }
 229       # Acquire just two nodes
 230       self.needed_locks = {
 231         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 232       }
 233       # Acquire no locks
 234       self.needed_locks = {} # No, you can't leave it to the default value None
 235
 236     """
 237     # The implementation of this method is mandatory only if the new LU is
 238     # concurrent, so that old LUs don't need to be changed all at the same
 239     # time.
 240     if self.REQ_BGL:
 241       self.needed_locks = {} # Exclusive LUs don't need locks.
 242     else:
 243       raise NotImplementedError
 244
 245   def DeclareLocks(self, level):
 246     """Declare LU locking needs for a level
 247
 248     While most LUs can just declare their locking needs at ExpandNames time,
 249     sometimes there's the need to calculate some locks after having acquired
 250     the ones before. This function is called just before acquiring locks at a
 251     particular level, but after acquiring the ones at lower levels, and permits
 252     such calculations. It can be used to modify self.needed_locks, and by
 253     default it does nothing.
 254
 255     This function is only called if you have something already set in
 256     self.needed_locks for the level.
 257
 258     @param level: Locking level which is going to be locked
 259     @type level: member of L{ganeti.locking.LEVELS}
 260
 261     """
 262
 263   def CheckPrereq(self):
 264     """Check prerequisites for this LU.
 265
 266     This method should check that the prerequisites for the execution
 267     of this LU are fulfilled. It can do internode communication, but
 268     it should be idempotent - no cluster or system changes are
 269     allowed.
 270
 271     The method should raise errors.OpPrereqError in case something is
 272     not fulfilled. Its return value is ignored.
 273
 274     This method should also update all the parameters of the opcode to
 275     their canonical form if it hasn't been done by ExpandNames before.
 276
 277     """
 278     if self.tasklets is not None:
 279       for (idx, tl) in enumerate(self.tasklets):
 280         logging.debug("Checking prerequisites for tasklet %s/%s",
 281                       idx + 1, len(self.tasklets))
 282         tl.CheckPrereq()
 283     else:
 284       pass
 285
 286   def Exec(self, feedback_fn):
 287     """Execute the LU.
 288
 289     This method should implement the actual work. It should raise
 290     errors.OpExecError for failures that are somewhat dealt with in
 291     code, or expected.
 292
 293     """
 294     if self.tasklets is not None:
 295       for (idx, tl) in enumerate(self.tasklets):
 296         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 297         tl.Exec(feedback_fn)
 298     else:
 299       raise NotImplementedError
 300
 301   def BuildHooksEnv(self):
 302     """Build hooks environment for this LU.
 303
 304     @rtype: dict
 305     @return: Dictionary containing the environment that will be used for
 306       running the hooks for this LU. The keys of the dict must not be prefixed
 307       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 308       will extend the environment with additional variables. If no environment
 309       should be defined, an empty dictionary should be returned (not C{None}).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def BuildHooksNodes(self):
 317     """Build list of nodes to run LU's hooks.
 318
 319     @rtype: tuple; (list, list)
 320     @return: Tuple containing a list of node names on which the hook
 321       should run before the execution and a list of node names on which the
 322       hook should run after the execution. No nodes should be returned as an
 323       empty list (and not None).
 324     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 325       will not be called.
 326
 327     """
 328     raise NotImplementedError
 329
 330   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 331     """Notify the LU about the results of its hooks.
 332
 333     This method is called every time a hooks phase is executed, and notifies
 334     the Logical Unit about the hooks' result. The LU can then use it to alter
 335     its result based on the hooks.  By default the method does nothing and the
 336     previous result is passed back unchanged but any LU can define it if it
 337     wants to use the local cluster hook-scripts somehow.
 338
 339     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 340         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 341     @param hook_results: the results of the multi-node hooks rpc call
 342     @param feedback_fn: function used send feedback back to the caller
 343     @param lu_result: the previous Exec result this LU had, or None
 344         in the PRE phase
 345     @return: the new Exec result, based on the previous result
 346         and hook results
 347
 348     """
 349     # API must be kept, thus we ignore the unused argument and could
 350     # be a function warnings
 351     # pylint: disable=W0613,R0201
 352     return lu_result
 353
 354   def _ExpandAndLockInstance(self):
 355     """Helper function to expand and lock an instance.
 356
 357     Many LUs that work on an instance take its name in self.op.instance_name
 358     and need to expand it and then declare the expanded name for locking. This
 359     function does it, and then updates self.op.instance_name to the expanded
 360     name. It also initializes needed_locks as a dict, if this hasn't been done
 361     before.
 362
 363     """
 364     if self.needed_locks is None:
 365       self.needed_locks = {}
 366     else:
 367       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 368         "_ExpandAndLockInstance called with instance-level locks set"
 369     self.op.instance_name = _ExpandInstanceName(self.cfg,
 370                                                 self.op.instance_name)
 371     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 372
 373   def _LockInstancesNodes(self, primary_only=False,
 374                           level=locking.LEVEL_NODE):
 375     """Helper function to declare instances' nodes for locking.
 376
 377     This function should be called after locking one or more instances to lock
 378     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 379     with all primary or secondary nodes for instances already locked and
 380     present in self.needed_locks[locking.LEVEL_INSTANCE].
 381
 382     It should be called from DeclareLocks, and for safety only works if
 383     self.recalculate_locks[locking.LEVEL_NODE] is set.
 384
 385     In the future it may grow parameters to just lock some instance's nodes, or
 386     to just lock primaries or secondary nodes, if needed.
 387
 388     If should be called in DeclareLocks in a way similar to::
 389
 390       if level == locking.LEVEL_NODE:
 391         self._LockInstancesNodes()
 392
 393     @type primary_only: boolean
 394     @param primary_only: only lock primary nodes of locked instances
 395     @param level: Which lock level to use for locking nodes
 396
 397     """
 398     assert level in self.recalculate_locks, \
 399       "_LockInstancesNodes helper function called with no nodes to recalculate"
 400
 401     # TODO: check if we're really been called with the instance locks held
 402
 403     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 404     # future we might want to have different behaviors depending on the value
 405     # of self.recalculate_locks[locking.LEVEL_NODE]
 406     wanted_nodes = []
 407     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 408     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 409       wanted_nodes.append(instance.primary_node)
 410       if not primary_only:
 411         wanted_nodes.extend(instance.secondary_nodes)
 412
 413     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 414       self.needed_locks[level] = wanted_nodes
 415     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 416       self.needed_locks[level].extend(wanted_nodes)
 417     else:
 418       raise errors.ProgrammerError("Unknown recalculation mode")
 419
 420     del self.recalculate_locks[level]
 421
 422
 423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 424   """Simple LU which runs no hooks.
 425
 426   This LU is intended as a parent for other LogicalUnits which will
 427   run no hooks, in order to reduce duplicate code.
 428
 429   """
 430   HPATH = None
 431   HTYPE = None
 432
 433   def BuildHooksEnv(self):
 434     """Empty BuildHooksEnv for NoHooksLu.
 435
 436     This just raises an error.
 437
 438     """
 439     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 440
 441   def BuildHooksNodes(self):
 442     """Empty BuildHooksNodes for NoHooksLU.
 443
 444     """
 445     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 446
 447
 448 class Tasklet:
 449   """Tasklet base class.
 450
 451   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 452   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 453   tasklets know nothing about locks.
 454
 455   Subclasses must follow these rules:
 456     - Implement CheckPrereq
 457     - Implement Exec
 458
 459   """
 460   def __init__(self, lu):
 461     self.lu = lu
 462
 463     # Shortcuts
 464     self.cfg = lu.cfg
 465     self.rpc = lu.rpc
 466
 467   def CheckPrereq(self):
 468     """Check prerequisites for this tasklets.
 469
 470     This method should check whether the prerequisites for the execution of
 471     this tasklet are fulfilled. It can do internode communication, but it
 472     should be idempotent - no cluster or system changes are allowed.
 473
 474     The method should raise errors.OpPrereqError in case something is not
 475     fulfilled. Its return value is ignored.
 476
 477     This method should also update all parameters to their canonical form if it
 478     hasn't been done before.
 479
 480     """
 481     pass
 482
 483   def Exec(self, feedback_fn):
 484     """Execute the tasklet.
 485
 486     This method should implement the actual work. It should raise
 487     errors.OpExecError for failures that are somewhat dealt with in code, or
 488     expected.
 489
 490     """
 491     raise NotImplementedError
 492
 493
 494 class _QueryBase:
 495   """Base for query utility classes.
 496
 497   """
 498   #: Attribute holding field definitions
 499   FIELDS = None
 500
 501   #: Field to sort by
 502   SORT_FIELD = "name"
 503
 504   def __init__(self, qfilter, fields, use_locking):
 505     """Initializes this class.
 506
 507     """
 508     self.use_locking = use_locking
 509
 510     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 511                              namefield=self.SORT_FIELD)
 512     self.requested_data = self.query.RequestedData()
 513     self.names = self.query.RequestedNames()
 514
 515     # Sort only if no names were requested
 516     self.sort_by_name = not self.names
 517
 518     self.do_locking = None
 519     self.wanted = None
 520
 521   def _GetNames(self, lu, all_names, lock_level):
 522     """Helper function to determine names asked for in the query.
 523
 524     """
 525     if self.do_locking:
 526       names = lu.owned_locks(lock_level)
 527     else:
 528       names = all_names
 529
 530     if self.wanted == locking.ALL_SET:
 531       assert not self.names
 532       # caller didn't specify names, so ordering is not important
 533       return utils.NiceSort(names)
 534
 535     # caller specified names and we must keep the same order
 536     assert self.names
 537     assert not self.do_locking or lu.glm.is_owned(lock_level)
 538
 539     missing = set(self.wanted).difference(names)
 540     if missing:
 541       raise errors.OpExecError("Some items were removed before retrieving"
 542                                " their data: %s" % missing)
 543
 544     # Return expanded names
 545     return self.wanted
 546
 547   def ExpandNames(self, lu):
 548     """Expand names for this query.
 549
 550     See L{LogicalUnit.ExpandNames}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def DeclareLocks(self, lu, level):
 556     """Declare locks for this query.
 557
 558     See L{LogicalUnit.DeclareLocks}.
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def _GetQueryData(self, lu):
 564     """Collects all data for this query.
 565
 566     @return: Query data object
 567
 568     """
 569     raise NotImplementedError()
 570
 571   def NewStyleQuery(self, lu):
 572     """Collect data and execute query.
 573
 574     """
 575     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 576                                   sort_by_name=self.sort_by_name)
 577
 578   def OldStyleQuery(self, lu):
 579     """Collect data and execute query.
 580
 581     """
 582     return self.query.OldStyleQuery(self._GetQueryData(lu),
 583                                     sort_by_name=self.sort_by_name)
 584
 585
 586 def _ShareAll():
 587   """Returns a dict declaring all lock levels shared.
 588
 589   """
 590   return dict.fromkeys(locking.LEVELS, 1)
 591
 592
 593 def _AnnotateDiskParams(instance, devs, cfg):
 594   """Little helper wrapper to the rpc annotation method.
 595
 596   @param instance: The instance object
 597   @type devs: List of L{objects.Disk}
 598   @param devs: The root devices (not any of its children!)
 599   @param cfg: The config object
 600   @returns The annotated disk copies
 601   @see L{rpc.AnnotateDiskParams}
 602
 603   """
 604   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 605                                 cfg.GetInstanceDiskParams(instance))
 606
 607
 608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 609                               cur_group_uuid):
 610   """Checks if node groups for locked instances are still correct.
 611
 612   @type cfg: L{config.ConfigWriter}
 613   @param cfg: Cluster configuration
 614   @type instances: dict; string as key, L{objects.Instance} as value
 615   @param instances: Dictionary, instance name as key, instance object as value
 616   @type owned_groups: iterable of string
 617   @param owned_groups: List of owned groups
 618   @type owned_nodes: iterable of string
 619   @param owned_nodes: List of owned nodes
 620   @type cur_group_uuid: string or None
 621   @param cur_group_uuid: Optional group UUID to check against instance's groups
 622
 623   """
 624   for (name, inst) in instances.items():
 625     assert owned_nodes.issuperset(inst.all_nodes), \
 626       "Instance %s's nodes changed while we kept the lock" % name
 627
 628     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 629
 630     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 631       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 632
 633
 634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 635                              primary_only=False):
 636   """Checks if the owned node groups are still correct for an instance.
 637
 638   @type cfg: L{config.ConfigWriter}
 639   @param cfg: The cluster configuration
 640   @type instance_name: string
 641   @param instance_name: Instance name
 642   @type owned_groups: set or frozenset
 643   @param owned_groups: List of currently owned node groups
 644   @type primary_only: boolean
 645   @param primary_only: Whether to check node groups for only the primary node
 646
 647   """
 648   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 649
 650   if not owned_groups.issuperset(inst_groups):
 651     raise errors.OpPrereqError("Instance %s's node groups changed since"
 652                                " locks were acquired, current groups are"
 653                                " are '%s', owning groups '%s'; retry the"
 654                                " operation" %
 655                                (instance_name,
 656                                 utils.CommaJoin(inst_groups),
 657                                 utils.CommaJoin(owned_groups)),
 658                                errors.ECODE_STATE)
 659
 660   return inst_groups
 661
 662
 663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 664   """Checks if the instances in a node group are still correct.
 665
 666   @type cfg: L{config.ConfigWriter}
 667   @param cfg: The cluster configuration
 668   @type group_uuid: string
 669   @param group_uuid: Node group UUID
 670   @type owned_instances: set or frozenset
 671   @param owned_instances: List of currently owned instances
 672
 673   """
 674   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 675   if owned_instances != wanted_instances:
 676     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 677                                " locks were acquired, wanted '%s', have '%s';"
 678                                " retry the operation" %
 679                                (group_uuid,
 680                                 utils.CommaJoin(wanted_instances),
 681                                 utils.CommaJoin(owned_instances)),
 682                                errors.ECODE_STATE)
 683
 684   return wanted_instances
 685
 686
 687 def _SupportsOob(cfg, node):
 688   """Tells if node supports OOB.
 689
 690   @type cfg: L{config.ConfigWriter}
 691   @param cfg: The cluster configuration
 692   @type node: L{objects.Node}
 693   @param node: The node
 694   @return: The OOB script if supported or an empty string otherwise
 695
 696   """
 697   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 698
 699
 700 def _CopyLockList(names):
 701   """Makes a copy of a list of lock names.
 702
 703   Handles L{locking.ALL_SET} correctly.
 704
 705   """
 706   if names == locking.ALL_SET:
 707     return locking.ALL_SET
 708   else:
 709     return names[:]
 710
 711
 712 def _GetWantedNodes(lu, nodes):
 713   """Returns list of checked and expanded node names.
 714
 715   @type lu: L{LogicalUnit}
 716   @param lu: the logical unit on whose behalf we execute
 717   @type nodes: list
 718   @param nodes: list of node names or None for all nodes
 719   @rtype: list
 720   @return: the list of nodes, sorted
 721   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 722
 723   """
 724   if nodes:
 725     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 726
 727   return utils.NiceSort(lu.cfg.GetNodeList())
 728
 729
 730 def _GetWantedInstances(lu, instances):
 731   """Returns list of checked and expanded instance names.
 732
 733   @type lu: L{LogicalUnit}
 734   @param lu: the logical unit on whose behalf we execute
 735   @type instances: list
 736   @param instances: list of instance names or None for all instances
 737   @rtype: list
 738   @return: the list of instances, sorted
 739   @raise errors.OpPrereqError: if the instances parameter is wrong type
 740   @raise errors.OpPrereqError: if any of the passed instances is not found
 741
 742   """
 743   if instances:
 744     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 745   else:
 746     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 747   return wanted
 748
 749
 750 def _GetUpdatedParams(old_params, update_dict,
 751                       use_default=True, use_none=False):
 752   """Return the new version of a parameter dictionary.
 753
 754   @type old_params: dict
 755   @param old_params: old parameters
 756   @type update_dict: dict
 757   @param update_dict: dict containing new parameter values, or
 758       constants.VALUE_DEFAULT to reset the parameter to its default
 759       value
 760   @param use_default: boolean
 761   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 762       values as 'to be deleted' values
 763   @param use_none: boolean
 764   @type use_none: whether to recognise C{None} values as 'to be
 765       deleted' values
 766   @rtype: dict
 767   @return: the new parameter dictionary
 768
 769   """
 770   params_copy = copy.deepcopy(old_params)
 771   for key, val in update_dict.iteritems():
 772     if ((use_default and val == constants.VALUE_DEFAULT) or
 773         (use_none and val is None)):
 774       try:
 775         del params_copy[key]
 776       except KeyError:
 777         pass
 778     else:
 779       params_copy[key] = val
 780   return params_copy
 781
 782
 783 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 784   """Return the new version of a instance policy.
 785
 786   @param group_policy: whether this policy applies to a group and thus
 787     we should support removal of policy entries
 788
 789   """
 790   use_none = use_default = group_policy
 791   ipolicy = copy.deepcopy(old_ipolicy)
 792   for key, value in new_ipolicy.items():
 793     if key not in constants.IPOLICY_ALL_KEYS:
 794       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 795                                  errors.ECODE_INVAL)
 796     if key in constants.IPOLICY_ISPECS:
 797       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 798       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 799                                        use_none=use_none,
 800                                        use_default=use_default)
 801     else:
 802       if (not value or value == [constants.VALUE_DEFAULT] or
 803           value == constants.VALUE_DEFAULT):
 804         if group_policy:
 805           del ipolicy[key]
 806         else:
 807           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 808                                      " on the cluster'" % key,
 809                                      errors.ECODE_INVAL)
 810       else:
 811         if key in constants.IPOLICY_PARAMETERS:
 812           # FIXME: we assume all such values are float
 813           try:
 814             ipolicy[key] = float(value)
 815           except (TypeError, ValueError), err:
 816             raise errors.OpPrereqError("Invalid value for attribute"
 817                                        " '%s': '%s', error: %s" %
 818                                        (key, value, err), errors.ECODE_INVAL)
 819         else:
 820           # FIXME: we assume all others are lists; this should be redone
 821           # in a nicer way
 822           ipolicy[key] = list(value)
 823   try:
 824     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 825   except errors.ConfigurationError, err:
 826     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 827                                errors.ECODE_INVAL)
 828   return ipolicy
 829
 830
 831 def _UpdateAndVerifySubDict(base, updates, type_check):
 832   """Updates and verifies a dict with sub dicts of the same type.
 833
 834   @param base: The dict with the old data
 835   @param updates: The dict with the new data
 836   @param type_check: Dict suitable to ForceDictType to verify correct types
 837   @returns: A new dict with updated and verified values
 838
 839   """
 840   def fn(old, value):
 841     new = _GetUpdatedParams(old, value)
 842     utils.ForceDictType(new, type_check)
 843     return new
 844
 845   ret = copy.deepcopy(base)
 846   ret.update(dict((key, fn(base.get(key, {}), value))
 847                   for key, value in updates.items()))
 848   return ret
 849
 850
 851 def _MergeAndVerifyHvState(op_input, obj_input):
 852   """Combines the hv state from an opcode with the one of the object
 853
 854   @param op_input: The input dict from the opcode
 855   @param obj_input: The input dict from the objects
 856   @return: The verified and updated dict
 857
 858   """
 859   if op_input:
 860     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 861     if invalid_hvs:
 862       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 863                                  " %s" % utils.CommaJoin(invalid_hvs),
 864                                  errors.ECODE_INVAL)
 865     if obj_input is None:
 866       obj_input = {}
 867     type_check = constants.HVSTS_PARAMETER_TYPES
 868     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 869
 870   return None
 871
 872
 873 def _MergeAndVerifyDiskState(op_input, obj_input):
 874   """Combines the disk state from an opcode with the one of the object
 875
 876   @param op_input: The input dict from the opcode
 877   @param obj_input: The input dict from the objects
 878   @return: The verified and updated dict
 879   """
 880   if op_input:
 881     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 882     if invalid_dst:
 883       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 884                                  utils.CommaJoin(invalid_dst),
 885                                  errors.ECODE_INVAL)
 886     type_check = constants.DSS_PARAMETER_TYPES
 887     if obj_input is None:
 888       obj_input = {}
 889     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 890                                               type_check))
 891                 for key, value in op_input.items())
 892
 893   return None
 894
 895
 896 def _ReleaseLocks(lu, level, names=None, keep=None):
 897   """Releases locks owned by an LU.
 898
 899   @type lu: L{LogicalUnit}
 900   @param level: Lock level
 901   @type names: list or None
 902   @param names: Names of locks to release
 903   @type keep: list or None
 904   @param keep: Names of locks to retain
 905
 906   """
 907   assert not (keep is not None and names is not None), \
 908          "Only one of the 'names' and the 'keep' parameters can be given"
 909
 910   if names is not None:
 911     should_release = names.__contains__
 912   elif keep:
 913     should_release = lambda name: name not in keep
 914   else:
 915     should_release = None
 916
 917   owned = lu.owned_locks(level)
 918   if not owned:
 919     # Not owning any lock at this level, do nothing
 920     pass
 921
 922   elif should_release:
 923     retain = []
 924     release = []
 925
 926     # Determine which locks to release
 927     for name in owned:
 928       if should_release(name):
 929         release.append(name)
 930       else:
 931         retain.append(name)
 932
 933     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 934
 935     # Release just some locks
 936     lu.glm.release(level, names=release)
 937
 938     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 939   else:
 940     # Release everything
 941     lu.glm.release(level)
 942
 943     assert not lu.glm.is_owned(level), "No locks should be owned"
 944
 945
 946 def _MapInstanceDisksToNodes(instances):
 947   """Creates a map from (node, volume) to instance name.
 948
 949   @type instances: list of L{objects.Instance}
 950   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 951
 952   """
 953   return dict(((node, vol), inst.name)
 954               for inst in instances
 955               for (node, vols) in inst.MapLVsByNode().items()
 956               for vol in vols)
 957
 958
 959 def _RunPostHook(lu, node_name):
 960   """Runs the post-hook for an opcode on a single node.
 961
 962   """
 963   hm = lu.proc.BuildHooksManager(lu)
 964   try:
 965     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 966   except Exception, err: # pylint: disable=W0703
 967     lu.LogWarning("Errors occurred running hooks on %s: %s",
 968                   node_name, err)
 969
 970
 971 def _CheckOutputFields(static, dynamic, selected):
 972   """Checks whether all selected fields are valid.
 973
 974   @type static: L{utils.FieldSet}
 975   @param static: static fields set
 976   @type dynamic: L{utils.FieldSet}
 977   @param dynamic: dynamic fields set
 978
 979   """
 980   f = utils.FieldSet()
 981   f.Extend(static)
 982   f.Extend(dynamic)
 983
 984   delta = f.NonMatching(selected)
 985   if delta:
 986     raise errors.OpPrereqError("Unknown output fields selected: %s"
 987                                % ",".join(delta), errors.ECODE_INVAL)
 988
 989
 990 def _CheckGlobalHvParams(params):
 991   """Validates that given hypervisor params are not global ones.
 992
 993   This will ensure that instances don't get customised versions of
 994   global params.
 995
 996   """
 997   used_globals = constants.HVC_GLOBALS.intersection(params)
 998   if used_globals:
 999     msg = ("The following hypervisor parameters are global and cannot"
1000            " be customized at instance level, please modify them at"
1001            " cluster level: %s" % utils.CommaJoin(used_globals))
1002     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1003
1004
1005 def _CheckNodeOnline(lu, node, msg=None):
1006   """Ensure that a given node is online.
1007
1008   @param lu: the LU on behalf of which we make the check
1009   @param node: the node to check
1010   @param msg: if passed, should be a message to replace the default one
1011   @raise errors.OpPrereqError: if the node is offline
1012
1013   """
1014   if msg is None:
1015     msg = "Can't use offline node"
1016   if lu.cfg.GetNodeInfo(node).offline:
1017     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1018
1019
1020 def _CheckNodeNotDrained(lu, node):
1021   """Ensure that a given node is not drained.
1022
1023   @param lu: the LU on behalf of which we make the check
1024   @param node: the node to check
1025   @raise errors.OpPrereqError: if the node is drained
1026
1027   """
1028   if lu.cfg.GetNodeInfo(node).drained:
1029     raise errors.OpPrereqError("Can't use drained node %s" % node,
1030                                errors.ECODE_STATE)
1031
1032
1033 def _CheckNodeVmCapable(lu, node):
1034   """Ensure that a given node is vm capable.
1035
1036   @param lu: the LU on behalf of which we make the check
1037   @param node: the node to check
1038   @raise errors.OpPrereqError: if the node is not vm capable
1039
1040   """
1041   if not lu.cfg.GetNodeInfo(node).vm_capable:
1042     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1043                                errors.ECODE_STATE)
1044
1045
1046 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1047   """Ensure that a node supports a given OS.
1048
1049   @param lu: the LU on behalf of which we make the check
1050   @param node: the node to check
1051   @param os_name: the OS to query about
1052   @param force_variant: whether to ignore variant errors
1053   @raise errors.OpPrereqError: if the node is not supporting the OS
1054
1055   """
1056   result = lu.rpc.call_os_get(node, os_name)
1057   result.Raise("OS '%s' not in supported OS list for node %s" %
1058                (os_name, node),
1059                prereq=True, ecode=errors.ECODE_INVAL)
1060   if not force_variant:
1061     _CheckOSVariant(result.payload, os_name)
1062
1063
1064 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1065   """Ensure that a node has the given secondary ip.
1066
1067   @type lu: L{LogicalUnit}
1068   @param lu: the LU on behalf of which we make the check
1069   @type node: string
1070   @param node: the node to check
1071   @type secondary_ip: string
1072   @param secondary_ip: the ip to check
1073   @type prereq: boolean
1074   @param prereq: whether to throw a prerequisite or an execute error
1075   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1076   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1077
1078   """
1079   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1080   result.Raise("Failure checking secondary ip on node %s" % node,
1081                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1082   if not result.payload:
1083     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1084            " please fix and re-run this command" % secondary_ip)
1085     if prereq:
1086       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1087     else:
1088       raise errors.OpExecError(msg)
1089
1090
1091 def _GetClusterDomainSecret():
1092   """Reads the cluster domain secret.
1093
1094   """
1095   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1096                                strict=True)
1097
1098
1099 def _CheckInstanceState(lu, instance, req_states, msg=None):
1100   """Ensure that an instance is in one of the required states.
1101
1102   @param lu: the LU on behalf of which we make the check
1103   @param instance: the instance to check
1104   @param msg: if passed, should be a message to replace the default one
1105   @raise errors.OpPrereqError: if the instance is not in the required state
1106
1107   """
1108   if msg is None:
1109     msg = ("can't use instance from outside %s states" %
1110            utils.CommaJoin(req_states))
1111   if instance.admin_state not in req_states:
1112     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1113                                (instance.name, instance.admin_state, msg),
1114                                errors.ECODE_STATE)
1115
1116   if constants.ADMINST_UP not in req_states:
1117     pnode = instance.primary_node
1118     if not lu.cfg.GetNodeInfo(pnode).offline:
1119       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1120       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1121                   prereq=True, ecode=errors.ECODE_ENVIRON)
1122       if instance.name in ins_l.payload:
1123         raise errors.OpPrereqError("Instance %s is running, %s" %
1124                                    (instance.name, msg), errors.ECODE_STATE)
1125     else:
1126       lu.LogWarning("Primary node offline, ignoring check that instance"
1127                      " is down")
1128
1129
1130 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1131   """Computes if value is in the desired range.
1132
1133   @param name: name of the parameter for which we perform the check
1134   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1135       not just 'disk')
1136   @param ipolicy: dictionary containing min, max and std values
1137   @param value: actual value that we want to use
1138   @return: None or element not meeting the criteria
1139
1140
1141   """
1142   if value in [None, constants.VALUE_AUTO]:
1143     return None
1144   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1145   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1146   if value > max_v or min_v > value:
1147     if qualifier:
1148       fqn = "%s/%s" % (name, qualifier)
1149     else:
1150       fqn = name
1151     return ("%s value %s is not in range [%s, %s]" %
1152             (fqn, value, min_v, max_v))
1153   return None
1154
1155
1156 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1157                                  nic_count, disk_sizes, spindle_use,
1158                                  _compute_fn=_ComputeMinMaxSpec):
1159   """Verifies ipolicy against provided specs.
1160
1161   @type ipolicy: dict
1162   @param ipolicy: The ipolicy
1163   @type mem_size: int
1164   @param mem_size: The memory size
1165   @type cpu_count: int
1166   @param cpu_count: Used cpu cores
1167   @type disk_count: int
1168   @param disk_count: Number of disks used
1169   @type nic_count: int
1170   @param nic_count: Number of nics used
1171   @type disk_sizes: list of ints
1172   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1173   @type spindle_use: int
1174   @param spindle_use: The number of spindles this instance uses
1175   @param _compute_fn: The compute function (unittest only)
1176   @return: A list of violations, or an empty list of no violations are found
1177
1178   """
1179   assert disk_count == len(disk_sizes)
1180
1181   test_settings = [
1182     (constants.ISPEC_MEM_SIZE, "", mem_size),
1183     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1184     (constants.ISPEC_DISK_COUNT, "", disk_count),
1185     (constants.ISPEC_NIC_COUNT, "", nic_count),
1186     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1187     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1188          for idx, d in enumerate(disk_sizes)]
1189
1190   return filter(None,
1191                 (_compute_fn(name, qualifier, ipolicy, value)
1192                  for (name, qualifier, value) in test_settings))
1193
1194
1195 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1196                                      _compute_fn=_ComputeIPolicySpecViolation):
1197   """Compute if instance meets the specs of ipolicy.
1198
1199   @type ipolicy: dict
1200   @param ipolicy: The ipolicy to verify against
1201   @type instance: L{objects.Instance}
1202   @param instance: The instance to verify
1203   @param _compute_fn: The function to verify ipolicy (unittest only)
1204   @see: L{_ComputeIPolicySpecViolation}
1205
1206   """
1207   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1208   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1209   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1210   disk_count = len(instance.disks)
1211   disk_sizes = [disk.size for disk in instance.disks]
1212   nic_count = len(instance.nics)
1213
1214   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1215                      disk_sizes, spindle_use)
1216
1217
1218 def _ComputeIPolicyInstanceSpecViolation(
1219   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1220   """Compute if instance specs meets the specs of ipolicy.
1221
1222   @type ipolicy: dict
1223   @param ipolicy: The ipolicy to verify against
1224   @param instance_spec: dict
1225   @param instance_spec: The instance spec to verify
1226   @param _compute_fn: The function to verify ipolicy (unittest only)
1227   @see: L{_ComputeIPolicySpecViolation}
1228
1229   """
1230   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1231   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1232   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1233   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1234   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1235   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1236
1237   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1238                      disk_sizes, spindle_use)
1239
1240
1241 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1242                                  target_group,
1243                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1244   """Compute if instance meets the specs of the new target group.
1245
1246   @param ipolicy: The ipolicy to verify
1247   @param instance: The instance object to verify
1248   @param current_group: The current group of the instance
1249   @param target_group: The new group of the instance
1250   @param _compute_fn: The function to verify ipolicy (unittest only)
1251   @see: L{_ComputeIPolicySpecViolation}
1252
1253   """
1254   if current_group == target_group:
1255     return []
1256   else:
1257     return _compute_fn(ipolicy, instance)
1258
1259
1260 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1261                             _compute_fn=_ComputeIPolicyNodeViolation):
1262   """Checks that the target node is correct in terms of instance policy.
1263
1264   @param ipolicy: The ipolicy to verify
1265   @param instance: The instance object to verify
1266   @param node: The new node to relocate
1267   @param ignore: Ignore violations of the ipolicy
1268   @param _compute_fn: The function to verify ipolicy (unittest only)
1269   @see: L{_ComputeIPolicySpecViolation}
1270
1271   """
1272   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1273   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1274
1275   if res:
1276     msg = ("Instance does not meet target node group's (%s) instance"
1277            " policy: %s") % (node.group, utils.CommaJoin(res))
1278     if ignore:
1279       lu.LogWarning(msg)
1280     else:
1281       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1282
1283
1284 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1285   """Computes a set of any instances that would violate the new ipolicy.
1286
1287   @param old_ipolicy: The current (still in-place) ipolicy
1288   @param new_ipolicy: The new (to become) ipolicy
1289   @param instances: List of instances to verify
1290   @return: A list of instances which violates the new ipolicy but
1291       did not before
1292
1293   """
1294   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1295           _ComputeViolatingInstances(old_ipolicy, instances))
1296
1297
1298 def _ExpandItemName(fn, name, kind):
1299   """Expand an item name.
1300
1301   @param fn: the function to use for expansion
1302   @param name: requested item name
1303   @param kind: text description ('Node' or 'Instance')
1304   @return: the resolved (full) name
1305   @raise errors.OpPrereqError: if the item is not found
1306
1307   """
1308   full_name = fn(name)
1309   if full_name is None:
1310     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1311                                errors.ECODE_NOENT)
1312   return full_name
1313
1314
1315 def _ExpandNodeName(cfg, name):
1316   """Wrapper over L{_ExpandItemName} for nodes."""
1317   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1318
1319
1320 def _ExpandInstanceName(cfg, name):
1321   """Wrapper over L{_ExpandItemName} for instance."""
1322   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1323
1324
1325 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1326                          network_type, mac_prefix, tags):
1327   """Builds network related env variables for hooks
1328
1329   This builds the hook environment from individual variables.
1330
1331   @type name: string
1332   @param name: the name of the network
1333   @type subnet: string
1334   @param subnet: the ipv4 subnet
1335   @type gateway: string
1336   @param gateway: the ipv4 gateway
1337   @type network6: string
1338   @param network6: the ipv6 subnet
1339   @type gateway6: string
1340   @param gateway6: the ipv6 gateway
1341   @type network_type: string
1342   @param network_type: the type of the network
1343   @type mac_prefix: string
1344   @param mac_prefix: the mac_prefix
1345   @type tags: list
1346   @param tags: the tags of the network
1347
1348   """
1349   env = {}
1350   if name:
1351     env["NETWORK_NAME"] = name
1352   if subnet:
1353     env["NETWORK_SUBNET"] = subnet
1354   if gateway:
1355     env["NETWORK_GATEWAY"] = gateway
1356   if network6:
1357     env["NETWORK_SUBNET6"] = network6
1358   if gateway6:
1359     env["NETWORK_GATEWAY6"] = gateway6
1360   if mac_prefix:
1361     env["NETWORK_MAC_PREFIX"] = mac_prefix
1362   if network_type:
1363     env["NETWORK_TYPE"] = network_type
1364   if tags:
1365     env["NETWORK_TAGS"] = " ".join(tags)
1366
1367   return env
1368
1369
1370 def _BuildNetworkHookEnvByObject(net):
1371   """Builds network related env varliables for hooks
1372
1373   @type net: L{objects.Network}
1374   @param net: the network object
1375
1376   """
1377   args = {
1378     "name": net.name,
1379     "subnet": net.network,
1380     "gateway": net.gateway,
1381     "network6": net.network6,
1382     "gateway6": net.gateway6,
1383     "network_type": net.network_type,
1384     "mac_prefix": net.mac_prefix,
1385     "tags": net.tags,
1386   }
1387
1388   return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
1389
1390
1391 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1392                           minmem, maxmem, vcpus, nics, disk_template, disks,
1393                           bep, hvp, hypervisor_name, tags):
1394   """Builds instance related env variables for hooks
1395
1396   This builds the hook environment from individual variables.
1397
1398   @type name: string
1399   @param name: the name of the instance
1400   @type primary_node: string
1401   @param primary_node: the name of the instance's primary node
1402   @type secondary_nodes: list
1403   @param secondary_nodes: list of secondary nodes as strings
1404   @type os_type: string
1405   @param os_type: the name of the instance's OS
1406   @type status: string
1407   @param status: the desired status of the instance
1408   @type minmem: string
1409   @param minmem: the minimum memory size of the instance
1410   @type maxmem: string
1411   @param maxmem: the maximum memory size of the instance
1412   @type vcpus: string
1413   @param vcpus: the count of VCPUs the instance has
1414   @type nics: list
1415   @param nics: list of tuples (ip, mac, mode, link, network) representing
1416       the NICs the instance has
1417   @type disk_template: string
1418   @param disk_template: the disk template of the instance
1419   @type disks: list
1420   @param disks: the list of (size, mode) pairs
1421   @type bep: dict
1422   @param bep: the backend parameters for the instance
1423   @type hvp: dict
1424   @param hvp: the hypervisor parameters for the instance
1425   @type hypervisor_name: string
1426   @param hypervisor_name: the hypervisor for the instance
1427   @type tags: list
1428   @param tags: list of instance tags as strings
1429   @rtype: dict
1430   @return: the hook environment for this instance
1431
1432   """
1433   env = {
1434     "OP_TARGET": name,
1435     "INSTANCE_NAME": name,
1436     "INSTANCE_PRIMARY": primary_node,
1437     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1438     "INSTANCE_OS_TYPE": os_type,
1439     "INSTANCE_STATUS": status,
1440     "INSTANCE_MINMEM": minmem,
1441     "INSTANCE_MAXMEM": maxmem,
1442     # TODO(2.7) remove deprecated "memory" value
1443     "INSTANCE_MEMORY": maxmem,
1444     "INSTANCE_VCPUS": vcpus,
1445     "INSTANCE_DISK_TEMPLATE": disk_template,
1446     "INSTANCE_HYPERVISOR": hypervisor_name,
1447   }
1448   if nics:
1449     nic_count = len(nics)
1450     for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1451       if ip is None:
1452         ip = ""
1453       env["INSTANCE_NIC%d_IP" % idx] = ip
1454       env["INSTANCE_NIC%d_MAC" % idx] = mac
1455       env["INSTANCE_NIC%d_MODE" % idx] = mode
1456       env["INSTANCE_NIC%d_LINK" % idx] = link
1457       if network:
1458         env["INSTANCE_NIC%d_NETWORK" % idx] = net
1459         if netinfo:
1460           nobj = objects.Network.FromDict(netinfo)
1461           if nobj.network:
1462             env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1463           if nobj.gateway:
1464             env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1465           if nobj.network6:
1466             env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1467           if nobj.gateway6:
1468             env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1469           if nobj.mac_prefix:
1470             env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1471           if nobj.network_type:
1472             env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1473           if nobj.tags:
1474             env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1475       if mode == constants.NIC_MODE_BRIDGED:
1476         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1477   else:
1478     nic_count = 0
1479
1480   env["INSTANCE_NIC_COUNT"] = nic_count
1481
1482   if disks:
1483     disk_count = len(disks)
1484     for idx, (size, mode) in enumerate(disks):
1485       env["INSTANCE_DISK%d_SIZE" % idx] = size
1486       env["INSTANCE_DISK%d_MODE" % idx] = mode
1487   else:
1488     disk_count = 0
1489
1490   env["INSTANCE_DISK_COUNT"] = disk_count
1491
1492   if not tags:
1493     tags = []
1494
1495   env["INSTANCE_TAGS"] = " ".join(tags)
1496
1497   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1498     for key, value in source.items():
1499       env["INSTANCE_%s_%s" % (kind, key)] = value
1500
1501   return env
1502
1503
1504 def _NICToTuple(lu, nic):
1505   """Build a tupple of nic information.
1506
1507   @type lu:  L{LogicalUnit}
1508   @param lu: the logical unit on whose behalf we execute
1509   @type nic: L{objects.NIC}
1510   @param nic: nic to convert to hooks tuple
1511
1512   """
1513   ip = nic.ip
1514   mac = nic.mac
1515   cluster = lu.cfg.GetClusterInfo()
1516   filled_params = cluster.SimpleFillNIC(nic.nicparams)
1517   mode = filled_params[constants.NIC_MODE]
1518   link = filled_params[constants.NIC_LINK]
1519   net = nic.network
1520   netinfo = None
1521   if net:
1522     net_uuid = lu.cfg.LookupNetwork(net)
1523     if net_uuid:
1524       nobj = lu.cfg.GetNetwork(net_uuid)
1525       netinfo = objects.Network.ToDict(nobj)
1526   return (ip, mac, mode, link, net, netinfo)
1527
1528
1529 def _NICListToTuple(lu, nics):
1530   """Build a list of nic information tuples.
1531
1532   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1533   value in LUInstanceQueryData.
1534
1535   @type lu:  L{LogicalUnit}
1536   @param lu: the logical unit on whose behalf we execute
1537   @type nics: list of L{objects.NIC}
1538   @param nics: list of nics to convert to hooks tuples
1539
1540   """
1541   hooks_nics = []
1542   for nic in nics:
1543     hooks_nics.append(_NICToTuple(lu, nic))
1544   return hooks_nics
1545
1546
1547 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1548   """Builds instance related env variables for hooks from an object.
1549
1550   @type lu: L{LogicalUnit}
1551   @param lu: the logical unit on whose behalf we execute
1552   @type instance: L{objects.Instance}
1553   @param instance: the instance for which we should build the
1554       environment
1555   @type override: dict
1556   @param override: dictionary with key/values that will override
1557       our values
1558   @rtype: dict
1559   @return: the hook environment dictionary
1560
1561   """
1562   cluster = lu.cfg.GetClusterInfo()
1563   bep = cluster.FillBE(instance)
1564   hvp = cluster.FillHV(instance)
1565   args = {
1566     "name": instance.name,
1567     "primary_node": instance.primary_node,
1568     "secondary_nodes": instance.secondary_nodes,
1569     "os_type": instance.os,
1570     "status": instance.admin_state,
1571     "maxmem": bep[constants.BE_MAXMEM],
1572     "minmem": bep[constants.BE_MINMEM],
1573     "vcpus": bep[constants.BE_VCPUS],
1574     "nics": _NICListToTuple(lu, instance.nics),
1575     "disk_template": instance.disk_template,
1576     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1577     "bep": bep,
1578     "hvp": hvp,
1579     "hypervisor_name": instance.hypervisor,
1580     "tags": instance.tags,
1581   }
1582   if override:
1583     args.update(override)
1584   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1585
1586
1587 def _AdjustCandidatePool(lu, exceptions):
1588   """Adjust the candidate pool after node operations.
1589
1590   """
1591   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1592   if mod_list:
1593     lu.LogInfo("Promoted nodes to master candidate role: %s",
1594                utils.CommaJoin(node.name for node in mod_list))
1595     for name in mod_list:
1596       lu.context.ReaddNode(name)
1597   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1598   if mc_now > mc_max:
1599     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1600                (mc_now, mc_max))
1601
1602
1603 def _DecideSelfPromotion(lu, exceptions=None):
1604   """Decide whether I should promote myself as a master candidate.
1605
1606   """
1607   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1608   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1609   # the new node will increase mc_max with one, so:
1610   mc_should = min(mc_should + 1, cp_size)
1611   return mc_now < mc_should
1612
1613
1614 def _ComputeViolatingInstances(ipolicy, instances):
1615   """Computes a set of instances who violates given ipolicy.
1616
1617   @param ipolicy: The ipolicy to verify
1618   @type instances: object.Instance
1619   @param instances: List of instances to verify
1620   @return: A frozenset of instance names violating the ipolicy
1621
1622   """
1623   return frozenset([inst.name for inst in instances
1624                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1625
1626
1627 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1628   """Check that the brigdes needed by a list of nics exist.
1629
1630   """
1631   cluster = lu.cfg.GetClusterInfo()
1632   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1633   brlist = [params[constants.NIC_LINK] for params in paramslist
1634             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1635   if brlist:
1636     result = lu.rpc.call_bridges_exist(target_node, brlist)
1637     result.Raise("Error checking bridges on destination node '%s'" %
1638                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1639
1640
1641 def _CheckInstanceBridgesExist(lu, instance, node=None):
1642   """Check that the brigdes needed by an instance exist.
1643
1644   """
1645   if node is None:
1646     node = instance.primary_node
1647   _CheckNicsBridgesExist(lu, instance.nics, node)
1648
1649
1650 def _CheckOSVariant(os_obj, name):
1651   """Check whether an OS name conforms to the os variants specification.
1652
1653   @type os_obj: L{objects.OS}
1654   @param os_obj: OS object to check
1655   @type name: string
1656   @param name: OS name passed by the user, to check for validity
1657
1658   """
1659   variant = objects.OS.GetVariant(name)
1660   if not os_obj.supported_variants:
1661     if variant:
1662       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1663                                  " passed)" % (os_obj.name, variant),
1664                                  errors.ECODE_INVAL)
1665     return
1666   if not variant:
1667     raise errors.OpPrereqError("OS name must include a variant",
1668                                errors.ECODE_INVAL)
1669
1670   if variant not in os_obj.supported_variants:
1671     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1672
1673
1674 def _GetNodeInstancesInner(cfg, fn):
1675   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1676
1677
1678 def _GetNodeInstances(cfg, node_name):
1679   """Returns a list of all primary and secondary instances on a node.
1680
1681   """
1682
1683   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1684
1685
1686 def _GetNodePrimaryInstances(cfg, node_name):
1687   """Returns primary instances on a node.
1688
1689   """
1690   return _GetNodeInstancesInner(cfg,
1691                                 lambda inst: node_name == inst.primary_node)
1692
1693
1694 def _GetNodeSecondaryInstances(cfg, node_name):
1695   """Returns secondary instances on a node.
1696
1697   """
1698   return _GetNodeInstancesInner(cfg,
1699                                 lambda inst: node_name in inst.secondary_nodes)
1700
1701
1702 def _GetStorageTypeArgs(cfg, storage_type):
1703   """Returns the arguments for a storage type.
1704
1705   """
1706   # Special case for file storage
1707   if storage_type == constants.ST_FILE:
1708     # storage.FileStorage wants a list of storage directories
1709     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1710
1711   return []
1712
1713
1714 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1715   faulty = []
1716
1717   for dev in instance.disks:
1718     cfg.SetDiskID(dev, node_name)
1719
1720   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1721                                                                 instance))
1722   result.Raise("Failed to get disk status from node %s" % node_name,
1723                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1724
1725   for idx, bdev_status in enumerate(result.payload):
1726     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1727       faulty.append(idx)
1728
1729   return faulty
1730
1731
1732 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1733   """Check the sanity of iallocator and node arguments and use the
1734   cluster-wide iallocator if appropriate.
1735
1736   Check that at most one of (iallocator, node) is specified. If none is
1737   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1738   then the LU's opcode's iallocator slot is filled with the cluster-wide
1739   default iallocator.
1740
1741   @type iallocator_slot: string
1742   @param iallocator_slot: the name of the opcode iallocator slot
1743   @type node_slot: string
1744   @param node_slot: the name of the opcode target node slot
1745
1746   """
1747   node = getattr(lu.op, node_slot, None)
1748   ialloc = getattr(lu.op, iallocator_slot, None)
1749   if node == []:
1750     node = None
1751
1752   if node is not None and ialloc is not None:
1753     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1754                                errors.ECODE_INVAL)
1755   elif ((node is None and ialloc is None) or
1756         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1757     default_iallocator = lu.cfg.GetDefaultIAllocator()
1758     if default_iallocator:
1759       setattr(lu.op, iallocator_slot, default_iallocator)
1760     else:
1761       raise errors.OpPrereqError("No iallocator or node given and no"
1762                                  " cluster-wide default iallocator found;"
1763                                  " please specify either an iallocator or a"
1764                                  " node, or set a cluster-wide default"
1765                                  " iallocator", errors.ECODE_INVAL)
1766
1767
1768 def _GetDefaultIAllocator(cfg, ialloc):
1769   """Decides on which iallocator to use.
1770
1771   @type cfg: L{config.ConfigWriter}
1772   @param cfg: Cluster configuration object
1773   @type ialloc: string or None
1774   @param ialloc: Iallocator specified in opcode
1775   @rtype: string
1776   @return: Iallocator name
1777
1778   """
1779   if not ialloc:
1780     # Use default iallocator
1781     ialloc = cfg.GetDefaultIAllocator()
1782
1783   if not ialloc:
1784     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1785                                " opcode nor as a cluster-wide default",
1786                                errors.ECODE_INVAL)
1787
1788   return ialloc
1789
1790
1791 def _CheckHostnameSane(lu, name):
1792   """Ensures that a given hostname resolves to a 'sane' name.
1793
1794   The given name is required to be a prefix of the resolved hostname,
1795   to prevent accidental mismatches.
1796
1797   @param lu: the logical unit on behalf of which we're checking
1798   @param name: the name we should resolve and check
1799   @return: the resolved hostname object
1800
1801   """
1802   hostname = netutils.GetHostname(name=name)
1803   if hostname.name != name:
1804     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1805   if not utils.MatchNameComponent(name, [hostname.name]):
1806     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1807                                 " same as given hostname '%s'") %
1808                                 (hostname.name, name), errors.ECODE_INVAL)
1809   return hostname
1810
1811
1812 class LUClusterPostInit(LogicalUnit):
1813   """Logical unit for running hooks after cluster initialization.
1814
1815   """
1816   HPATH = "cluster-init"
1817   HTYPE = constants.HTYPE_CLUSTER
1818
1819   def BuildHooksEnv(self):
1820     """Build hooks env.
1821
1822     """
1823     return {
1824       "OP_TARGET": self.cfg.GetClusterName(),
1825       }
1826
1827   def BuildHooksNodes(self):
1828     """Build hooks nodes.
1829
1830     """
1831     return ([], [self.cfg.GetMasterNode()])
1832
1833   def Exec(self, feedback_fn):
1834     """Nothing to do.
1835
1836     """
1837     return True
1838
1839
1840 class LUClusterDestroy(LogicalUnit):
1841   """Logical unit for destroying the cluster.
1842
1843   """
1844   HPATH = "cluster-destroy"
1845   HTYPE = constants.HTYPE_CLUSTER
1846
1847   def BuildHooksEnv(self):
1848     """Build hooks env.
1849
1850     """
1851     return {
1852       "OP_TARGET": self.cfg.GetClusterName(),
1853       }
1854
1855   def BuildHooksNodes(self):
1856     """Build hooks nodes.
1857
1858     """
1859     return ([], [])
1860
1861   def CheckPrereq(self):
1862     """Check prerequisites.
1863
1864     This checks whether the cluster is empty.
1865
1866     Any errors are signaled by raising errors.OpPrereqError.
1867
1868     """
1869     master = self.cfg.GetMasterNode()
1870
1871     nodelist = self.cfg.GetNodeList()
1872     if len(nodelist) != 1 or nodelist[0] != master:
1873       raise errors.OpPrereqError("There are still %d node(s) in"
1874                                  " this cluster." % (len(nodelist) - 1),
1875                                  errors.ECODE_INVAL)
1876     instancelist = self.cfg.GetInstanceList()
1877     if instancelist:
1878       raise errors.OpPrereqError("There are still %d instance(s) in"
1879                                  " this cluster." % len(instancelist),
1880                                  errors.ECODE_INVAL)
1881
1882   def Exec(self, feedback_fn):
1883     """Destroys the cluster.
1884
1885     """
1886     master_params = self.cfg.GetMasterNetworkParameters()
1887
1888     # Run post hooks on master node before it's removed
1889     _RunPostHook(self, master_params.name)
1890
1891     ems = self.cfg.GetUseExternalMipScript()
1892     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1893                                                      master_params, ems)
1894     if result.fail_msg:
1895       self.LogWarning("Error disabling the master IP address: %s",
1896                       result.fail_msg)
1897
1898     return master_params.name
1899
1900
1901 def _VerifyCertificate(filename):
1902   """Verifies a certificate for L{LUClusterVerifyConfig}.
1903
1904   @type filename: string
1905   @param filename: Path to PEM file
1906
1907   """
1908   try:
1909     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1910                                            utils.ReadFile(filename))
1911   except Exception, err: # pylint: disable=W0703
1912     return (LUClusterVerifyConfig.ETYPE_ERROR,
1913             "Failed to load X509 certificate %s: %s" % (filename, err))
1914
1915   (errcode, msg) = \
1916     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1917                                 constants.SSL_CERT_EXPIRATION_ERROR)
1918
1919   if msg:
1920     fnamemsg = "While verifying %s: %s" % (filename, msg)
1921   else:
1922     fnamemsg = None
1923
1924   if errcode is None:
1925     return (None, fnamemsg)
1926   elif errcode == utils.CERT_WARNING:
1927     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1928   elif errcode == utils.CERT_ERROR:
1929     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1930
1931   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1932
1933
1934 def _GetAllHypervisorParameters(cluster, instances):
1935   """Compute the set of all hypervisor parameters.
1936
1937   @type cluster: L{objects.Cluster}
1938   @param cluster: the cluster object
1939   @param instances: list of L{objects.Instance}
1940   @param instances: additional instances from which to obtain parameters
1941   @rtype: list of (origin, hypervisor, parameters)
1942   @return: a list with all parameters found, indicating the hypervisor they
1943        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1944
1945   """
1946   hvp_data = []
1947
1948   for hv_name in cluster.enabled_hypervisors:
1949     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1950
1951   for os_name, os_hvp in cluster.os_hvp.items():
1952     for hv_name, hv_params in os_hvp.items():
1953       if hv_params:
1954         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1955         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1956
1957   # TODO: collapse identical parameter values in a single one
1958   for instance in instances:
1959     if instance.hvparams:
1960       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1961                        cluster.FillHV(instance)))
1962
1963   return hvp_data
1964
1965
1966 class _VerifyErrors(object):
1967   """Mix-in for cluster/group verify LUs.
1968
1969   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1970   self.op and self._feedback_fn to be available.)
1971
1972   """
1973
1974   ETYPE_FIELD = "code"
1975   ETYPE_ERROR = "ERROR"
1976   ETYPE_WARNING = "WARNING"
1977
1978   def _Error(self, ecode, item, msg, *args, **kwargs):
1979     """Format an error message.
1980
1981     Based on the opcode's error_codes parameter, either format a
1982     parseable error code, or a simpler error string.
1983
1984     This must be called only from Exec and functions called from Exec.
1985
1986     """
1987     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1988     itype, etxt, _ = ecode
1989     # first complete the msg
1990     if args:
1991       msg = msg % args
1992     # then format the whole message
1993     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1994       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1995     else:
1996       if item:
1997         item = " " + item
1998       else:
1999         item = ""
2000       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2001     # and finally report it via the feedback_fn
2002     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
2003
2004   def _ErrorIf(self, cond, ecode, *args, **kwargs):
2005     """Log an error message if the passed condition is True.
2006
2007     """
2008     cond = (bool(cond)
2009             or self.op.debug_simulate_errors) # pylint: disable=E1101
2010
2011     # If the error code is in the list of ignored errors, demote the error to a
2012     # warning
2013     (_, etxt, _) = ecode
2014     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
2015       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
2016
2017     if cond:
2018       self._Error(ecode, *args, **kwargs)
2019
2020     # do not mark the operation as failed for WARN cases only
2021     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2022       self.bad = self.bad or cond
2023
2024
2025 class LUClusterVerify(NoHooksLU):
2026   """Submits all jobs necessary to verify the cluster.
2027
2028   """
2029   REQ_BGL = False
2030
2031   def ExpandNames(self):
2032     self.needed_locks = {}
2033
2034   def Exec(self, feedback_fn):
2035     jobs = []
2036
2037     if self.op.group_name:
2038       groups = [self.op.group_name]
2039       depends_fn = lambda: None
2040     else:
2041       groups = self.cfg.GetNodeGroupList()
2042
2043       # Verify global configuration
2044       jobs.append([
2045         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2046         ])
2047
2048       # Always depend on global verification
2049       depends_fn = lambda: [(-len(jobs), [])]
2050
2051     jobs.extend(
2052       [opcodes.OpClusterVerifyGroup(group_name=group,
2053                                     ignore_errors=self.op.ignore_errors,
2054                                     depends=depends_fn())]
2055       for group in groups)
2056
2057     # Fix up all parameters
2058     for op in itertools.chain(*jobs): # pylint: disable=W0142
2059       op.debug_simulate_errors = self.op.debug_simulate_errors
2060       op.verbose = self.op.verbose
2061       op.error_codes = self.op.error_codes
2062       try:
2063         op.skip_checks = self.op.skip_checks
2064       except AttributeError:
2065         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2066
2067     return ResultWithJobs(jobs)
2068
2069
2070 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2071   """Verifies the cluster config.
2072
2073   """
2074   REQ_BGL = False
2075
2076   def _VerifyHVP(self, hvp_data):
2077     """Verifies locally the syntax of the hypervisor parameters.
2078
2079     """
2080     for item, hv_name, hv_params in hvp_data:
2081       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2082              (item, hv_name))
2083       try:
2084         hv_class = hypervisor.GetHypervisor(hv_name)
2085         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2086         hv_class.CheckParameterSyntax(hv_params)
2087       except errors.GenericError, err:
2088         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2089
2090   def ExpandNames(self):
2091     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2092     self.share_locks = _ShareAll()
2093
2094   def CheckPrereq(self):
2095     """Check prerequisites.
2096
2097     """
2098     # Retrieve all information
2099     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2100     self.all_node_info = self.cfg.GetAllNodesInfo()
2101     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2102
2103   def Exec(self, feedback_fn):
2104     """Verify integrity of cluster, performing various test on nodes.
2105
2106     """
2107     self.bad = False
2108     self._feedback_fn = feedback_fn
2109
2110     feedback_fn("* Verifying cluster config")
2111
2112     for msg in self.cfg.VerifyConfig():
2113       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2114
2115     feedback_fn("* Verifying cluster certificate files")
2116
2117     for cert_filename in pathutils.ALL_CERT_FILES:
2118       (errcode, msg) = _VerifyCertificate(cert_filename)
2119       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2120
2121     feedback_fn("* Verifying hypervisor parameters")
2122
2123     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2124                                                 self.all_inst_info.values()))
2125
2126     feedback_fn("* Verifying all nodes belong to an existing group")
2127
2128     # We do this verification here because, should this bogus circumstance
2129     # occur, it would never be caught by VerifyGroup, which only acts on
2130     # nodes/instances reachable from existing node groups.
2131
2132     dangling_nodes = set(node.name for node in self.all_node_info.values()
2133                          if node.group not in self.all_group_info)
2134
2135     dangling_instances = {}
2136     no_node_instances = []
2137
2138     for inst in self.all_inst_info.values():
2139       if inst.primary_node in dangling_nodes:
2140         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2141       elif inst.primary_node not in self.all_node_info:
2142         no_node_instances.append(inst.name)
2143
2144     pretty_dangling = [
2145         "%s (%s)" %
2146         (node.name,
2147          utils.CommaJoin(dangling_instances.get(node.name,
2148                                                 ["no instances"])))
2149         for node in dangling_nodes]
2150
2151     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2152                   None,
2153                   "the following nodes (and their instances) belong to a non"
2154                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2155
2156     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2157                   None,
2158                   "the following instances have a non-existing primary-node:"
2159                   " %s", utils.CommaJoin(no_node_instances))
2160
2161     return not self.bad
2162
2163
2164 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2165   """Verifies the status of a node group.
2166
2167   """
2168   HPATH = "cluster-verify"
2169   HTYPE = constants.HTYPE_CLUSTER
2170   REQ_BGL = False
2171
2172   _HOOKS_INDENT_RE = re.compile("^", re.M)
2173
2174   class NodeImage(object):
2175     """A class representing the logical and physical status of a node.
2176
2177     @type name: string
2178     @ivar name: the node name to which this object refers
2179     @ivar volumes: a structure as returned from
2180         L{ganeti.backend.GetVolumeList} (runtime)
2181     @ivar instances: a list of running instances (runtime)
2182     @ivar pinst: list of configured primary instances (config)
2183     @ivar sinst: list of configured secondary instances (config)
2184     @ivar sbp: dictionary of {primary-node: list of instances} for all
2185         instances for which this node is secondary (config)
2186     @ivar mfree: free memory, as reported by hypervisor (runtime)
2187     @ivar dfree: free disk, as reported by the node (runtime)
2188     @ivar offline: the offline status (config)
2189     @type rpc_fail: boolean
2190     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2191         not whether the individual keys were correct) (runtime)
2192     @type lvm_fail: boolean
2193     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2194     @type hyp_fail: boolean
2195     @ivar hyp_fail: whether the RPC call didn't return the instance list
2196     @type ghost: boolean
2197     @ivar ghost: whether this is a known node or not (config)
2198     @type os_fail: boolean
2199     @ivar os_fail: whether the RPC call didn't return valid OS data
2200     @type oslist: list
2201     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2202     @type vm_capable: boolean
2203     @ivar vm_capable: whether the node can host instances
2204
2205     """
2206     def __init__(self, offline=False, name=None, vm_capable=True):
2207       self.name = name
2208       self.volumes = {}
2209       self.instances = []
2210       self.pinst = []
2211       self.sinst = []
2212       self.sbp = {}
2213       self.mfree = 0
2214       self.dfree = 0
2215       self.offline = offline
2216       self.vm_capable = vm_capable
2217       self.rpc_fail = False
2218       self.lvm_fail = False
2219       self.hyp_fail = False
2220       self.ghost = False
2221       self.os_fail = False
2222       self.oslist = {}
2223
2224   def ExpandNames(self):
2225     # This raises errors.OpPrereqError on its own:
2226     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2227
2228     # Get instances in node group; this is unsafe and needs verification later
2229     inst_names = \
2230       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2231
2232     self.needed_locks = {
2233       locking.LEVEL_INSTANCE: inst_names,
2234       locking.LEVEL_NODEGROUP: [self.group_uuid],
2235       locking.LEVEL_NODE: [],
2236
2237       # This opcode is run by watcher every five minutes and acquires all nodes
2238       # for a group. It doesn't run for a long time, so it's better to acquire
2239       # the node allocation lock as well.
2240       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2241       }
2242
2243     self.share_locks = _ShareAll()
2244
2245   def DeclareLocks(self, level):
2246     if level == locking.LEVEL_NODE:
2247       # Get members of node group; this is unsafe and needs verification later
2248       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2249
2250       all_inst_info = self.cfg.GetAllInstancesInfo()
2251
2252       # In Exec(), we warn about mirrored instances that have primary and
2253       # secondary living in separate node groups. To fully verify that
2254       # volumes for these instances are healthy, we will need to do an
2255       # extra call to their secondaries. We ensure here those nodes will
2256       # be locked.
2257       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2258         # Important: access only the instances whose lock is owned
2259         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2260           nodes.update(all_inst_info[inst].secondary_nodes)
2261
2262       self.needed_locks[locking.LEVEL_NODE] = nodes
2263
2264   def CheckPrereq(self):
2265     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2266     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2267
2268     group_nodes = set(self.group_info.members)
2269     group_instances = \
2270       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2271
2272     unlocked_nodes = \
2273         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2274
2275     unlocked_instances = \
2276         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2277
2278     if unlocked_nodes:
2279       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2280                                  utils.CommaJoin(unlocked_nodes),
2281                                  errors.ECODE_STATE)
2282
2283     if unlocked_instances:
2284       raise errors.OpPrereqError("Missing lock for instances: %s" %
2285                                  utils.CommaJoin(unlocked_instances),
2286                                  errors.ECODE_STATE)
2287
2288     self.all_node_info = self.cfg.GetAllNodesInfo()
2289     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2290
2291     self.my_node_names = utils.NiceSort(group_nodes)
2292     self.my_inst_names = utils.NiceSort(group_instances)
2293
2294     self.my_node_info = dict((name, self.all_node_info[name])
2295                              for name in self.my_node_names)
2296
2297     self.my_inst_info = dict((name, self.all_inst_info[name])
2298                              for name in self.my_inst_names)
2299
2300     # We detect here the nodes that will need the extra RPC calls for verifying
2301     # split LV volumes; they should be locked.
2302     extra_lv_nodes = set()
2303
2304     for inst in self.my_inst_info.values():
2305       if inst.disk_template in constants.DTS_INT_MIRROR:
2306         for nname in inst.all_nodes:
2307           if self.all_node_info[nname].group != self.group_uuid:
2308             extra_lv_nodes.add(nname)
2309
2310     unlocked_lv_nodes = \
2311         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2312
2313     if unlocked_lv_nodes:
2314       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2315                                  utils.CommaJoin(unlocked_lv_nodes),
2316                                  errors.ECODE_STATE)
2317     self.extra_lv_nodes = list(extra_lv_nodes)
2318
2319   def _VerifyNode(self, ninfo, nresult):
2320     """Perform some basic validation on data returned from a node.
2321
2322       - check the result data structure is well formed and has all the
2323         mandatory fields
2324       - check ganeti version
2325
2326     @type ninfo: L{objects.Node}
2327     @param ninfo: the node to check
2328     @param nresult: the results from the node
2329     @rtype: boolean
2330     @return: whether overall this call was successful (and we can expect
2331          reasonable values in the respose)
2332
2333     """
2334     node = ninfo.name
2335     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2336
2337     # main result, nresult should be a non-empty dict
2338     test = not nresult or not isinstance(nresult, dict)
2339     _ErrorIf(test, constants.CV_ENODERPC, node,
2340                   "unable to verify node: no data returned")
2341     if test:
2342       return False
2343
2344     # compares ganeti version
2345     local_version = constants.PROTOCOL_VERSION
2346     remote_version = nresult.get("version", None)
2347     test = not (remote_version and
2348                 isinstance(remote_version, (list, tuple)) and
2349                 len(remote_version) == 2)
2350     _ErrorIf(test, constants.CV_ENODERPC, node,
2351              "connection to node returned invalid data")
2352     if test:
2353       return False
2354
2355     test = local_version != remote_version[0]
2356     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2357              "incompatible protocol versions: master %s,"
2358              " node %s", local_version, remote_version[0])
2359     if test:
2360       return False
2361
2362     # node seems compatible, we can actually try to look into its results
2363
2364     # full package version
2365     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2366                   constants.CV_ENODEVERSION, node,
2367                   "software version mismatch: master %s, node %s",
2368                   constants.RELEASE_VERSION, remote_version[1],
2369                   code=self.ETYPE_WARNING)
2370
2371     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2372     if ninfo.vm_capable and isinstance(hyp_result, dict):
2373       for hv_name, hv_result in hyp_result.iteritems():
2374         test = hv_result is not None
2375         _ErrorIf(test, constants.CV_ENODEHV, node,
2376                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2377
2378     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2379     if ninfo.vm_capable and isinstance(hvp_result, list):
2380       for item, hv_name, hv_result in hvp_result:
2381         _ErrorIf(True, constants.CV_ENODEHV, node,
2382                  "hypervisor %s parameter verify failure (source %s): %s",
2383                  hv_name, item, hv_result)
2384
2385     test = nresult.get(constants.NV_NODESETUP,
2386                        ["Missing NODESETUP results"])
2387     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2388              "; ".join(test))
2389
2390     return True
2391
2392   def _VerifyNodeTime(self, ninfo, nresult,
2393                       nvinfo_starttime, nvinfo_endtime):
2394     """Check the node time.
2395
2396     @type ninfo: L{objects.Node}
2397     @param ninfo: the node to check
2398     @param nresult: the remote results for the node
2399     @param nvinfo_starttime: the start time of the RPC call
2400     @param nvinfo_endtime: the end time of the RPC call
2401
2402     """
2403     node = ninfo.name
2404     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2405
2406     ntime = nresult.get(constants.NV_TIME, None)
2407     try:
2408       ntime_merged = utils.MergeTime(ntime)
2409     except (ValueError, TypeError):
2410       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2411       return
2412
2413     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2414       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2415     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2416       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2417     else:
2418       ntime_diff = None
2419
2420     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2421              "Node time diverges by at least %s from master node time",
2422              ntime_diff)
2423
2424   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2425     """Check the node LVM results.
2426
2427     @type ninfo: L{objects.Node}
2428     @param ninfo: the node to check
2429     @param nresult: the remote results for the node
2430     @param vg_name: the configured VG name
2431
2432     """
2433     if vg_name is None:
2434       return
2435
2436     node = ninfo.name
2437     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2438
2439     # checks vg existence and size > 20G
2440     vglist = nresult.get(constants.NV_VGLIST, None)
2441     test = not vglist
2442     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2443     if not test:
2444       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2445                                             constants.MIN_VG_SIZE)
2446       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2447
2448     # check pv names
2449     pvlist = nresult.get(constants.NV_PVLIST, None)
2450     test = pvlist is None
2451     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2452     if not test:
2453       # check that ':' is not present in PV names, since it's a
2454       # special character for lvcreate (denotes the range of PEs to
2455       # use on the PV)
2456       for _, pvname, owner_vg in pvlist:
2457         test = ":" in pvname
2458         _ErrorIf(test, constants.CV_ENODELVM, node,
2459                  "Invalid character ':' in PV '%s' of VG '%s'",
2460                  pvname, owner_vg)
2461
2462   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2463     """Check the node bridges.
2464
2465     @type ninfo: L{objects.Node}
2466     @param ninfo: the node to check
2467     @param nresult: the remote results for the node
2468     @param bridges: the expected list of bridges
2469
2470     """
2471     if not bridges:
2472       return
2473
2474     node = ninfo.name
2475     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2476
2477     missing = nresult.get(constants.NV_BRIDGES, None)
2478     test = not isinstance(missing, list)
2479     _ErrorIf(test, constants.CV_ENODENET, node,
2480              "did not return valid bridge information")
2481     if not test:
2482       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2483                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2484
2485   def _VerifyNodeUserScripts(self, ninfo, nresult):
2486     """Check the results of user scripts presence and executability on the node
2487
2488     @type ninfo: L{objects.Node}
2489     @param ninfo: the node to check
2490     @param nresult: the remote results for the node
2491
2492     """
2493     node = ninfo.name
2494
2495     test = not constants.NV_USERSCRIPTS in nresult
2496     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2497                   "did not return user scripts information")
2498
2499     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2500     if not test:
2501       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2502                     "user scripts not present or not executable: %s" %
2503                     utils.CommaJoin(sorted(broken_scripts)))
2504
2505   def _VerifyNodeNetwork(self, ninfo, nresult):
2506     """Check the node network connectivity results.
2507
2508     @type ninfo: L{objects.Node}
2509     @param ninfo: the node to check
2510     @param nresult: the remote results for the node
2511
2512     """
2513     node = ninfo.name
2514     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2515
2516     test = constants.NV_NODELIST not in nresult
2517     _ErrorIf(test, constants.CV_ENODESSH, node,
2518              "node hasn't returned node ssh connectivity data")
2519     if not test:
2520       if nresult[constants.NV_NODELIST]:
2521         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2522           _ErrorIf(True, constants.CV_ENODESSH, node,
2523                    "ssh communication with node '%s': %s", a_node, a_msg)
2524
2525     test = constants.NV_NODENETTEST not in nresult
2526     _ErrorIf(test, constants.CV_ENODENET, node,
2527              "node hasn't returned node tcp connectivity data")
2528     if not test:
2529       if nresult[constants.NV_NODENETTEST]:
2530         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2531         for anode in nlist:
2532           _ErrorIf(True, constants.CV_ENODENET, node,
2533                    "tcp communication with node '%s': %s",
2534                    anode, nresult[constants.NV_NODENETTEST][anode])
2535
2536     test = constants.NV_MASTERIP not in nresult
2537     _ErrorIf(test, constants.CV_ENODENET, node,
2538              "node hasn't returned node master IP reachability data")
2539     if not test:
2540       if not nresult[constants.NV_MASTERIP]:
2541         if node == self.master_node:
2542           msg = "the master node cannot reach the master IP (not configured?)"
2543         else:
2544           msg = "cannot reach the master IP"
2545         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2546
2547   def _VerifyInstance(self, instance, instanceconfig, node_image,
2548                       diskstatus):
2549     """Verify an instance.
2550
2551     This function checks to see if the required block devices are
2552     available on the instance's node.
2553
2554     """
2555     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2556     node_current = instanceconfig.primary_node
2557
2558     node_vol_should = {}
2559     instanceconfig.MapLVsByNode(node_vol_should)
2560
2561     cluster = self.cfg.GetClusterInfo()
2562     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2563                                                             self.group_info)
2564     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2565     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2566              code=self.ETYPE_WARNING)
2567
2568     for node in node_vol_should:
2569       n_img = node_image[node]
2570       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2571         # ignore missing volumes on offline or broken nodes
2572         continue
2573       for volume in node_vol_should[node]:
2574         test = volume not in n_img.volumes
2575         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2576                  "volume %s missing on node %s", volume, node)
2577
2578     if instanceconfig.admin_state == constants.ADMINST_UP:
2579       pri_img = node_image[node_current]
2580       test = instance not in pri_img.instances and not pri_img.offline
2581       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2582                "instance not running on its primary node %s",
2583                node_current)
2584
2585     diskdata = [(nname, success, status, idx)
2586                 for (nname, disks) in diskstatus.items()
2587                 for idx, (success, status) in enumerate(disks)]
2588
2589     for nname, success, bdev_status, idx in diskdata:
2590       # the 'ghost node' construction in Exec() ensures that we have a
2591       # node here
2592       snode = node_image[nname]
2593       bad_snode = snode.ghost or snode.offline
2594       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2595                not success and not bad_snode,
2596                constants.CV_EINSTANCEFAULTYDISK, instance,
2597                "couldn't retrieve status for disk/%s on %s: %s",
2598                idx, nname, bdev_status)
2599       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2600                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2601                constants.CV_EINSTANCEFAULTYDISK, instance,
2602                "disk/%s on %s is faulty", idx, nname)
2603
2604   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2605     """Verify if there are any unknown volumes in the cluster.
2606
2607     The .os, .swap and backup volumes are ignored. All other volumes are
2608     reported as unknown.
2609
2610     @type reserved: L{ganeti.utils.FieldSet}
2611     @param reserved: a FieldSet of reserved volume names
2612
2613     """
2614     for node, n_img in node_image.items():
2615       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2616           self.all_node_info[node].group != self.group_uuid):
2617         # skip non-healthy nodes
2618         continue
2619       for volume in n_img.volumes:
2620         test = ((node not in node_vol_should or
2621                 volume not in node_vol_should[node]) and
2622                 not reserved.Matches(volume))
2623         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2624                       "volume %s is unknown", volume)
2625
2626   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2627     """Verify N+1 Memory Resilience.
2628
2629     Check that if one single node dies we can still start all the
2630     instances it was primary for.
2631
2632     """
2633     cluster_info = self.cfg.GetClusterInfo()
2634     for node, n_img in node_image.items():
2635       # This code checks that every node which is now listed as
2636       # secondary has enough memory to host all instances it is
2637       # supposed to should a single other node in the cluster fail.
2638       # FIXME: not ready for failover to an arbitrary node
2639       # FIXME: does not support file-backed instances
2640       # WARNING: we currently take into account down instances as well
2641       # as up ones, considering that even if they're down someone
2642       # might want to start them even in the event of a node failure.
2643       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2644         # we're skipping nodes marked offline and nodes in other groups from
2645         # the N+1 warning, since most likely we don't have good memory
2646         # infromation from them; we already list instances living on such
2647         # nodes, and that's enough warning
2648         continue
2649       #TODO(dynmem): also consider ballooning out other instances
2650       for prinode, instances in n_img.sbp.items():
2651         needed_mem = 0
2652         for instance in instances:
2653           bep = cluster_info.FillBE(instance_cfg[instance])
2654           if bep[constants.BE_AUTO_BALANCE]:
2655             needed_mem += bep[constants.BE_MINMEM]
2656         test = n_img.mfree < needed_mem
2657         self._ErrorIf(test, constants.CV_ENODEN1, node,
2658                       "not enough memory to accomodate instance failovers"
2659                       " should node %s fail (%dMiB needed, %dMiB available)",
2660                       prinode, needed_mem, n_img.mfree)
2661
2662   @classmethod
2663   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2664                    (files_all, files_opt, files_mc, files_vm)):
2665     """Verifies file checksums collected from all nodes.
2666
2667     @param errorif: Callback for reporting errors
2668     @param nodeinfo: List of L{objects.Node} objects
2669     @param master_node: Name of master node
2670     @param all_nvinfo: RPC results
2671
2672     """
2673     # Define functions determining which nodes to consider for a file
2674     files2nodefn = [
2675       (files_all, None),
2676       (files_mc, lambda node: (node.master_candidate or
2677                                node.name == master_node)),
2678       (files_vm, lambda node: node.vm_capable),
2679       ]
2680
2681     # Build mapping from filename to list of nodes which should have the file
2682     nodefiles = {}
2683     for (files, fn) in files2nodefn:
2684       if fn is None:
2685         filenodes = nodeinfo
2686       else:
2687         filenodes = filter(fn, nodeinfo)
2688       nodefiles.update((filename,
2689                         frozenset(map(operator.attrgetter("name"), filenodes)))
2690                        for filename in files)
2691
2692     assert set(nodefiles) == (files_all | files_mc | files_vm)
2693
2694     fileinfo = dict((filename, {}) for filename in nodefiles)
2695     ignore_nodes = set()
2696
2697     for node in nodeinfo:
2698       if node.offline:
2699         ignore_nodes.add(node.name)
2700         continue
2701
2702       nresult = all_nvinfo[node.name]
2703
2704       if nresult.fail_msg or not nresult.payload:
2705         node_files = None
2706       else:
2707         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2708         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2709                           for (key, value) in fingerprints.items())
2710         del fingerprints
2711
2712       test = not (node_files and isinstance(node_files, dict))
2713       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2714               "Node did not return file checksum data")
2715       if test:
2716         ignore_nodes.add(node.name)
2717         continue
2718
2719       # Build per-checksum mapping from filename to nodes having it
2720       for (filename, checksum) in node_files.items():
2721         assert filename in nodefiles
2722         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2723
2724     for (filename, checksums) in fileinfo.items():
2725       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2726
2727       # Nodes having the file
2728       with_file = frozenset(node_name
2729                             for nodes in fileinfo[filename].values()
2730                             for node_name in nodes) - ignore_nodes
2731
2732       expected_nodes = nodefiles[filename] - ignore_nodes
2733
2734       # Nodes missing file
2735       missing_file = expected_nodes - with_file
2736
2737       if filename in files_opt:
2738         # All or no nodes
2739         errorif(missing_file and missing_file != expected_nodes,
2740                 constants.CV_ECLUSTERFILECHECK, None,
2741                 "File %s is optional, but it must exist on all or no"
2742                 " nodes (not found on %s)",
2743                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2744       else:
2745         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2746                 "File %s is missing from node(s) %s", filename,
2747                 utils.CommaJoin(utils.NiceSort(missing_file)))
2748
2749         # Warn if a node has a file it shouldn't
2750         unexpected = with_file - expected_nodes
2751         errorif(unexpected,
2752                 constants.CV_ECLUSTERFILECHECK, None,
2753                 "File %s should not exist on node(s) %s",
2754                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2755
2756       # See if there are multiple versions of the file
2757       test = len(checksums) > 1
2758       if test:
2759         variants = ["variant %s on %s" %
2760                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2761                     for (idx, (checksum, nodes)) in
2762                       enumerate(sorted(checksums.items()))]
2763       else:
2764         variants = []
2765
2766       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2767               "File %s found with %s different checksums (%s)",
2768               filename, len(checksums), "; ".join(variants))
2769
2770   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2771                       drbd_map):
2772     """Verifies and the node DRBD status.
2773
2774     @type ninfo: L{objects.Node}
2775     @param ninfo: the node to check
2776     @param nresult: the remote results for the node
2777     @param instanceinfo: the dict of instances
2778     @param drbd_helper: the configured DRBD usermode helper
2779     @param drbd_map: the DRBD map as returned by
2780         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2781
2782     """
2783     node = ninfo.name
2784     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2785
2786     if drbd_helper:
2787       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2788       test = (helper_result is None)
2789       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2790                "no drbd usermode helper returned")
2791       if helper_result:
2792         status, payload = helper_result
2793         test = not status
2794         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2795                  "drbd usermode helper check unsuccessful: %s", payload)
2796         test = status and (payload != drbd_helper)
2797         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2798                  "wrong drbd usermode helper: %s", payload)
2799
2800     # compute the DRBD minors
2801     node_drbd = {}
2802     for minor, instance in drbd_map[node].items():
2803       test = instance not in instanceinfo
2804       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2805                "ghost instance '%s' in temporary DRBD map", instance)
2806         # ghost instance should not be running, but otherwise we
2807         # don't give double warnings (both ghost instance and
2808         # unallocated minor in use)
2809       if test:
2810         node_drbd[minor] = (instance, False)
2811       else:
2812         instance = instanceinfo[instance]
2813         node_drbd[minor] = (instance.name,
2814                             instance.admin_state == constants.ADMINST_UP)
2815
2816     # and now check them
2817     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2818     test = not isinstance(used_minors, (tuple, list))
2819     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2820              "cannot parse drbd status file: %s", str(used_minors))
2821     if test:
2822       # we cannot check drbd status
2823       return
2824
2825     for minor, (iname, must_exist) in node_drbd.items():
2826       test = minor not in used_minors and must_exist
2827       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2828                "drbd minor %d of instance %s is not active", minor, iname)
2829     for minor in used_minors:
2830       test = minor not in node_drbd
2831       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2832                "unallocated drbd minor %d is in use", minor)
2833
2834   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2835     """Builds the node OS structures.
2836
2837     @type ninfo: L{objects.Node}
2838     @param ninfo: the node to check
2839     @param nresult: the remote results for the node
2840     @param nimg: the node image object
2841
2842     """
2843     node = ninfo.name
2844     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2845
2846     remote_os = nresult.get(constants.NV_OSLIST, None)
2847     test = (not isinstance(remote_os, list) or
2848             not compat.all(isinstance(v, list) and len(v) == 7
2849                            for v in remote_os))
2850
2851     _ErrorIf(test, constants.CV_ENODEOS, node,
2852              "node hasn't returned valid OS data")
2853
2854     nimg.os_fail = test
2855
2856     if test:
2857       return
2858
2859     os_dict = {}
2860
2861     for (name, os_path, status, diagnose,
2862          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2863
2864       if name not in os_dict:
2865         os_dict[name] = []
2866
2867       # parameters is a list of lists instead of list of tuples due to
2868       # JSON lacking a real tuple type, fix it:
2869       parameters = [tuple(v) for v in parameters]
2870       os_dict[name].append((os_path, status, diagnose,
2871                             set(variants), set(parameters), set(api_ver)))
2872
2873     nimg.oslist = os_dict
2874
2875   def _VerifyNodeOS(self, ninfo, nimg, base):
2876     """Verifies the node OS list.
2877
2878     @type ninfo: L{objects.Node}
2879     @param ninfo: the node to check
2880     @param nimg: the node image object
2881     @param base: the 'template' node we match against (e.g. from the master)
2882
2883     """
2884     node = ninfo.name
2885     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2886
2887     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2888
2889     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2890     for os_name, os_data in nimg.oslist.items():
2891       assert os_data, "Empty OS status for OS %s?!" % os_name
2892       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2893       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2894                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2895       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2896                "OS '%s' has multiple entries (first one shadows the rest): %s",
2897                os_name, utils.CommaJoin([v[0] for v in os_data]))
2898       # comparisons with the 'base' image
2899       test = os_name not in base.oslist
2900       _ErrorIf(test, constants.CV_ENODEOS, node,
2901                "Extra OS %s not present on reference node (%s)",
2902                os_name, base.name)
2903       if test:
2904         continue
2905       assert base.oslist[os_name], "Base node has empty OS status?"
2906       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2907       if not b_status:
2908         # base OS is invalid, skipping
2909         continue
2910       for kind, a, b in [("API version", f_api, b_api),
2911                          ("variants list", f_var, b_var),
2912                          ("parameters", beautify_params(f_param),
2913                           beautify_params(b_param))]:
2914         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2915                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2916                  kind, os_name, base.name,
2917                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2918
2919     # check any missing OSes
2920     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2921     _ErrorIf(missing, constants.CV_ENODEOS, node,
2922              "OSes present on reference node %s but missing on this node: %s",
2923              base.name, utils.CommaJoin(missing))
2924
2925   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2926     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2927
2928     @type ninfo: L{objects.Node}
2929     @param ninfo: the node to check
2930     @param nresult: the remote results for the node
2931     @type is_master: bool
2932     @param is_master: Whether node is the master node
2933
2934     """
2935     node = ninfo.name
2936
2937     if (is_master and
2938         (constants.ENABLE_FILE_STORAGE or
2939          constants.ENABLE_SHARED_FILE_STORAGE)):
2940       try:
2941         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2942       except KeyError:
2943         # This should never happen
2944         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2945                       "Node did not return forbidden file storage paths")
2946       else:
2947         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2948                       "Found forbidden file storage paths: %s",
2949                       utils.CommaJoin(fspaths))
2950     else:
2951       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2952                     constants.CV_ENODEFILESTORAGEPATHS, node,
2953                     "Node should not have returned forbidden file storage"
2954                     " paths")
2955
2956   def _VerifyOob(self, ninfo, nresult):
2957     """Verifies out of band functionality of a node.
2958
2959     @type ninfo: L{objects.Node}
2960     @param ninfo: the node to check
2961     @param nresult: the remote results for the node
2962
2963     """
2964     node = ninfo.name
2965     # We just have to verify the paths on master and/or master candidates
2966     # as the oob helper is invoked on the master
2967     if ((ninfo.master_candidate or ninfo.master_capable) and
2968         constants.NV_OOB_PATHS in nresult):
2969       for path_result in nresult[constants.NV_OOB_PATHS]:
2970         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2971
2972   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2973     """Verifies and updates the node volume data.
2974
2975     This function will update a L{NodeImage}'s internal structures
2976     with data from the remote call.
2977
2978     @type ninfo: L{objects.Node}
2979     @param ninfo: the node to check
2980     @param nresult: the remote results for the node
2981     @param nimg: the node image object
2982     @param vg_name: the configured VG name
2983
2984     """
2985     node = ninfo.name
2986     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2987
2988     nimg.lvm_fail = True
2989     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2990     if vg_name is None:
2991       pass
2992     elif isinstance(lvdata, basestring):
2993       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2994                utils.SafeEncode(lvdata))
2995     elif not isinstance(lvdata, dict):
2996       _ErrorIf(True, constants.CV_ENODELVM, node,
2997                "rpc call to node failed (lvlist)")
2998     else:
2999       nimg.volumes = lvdata
3000       nimg.lvm_fail = False
3001
3002   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3003     """Verifies and updates the node instance list.
3004
3005     If the listing was successful, then updates this node's instance
3006     list. Otherwise, it marks the RPC call as failed for the instance
3007     list key.
3008
3009     @type ninfo: L{objects.Node}
3010     @param ninfo: the node to check
3011     @param nresult: the remote results for the node
3012     @param nimg: the node image object
3013
3014     """
3015     idata = nresult.get(constants.NV_INSTANCELIST, None)
3016     test = not isinstance(idata, list)
3017     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3018                   "rpc call to node failed (instancelist): %s",
3019                   utils.SafeEncode(str(idata)))
3020     if test:
3021       nimg.hyp_fail = True
3022     else:
3023       nimg.instances = idata
3024
3025   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3026     """Verifies and computes a node information map
3027
3028     @type ninfo: L{objects.Node}
3029     @param ninfo: the node to check
3030     @param nresult: the remote results for the node
3031     @param nimg: the node image object
3032     @param vg_name: the configured VG name
3033
3034     """
3035     node = ninfo.name
3036     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3037
3038     # try to read free memory (from the hypervisor)
3039     hv_info = nresult.get(constants.NV_HVINFO, None)
3040     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3041     _ErrorIf(test, constants.CV_ENODEHV, node,
3042              "rpc call to node failed (hvinfo)")
3043     if not test:
3044       try:
3045         nimg.mfree = int(hv_info["memory_free"])
3046       except (ValueError, TypeError):
3047         _ErrorIf(True, constants.CV_ENODERPC, node,
3048                  "node returned invalid nodeinfo, check hypervisor")
3049
3050     # FIXME: devise a free space model for file based instances as well
3051     if vg_name is not None:
3052       test = (constants.NV_VGLIST not in nresult or
3053               vg_name not in nresult[constants.NV_VGLIST])
3054       _ErrorIf(test, constants.CV_ENODELVM, node,
3055                "node didn't return data for the volume group '%s'"
3056                " - it is either missing or broken", vg_name)
3057       if not test:
3058         try:
3059           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3060         except (ValueError, TypeError):
3061           _ErrorIf(True, constants.CV_ENODERPC, node,
3062                    "node returned invalid LVM info, check LVM status")
3063
3064   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3065     """Gets per-disk status information for all instances.
3066
3067     @type nodelist: list of strings
3068     @param nodelist: Node names
3069     @type node_image: dict of (name, L{objects.Node})
3070     @param node_image: Node objects
3071     @type instanceinfo: dict of (name, L{objects.Instance})
3072     @param instanceinfo: Instance objects
3073     @rtype: {instance: {node: [(succes, payload)]}}
3074     @return: a dictionary of per-instance dictionaries with nodes as
3075         keys and disk information as values; the disk information is a
3076         list of tuples (success, payload)
3077
3078     """
3079     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3080
3081     node_disks = {}
3082     node_disks_devonly = {}
3083     diskless_instances = set()
3084     diskless = constants.DT_DISKLESS
3085
3086     for nname in nodelist:
3087       node_instances = list(itertools.chain(node_image[nname].pinst,
3088                                             node_image[nname].sinst))
3089       diskless_instances.update(inst for inst in node_instances
3090                                 if instanceinfo[inst].disk_template == diskless)
3091       disks = [(inst, disk)
3092                for inst in node_instances
3093                for disk in instanceinfo[inst].disks]
3094
3095       if not disks:
3096         # No need to collect data
3097         continue
3098
3099       node_disks[nname] = disks
3100
3101       # _AnnotateDiskParams makes already copies of the disks
3102       devonly = []
3103       for (inst, dev) in disks:
3104         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3105         self.cfg.SetDiskID(anno_disk, nname)
3106         devonly.append(anno_disk)
3107
3108       node_disks_devonly[nname] = devonly
3109
3110     assert len(node_disks) == len(node_disks_devonly)
3111
3112     # Collect data from all nodes with disks
3113     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3114                                                           node_disks_devonly)
3115
3116     assert len(result) == len(node_disks)
3117
3118     instdisk = {}
3119
3120     for (nname, nres) in result.items():
3121       disks = node_disks[nname]
3122
3123       if nres.offline:
3124         # No data from this node
3125         data = len(disks) * [(False, "node offline")]
3126       else:
3127         msg = nres.fail_msg
3128         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3129                  "while getting disk information: %s", msg)
3130         if msg:
3131           # No data from this node
3132           data = len(disks) * [(False, msg)]
3133         else:
3134           data = []
3135           for idx, i in enumerate(nres.payload):
3136             if isinstance(i, (tuple, list)) and len(i) == 2:
3137               data.append(i)
3138             else:
3139               logging.warning("Invalid result from node %s, entry %d: %s",
3140                               nname, idx, i)
3141               data.append((False, "Invalid result from the remote node"))
3142
3143       for ((inst, _), status) in zip(disks, data):
3144         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3145
3146     # Add empty entries for diskless instances.
3147     for inst in diskless_instances:
3148       assert inst not in instdisk
3149       instdisk[inst] = {}
3150
3151     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3152                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3153                       compat.all(isinstance(s, (tuple, list)) and
3154                                  len(s) == 2 for s in statuses)
3155                       for inst, nnames in instdisk.items()
3156                       for nname, statuses in nnames.items())
3157     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3158
3159     return instdisk
3160
3161   @staticmethod
3162   def _SshNodeSelector(group_uuid, all_nodes):
3163     """Create endless iterators for all potential SSH check hosts.
3164
3165     """
3166     nodes = [node for node in all_nodes
3167              if (node.group != group_uuid and
3168                  not node.offline)]
3169     keyfunc = operator.attrgetter("group")
3170
3171     return map(itertools.cycle,
3172                [sorted(map(operator.attrgetter("name"), names))
3173                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3174                                                   keyfunc)])
3175
3176   @classmethod
3177   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3178     """Choose which nodes should talk to which other nodes.
3179
3180     We will make nodes contact all nodes in their group, and one node from
3181     every other group.
3182
3183     @warning: This algorithm has a known issue if one node group is much
3184       smaller than others (e.g. just one node). In such a case all other
3185       nodes will talk to the single node.
3186
3187     """
3188     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3189     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3190
3191     return (online_nodes,
3192             dict((name, sorted([i.next() for i in sel]))
3193                  for name in online_nodes))
3194
3195   def BuildHooksEnv(self):
3196     """Build hooks env.
3197
3198     Cluster-Verify hooks just ran in the post phase and their failure makes
3199     the output be logged in the verify output and the verification to fail.
3200
3201     """
3202     env = {
3203       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3204       }
3205
3206     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3207                for node in self.my_node_info.values())
3208
3209     return env
3210
3211   def BuildHooksNodes(self):
3212     """Build hooks nodes.
3213
3214     """
3215     return ([], self.my_node_names)
3216
3217   def Exec(self, feedback_fn):
3218     """Verify integrity of the node group, performing various test on nodes.
3219
3220     """
3221     # This method has too many local variables. pylint: disable=R0914
3222     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3223
3224     if not self.my_node_names:
3225       # empty node group
3226       feedback_fn("* Empty node group, skipping verification")
3227       return True
3228
3229     self.bad = False
3230     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3231     verbose = self.op.verbose
3232     self._feedback_fn = feedback_fn
3233
3234     vg_name = self.cfg.GetVGName()
3235     drbd_helper = self.cfg.GetDRBDHelper()
3236     cluster = self.cfg.GetClusterInfo()
3237     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3238     hypervisors = cluster.enabled_hypervisors
3239     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3240
3241     i_non_redundant = [] # Non redundant instances
3242     i_non_a_balanced = [] # Non auto-balanced instances
3243     i_offline = 0 # Count of offline instances
3244     n_offline = 0 # Count of offline nodes
3245     n_drained = 0 # Count of nodes being drained
3246     node_vol_should = {}
3247
3248     # FIXME: verify OS list
3249
3250     # File verification
3251     filemap = _ComputeAncillaryFiles(cluster, False)
3252
3253     # do local checksums
3254     master_node = self.master_node = self.cfg.GetMasterNode()
3255     master_ip = self.cfg.GetMasterIP()
3256
3257     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3258
3259     user_scripts = []
3260     if self.cfg.GetUseExternalMipScript():
3261       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3262
3263     node_verify_param = {
3264       constants.NV_FILELIST:
3265         map(vcluster.MakeVirtualPath,
3266             utils.UniqueSequence(filename
3267                                  for files in filemap
3268                                  for filename in files)),
3269       constants.NV_NODELIST:
3270         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3271                                   self.all_node_info.values()),
3272       constants.NV_HYPERVISOR: hypervisors,
3273       constants.NV_HVPARAMS:
3274         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3275       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3276                                  for node in node_data_list
3277                                  if not node.offline],
3278       constants.NV_INSTANCELIST: hypervisors,
3279       constants.NV_VERSION: None,
3280       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3281       constants.NV_NODESETUP: None,
3282       constants.NV_TIME: None,
3283       constants.NV_MASTERIP: (master_node, master_ip),
3284       constants.NV_OSLIST: None,
3285       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3286       constants.NV_USERSCRIPTS: user_scripts,
3287       }
3288
3289     if vg_name is not None:
3290       node_verify_param[constants.NV_VGLIST] = None
3291       node_verify_param[constants.NV_LVLIST] = vg_name
3292       node_verify_param[constants.NV_PVLIST] = [vg_name]
3293
3294     if drbd_helper:
3295       node_verify_param[constants.NV_DRBDLIST] = None
3296       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3297
3298     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3299       # Load file storage paths only from master node
3300       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3301
3302     # bridge checks
3303     # FIXME: this needs to be changed per node-group, not cluster-wide
3304     bridges = set()
3305     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3306     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3307       bridges.add(default_nicpp[constants.NIC_LINK])
3308     for instance in self.my_inst_info.values():
3309       for nic in instance.nics:
3310         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3311         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3312           bridges.add(full_nic[constants.NIC_LINK])
3313
3314     if bridges:
3315       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3316
3317     # Build our expected cluster state
3318     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3319                                                  name=node.name,
3320                                                  vm_capable=node.vm_capable))
3321                       for node in node_data_list)
3322
3323     # Gather OOB paths
3324     oob_paths = []
3325     for node in self.all_node_info.values():
3326       path = _SupportsOob(self.cfg, node)
3327       if path and path not in oob_paths:
3328         oob_paths.append(path)
3329
3330     if oob_paths:
3331       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3332
3333     for instance in self.my_inst_names:
3334       inst_config = self.my_inst_info[instance]
3335       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3336         i_offline += 1
3337
3338       for nname in inst_config.all_nodes:
3339         if nname not in node_image:
3340           gnode = self.NodeImage(name=nname)
3341           gnode.ghost = (nname not in self.all_node_info)
3342           node_image[nname] = gnode
3343
3344       inst_config.MapLVsByNode(node_vol_should)
3345
3346       pnode = inst_config.primary_node
3347       node_image[pnode].pinst.append(instance)
3348
3349       for snode in inst_config.secondary_nodes:
3350         nimg = node_image[snode]
3351         nimg.sinst.append(instance)
3352         if pnode not in nimg.sbp:
3353           nimg.sbp[pnode] = []
3354         nimg.sbp[pnode].append(instance)
3355
3356     # At this point, we have the in-memory data structures complete,
3357     # except for the runtime information, which we'll gather next
3358
3359     # Due to the way our RPC system works, exact response times cannot be
3360     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3361     # time before and after executing the request, we can at least have a time
3362     # window.
3363     nvinfo_starttime = time.time()
3364     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3365                                            node_verify_param,
3366                                            self.cfg.GetClusterName())
3367     nvinfo_endtime = time.time()
3368
3369     if self.extra_lv_nodes and vg_name is not None:
3370       extra_lv_nvinfo = \
3371           self.rpc.call_node_verify(self.extra_lv_nodes,
3372                                     {constants.NV_LVLIST: vg_name},
3373                                     self.cfg.GetClusterName())
3374     else:
3375       extra_lv_nvinfo = {}
3376
3377     all_drbd_map = self.cfg.ComputeDRBDMap()
3378
3379     feedback_fn("* Gathering disk information (%s nodes)" %
3380                 len(self.my_node_names))
3381     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3382                                      self.my_inst_info)
3383
3384     feedback_fn("* Verifying configuration file consistency")
3385
3386     # If not all nodes are being checked, we need to make sure the master node
3387     # and a non-checked vm_capable node are in the list.
3388     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3389     if absent_nodes:
3390       vf_nvinfo = all_nvinfo.copy()
3391       vf_node_info = list(self.my_node_info.values())
3392       additional_nodes = []
3393       if master_node not in self.my_node_info:
3394         additional_nodes.append(master_node)
3395         vf_node_info.append(self.all_node_info[master_node])
3396       # Add the first vm_capable node we find which is not included,
3397       # excluding the master node (which we already have)
3398       for node in absent_nodes:
3399         nodeinfo = self.all_node_info[node]
3400         if (nodeinfo.vm_capable and not nodeinfo.offline and
3401             node != master_node):
3402           additional_nodes.append(node)
3403           vf_node_info.append(self.all_node_info[node])
3404           break
3405       key = constants.NV_FILELIST
3406       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3407                                                  {key: node_verify_param[key]},
3408                                                  self.cfg.GetClusterName()))
3409     else:
3410       vf_nvinfo = all_nvinfo
3411       vf_node_info = self.my_node_info.values()
3412
3413     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3414
3415     feedback_fn("* Verifying node status")
3416
3417     refos_img = None
3418
3419     for node_i in node_data_list:
3420       node = node_i.name
3421       nimg = node_image[node]
3422
3423       if node_i.offline:
3424         if verbose:
3425           feedback_fn("* Skipping offline node %s" % (node,))
3426         n_offline += 1
3427         continue
3428
3429       if node == master_node:
3430         ntype = "master"
3431       elif node_i.master_candidate:
3432         ntype = "master candidate"
3433       elif node_i.drained:
3434         ntype = "drained"
3435         n_drained += 1
3436       else:
3437         ntype = "regular"
3438       if verbose:
3439         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3440
3441       msg = all_nvinfo[node].fail_msg
3442       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3443                msg)
3444       if msg:
3445         nimg.rpc_fail = True
3446         continue
3447
3448       nresult = all_nvinfo[node].payload
3449
3450       nimg.call_ok = self._VerifyNode(node_i, nresult)
3451       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3452       self._VerifyNodeNetwork(node_i, nresult)
3453       self._VerifyNodeUserScripts(node_i, nresult)
3454       self._VerifyOob(node_i, nresult)
3455       self._VerifyFileStoragePaths(node_i, nresult,
3456                                    node == master_node)
3457
3458       if nimg.vm_capable:
3459         self._VerifyNodeLVM(node_i, nresult, vg_name)
3460         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3461                              all_drbd_map)
3462
3463         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3464         self._UpdateNodeInstances(node_i, nresult, nimg)
3465         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3466         self._UpdateNodeOS(node_i, nresult, nimg)
3467
3468         if not nimg.os_fail:
3469           if refos_img is None:
3470             refos_img = nimg
3471           self._VerifyNodeOS(node_i, nimg, refos_img)
3472         self._VerifyNodeBridges(node_i, nresult, bridges)
3473
3474         # Check whether all running instancies are primary for the node. (This
3475         # can no longer be done from _VerifyInstance below, since some of the
3476         # wrong instances could be from other node groups.)
3477         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3478
3479         for inst in non_primary_inst:
3480           test = inst in self.all_inst_info
3481           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3482                    "instance should not run on node %s", node_i.name)
3483           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3484                    "node is running unknown instance %s", inst)
3485
3486     for node, result in extra_lv_nvinfo.items():
3487       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3488                               node_image[node], vg_name)
3489
3490     feedback_fn("* Verifying instance status")
3491     for instance in self.my_inst_names:
3492       if verbose:
3493         feedback_fn("* Verifying instance %s" % instance)
3494       inst_config = self.my_inst_info[instance]
3495       self._VerifyInstance(instance, inst_config, node_image,
3496                            instdisk[instance])
3497       inst_nodes_offline = []
3498
3499       pnode = inst_config.primary_node
3500       pnode_img = node_image[pnode]
3501       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3502                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3503                " primary node failed", instance)
3504
3505       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3506                pnode_img.offline,
3507                constants.CV_EINSTANCEBADNODE, instance,
3508                "instance is marked as running and lives on offline node %s",
3509                inst_config.primary_node)
3510
3511       # If the instance is non-redundant we cannot survive losing its primary
3512       # node, so we are not N+1 compliant.
3513       if inst_config.disk_template not in constants.DTS_MIRRORED:
3514         i_non_redundant.append(instance)
3515
3516       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3517                constants.CV_EINSTANCELAYOUT,
3518                instance, "instance has multiple secondary nodes: %s",
3519                utils.CommaJoin(inst_config.secondary_nodes),
3520                code=self.ETYPE_WARNING)
3521
3522       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3523         pnode = inst_config.primary_node
3524         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3525         instance_groups = {}
3526
3527         for node in instance_nodes:
3528           instance_groups.setdefault(self.all_node_info[node].group,
3529                                      []).append(node)
3530
3531         pretty_list = [
3532           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3533           # Sort so that we always list the primary node first.
3534           for group, nodes in sorted(instance_groups.items(),
3535                                      key=lambda (_, nodes): pnode in nodes,
3536                                      reverse=True)]
3537
3538         self._ErrorIf(len(instance_groups) > 1,
3539                       constants.CV_EINSTANCESPLITGROUPS,
3540                       instance, "instance has primary and secondary nodes in"
3541                       " different groups: %s", utils.CommaJoin(pretty_list),
3542                       code=self.ETYPE_WARNING)
3543
3544       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3545         i_non_a_balanced.append(instance)
3546
3547       for snode in inst_config.secondary_nodes:
3548         s_img = node_image[snode]
3549         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3550                  snode, "instance %s, connection to secondary node failed",
3551                  instance)
3552
3553         if s_img.offline:
3554           inst_nodes_offline.append(snode)
3555
3556       # warn that the instance lives on offline nodes
3557       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3558                "instance has offline secondary node(s) %s",
3559                utils.CommaJoin(inst_nodes_offline))
3560       # ... or ghost/non-vm_capable nodes
3561       for node in inst_config.all_nodes:
3562         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3563                  instance, "instance lives on ghost node %s", node)
3564         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3565                  instance, "instance lives on non-vm_capable node %s", node)
3566
3567     feedback_fn("* Verifying orphan volumes")
3568     reserved = utils.FieldSet(*cluster.reserved_lvs)
3569
3570     # We will get spurious "unknown volume" warnings if any node of this group
3571     # is secondary for an instance whose primary is in another group. To avoid
3572     # them, we find these instances and add their volumes to node_vol_should.
3573     for inst in self.all_inst_info.values():
3574       for secondary in inst.secondary_nodes:
3575         if (secondary in self.my_node_info
3576             and inst.name not in self.my_inst_info):
3577           inst.MapLVsByNode(node_vol_should)
3578           break
3579
3580     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3581
3582     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3583       feedback_fn("* Verifying N+1 Memory redundancy")
3584       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3585
3586     feedback_fn("* Other Notes")
3587     if i_non_redundant:
3588       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3589                   % len(i_non_redundant))
3590
3591     if i_non_a_balanced:
3592       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3593                   % len(i_non_a_balanced))
3594
3595     if i_offline:
3596       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3597
3598     if n_offline:
3599       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3600
3601     if n_drained:
3602       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3603
3604     return not self.bad
3605
3606   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3607     """Analyze the post-hooks' result
3608
3609     This method analyses the hook result, handles it, and sends some
3610     nicely-formatted feedback back to the user.
3611
3612     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3613         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3614     @param hooks_results: the results of the multi-node hooks rpc call
3615     @param feedback_fn: function used send feedback back to the caller
3616     @param lu_result: previous Exec result
3617     @return: the new Exec result, based on the previous result
3618         and hook results
3619
3620     """
3621     # We only really run POST phase hooks, only for non-empty groups,
3622     # and are only interested in their results
3623     if not self.my_node_names:
3624       # empty node group
3625       pass
3626     elif phase == constants.HOOKS_PHASE_POST:
3627       # Used to change hooks' output to proper indentation
3628       feedback_fn("* Hooks Results")
3629       assert hooks_results, "invalid result from hooks"
3630
3631       for node_name in hooks_results:
3632         res = hooks_results[node_name]
3633         msg = res.fail_msg
3634         test = msg and not res.offline
3635         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3636                       "Communication failure in hooks execution: %s", msg)
3637         if res.offline or msg:
3638           # No need to investigate payload if node is offline or gave
3639           # an error.
3640           continue
3641         for script, hkr, output in res.payload:
3642           test = hkr == constants.HKR_FAIL
3643           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3644                         "Script %s failed, output:", script)
3645           if test:
3646             output = self._HOOKS_INDENT_RE.sub("      ", output)
3647             feedback_fn("%s" % output)
3648             lu_result = False
3649
3650     return lu_result
3651
3652
3653 class LUClusterVerifyDisks(NoHooksLU):
3654   """Verifies the cluster disks status.
3655
3656   """
3657   REQ_BGL = False
3658
3659   def ExpandNames(self):
3660     self.share_locks = _ShareAll()
3661     self.needed_locks = {
3662       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3663       }
3664
3665   def Exec(self, feedback_fn):
3666     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3667
3668     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3669     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3670                            for group in group_names])
3671
3672
3673 class LUGroupVerifyDisks(NoHooksLU):
3674   """Verifies the status of all disks in a node group.
3675
3676   """
3677   REQ_BGL = False
3678
3679   def ExpandNames(self):
3680     # Raises errors.OpPrereqError on its own if group can't be found
3681     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3682
3683     self.share_locks = _ShareAll()
3684     self.needed_locks = {
3685       locking.LEVEL_INSTANCE: [],
3686       locking.LEVEL_NODEGROUP: [],
3687       locking.LEVEL_NODE: [],
3688
3689       # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3690       # starts one instance of this opcode for every group, which means all
3691       # nodes will be locked for a short amount of time, so it's better to
3692       # acquire the node allocation lock as well.
3693       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3694       }
3695
3696   def DeclareLocks(self, level):
3697     if level == locking.LEVEL_INSTANCE:
3698       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3699
3700       # Lock instances optimistically, needs verification once node and group
3701       # locks have been acquired
3702       self.needed_locks[locking.LEVEL_INSTANCE] = \
3703         self.cfg.GetNodeGroupInstances(self.group_uuid)
3704
3705     elif level == locking.LEVEL_NODEGROUP:
3706       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3707
3708       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3709         set([self.group_uuid] +
3710             # Lock all groups used by instances optimistically; this requires
3711             # going via the node before it's locked, requiring verification
3712             # later on
3713             [group_uuid
3714              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3715              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3716
3717     elif level == locking.LEVEL_NODE:
3718       # This will only lock the nodes in the group to be verified which contain
3719       # actual instances
3720       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3721       self._LockInstancesNodes()
3722
3723       # Lock all nodes in group to be verified
3724       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3725       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3726       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3727
3728   def CheckPrereq(self):
3729     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3730     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3731     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3732
3733     assert self.group_uuid in owned_groups
3734
3735     # Check if locked instances are still correct
3736     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3737
3738     # Get instance information
3739     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3740
3741     # Check if node groups for locked instances are still correct
3742     _CheckInstancesNodeGroups(self.cfg, self.instances,
3743                               owned_groups, owned_nodes, self.group_uuid)
3744
3745   def Exec(self, feedback_fn):
3746     """Verify integrity of cluster disks.
3747
3748     @rtype: tuple of three items
3749     @return: a tuple of (dict of node-to-node_error, list of instances
3750         which need activate-disks, dict of instance: (node, volume) for
3751         missing volumes
3752
3753     """
3754     res_nodes = {}
3755     res_instances = set()
3756     res_missing = {}
3757
3758     nv_dict = _MapInstanceDisksToNodes(
3759       [inst for inst in self.instances.values()
3760        if inst.admin_state == constants.ADMINST_UP])
3761
3762     if nv_dict:
3763       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3764                              set(self.cfg.GetVmCapableNodeList()))
3765
3766       node_lvs = self.rpc.call_lv_list(nodes, [])
3767
3768       for (node, node_res) in node_lvs.items():
3769         if node_res.offline:
3770           continue
3771
3772         msg = node_res.fail_msg
3773         if msg:
3774           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3775           res_nodes[node] = msg
3776           continue
3777
3778         for lv_name, (_, _, lv_online) in node_res.payload.items():
3779           inst = nv_dict.pop((node, lv_name), None)
3780           if not (lv_online or inst is None):
3781             res_instances.add(inst)
3782
3783       # any leftover items in nv_dict are missing LVs, let's arrange the data
3784       # better
3785       for key, inst in nv_dict.iteritems():
3786         res_missing.setdefault(inst, []).append(list(key))
3787
3788     return (res_nodes, list(res_instances), res_missing)
3789
3790
3791 class LUClusterRepairDiskSizes(NoHooksLU):
3792   """Verifies the cluster disks sizes.
3793
3794   """
3795   REQ_BGL = False
3796
3797   def ExpandNames(self):
3798     if self.op.instances:
3799       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3800       # Not getting the node allocation lock as only a specific set of
3801       # instances (and their nodes) is going to be acquired
3802       self.needed_locks = {
3803         locking.LEVEL_NODE_RES: [],
3804         locking.LEVEL_INSTANCE: self.wanted_names,
3805         }
3806       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3807     else:
3808       self.wanted_names = None
3809       self.needed_locks = {
3810         locking.LEVEL_NODE_RES: locking.ALL_SET,
3811         locking.LEVEL_INSTANCE: locking.ALL_SET,
3812
3813         # This opcode is acquires the node locks for all instances
3814         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3815         }
3816
3817     self.share_locks = {
3818       locking.LEVEL_NODE_RES: 1,
3819       locking.LEVEL_INSTANCE: 0,
3820       locking.LEVEL_NODE_ALLOC: 1,
3821       }
3822
3823   def DeclareLocks(self, level):
3824     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3825       self._LockInstancesNodes(primary_only=True, level=level)
3826
3827   def CheckPrereq(self):
3828     """Check prerequisites.
3829
3830     This only checks the optional instance list against the existing names.
3831
3832     """
3833     if self.wanted_names is None:
3834       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3835
3836     self.wanted_instances = \
3837         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3838
3839   def _EnsureChildSizes(self, disk):
3840     """Ensure children of the disk have the needed disk size.
3841
3842     This is valid mainly for DRBD8 and fixes an issue where the
3843     children have smaller disk size.
3844
3845     @param disk: an L{ganeti.objects.Disk} object
3846
3847     """
3848     if disk.dev_type == constants.LD_DRBD8:
3849       assert disk.children, "Empty children for DRBD8?"
3850       fchild = disk.children[0]
3851       mismatch = fchild.size < disk.size
3852       if mismatch:
3853         self.LogInfo("Child disk has size %d, parent %d, fixing",
3854                      fchild.size, disk.size)
3855         fchild.size = disk.size
3856
3857       # and we recurse on this child only, not on the metadev
3858       return self._EnsureChildSizes(fchild) or mismatch
3859     else:
3860       return False
3861
3862   def Exec(self, feedback_fn):
3863     """Verify the size of cluster disks.
3864
3865     """
3866     # TODO: check child disks too
3867     # TODO: check differences in size between primary/secondary nodes
3868     per_node_disks = {}
3869     for instance in self.wanted_instances:
3870       pnode = instance.primary_node
3871       if pnode not in per_node_disks:
3872         per_node_disks[pnode] = []
3873       for idx, disk in enumerate(instance.disks):
3874         per_node_disks[pnode].append((instance, idx, disk))
3875
3876     assert not (frozenset(per_node_disks.keys()) -
3877                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3878       "Not owning correct locks"
3879     assert not self.owned_locks(locking.LEVEL_NODE)
3880
3881     changed = []
3882     for node, dskl in per_node_disks.items():
3883       newl = [v[2].Copy() for v in dskl]
3884       for dsk in newl:
3885         self.cfg.SetDiskID(dsk, node)
3886       result = self.rpc.call_blockdev_getsize(node, newl)
3887       if result.fail_msg:
3888         self.LogWarning("Failure in blockdev_getsize call to node"
3889                         " %s, ignoring", node)
3890         continue
3891       if len(result.payload) != len(dskl):
3892         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3893                         " result.payload=%s", node, len(dskl), result.payload)
3894         self.LogWarning("Invalid result from node %s, ignoring node results",
3895                         node)
3896         continue
3897       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3898         if size is None:
3899           self.LogWarning("Disk %d of instance %s did not return size"
3900                           " information, ignoring", idx, instance.name)
3901           continue
3902         if not isinstance(size, (int, long)):
3903           self.LogWarning("Disk %d of instance %s did not return valid"
3904                           " size information, ignoring", idx, instance.name)
3905           continue
3906         size = size >> 20
3907         if size != disk.size:
3908           self.LogInfo("Disk %d of instance %s has mismatched size,"
3909                        " correcting: recorded %d, actual %d", idx,
3910                        instance.name, disk.size, size)
3911           disk.size = size
3912           self.cfg.Update(instance, feedback_fn)
3913           changed.append((instance.name, idx, size))
3914         if self._EnsureChildSizes(disk):
3915           self.cfg.Update(instance, feedback_fn)
3916           changed.append((instance.name, idx, disk.size))
3917     return changed
3918
3919
3920 class LUClusterRename(LogicalUnit):
3921   """Rename the cluster.
3922
3923   """
3924   HPATH = "cluster-rename"
3925   HTYPE = constants.HTYPE_CLUSTER
3926
3927   def BuildHooksEnv(self):
3928     """Build hooks env.
3929
3930     """
3931     return {
3932       "OP_TARGET": self.cfg.GetClusterName(),
3933       "NEW_NAME": self.op.name,
3934       }
3935
3936   def BuildHooksNodes(self):
3937     """Build hooks nodes.
3938
3939     """
3940     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3941
3942   def CheckPrereq(self):
3943     """Verify that the passed name is a valid one.
3944
3945     """
3946     hostname = netutils.GetHostname(name=self.op.name,
3947                                     family=self.cfg.GetPrimaryIPFamily())
3948
3949     new_name = hostname.name
3950     self.ip = new_ip = hostname.ip
3951     old_name = self.cfg.GetClusterName()
3952     old_ip = self.cfg.GetMasterIP()
3953     if new_name == old_name and new_ip == old_ip:
3954       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3955                                  " cluster has changed",
3956                                  errors.ECODE_INVAL)
3957     if new_ip != old_ip:
3958       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3959         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3960                                    " reachable on the network" %
3961                                    new_ip, errors.ECODE_NOTUNIQUE)
3962
3963     self.op.name = new_name
3964
3965   def Exec(self, feedback_fn):
3966     """Rename the cluster.
3967
3968     """
3969     clustername = self.op.name
3970     new_ip = self.ip
3971
3972     # shutdown the master IP
3973     master_params = self.cfg.GetMasterNetworkParameters()
3974     ems = self.cfg.GetUseExternalMipScript()
3975     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3976                                                      master_params, ems)
3977     result.Raise("Could not disable the master role")
3978
3979     try:
3980       cluster = self.cfg.GetClusterInfo()
3981       cluster.cluster_name = clustername
3982       cluster.master_ip = new_ip
3983       self.cfg.Update(cluster, feedback_fn)
3984
3985       # update the known hosts file
3986       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3987       node_list = self.cfg.GetOnlineNodeList()
3988       try:
3989         node_list.remove(master_params.name)
3990       except ValueError:
3991         pass
3992       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3993     finally:
3994       master_params.ip = new_ip
3995       result = self.rpc.call_node_activate_master_ip(master_params.name,
3996                                                      master_params, ems)
3997       msg = result.fail_msg
3998       if msg:
3999         self.LogWarning("Could not re-enable the master role on"
4000                         " the master, please restart manually: %s", msg)
4001
4002     return clustername
4003
4004
4005 def _ValidateNetmask(cfg, netmask):
4006   """Checks if a netmask is valid.
4007
4008   @type cfg: L{config.ConfigWriter}
4009   @param cfg: The cluster configuration
4010   @type netmask: int
4011   @param netmask: the netmask to be verified
4012   @raise errors.OpPrereqError: if the validation fails
4013
4014   """
4015   ip_family = cfg.GetPrimaryIPFamily()
4016   try:
4017     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4018   except errors.ProgrammerError:
4019     raise errors.OpPrereqError("Invalid primary ip family: %s." %
4020                                ip_family, errors.ECODE_INVAL)
4021   if not ipcls.ValidateNetmask(netmask):
4022     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4023                                 (netmask), errors.ECODE_INVAL)
4024
4025
4026 class LUClusterSetParams(LogicalUnit):
4027   """Change the parameters of the cluster.
4028
4029   """
4030   HPATH = "cluster-modify"
4031   HTYPE = constants.HTYPE_CLUSTER
4032   REQ_BGL = False
4033
4034   def CheckArguments(self):
4035     """Check parameters
4036
4037     """
4038     if self.op.uid_pool:
4039       uidpool.CheckUidPool(self.op.uid_pool)
4040
4041     if self.op.add_uids:
4042       uidpool.CheckUidPool(self.op.add_uids)
4043
4044     if self.op.remove_uids:
4045       uidpool.CheckUidPool(self.op.remove_uids)
4046
4047     if self.op.master_netmask is not None:
4048       _ValidateNetmask(self.cfg, self.op.master_netmask)
4049
4050     if self.op.diskparams:
4051       for dt_params in self.op.diskparams.values():
4052         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4053       try:
4054         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4055       except errors.OpPrereqError, err:
4056         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4057                                    errors.ECODE_INVAL)
4058
4059   def ExpandNames(self):
4060     # FIXME: in the future maybe other cluster params won't require checking on
4061     # all nodes to be modified.
4062     # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4063     # resource locks the right thing, shouldn't it be the BGL instead?
4064     self.needed_locks = {
4065       locking.LEVEL_NODE: locking.ALL_SET,
4066       locking.LEVEL_INSTANCE: locking.ALL_SET,
4067       locking.LEVEL_NODEGROUP: locking.ALL_SET,
4068       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4069     }
4070     self.share_locks = _ShareAll()
4071
4072   def BuildHooksEnv(self):
4073     """Build hooks env.
4074
4075     """
4076     return {
4077       "OP_TARGET": self.cfg.GetClusterName(),
4078       "NEW_VG_NAME": self.op.vg_name,
4079       }
4080
4081   def BuildHooksNodes(self):
4082     """Build hooks nodes.
4083
4084     """
4085     mn = self.cfg.GetMasterNode()
4086     return ([mn], [mn])
4087
4088   def CheckPrereq(self):
4089     """Check prerequisites.
4090
4091     This checks whether the given params don't conflict and
4092     if the given volume group is valid.
4093
4094     """
4095     if self.op.vg_name is not None and not self.op.vg_name:
4096       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4097         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4098                                    " instances exist", errors.ECODE_INVAL)
4099
4100     if self.op.drbd_helper is not None and not self.op.drbd_helper:
4101       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4102         raise errors.OpPrereqError("Cannot disable drbd helper while"
4103                                    " drbd-based instances exist",
4104                                    errors.ECODE_INVAL)
4105
4106     node_list = self.owned_locks(locking.LEVEL_NODE)
4107
4108     # if vg_name not None, checks given volume group on all nodes
4109     if self.op.vg_name:
4110       vglist = self.rpc.call_vg_list(node_list)
4111       for node in node_list:
4112         msg = vglist[node].fail_msg
4113         if msg:
4114           # ignoring down node
4115           self.LogWarning("Error while gathering data on node %s"
4116                           " (ignoring node): %s", node, msg)
4117           continue
4118         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4119                                               self.op.vg_name,
4120                                               constants.MIN_VG_SIZE)
4121         if vgstatus:
4122           raise errors.OpPrereqError("Error on node '%s': %s" %
4123                                      (node, vgstatus), errors.ECODE_ENVIRON)
4124
4125     if self.op.drbd_helper:
4126       # checks given drbd helper on all nodes
4127       helpers = self.rpc.call_drbd_helper(node_list)
4128       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4129         if ninfo.offline:
4130           self.LogInfo("Not checking drbd helper on offline node %s", node)
4131           continue
4132         msg = helpers[node].fail_msg
4133         if msg:
4134           raise errors.OpPrereqError("Error checking drbd helper on node"
4135                                      " '%s': %s" % (node, msg),
4136                                      errors.ECODE_ENVIRON)
4137         node_helper = helpers[node].payload
4138         if node_helper != self.op.drbd_helper:
4139           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4140                                      (node, node_helper), errors.ECODE_ENVIRON)
4141
4142     self.cluster = cluster = self.cfg.GetClusterInfo()
4143     # validate params changes
4144     if self.op.beparams:
4145       objects.UpgradeBeParams(self.op.beparams)
4146       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4147       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4148
4149     if self.op.ndparams:
4150       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4151       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4152
4153       # TODO: we need a more general way to handle resetting
4154       # cluster-level parameters to default values
4155       if self.new_ndparams["oob_program"] == "":
4156         self.new_ndparams["oob_program"] = \
4157             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4158
4159     if self.op.hv_state:
4160       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4161                                             self.cluster.hv_state_static)
4162       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4163                                for hv, values in new_hv_state.items())
4164
4165     if self.op.disk_state:
4166       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4167                                                 self.cluster.disk_state_static)
4168       self.new_disk_state = \
4169         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4170                             for name, values in svalues.items()))
4171              for storage, svalues in new_disk_state.items())
4172
4173     if self.op.ipolicy:
4174       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4175                                             group_policy=False)
4176
4177       all_instances = self.cfg.GetAllInstancesInfo().values()
4178       violations = set()
4179       for group in self.cfg.GetAllNodeGroupsInfo().values():
4180         instances = frozenset([inst for inst in all_instances
4181                                if compat.any(node in group.members
4182                                              for node in inst.all_nodes)])
4183         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4184         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4185         new = _ComputeNewInstanceViolations(ipol,
4186                                             new_ipolicy, instances)
4187         if new:
4188           violations.update(new)
4189
4190       if violations:
4191         self.LogWarning("After the ipolicy change the following instances"
4192                         " violate them: %s",
4193                         utils.CommaJoin(utils.NiceSort(violations)))
4194
4195     if self.op.nicparams:
4196       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4197       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4198       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4199       nic_errors = []
4200
4201       # check all instances for consistency
4202       for instance in self.cfg.GetAllInstancesInfo().values():
4203         for nic_idx, nic in enumerate(instance.nics):
4204           params_copy = copy.deepcopy(nic.nicparams)
4205           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4206
4207           # check parameter syntax
4208           try:
4209             objects.NIC.CheckParameterSyntax(params_filled)
4210           except errors.ConfigurationError, err:
4211             nic_errors.append("Instance %s, nic/%d: %s" %
4212                               (instance.name, nic_idx, err))
4213
4214           # if we're moving instances to routed, check that they have an ip
4215           target_mode = params_filled[constants.NIC_MODE]
4216           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4217             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4218                               " address" % (instance.name, nic_idx))
4219       if nic_errors:
4220         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4221                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4222
4223     # hypervisor list/parameters
4224     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4225     if self.op.hvparams:
4226       for hv_name, hv_dict in self.op.hvparams.items():
4227         if hv_name not in self.new_hvparams:
4228           self.new_hvparams[hv_name] = hv_dict
4229         else:
4230           self.new_hvparams[hv_name].update(hv_dict)
4231
4232     # disk template parameters
4233     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4234     if self.op.diskparams:
4235       for dt_name, dt_params in self.op.diskparams.items():
4236         if dt_name not in self.op.diskparams:
4237           self.new_diskparams[dt_name] = dt_params
4238         else:
4239           self.new_diskparams[dt_name].update(dt_params)
4240
4241     # os hypervisor parameters
4242     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4243     if self.op.os_hvp:
4244       for os_name, hvs in self.op.os_hvp.items():
4245         if os_name not in self.new_os_hvp:
4246           self.new_os_hvp[os_name] = hvs
4247         else:
4248           for hv_name, hv_dict in hvs.items():
4249             if hv_name not in self.new_os_hvp[os_name]:
4250               self.new_os_hvp[os_name][hv_name] = hv_dict
4251             else:
4252               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4253
4254     # os parameters
4255     self.new_osp = objects.FillDict(cluster.osparams, {})
4256     if self.op.osparams:
4257       for os_name, osp in self.op.osparams.items():
4258         if os_name not in self.new_osp:
4259           self.new_osp[os_name] = {}
4260
4261         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4262                                                   use_none=True)
4263
4264         if not self.new_osp[os_name]:
4265           # we removed all parameters
4266           del self.new_osp[os_name]
4267         else:
4268           # check the parameter validity (remote check)
4269           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4270                          os_name, self.new_osp[os_name])
4271
4272     # changes to the hypervisor list
4273     if self.op.enabled_hypervisors is not None:
4274       self.hv_list = self.op.enabled_hypervisors
4275       for hv in self.hv_list:
4276         # if the hypervisor doesn't already exist in the cluster
4277         # hvparams, we initialize it to empty, and then (in both
4278         # cases) we make sure to fill the defaults, as we might not
4279         # have a complete defaults list if the hypervisor wasn't
4280         # enabled before
4281         if hv not in new_hvp:
4282           new_hvp[hv] = {}
4283         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4284         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4285     else:
4286       self.hv_list = cluster.enabled_hypervisors
4287
4288     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4289       # either the enabled list has changed, or the parameters have, validate
4290       for hv_name, hv_params in self.new_hvparams.items():
4291         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4292             (self.op.enabled_hypervisors and
4293              hv_name in self.op.enabled_hypervisors)):
4294           # either this is a new hypervisor, or its parameters have changed
4295           hv_class = hypervisor.GetHypervisor(hv_name)
4296           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4297           hv_class.CheckParameterSyntax(hv_params)
4298           _CheckHVParams(self, node_list, hv_name, hv_params)
4299
4300     if self.op.os_hvp:
4301       # no need to check any newly-enabled hypervisors, since the
4302       # defaults have already been checked in the above code-block
4303       for os_name, os_hvp in self.new_os_hvp.items():
4304         for hv_name, hv_params in os_hvp.items():
4305           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4306           # we need to fill in the new os_hvp on top of the actual hv_p
4307           cluster_defaults = self.new_hvparams.get(hv_name, {})
4308           new_osp = objects.FillDict(cluster_defaults, hv_params)
4309           hv_class = hypervisor.GetHypervisor(hv_name)
4310           hv_class.CheckParameterSyntax(new_osp)
4311           _CheckHVParams(self, node_list, hv_name, new_osp)
4312
4313     if self.op.default_iallocator:
4314       alloc_script = utils.FindFile(self.op.default_iallocator,
4315                                     constants.IALLOCATOR_SEARCH_PATH,
4316                                     os.path.isfile)
4317       if alloc_script is None:
4318         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4319                                    " specified" % self.op.default_iallocator,
4320                                    errors.ECODE_INVAL)
4321
4322   def Exec(self, feedback_fn):
4323     """Change the parameters of the cluster.
4324
4325     """
4326     if self.op.vg_name is not None:
4327       new_volume = self.op.vg_name
4328       if not new_volume:
4329         new_volume = None
4330       if new_volume != self.cfg.GetVGName():
4331         self.cfg.SetVGName(new_volume)
4332       else:
4333         feedback_fn("Cluster LVM configuration already in desired"
4334                     " state, not changing")
4335     if self.op.drbd_helper is not None:
4336       new_helper = self.op.drbd_helper
4337       if not new_helper:
4338         new_helper = None
4339       if new_helper != self.cfg.GetDRBDHelper():
4340         self.cfg.SetDRBDHelper(new_helper)
4341       else:
4342         feedback_fn("Cluster DRBD helper already in desired state,"
4343                     " not changing")
4344     if self.op.hvparams:
4345       self.cluster.hvparams = self.new_hvparams
4346     if self.op.os_hvp:
4347       self.cluster.os_hvp = self.new_os_hvp
4348     if self.op.enabled_hypervisors is not None:
4349       self.cluster.hvparams = self.new_hvparams
4350       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4351     if self.op.beparams:
4352       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4353     if self.op.nicparams:
4354       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4355     if self.op.ipolicy:
4356       self.cluster.ipolicy = self.new_ipolicy
4357     if self.op.osparams:
4358       self.cluster.osparams = self.new_osp
4359     if self.op.ndparams:
4360       self.cluster.ndparams = self.new_ndparams
4361     if self.op.diskparams:
4362       self.cluster.diskparams = self.new_diskparams
4363     if self.op.hv_state:
4364       self.cluster.hv_state_static = self.new_hv_state
4365     if self.op.disk_state:
4366       self.cluster.disk_state_static = self.new_disk_state
4367
4368     if self.op.candidate_pool_size is not None:
4369       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4370       # we need to update the pool size here, otherwise the save will fail
4371       _AdjustCandidatePool(self, [])
4372
4373     if self.op.maintain_node_health is not None:
4374       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4375         feedback_fn("Note: CONFD was disabled at build time, node health"
4376                     " maintenance is not useful (still enabling it)")
4377       self.cluster.maintain_node_health = self.op.maintain_node_health
4378
4379     if self.op.prealloc_wipe_disks is not None:
4380       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4381
4382     if self.op.add_uids is not None:
4383       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4384
4385     if self.op.remove_uids is not None:
4386       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4387
4388     if self.op.uid_pool is not None:
4389       self.cluster.uid_pool = self.op.uid_pool
4390
4391     if self.op.default_iallocator is not None:
4392       self.cluster.default_iallocator = self.op.default_iallocator
4393
4394     if self.op.reserved_lvs is not None:
4395       self.cluster.reserved_lvs = self.op.reserved_lvs
4396
4397     if self.op.use_external_mip_script is not None:
4398       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4399
4400     def helper_os(aname, mods, desc):
4401       desc += " OS list"
4402       lst = getattr(self.cluster, aname)
4403       for key, val in mods:
4404         if key == constants.DDM_ADD:
4405           if val in lst:
4406             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4407           else:
4408             lst.append(val)
4409         elif key == constants.DDM_REMOVE:
4410           if val in lst:
4411             lst.remove(val)
4412           else:
4413             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4414         else:
4415           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4416
4417     if self.op.hidden_os:
4418       helper_os("hidden_os", self.op.hidden_os, "hidden")
4419
4420     if self.op.blacklisted_os:
4421       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4422
4423     if self.op.master_netdev:
4424       master_params = self.cfg.GetMasterNetworkParameters()
4425       ems = self.cfg.GetUseExternalMipScript()
4426       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4427                   self.cluster.master_netdev)
4428       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4429                                                        master_params, ems)
4430       result.Raise("Could not disable the master ip")
4431       feedback_fn("Changing master_netdev from %s to %s" %
4432                   (master_params.netdev, self.op.master_netdev))
4433       self.cluster.master_netdev = self.op.master_netdev
4434
4435     if self.op.master_netmask:
4436       master_params = self.cfg.GetMasterNetworkParameters()
4437       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4438       result = self.rpc.call_node_change_master_netmask(master_params.name,
4439                                                         master_params.netmask,
4440                                                         self.op.master_netmask,
4441                                                         master_params.ip,
4442                                                         master_params.netdev)
4443       if result.fail_msg:
4444         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4445         feedback_fn(msg)
4446
4447       self.cluster.master_netmask = self.op.master_netmask
4448
4449     self.cfg.Update(self.cluster, feedback_fn)
4450
4451     if self.op.master_netdev:
4452       master_params = self.cfg.GetMasterNetworkParameters()
4453       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4454                   self.op.master_netdev)
4455       ems = self.cfg.GetUseExternalMipScript()
4456       result = self.rpc.call_node_activate_master_ip(master_params.name,
4457                                                      master_params, ems)
4458       if result.fail_msg:
4459         self.LogWarning("Could not re-enable the master ip on"
4460                         " the master, please restart manually: %s",
4461                         result.fail_msg)
4462
4463
4464 def _UploadHelper(lu, nodes, fname):
4465   """Helper for uploading a file and showing warnings.
4466
4467   """
4468   if os.path.exists(fname):
4469     result = lu.rpc.call_upload_file(nodes, fname)
4470     for to_node, to_result in result.items():
4471       msg = to_result.fail_msg
4472       if msg:
4473         msg = ("Copy of file %s to node %s failed: %s" %
4474                (fname, to_node, msg))
4475         lu.LogWarning(msg)
4476
4477
4478 def _ComputeAncillaryFiles(cluster, redist):
4479   """Compute files external to Ganeti which need to be consistent.
4480
4481   @type redist: boolean
4482   @param redist: Whether to include files which need to be redistributed
4483
4484   """
4485   # Compute files for all nodes
4486   files_all = set([
4487     pathutils.SSH_KNOWN_HOSTS_FILE,
4488     pathutils.CONFD_HMAC_KEY,
4489     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4490     pathutils.SPICE_CERT_FILE,
4491     pathutils.SPICE_CACERT_FILE,
4492     pathutils.RAPI_USERS_FILE,
4493     ])
4494
4495   if redist:
4496     # we need to ship at least the RAPI certificate
4497     files_all.add(pathutils.RAPI_CERT_FILE)
4498   else:
4499     files_all.update(pathutils.ALL_CERT_FILES)
4500     files_all.update(ssconf.SimpleStore().GetFileList())
4501
4502   if cluster.modify_etc_hosts:
4503     files_all.add(pathutils.ETC_HOSTS)
4504
4505   if cluster.use_external_mip_script:
4506     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4507
4508   # Files which are optional, these must:
4509   # - be present in one other category as well
4510   # - either exist or not exist on all nodes of that category (mc, vm all)
4511   files_opt = set([
4512     pathutils.RAPI_USERS_FILE,
4513     ])
4514
4515   # Files which should only be on master candidates
4516   files_mc = set()
4517
4518   if not redist:
4519     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4520
4521   # File storage
4522   if (not redist and
4523       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4524     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4525     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4526
4527   # Files which should only be on VM-capable nodes
4528   files_vm = set(
4529     filename
4530     for hv_name in cluster.enabled_hypervisors
4531     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4532
4533   files_opt |= set(
4534     filename
4535     for hv_name in cluster.enabled_hypervisors
4536     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4537
4538   # Filenames in each category must be unique
4539   all_files_set = files_all | files_mc | files_vm
4540   assert (len(all_files_set) ==
4541           sum(map(len, [files_all, files_mc, files_vm]))), \
4542          "Found file listed in more than one file list"
4543
4544   # Optional files must be present in one other category
4545   assert all_files_set.issuperset(files_opt), \
4546          "Optional file not in a different required list"
4547
4548   # This one file should never ever be re-distributed via RPC
4549   assert not (redist and
4550               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4551
4552   return (files_all, files_opt, files_mc, files_vm)
4553
4554
4555 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4556   """Distribute additional files which are part of the cluster configuration.
4557
4558   ConfigWriter takes care of distributing the config and ssconf files, but
4559   there are more files which should be distributed to all nodes. This function
4560   makes sure those are copied.
4561
4562   @param lu: calling logical unit
4563   @param additional_nodes: list of nodes not in the config to distribute to
4564   @type additional_vm: boolean
4565   @param additional_vm: whether the additional nodes are vm-capable or not
4566
4567   """
4568   # Gather target nodes
4569   cluster = lu.cfg.GetClusterInfo()
4570   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4571
4572   online_nodes = lu.cfg.GetOnlineNodeList()
4573   online_set = frozenset(online_nodes)
4574   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4575
4576   if additional_nodes is not None:
4577     online_nodes.extend(additional_nodes)
4578     if additional_vm:
4579       vm_nodes.extend(additional_nodes)
4580
4581   # Never distribute to master node
4582   for nodelist in [online_nodes, vm_nodes]:
4583     if master_info.name in nodelist:
4584       nodelist.remove(master_info.name)
4585
4586   # Gather file lists
4587   (files_all, _, files_mc, files_vm) = \
4588     _ComputeAncillaryFiles(cluster, True)
4589
4590   # Never re-distribute configuration file from here
4591   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4592               pathutils.CLUSTER_CONF_FILE in files_vm)
4593   assert not files_mc, "Master candidates not handled in this function"
4594
4595   filemap = [
4596     (online_nodes, files_all),
4597     (vm_nodes, files_vm),
4598     ]
4599
4600   # Upload the files
4601   for (node_list, files) in filemap:
4602     for fname in files:
4603       _UploadHelper(lu, node_list, fname)
4604
4605
4606 class LUClusterRedistConf(NoHooksLU):
4607   """Force the redistribution of cluster configuration.
4608
4609   This is a very simple LU.
4610
4611   """
4612   REQ_BGL = False
4613
4614   def ExpandNames(self):
4615     self.needed_locks = {
4616       locking.LEVEL_NODE: locking.ALL_SET,
4617       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4618     }
4619     self.share_locks = _ShareAll()
4620
4621   def Exec(self, feedback_fn):
4622     """Redistribute the configuration.
4623
4624     """
4625     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4626     _RedistributeAncillaryFiles(self)
4627
4628
4629 class LUClusterActivateMasterIp(NoHooksLU):
4630   """Activate the master IP on the master node.
4631
4632   """
4633   def Exec(self, feedback_fn):
4634     """Activate the master IP.
4635
4636     """
4637     master_params = self.cfg.GetMasterNetworkParameters()
4638     ems = self.cfg.GetUseExternalMipScript()
4639     result = self.rpc.call_node_activate_master_ip(master_params.name,
4640                                                    master_params, ems)
4641     result.Raise("Could not activate the master IP")
4642
4643
4644 class LUClusterDeactivateMasterIp(NoHooksLU):
4645   """Deactivate the master IP on the master node.
4646
4647   """
4648   def Exec(self, feedback_fn):
4649     """Deactivate the master IP.
4650
4651     """
4652     master_params = self.cfg.GetMasterNetworkParameters()
4653     ems = self.cfg.GetUseExternalMipScript()
4654     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4655                                                      master_params, ems)
4656     result.Raise("Could not deactivate the master IP")
4657
4658
4659 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4660   """Sleep and poll for an instance's disk to sync.
4661
4662   """
4663   if not instance.disks or disks is not None and not disks:
4664     return True
4665
4666   disks = _ExpandCheckDisks(instance, disks)
4667
4668   if not oneshot:
4669     lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4670
4671   node = instance.primary_node
4672
4673   for dev in disks:
4674     lu.cfg.SetDiskID(dev, node)
4675
4676   # TODO: Convert to utils.Retry
4677
4678   retries = 0
4679   degr_retries = 10 # in seconds, as we sleep 1 second each time
4680   while True:
4681     max_time = 0
4682     done = True
4683     cumul_degraded = False
4684     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4685     msg = rstats.fail_msg
4686     if msg:
4687       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4688       retries += 1
4689       if retries >= 10:
4690         raise errors.RemoteError("Can't contact node %s for mirror data,"
4691                                  " aborting." % node)
4692       time.sleep(6)
4693       continue
4694     rstats = rstats.payload
4695     retries = 0
4696     for i, mstat in enumerate(rstats):
4697       if mstat is None:
4698         lu.LogWarning("Can't compute data for node %s/%s",
4699                            node, disks[i].iv_name)
4700         continue
4701
4702       cumul_degraded = (cumul_degraded or
4703                         (mstat.is_degraded and mstat.sync_percent is None))
4704       if mstat.sync_percent is not None:
4705         done = False
4706         if mstat.estimated_time is not None:
4707           rem_time = ("%s remaining (estimated)" %
4708                       utils.FormatSeconds(mstat.estimated_time))
4709           max_time = mstat.estimated_time
4710         else:
4711           rem_time = "no time estimate"
4712         lu.LogInfo("- device %s: %5.2f%% done, %s",
4713                    disks[i].iv_name, mstat.sync_percent, rem_time)
4714
4715     # if we're done but degraded, let's do a few small retries, to
4716     # make sure we see a stable and not transient situation; therefore
4717     # we force restart of the loop
4718     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4719       logging.info("Degraded disks found, %d retries left", degr_retries)
4720       degr_retries -= 1
4721       time.sleep(1)
4722       continue
4723
4724     if done or oneshot:
4725       break
4726
4727     time.sleep(min(60, max_time))
4728
4729   if done:
4730     lu.LogInfo("Instance %s's disks are in sync", instance.name)
4731
4732   return not cumul_degraded
4733
4734
4735 def _BlockdevFind(lu, node, dev, instance):
4736   """Wrapper around call_blockdev_find to annotate diskparams.
4737
4738   @param lu: A reference to the lu object
4739   @param node: The node to call out
4740   @param dev: The device to find
4741   @param instance: The instance object the device belongs to
4742   @returns The result of the rpc call
4743
4744   """
4745   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4746   return lu.rpc.call_blockdev_find(node, disk)
4747
4748
4749 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4750   """Wrapper around L{_CheckDiskConsistencyInner}.
4751
4752   """
4753   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4754   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4755                                     ldisk=ldisk)
4756
4757
4758 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4759                                ldisk=False):
4760   """Check that mirrors are not degraded.
4761
4762   @attention: The device has to be annotated already.
4763
4764   The ldisk parameter, if True, will change the test from the
4765   is_degraded attribute (which represents overall non-ok status for
4766   the device(s)) to the ldisk (representing the local storage status).
4767
4768   """
4769   lu.cfg.SetDiskID(dev, node)
4770
4771   result = True
4772
4773   if on_primary or dev.AssembleOnSecondary():
4774     rstats = lu.rpc.call_blockdev_find(node, dev)
4775     msg = rstats.fail_msg
4776     if msg:
4777       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4778       result = False
4779     elif not rstats.payload:
4780       lu.LogWarning("Can't find disk on node %s", node)
4781       result = False
4782     else:
4783       if ldisk:
4784         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4785       else:
4786         result = result and not rstats.payload.is_degraded
4787
4788   if dev.children:
4789     for child in dev.children:
4790       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4791                                                      on_primary)
4792
4793   return result
4794
4795
4796 class LUOobCommand(NoHooksLU):
4797   """Logical unit for OOB handling.
4798
4799   """
4800   REQ_BGL = False
4801   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4802
4803   def ExpandNames(self):
4804     """Gather locks we need.
4805
4806     """
4807     if self.op.node_names:
4808       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4809       lock_names = self.op.node_names
4810     else:
4811       lock_names = locking.ALL_SET
4812
4813     self.needed_locks = {
4814       locking.LEVEL_NODE: lock_names,
4815       }
4816
4817     if not self.op.node_names:
4818       # Acquire node allocation lock only if all nodes are affected
4819       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4820       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4821
4822   def CheckPrereq(self):
4823     """Check prerequisites.
4824
4825     This checks:
4826      - the node exists in the configuration
4827      - OOB is supported
4828
4829     Any errors are signaled by raising errors.OpPrereqError.
4830
4831     """
4832     self.nodes = []
4833     self.master_node = self.cfg.GetMasterNode()
4834
4835     assert self.op.power_delay >= 0.0
4836
4837     if self.op.node_names:
4838       if (self.op.command in self._SKIP_MASTER and
4839           self.master_node in self.op.node_names):
4840         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4841         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4842
4843         if master_oob_handler:
4844           additional_text = ("run '%s %s %s' if you want to operate on the"
4845                              " master regardless") % (master_oob_handler,
4846                                                       self.op.command,
4847                                                       self.master_node)
4848         else:
4849           additional_text = "it does not support out-of-band operations"
4850
4851         raise errors.OpPrereqError(("Operating on the master node %s is not"
4852                                     " allowed for %s; %s") %
4853                                    (self.master_node, self.op.command,
4854                                     additional_text), errors.ECODE_INVAL)
4855     else:
4856       self.op.node_names = self.cfg.GetNodeList()
4857       if self.op.command in self._SKIP_MASTER:
4858         self.op.node_names.remove(self.master_node)
4859
4860     if self.op.command in self._SKIP_MASTER:
4861       assert self.master_node not in self.op.node_names
4862
4863     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4864       if node is None:
4865         raise errors.OpPrereqError("Node %s not found" % node_name,
4866                                    errors.ECODE_NOENT)
4867       else:
4868         self.nodes.append(node)
4869
4870       if (not self.op.ignore_status and
4871           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4872         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4873                                     " not marked offline") % node_name,
4874                                    errors.ECODE_STATE)
4875
4876   def Exec(self, feedback_fn):
4877     """Execute OOB and return result if we expect any.
4878
4879     """
4880     master_node = self.master_node
4881     ret = []
4882
4883     for idx, node in enumerate(utils.NiceSort(self.nodes,
4884                                               key=lambda node: node.name)):
4885       node_entry = [(constants.RS_NORMAL, node.name)]
4886       ret.append(node_entry)
4887
4888       oob_program = _SupportsOob(self.cfg, node)
4889
4890       if not oob_program:
4891         node_entry.append((constants.RS_UNAVAIL, None))
4892         continue
4893
4894       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4895                    self.op.command, oob_program, node.name)
4896       result = self.rpc.call_run_oob(master_node, oob_program,
4897                                      self.op.command, node.name,
4898                                      self.op.timeout)
4899
4900       if result.fail_msg:
4901         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4902                         node.name, result.fail_msg)
4903         node_entry.append((constants.RS_NODATA, None))
4904       else:
4905         try:
4906           self._CheckPayload(result)
4907         except errors.OpExecError, err:
4908           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4909                           node.name, err)
4910           node_entry.append((constants.RS_NODATA, None))
4911         else:
4912           if self.op.command == constants.OOB_HEALTH:
4913             # For health we should log important events
4914             for item, status in result.payload:
4915               if status in [constants.OOB_STATUS_WARNING,
4916                             constants.OOB_STATUS_CRITICAL]:
4917                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4918                                 item, node.name, status)
4919
4920           if self.op.command == constants.OOB_POWER_ON:
4921             node.powered = True
4922           elif self.op.command == constants.OOB_POWER_OFF:
4923             node.powered = False
4924           elif self.op.command == constants.OOB_POWER_STATUS:
4925             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4926             if powered != node.powered:
4927               logging.warning(("Recorded power state (%s) of node '%s' does not"
4928                                " match actual power state (%s)"), node.powered,
4929                               node.name, powered)
4930
4931           # For configuration changing commands we should update the node
4932           if self.op.command in (constants.OOB_POWER_ON,
4933                                  constants.OOB_POWER_OFF):
4934             self.cfg.Update(node, feedback_fn)
4935
4936           node_entry.append((constants.RS_NORMAL, result.payload))
4937
4938           if (self.op.command == constants.OOB_POWER_ON and
4939               idx < len(self.nodes) - 1):
4940             time.sleep(self.op.power_delay)
4941
4942     return ret
4943
4944   def _CheckPayload(self, result):
4945     """Checks if the payload is valid.
4946
4947     @param result: RPC result
4948     @raises errors.OpExecError: If payload is not valid
4949
4950     """
4951     errs = []
4952     if self.op.command == constants.OOB_HEALTH:
4953       if not isinstance(result.payload, list):
4954         errs.append("command 'health' is expected to return a list but got %s" %
4955                     type(result.payload))
4956       else:
4957         for item, status in result.payload:
4958           if status not in constants.OOB_STATUSES:
4959             errs.append("health item '%s' has invalid status '%s'" %
4960                         (item, status))
4961
4962     if self.op.command == constants.OOB_POWER_STATUS:
4963       if not isinstance(result.payload, dict):
4964         errs.append("power-status is expected to return a dict but got %s" %
4965                     type(result.payload))
4966
4967     if self.op.command in [
4968       constants.OOB_POWER_ON,
4969       constants.OOB_POWER_OFF,
4970       constants.OOB_POWER_CYCLE,
4971       ]:
4972       if result.payload is not None:
4973         errs.append("%s is expected to not return payload but got '%s'" %
4974                     (self.op.command, result.payload))
4975
4976     if errs:
4977       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4978                                utils.CommaJoin(errs))
4979
4980
4981 class _OsQuery(_QueryBase):
4982   FIELDS = query.OS_FIELDS
4983
4984   def ExpandNames(self, lu):
4985     # Lock all nodes in shared mode
4986     # Temporary removal of locks, should be reverted later
4987     # TODO: reintroduce locks when they are lighter-weight
4988     lu.needed_locks = {}
4989     #self.share_locks[locking.LEVEL_NODE] = 1
4990     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4991
4992     # The following variables interact with _QueryBase._GetNames
4993     if self.names:
4994       self.wanted = self.names
4995     else:
4996       self.wanted = locking.ALL_SET
4997
4998     self.do_locking = self.use_locking
4999
5000   def DeclareLocks(self, lu, level):
5001     pass
5002
5003   @staticmethod
5004   def _DiagnoseByOS(rlist):
5005     """Remaps a per-node return list into an a per-os per-node dictionary
5006
5007     @param rlist: a map with node names as keys and OS objects as values
5008
5009     @rtype: dict
5010     @return: a dictionary with osnames as keys and as value another
5011         map, with nodes as keys and tuples of (path, status, diagnose,
5012         variants, parameters, api_versions) as values, eg::
5013
5014           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5015                                      (/srv/..., False, "invalid api")],
5016                            "node2": [(/srv/..., True, "", [], [])]}
5017           }
5018
5019     """
5020     all_os = {}
5021     # we build here the list of nodes that didn't fail the RPC (at RPC
5022     # level), so that nodes with a non-responding node daemon don't
5023     # make all OSes invalid
5024     good_nodes = [node_name for node_name in rlist
5025                   if not rlist[node_name].fail_msg]
5026     for node_name, nr in rlist.items():
5027       if nr.fail_msg or not nr.payload:
5028         continue
5029       for (name, path, status, diagnose, variants,
5030            params, api_versions) in nr.payload:
5031         if name not in all_os:
5032           # build a list of nodes for this os containing empty lists
5033           # for each node in node_list
5034           all_os[name] = {}
5035           for nname in good_nodes:
5036             all_os[name][nname] = []
5037         # convert params from [name, help] to (name, help)
5038         params = [tuple(v) for v in params]
5039         all_os[name][node_name].append((path, status, diagnose,
5040                                         variants, params, api_versions))
5041     return all_os
5042
5043   def _GetQueryData(self, lu):
5044     """Computes the list of nodes and their attributes.
5045
5046     """
5047     # Locking is not used
5048     assert not (compat.any(lu.glm.is_owned(level)
5049                            for level in locking.LEVELS
5050                            if level != locking.LEVEL_CLUSTER) or
5051                 self.do_locking or self.use_locking)
5052
5053     valid_nodes = [node.name
5054                    for node in lu.cfg.GetAllNodesInfo().values()
5055                    if not node.offline and node.vm_capable]
5056     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5057     cluster = lu.cfg.GetClusterInfo()
5058
5059     data = {}
5060
5061     for (os_name, os_data) in pol.items():
5062       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5063                           hidden=(os_name in cluster.hidden_os),
5064                           blacklisted=(os_name in cluster.blacklisted_os))
5065
5066       variants = set()
5067       parameters = set()
5068       api_versions = set()
5069
5070       for idx, osl in enumerate(os_data.values()):
5071         info.valid = bool(info.valid and osl and osl[0][1])
5072         if not info.valid:
5073           break
5074
5075         (node_variants, node_params, node_api) = osl[0][3:6]
5076         if idx == 0:
5077           # First entry
5078           variants.update(node_variants)
5079           parameters.update(node_params)
5080           api_versions.update(node_api)
5081         else:
5082           # Filter out inconsistent values
5083           variants.intersection_update(node_variants)
5084           parameters.intersection_update(node_params)
5085           api_versions.intersection_update(node_api)
5086
5087       info.variants = list(variants)
5088       info.parameters = list(parameters)
5089       info.api_versions = list(api_versions)
5090
5091       data[os_name] = info
5092
5093     # Prepare data in requested order
5094     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5095             if name in data]
5096
5097
5098 class LUOsDiagnose(NoHooksLU):
5099   """Logical unit for OS diagnose/query.
5100
5101   """
5102   REQ_BGL = False
5103
5104   @staticmethod
5105   def _BuildFilter(fields, names):
5106     """Builds a filter for querying OSes.
5107
5108     """
5109     name_filter = qlang.MakeSimpleFilter("name", names)
5110
5111     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5112     # respective field is not requested
5113     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5114                      for fname in ["hidden", "blacklisted"]
5115                      if fname not in fields]
5116     if "valid" not in fields:
5117       status_filter.append([qlang.OP_TRUE, "valid"])
5118
5119     if status_filter:
5120       status_filter.insert(0, qlang.OP_AND)
5121     else:
5122       status_filter = None
5123
5124     if name_filter and status_filter:
5125       return [qlang.OP_AND, name_filter, status_filter]
5126     elif name_filter:
5127       return name_filter
5128     else:
5129       return status_filter
5130
5131   def CheckArguments(self):
5132     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5133                        self.op.output_fields, False)
5134
5135   def ExpandNames(self):
5136     self.oq.ExpandNames(self)
5137
5138   def Exec(self, feedback_fn):
5139     return self.oq.OldStyleQuery(self)
5140
5141
5142 class LUNodeRemove(LogicalUnit):
5143   """Logical unit for removing a node.
5144
5145   """
5146   HPATH = "node-remove"
5147   HTYPE = constants.HTYPE_NODE
5148
5149   def BuildHooksEnv(self):
5150     """Build hooks env.
5151
5152     """
5153     return {
5154       "OP_TARGET": self.op.node_name,
5155       "NODE_NAME": self.op.node_name,
5156       }
5157
5158   def BuildHooksNodes(self):
5159     """Build hooks nodes.
5160
5161     This doesn't run on the target node in the pre phase as a failed
5162     node would then be impossible to remove.
5163
5164     """
5165     all_nodes = self.cfg.GetNodeList()
5166     try:
5167       all_nodes.remove(self.op.node_name)
5168     except ValueError:
5169       pass
5170     return (all_nodes, all_nodes)
5171
5172   def CheckPrereq(self):
5173     """Check prerequisites.
5174
5175     This checks:
5176      - the node exists in the configuration
5177      - it does not have primary or secondary instances
5178      - it's not the master
5179
5180     Any errors are signaled by raising errors.OpPrereqError.
5181
5182     """
5183     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5184     node = self.cfg.GetNodeInfo(self.op.node_name)
5185     assert node is not None
5186
5187     masternode = self.cfg.GetMasterNode()
5188     if node.name == masternode:
5189       raise errors.OpPrereqError("Node is the master node, failover to another"
5190                                  " node is required", errors.ECODE_INVAL)
5191
5192     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5193       if node.name in instance.all_nodes:
5194         raise errors.OpPrereqError("Instance %s is still running on the node,"
5195                                    " please remove first" % instance_name,
5196                                    errors.ECODE_INVAL)
5197     self.op.node_name = node.name
5198     self.node = node
5199
5200   def Exec(self, feedback_fn):
5201     """Removes the node from the cluster.
5202
5203     """
5204     node = self.node
5205     logging.info("Stopping the node daemon and removing configs from node %s",
5206                  node.name)
5207
5208     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5209
5210     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5211       "Not owning BGL"
5212
5213     # Promote nodes to master candidate as needed
5214     _AdjustCandidatePool(self, exceptions=[node.name])
5215     self.context.RemoveNode(node.name)
5216
5217     # Run post hooks on the node before it's removed
5218     _RunPostHook(self, node.name)
5219
5220     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5221     msg = result.fail_msg
5222     if msg:
5223       self.LogWarning("Errors encountered on the remote node while leaving"
5224                       " the cluster: %s", msg)
5225
5226     # Remove node from our /etc/hosts
5227     if self.cfg.GetClusterInfo().modify_etc_hosts:
5228       master_node = self.cfg.GetMasterNode()
5229       result = self.rpc.call_etc_hosts_modify(master_node,
5230                                               constants.ETC_HOSTS_REMOVE,
5231                                               node.name, None)
5232       result.Raise("Can't update hosts file with new host data")
5233       _RedistributeAncillaryFiles(self)
5234
5235
5236 class _NodeQuery(_QueryBase):
5237   FIELDS = query.NODE_FIELDS
5238
5239   def ExpandNames(self, lu):
5240     lu.needed_locks = {}
5241     lu.share_locks = _ShareAll()
5242
5243     if self.names:
5244       self.wanted = _GetWantedNodes(lu, self.names)
5245     else:
5246       self.wanted = locking.ALL_SET
5247
5248     self.do_locking = (self.use_locking and
5249                        query.NQ_LIVE in self.requested_data)
5250
5251     if self.do_locking:
5252       # If any non-static field is requested we need to lock the nodes
5253       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5254       lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5255
5256   def DeclareLocks(self, lu, level):
5257     pass
5258
5259   def _GetQueryData(self, lu):
5260     """Computes the list of nodes and their attributes.
5261
5262     """
5263     all_info = lu.cfg.GetAllNodesInfo()
5264
5265     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5266
5267     # Gather data as requested
5268     if query.NQ_LIVE in self.requested_data:
5269       # filter out non-vm_capable nodes
5270       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5271
5272       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5273                                         [lu.cfg.GetHypervisorType()])
5274       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5275                        for (name, nresult) in node_data.items()
5276                        if not nresult.fail_msg and nresult.payload)
5277     else:
5278       live_data = None
5279
5280     if query.NQ_INST in self.requested_data:
5281       node_to_primary = dict([(name, set()) for name in nodenames])
5282       node_to_secondary = dict([(name, set()) for name in nodenames])
5283
5284       inst_data = lu.cfg.GetAllInstancesInfo()
5285
5286       for inst in inst_data.values():
5287         if inst.primary_node in node_to_primary:
5288           node_to_primary[inst.primary_node].add(inst.name)
5289         for secnode in inst.secondary_nodes:
5290           if secnode in node_to_secondary:
5291             node_to_secondary[secnode].add(inst.name)
5292     else:
5293       node_to_primary = None
5294       node_to_secondary = None
5295
5296     if query.NQ_OOB in self.requested_data:
5297       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5298                          for name, node in all_info.iteritems())
5299     else:
5300       oob_support = None
5301
5302     if query.NQ_GROUP in self.requested_data:
5303       groups = lu.cfg.GetAllNodeGroupsInfo()
5304     else:
5305       groups = {}
5306
5307     return query.NodeQueryData([all_info[name] for name in nodenames],
5308                                live_data, lu.cfg.GetMasterNode(),
5309                                node_to_primary, node_to_secondary, groups,
5310                                oob_support, lu.cfg.GetClusterInfo())
5311
5312
5313 class LUNodeQuery(NoHooksLU):
5314   """Logical unit for querying nodes.
5315
5316   """
5317   # pylint: disable=W0142
5318   REQ_BGL = False
5319
5320   def CheckArguments(self):
5321     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5322                          self.op.output_fields, self.op.use_locking)
5323
5324   def ExpandNames(self):
5325     self.nq.ExpandNames(self)
5326
5327   def DeclareLocks(self, level):
5328     self.nq.DeclareLocks(self, level)
5329
5330   def Exec(self, feedback_fn):
5331     return self.nq.OldStyleQuery(self)
5332
5333
5334 class LUNodeQueryvols(NoHooksLU):
5335   """Logical unit for getting volumes on node(s).
5336
5337   """
5338   REQ_BGL = False
5339   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5340   _FIELDS_STATIC = utils.FieldSet("node")
5341
5342   def CheckArguments(self):
5343     _CheckOutputFields(static=self._FIELDS_STATIC,
5344                        dynamic=self._FIELDS_DYNAMIC,
5345                        selected=self.op.output_fields)
5346
5347   def ExpandNames(self):
5348     self.share_locks = _ShareAll()
5349
5350     if self.op.nodes:
5351       self.needed_locks = {
5352         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5353         }
5354     else:
5355       self.needed_locks = {
5356         locking.LEVEL_NODE: locking.ALL_SET,
5357         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5358         }
5359
5360   def Exec(self, feedback_fn):
5361     """Computes the list of nodes and their attributes.
5362
5363     """
5364     nodenames = self.owned_locks(locking.LEVEL_NODE)
5365     volumes = self.rpc.call_node_volumes(nodenames)
5366
5367     ilist = self.cfg.GetAllInstancesInfo()
5368     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5369
5370     output = []
5371     for node in nodenames:
5372       nresult = volumes[node]
5373       if nresult.offline:
5374         continue
5375       msg = nresult.fail_msg
5376       if msg:
5377         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5378         continue
5379
5380       node_vols = sorted(nresult.payload,
5381                          key=operator.itemgetter("dev"))
5382
5383       for vol in node_vols:
5384         node_output = []
5385         for field in self.op.output_fields:
5386           if field == "node":
5387             val = node
5388           elif field == "phys":
5389             val = vol["dev"]
5390           elif field == "vg":
5391             val = vol["vg"]
5392           elif field == "name":
5393             val = vol["name"]
5394           elif field == "size":
5395             val = int(float(vol["size"]))
5396           elif field == "instance":
5397             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5398           else:
5399             raise errors.ParameterError(field)
5400           node_output.append(str(val))
5401
5402         output.append(node_output)
5403
5404     return output
5405
5406
5407 class LUNodeQueryStorage(NoHooksLU):
5408   """Logical unit for getting information on storage units on node(s).
5409
5410   """
5411   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5412   REQ_BGL = False
5413
5414   def CheckArguments(self):
5415     _CheckOutputFields(static=self._FIELDS_STATIC,
5416                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5417                        selected=self.op.output_fields)
5418
5419   def ExpandNames(self):
5420     self.share_locks = _ShareAll()
5421
5422     if self.op.nodes:
5423       self.needed_locks = {
5424         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5425         }
5426     else:
5427       self.needed_locks = {
5428         locking.LEVEL_NODE: locking.ALL_SET,
5429         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5430         }
5431
5432   def Exec(self, feedback_fn):
5433     """Computes the list of nodes and their attributes.
5434
5435     """
5436     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5437
5438     # Always get name to sort by
5439     if constants.SF_NAME in self.op.output_fields:
5440       fields = self.op.output_fields[:]
5441     else:
5442       fields = [constants.SF_NAME] + self.op.output_fields
5443
5444     # Never ask for node or type as it's only known to the LU
5445     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5446       while extra in fields:
5447         fields.remove(extra)
5448
5449     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5450     name_idx = field_idx[constants.SF_NAME]
5451
5452     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5453     data = self.rpc.call_storage_list(self.nodes,
5454                                       self.op.storage_type, st_args,
5455                                       self.op.name, fields)
5456
5457     result = []
5458
5459     for node in utils.NiceSort(self.nodes):
5460       nresult = data[node]
5461       if nresult.offline:
5462         continue
5463
5464       msg = nresult.fail_msg
5465       if msg:
5466         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5467         continue
5468
5469       rows = dict([(row[name_idx], row) for row in nresult.payload])
5470
5471       for name in utils.NiceSort(rows.keys()):
5472         row = rows[name]
5473
5474         out = []
5475
5476         for field in self.op.output_fields:
5477           if field == constants.SF_NODE:
5478             val = node
5479           elif field == constants.SF_TYPE:
5480             val = self.op.storage_type
5481           elif field in field_idx:
5482             val = row[field_idx[field]]
5483           else:
5484             raise errors.ParameterError(field)
5485
5486           out.append(val)
5487
5488         result.append(out)
5489
5490     return result
5491
5492
5493 class _InstanceQuery(_QueryBase):
5494   FIELDS = query.INSTANCE_FIELDS
5495
5496   def ExpandNames(self, lu):
5497     lu.needed_locks = {}
5498     lu.share_locks = _ShareAll()
5499
5500     if self.names:
5501       self.wanted = _GetWantedInstances(lu, self.names)
5502     else:
5503       self.wanted = locking.ALL_SET
5504
5505     self.do_locking = (self.use_locking and
5506                        query.IQ_LIVE in self.requested_data)
5507     if self.do_locking:
5508       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5509       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5510       lu.needed_locks[locking.LEVEL_NODE] = []
5511       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5512
5513     self.do_grouplocks = (self.do_locking and
5514                           query.IQ_NODES in self.requested_data)
5515
5516   def DeclareLocks(self, lu, level):
5517     if self.do_locking:
5518       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5519         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5520
5521         # Lock all groups used by instances optimistically; this requires going
5522         # via the node before it's locked, requiring verification later on
5523         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5524           set(group_uuid
5525               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5526               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5527       elif level == locking.LEVEL_NODE:
5528         lu._LockInstancesNodes() # pylint: disable=W0212
5529
5530   @staticmethod
5531   def _CheckGroupLocks(lu):
5532     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5533     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5534
5535     # Check if node groups for locked instances are still correct
5536     for instance_name in owned_instances:
5537       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5538
5539   def _GetQueryData(self, lu):
5540     """Computes the list of instances and their attributes.
5541
5542     """
5543     if self.do_grouplocks:
5544       self._CheckGroupLocks(lu)
5545
5546     cluster = lu.cfg.GetClusterInfo()
5547     all_info = lu.cfg.GetAllInstancesInfo()
5548
5549     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5550
5551     instance_list = [all_info[name] for name in instance_names]
5552     nodes = frozenset(itertools.chain(*(inst.all_nodes
5553                                         for inst in instance_list)))
5554     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5555     bad_nodes = []
5556     offline_nodes = []
5557     wrongnode_inst = set()
5558
5559     # Gather data as requested
5560     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5561       live_data = {}
5562       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5563       for name in nodes:
5564         result = node_data[name]
5565         if result.offline:
5566           # offline nodes will be in both lists
5567           assert result.fail_msg
5568           offline_nodes.append(name)
5569         if result.fail_msg:
5570           bad_nodes.append(name)
5571         elif result.payload:
5572           for inst in result.payload:
5573             if inst in all_info:
5574               if all_info[inst].primary_node == name:
5575                 live_data.update(result.payload)
5576               else:
5577                 wrongnode_inst.add(inst)
5578             else:
5579               # orphan instance; we don't list it here as we don't
5580               # handle this case yet in the output of instance listing
5581               logging.warning("Orphan instance '%s' found on node %s",
5582                               inst, name)
5583         # else no instance is alive
5584     else:
5585       live_data = {}
5586
5587     if query.IQ_DISKUSAGE in self.requested_data:
5588       gmi = ganeti.masterd.instance
5589       disk_usage = dict((inst.name,
5590                          gmi.ComputeDiskSize(inst.disk_template,
5591                                              [{constants.IDISK_SIZE: disk.size}
5592                                               for disk in inst.disks]))
5593                         for inst in instance_list)
5594     else:
5595       disk_usage = None
5596
5597     if query.IQ_CONSOLE in self.requested_data:
5598       consinfo = {}
5599       for inst in instance_list:
5600         if inst.name in live_data:
5601           # Instance is running
5602           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5603         else:
5604           consinfo[inst.name] = None
5605       assert set(consinfo.keys()) == set(instance_names)
5606     else:
5607       consinfo = None
5608
5609     if query.IQ_NODES in self.requested_data:
5610       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5611                                             instance_list)))
5612       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5613       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5614                     for uuid in set(map(operator.attrgetter("group"),
5615                                         nodes.values())))
5616     else:
5617       nodes = None
5618       groups = None
5619
5620     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5621                                    disk_usage, offline_nodes, bad_nodes,
5622                                    live_data, wrongnode_inst, consinfo,
5623                                    nodes, groups)
5624
5625
5626 class LUQuery(NoHooksLU):
5627   """Query for resources/items of a certain kind.
5628
5629   """
5630   # pylint: disable=W0142
5631   REQ_BGL = False
5632
5633   def CheckArguments(self):
5634     qcls = _GetQueryImplementation(self.op.what)
5635
5636     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5637
5638   def ExpandNames(self):
5639     self.impl.ExpandNames(self)
5640
5641   def DeclareLocks(self, level):
5642     self.impl.DeclareLocks(self, level)
5643
5644   def Exec(self, feedback_fn):
5645     return self.impl.NewStyleQuery(self)
5646
5647
5648 class LUQueryFields(NoHooksLU):
5649   """Query for resources/items of a certain kind.
5650
5651   """
5652   # pylint: disable=W0142
5653   REQ_BGL = False
5654
5655   def CheckArguments(self):
5656     self.qcls = _GetQueryImplementation(self.op.what)
5657
5658   def ExpandNames(self):
5659     self.needed_locks = {}
5660
5661   def Exec(self, feedback_fn):
5662     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5663
5664
5665 class LUNodeModifyStorage(NoHooksLU):
5666   """Logical unit for modifying a storage volume on a node.
5667
5668   """
5669   REQ_BGL = False
5670
5671   def CheckArguments(self):
5672     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5673
5674     storage_type = self.op.storage_type
5675
5676     try:
5677       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5678     except KeyError:
5679       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5680                                  " modified" % storage_type,
5681                                  errors.ECODE_INVAL)
5682
5683     diff = set(self.op.changes.keys()) - modifiable
5684     if diff:
5685       raise errors.OpPrereqError("The following fields can not be modified for"
5686                                  " storage units of type '%s': %r" %
5687                                  (storage_type, list(diff)),
5688                                  errors.ECODE_INVAL)
5689
5690   def ExpandNames(self):
5691     self.needed_locks = {
5692       locking.LEVEL_NODE: self.op.node_name,
5693       }
5694
5695   def Exec(self, feedback_fn):
5696     """Computes the list of nodes and their attributes.
5697
5698     """
5699     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5700     result = self.rpc.call_storage_modify(self.op.node_name,
5701                                           self.op.storage_type, st_args,
5702                                           self.op.name, self.op.changes)
5703     result.Raise("Failed to modify storage unit '%s' on %s" %
5704                  (self.op.name, self.op.node_name))
5705
5706
5707 class LUNodeAdd(LogicalUnit):
5708   """Logical unit for adding node to the cluster.
5709
5710   """
5711   HPATH = "node-add"
5712   HTYPE = constants.HTYPE_NODE
5713   _NFLAGS = ["master_capable", "vm_capable"]
5714
5715   def CheckArguments(self):
5716     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5717     # validate/normalize the node name
5718     self.hostname = netutils.GetHostname(name=self.op.node_name,
5719                                          family=self.primary_ip_family)
5720     self.op.node_name = self.hostname.name
5721
5722     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5723       raise errors.OpPrereqError("Cannot readd the master node",
5724                                  errors.ECODE_STATE)
5725
5726     if self.op.readd and self.op.group:
5727       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5728                                  " being readded", errors.ECODE_INVAL)
5729
5730   def BuildHooksEnv(self):
5731     """Build hooks env.
5732
5733     This will run on all nodes before, and on all nodes + the new node after.
5734
5735     """
5736     return {
5737       "OP_TARGET": self.op.node_name,
5738       "NODE_NAME": self.op.node_name,
5739       "NODE_PIP": self.op.primary_ip,
5740       "NODE_SIP": self.op.secondary_ip,
5741       "MASTER_CAPABLE": str(self.op.master_capable),
5742       "VM_CAPABLE": str(self.op.vm_capable),
5743       }
5744
5745   def BuildHooksNodes(self):
5746     """Build hooks nodes.
5747
5748     """
5749     # Exclude added node
5750     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5751     post_nodes = pre_nodes + [self.op.node_name, ]
5752
5753     return (pre_nodes, post_nodes)
5754
5755   def CheckPrereq(self):
5756     """Check prerequisites.
5757
5758     This checks:
5759      - the new node is not already in the config
5760      - it is resolvable
5761      - its parameters (single/dual homed) matches the cluster
5762
5763     Any errors are signaled by raising errors.OpPrereqError.
5764
5765     """
5766     cfg = self.cfg
5767     hostname = self.hostname
5768     node = hostname.name
5769     primary_ip = self.op.primary_ip = hostname.ip
5770     if self.op.secondary_ip is None:
5771       if self.primary_ip_family == netutils.IP6Address.family:
5772         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5773                                    " IPv4 address must be given as secondary",
5774                                    errors.ECODE_INVAL)
5775       self.op.secondary_ip = primary_ip
5776
5777     secondary_ip = self.op.secondary_ip
5778     if not netutils.IP4Address.IsValid(secondary_ip):
5779       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5780                                  " address" % secondary_ip, errors.ECODE_INVAL)
5781
5782     node_list = cfg.GetNodeList()
5783     if not self.op.readd and node in node_list:
5784       raise errors.OpPrereqError("Node %s is already in the configuration" %
5785                                  node, errors.ECODE_EXISTS)
5786     elif self.op.readd and node not in node_list:
5787       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5788                                  errors.ECODE_NOENT)
5789
5790     self.changed_primary_ip = False
5791
5792     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5793       if self.op.readd and node == existing_node_name:
5794         if existing_node.secondary_ip != secondary_ip:
5795           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5796                                      " address configuration as before",
5797                                      errors.ECODE_INVAL)
5798         if existing_node.primary_ip != primary_ip:
5799           self.changed_primary_ip = True
5800
5801         continue
5802
5803       if (existing_node.primary_ip == primary_ip or
5804           existing_node.secondary_ip == primary_ip or
5805           existing_node.primary_ip == secondary_ip or
5806           existing_node.secondary_ip == secondary_ip):
5807         raise errors.OpPrereqError("New node ip address(es) conflict with"
5808                                    " existing node %s" % existing_node.name,
5809                                    errors.ECODE_NOTUNIQUE)
5810
5811     # After this 'if' block, None is no longer a valid value for the
5812     # _capable op attributes
5813     if self.op.readd:
5814       old_node = self.cfg.GetNodeInfo(node)
5815       assert old_node is not None, "Can't retrieve locked node %s" % node
5816       for attr in self._NFLAGS:
5817         if getattr(self.op, attr) is None:
5818           setattr(self.op, attr, getattr(old_node, attr))
5819     else:
5820       for attr in self._NFLAGS:
5821         if getattr(self.op, attr) is None:
5822           setattr(self.op, attr, True)
5823
5824     if self.op.readd and not self.op.vm_capable:
5825       pri, sec = cfg.GetNodeInstances(node)
5826       if pri or sec:
5827         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5828                                    " flag set to false, but it already holds"
5829                                    " instances" % node,
5830                                    errors.ECODE_STATE)
5831
5832     # check that the type of the node (single versus dual homed) is the
5833     # same as for the master
5834     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5835     master_singlehomed = myself.secondary_ip == myself.primary_ip
5836     newbie_singlehomed = secondary_ip == primary_ip
5837     if master_singlehomed != newbie_singlehomed:
5838       if master_singlehomed:
5839         raise errors.OpPrereqError("The master has no secondary ip but the"
5840                                    " new node has one",
5841                                    errors.ECODE_INVAL)
5842       else:
5843         raise errors.OpPrereqError("The master has a secondary ip but the"
5844                                    " new node doesn't have one",
5845                                    errors.ECODE_INVAL)
5846
5847     # checks reachability
5848     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5849       raise errors.OpPrereqError("Node not reachable by ping",
5850                                  errors.ECODE_ENVIRON)
5851
5852     if not newbie_singlehomed:
5853       # check reachability from my secondary ip to newbie's secondary ip
5854       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5855                               source=myself.secondary_ip):
5856         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5857                                    " based ping to node daemon port",
5858                                    errors.ECODE_ENVIRON)
5859
5860     if self.op.readd:
5861       exceptions = [node]
5862     else:
5863       exceptions = []
5864
5865     if self.op.master_capable:
5866       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5867     else:
5868       self.master_candidate = False
5869
5870     if self.op.readd:
5871       self.new_node = old_node
5872     else:
5873       node_group = cfg.LookupNodeGroup(self.op.group)
5874       self.new_node = objects.Node(name=node,
5875                                    primary_ip=primary_ip,
5876                                    secondary_ip=secondary_ip,
5877                                    master_candidate=self.master_candidate,
5878                                    offline=False, drained=False,
5879                                    group=node_group)
5880
5881     if self.op.ndparams:
5882       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5883
5884     if self.op.hv_state:
5885       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5886
5887     if self.op.disk_state:
5888       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5889
5890     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5891     #       it a property on the base class.
5892     result = rpc.DnsOnlyRunner().call_version([node])[node]
5893     result.Raise("Can't get version information from node %s" % node)
5894     if constants.PROTOCOL_VERSION == result.payload:
5895       logging.info("Communication to node %s fine, sw version %s match",
5896                    node, result.payload)
5897     else:
5898       raise errors.OpPrereqError("Version mismatch master version %s,"
5899                                  " node version %s" %
5900                                  (constants.PROTOCOL_VERSION, result.payload),
5901                                  errors.ECODE_ENVIRON)
5902
5903   def Exec(self, feedback_fn):
5904     """Adds the new node to the cluster.
5905
5906     """
5907     new_node = self.new_node
5908     node = new_node.name
5909
5910     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5911       "Not owning BGL"
5912
5913     # We adding a new node so we assume it's powered
5914     new_node.powered = True
5915
5916     # for re-adds, reset the offline/drained/master-candidate flags;
5917     # we need to reset here, otherwise offline would prevent RPC calls
5918     # later in the procedure; this also means that if the re-add
5919     # fails, we are left with a non-offlined, broken node
5920     if self.op.readd:
5921       new_node.drained = new_node.offline = False # pylint: disable=W0201
5922       self.LogInfo("Readding a node, the offline/drained flags were reset")
5923       # if we demote the node, we do cleanup later in the procedure
5924       new_node.master_candidate = self.master_candidate
5925       if self.changed_primary_ip:
5926         new_node.primary_ip = self.op.primary_ip
5927
5928     # copy the master/vm_capable flags
5929     for attr in self._NFLAGS:
5930       setattr(new_node, attr, getattr(self.op, attr))
5931
5932     # notify the user about any possible mc promotion
5933     if new_node.master_candidate:
5934       self.LogInfo("Node will be a master candidate")
5935
5936     if self.op.ndparams:
5937       new_node.ndparams = self.op.ndparams
5938     else:
5939       new_node.ndparams = {}
5940
5941     if self.op.hv_state:
5942       new_node.hv_state_static = self.new_hv_state
5943
5944     if self.op.disk_state:
5945       new_node.disk_state_static = self.new_disk_state
5946
5947     # Add node to our /etc/hosts, and add key to known_hosts
5948     if self.cfg.GetClusterInfo().modify_etc_hosts:
5949       master_node = self.cfg.GetMasterNode()
5950       result = self.rpc.call_etc_hosts_modify(master_node,
5951                                               constants.ETC_HOSTS_ADD,
5952                                               self.hostname.name,
5953                                               self.hostname.ip)
5954       result.Raise("Can't update hosts file with new host data")
5955
5956     if new_node.secondary_ip != new_node.primary_ip:
5957       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5958                                False)
5959
5960     node_verify_list = [self.cfg.GetMasterNode()]
5961     node_verify_param = {
5962       constants.NV_NODELIST: ([node], {}),
5963       # TODO: do a node-net-test as well?
5964     }
5965
5966     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5967                                        self.cfg.GetClusterName())
5968     for verifier in node_verify_list:
5969       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5970       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5971       if nl_payload:
5972         for failed in nl_payload:
5973           feedback_fn("ssh/hostname verification failed"
5974                       " (checking from %s): %s" %
5975                       (verifier, nl_payload[failed]))
5976         raise errors.OpExecError("ssh/hostname verification failed")
5977
5978     if self.op.readd:
5979       _RedistributeAncillaryFiles(self)
5980       self.context.ReaddNode(new_node)
5981       # make sure we redistribute the config
5982       self.cfg.Update(new_node, feedback_fn)
5983       # and make sure the new node will not have old files around
5984       if not new_node.master_candidate:
5985         result = self.rpc.call_node_demote_from_mc(new_node.name)
5986         msg = result.fail_msg
5987         if msg:
5988           self.LogWarning("Node failed to demote itself from master"
5989                           " candidate status: %s" % msg)
5990     else:
5991       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5992                                   additional_vm=self.op.vm_capable)
5993       self.context.AddNode(new_node, self.proc.GetECId())
5994
5995
5996 class LUNodeSetParams(LogicalUnit):
5997   """Modifies the parameters of a node.
5998
5999   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6000       to the node role (as _ROLE_*)
6001   @cvar _R2F: a dictionary from node role to tuples of flags
6002   @cvar _FLAGS: a list of attribute names corresponding to the flags
6003
6004   """
6005   HPATH = "node-modify"
6006   HTYPE = constants.HTYPE_NODE
6007   REQ_BGL = False
6008   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6009   _F2R = {
6010     (True, False, False): _ROLE_CANDIDATE,
6011     (False, True, False): _ROLE_DRAINED,
6012     (False, False, True): _ROLE_OFFLINE,
6013     (False, False, False): _ROLE_REGULAR,
6014     }
6015   _R2F = dict((v, k) for k, v in _F2R.items())
6016   _FLAGS = ["master_candidate", "drained", "offline"]
6017
6018   def CheckArguments(self):
6019     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6020     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6021                 self.op.master_capable, self.op.vm_capable,
6022                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6023                 self.op.disk_state]
6024     if all_mods.count(None) == len(all_mods):
6025       raise errors.OpPrereqError("Please pass at least one modification",
6026                                  errors.ECODE_INVAL)
6027     if all_mods.count(True) > 1:
6028       raise errors.OpPrereqError("Can't set the node into more than one"
6029                                  " state at the same time",
6030                                  errors.ECODE_INVAL)
6031
6032     # Boolean value that tells us whether we might be demoting from MC
6033     self.might_demote = (self.op.master_candidate is False or
6034                          self.op.offline is True or
6035                          self.op.drained is True or
6036                          self.op.master_capable is False)
6037
6038     if self.op.secondary_ip:
6039       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6040         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6041                                    " address" % self.op.secondary_ip,
6042                                    errors.ECODE_INVAL)
6043
6044     self.lock_all = self.op.auto_promote and self.might_demote
6045     self.lock_instances = self.op.secondary_ip is not None
6046
6047   def _InstanceFilter(self, instance):
6048     """Filter for getting affected instances.
6049
6050     """
6051     return (instance.disk_template in constants.DTS_INT_MIRROR and
6052             self.op.node_name in instance.all_nodes)
6053
6054   def ExpandNames(self):
6055     if self.lock_all:
6056       self.needed_locks = {
6057         locking.LEVEL_NODE: locking.ALL_SET,
6058
6059         # Block allocations when all nodes are locked
6060         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6061         }
6062     else:
6063       self.needed_locks = {
6064         locking.LEVEL_NODE: self.op.node_name,
6065         }
6066
6067     # Since modifying a node can have severe effects on currently running
6068     # operations the resource lock is at least acquired in shared mode
6069     self.needed_locks[locking.LEVEL_NODE_RES] = \
6070       self.needed_locks[locking.LEVEL_NODE]
6071
6072     # Get all locks except nodes in shared mode; they are not used for anything
6073     # but read-only access
6074     self.share_locks = _ShareAll()
6075     self.share_locks[locking.LEVEL_NODE] = 0
6076     self.share_locks[locking.LEVEL_NODE_RES] = 0
6077     self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6078
6079     if self.lock_instances:
6080       self.needed_locks[locking.LEVEL_INSTANCE] = \
6081         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6082
6083   def BuildHooksEnv(self):
6084     """Build hooks env.
6085
6086     This runs on the master node.
6087
6088     """
6089     return {
6090       "OP_TARGET": self.op.node_name,
6091       "MASTER_CANDIDATE": str(self.op.master_candidate),
6092       "OFFLINE": str(self.op.offline),
6093       "DRAINED": str(self.op.drained),
6094       "MASTER_CAPABLE": str(self.op.master_capable),
6095       "VM_CAPABLE": str(self.op.vm_capable),
6096       }
6097
6098   def BuildHooksNodes(self):
6099     """Build hooks nodes.
6100
6101     """
6102     nl = [self.cfg.GetMasterNode(), self.op.node_name]
6103     return (nl, nl)
6104
6105   def CheckPrereq(self):
6106     """Check prerequisites.
6107
6108     This only checks the instance list against the existing names.
6109
6110     """
6111     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6112
6113     if self.lock_instances:
6114       affected_instances = \
6115         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6116
6117       # Verify instance locks
6118       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6119       wanted_instances = frozenset(affected_instances.keys())
6120       if wanted_instances - owned_instances:
6121         raise errors.OpPrereqError("Instances affected by changing node %s's"
6122                                    " secondary IP address have changed since"
6123                                    " locks were acquired, wanted '%s', have"
6124                                    " '%s'; retry the operation" %
6125                                    (self.op.node_name,
6126                                     utils.CommaJoin(wanted_instances),
6127                                     utils.CommaJoin(owned_instances)),
6128                                    errors.ECODE_STATE)
6129     else:
6130       affected_instances = None
6131
6132     if (self.op.master_candidate is not None or
6133         self.op.drained is not None or
6134         self.op.offline is not None):
6135       # we can't change the master's node flags
6136       if self.op.node_name == self.cfg.GetMasterNode():
6137         raise errors.OpPrereqError("The master role can be changed"
6138                                    " only via master-failover",
6139                                    errors.ECODE_INVAL)
6140
6141     if self.op.master_candidate and not node.master_capable:
6142       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6143                                  " it a master candidate" % node.name,
6144                                  errors.ECODE_STATE)
6145
6146     if self.op.vm_capable is False:
6147       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6148       if ipri or isec:
6149         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6150                                    " the vm_capable flag" % node.name,
6151                                    errors.ECODE_STATE)
6152
6153     if node.master_candidate and self.might_demote and not self.lock_all:
6154       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6155       # check if after removing the current node, we're missing master
6156       # candidates
6157       (mc_remaining, mc_should, _) = \
6158           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6159       if mc_remaining < mc_should:
6160         raise errors.OpPrereqError("Not enough master candidates, please"
6161                                    " pass auto promote option to allow"
6162                                    " promotion (--auto-promote or RAPI"
6163                                    " auto_promote=True)", errors.ECODE_STATE)
6164
6165     self.old_flags = old_flags = (node.master_candidate,
6166                                   node.drained, node.offline)
6167     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6168     self.old_role = old_role = self._F2R[old_flags]
6169
6170     # Check for ineffective changes
6171     for attr in self._FLAGS:
6172       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6173         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6174         setattr(self.op, attr, None)
6175
6176     # Past this point, any flag change to False means a transition
6177     # away from the respective state, as only real changes are kept
6178
6179     # TODO: We might query the real power state if it supports OOB
6180     if _SupportsOob(self.cfg, node):
6181       if self.op.offline is False and not (node.powered or
6182                                            self.op.powered is True):
6183         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6184                                     " offline status can be reset") %
6185                                    self.op.node_name, errors.ECODE_STATE)
6186     elif self.op.powered is not None:
6187       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6188                                   " as it does not support out-of-band"
6189                                   " handling") % self.op.node_name,
6190                                  errors.ECODE_STATE)
6191
6192     # If we're being deofflined/drained, we'll MC ourself if needed
6193     if (self.op.drained is False or self.op.offline is False or
6194         (self.op.master_capable and not node.master_capable)):
6195       if _DecideSelfPromotion(self):
6196         self.op.master_candidate = True
6197         self.LogInfo("Auto-promoting node to master candidate")
6198
6199     # If we're no longer master capable, we'll demote ourselves from MC
6200     if self.op.master_capable is False and node.master_candidate:
6201       self.LogInfo("Demoting from master candidate")
6202       self.op.master_candidate = False
6203
6204     # Compute new role
6205     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6206     if self.op.master_candidate:
6207       new_role = self._ROLE_CANDIDATE
6208     elif self.op.drained:
6209       new_role = self._ROLE_DRAINED
6210     elif self.op.offline:
6211       new_role = self._ROLE_OFFLINE
6212     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6213       # False is still in new flags, which means we're un-setting (the
6214       # only) True flag
6215       new_role = self._ROLE_REGULAR
6216     else: # no new flags, nothing, keep old role
6217       new_role = old_role
6218
6219     self.new_role = new_role
6220
6221     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6222       # Trying to transition out of offline status
6223       result = self.rpc.call_version([node.name])[node.name]
6224       if result.fail_msg:
6225         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6226                                    " to report its version: %s" %
6227                                    (node.name, result.fail_msg),
6228                                    errors.ECODE_STATE)
6229       else:
6230         self.LogWarning("Transitioning node from offline to online state"
6231                         " without using re-add. Please make sure the node"
6232                         " is healthy!")
6233
6234     # When changing the secondary ip, verify if this is a single-homed to
6235     # multi-homed transition or vice versa, and apply the relevant
6236     # restrictions.
6237     if self.op.secondary_ip:
6238       # Ok even without locking, because this can't be changed by any LU
6239       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6240       master_singlehomed = master.secondary_ip == master.primary_ip
6241       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6242         if self.op.force and node.name == master.name:
6243           self.LogWarning("Transitioning from single-homed to multi-homed"
6244                           " cluster; all nodes will require a secondary IP"
6245                           " address")
6246         else:
6247           raise errors.OpPrereqError("Changing the secondary ip on a"
6248                                      " single-homed cluster requires the"
6249                                      " --force option to be passed, and the"
6250                                      " target node to be the master",
6251                                      errors.ECODE_INVAL)
6252       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6253         if self.op.force and node.name == master.name:
6254           self.LogWarning("Transitioning from multi-homed to single-homed"
6255                           " cluster; secondary IP addresses will have to be"
6256                           " removed")
6257         else:
6258           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6259                                      " same as the primary IP on a multi-homed"
6260                                      " cluster, unless the --force option is"
6261                                      " passed, and the target node is the"
6262                                      " master", errors.ECODE_INVAL)
6263
6264       assert not (frozenset(affected_instances) -
6265                   self.owned_locks(locking.LEVEL_INSTANCE))
6266
6267       if node.offline:
6268         if affected_instances:
6269           msg = ("Cannot change secondary IP address: offline node has"
6270                  " instances (%s) configured to use it" %
6271                  utils.CommaJoin(affected_instances.keys()))
6272           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6273       else:
6274         # On online nodes, check that no instances are running, and that
6275         # the node has the new ip and we can reach it.
6276         for instance in affected_instances.values():
6277           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6278                               msg="cannot change secondary ip")
6279
6280         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6281         if master.name != node.name:
6282           # check reachability from master secondary ip to new secondary ip
6283           if not netutils.TcpPing(self.op.secondary_ip,
6284                                   constants.DEFAULT_NODED_PORT,
6285                                   source=master.secondary_ip):
6286             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6287                                        " based ping to node daemon port",
6288                                        errors.ECODE_ENVIRON)
6289
6290     if self.op.ndparams:
6291       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6292       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6293       self.new_ndparams = new_ndparams
6294
6295     if self.op.hv_state:
6296       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6297                                                  self.node.hv_state_static)
6298
6299     if self.op.disk_state:
6300       self.new_disk_state = \
6301         _MergeAndVerifyDiskState(self.op.disk_state,
6302                                  self.node.disk_state_static)
6303
6304   def Exec(self, feedback_fn):
6305     """Modifies a node.
6306
6307     """
6308     node = self.node
6309     old_role = self.old_role
6310     new_role = self.new_role
6311
6312     result = []
6313
6314     if self.op.ndparams:
6315       node.ndparams = self.new_ndparams
6316
6317     if self.op.powered is not None:
6318       node.powered = self.op.powered
6319
6320     if self.op.hv_state:
6321       node.hv_state_static = self.new_hv_state
6322
6323     if self.op.disk_state:
6324       node.disk_state_static = self.new_disk_state
6325
6326     for attr in ["master_capable", "vm_capable"]:
6327       val = getattr(self.op, attr)
6328       if val is not None:
6329         setattr(node, attr, val)
6330         result.append((attr, str(val)))
6331
6332     if new_role != old_role:
6333       # Tell the node to demote itself, if no longer MC and not offline
6334       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6335         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6336         if msg:
6337           self.LogWarning("Node failed to demote itself: %s", msg)
6338
6339       new_flags = self._R2F[new_role]
6340       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6341         if of != nf:
6342           result.append((desc, str(nf)))
6343       (node.master_candidate, node.drained, node.offline) = new_flags
6344
6345       # we locked all nodes, we adjust the CP before updating this node
6346       if self.lock_all:
6347         _AdjustCandidatePool(self, [node.name])
6348
6349     if self.op.secondary_ip:
6350       node.secondary_ip = self.op.secondary_ip
6351       result.append(("secondary_ip", self.op.secondary_ip))
6352
6353     # this will trigger configuration file update, if needed
6354     self.cfg.Update(node, feedback_fn)
6355
6356     # this will trigger job queue propagation or cleanup if the mc
6357     # flag changed
6358     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6359       self.context.ReaddNode(node)
6360
6361     return result
6362
6363
6364 class LUNodePowercycle(NoHooksLU):
6365   """Powercycles a node.
6366
6367   """
6368   REQ_BGL = False
6369
6370   def CheckArguments(self):
6371     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6372     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6373       raise errors.OpPrereqError("The node is the master and the force"
6374                                  " parameter was not set",
6375                                  errors.ECODE_INVAL)
6376
6377   def ExpandNames(self):
6378     """Locking for PowercycleNode.
6379
6380     This is a last-resort option and shouldn't block on other
6381     jobs. Therefore, we grab no locks.
6382
6383     """
6384     self.needed_locks = {}
6385
6386   def Exec(self, feedback_fn):
6387     """Reboots a node.
6388
6389     """
6390     result = self.rpc.call_node_powercycle(self.op.node_name,
6391                                            self.cfg.GetHypervisorType())
6392     result.Raise("Failed to schedule the reboot")
6393     return result.payload
6394
6395
6396 class LUClusterQuery(NoHooksLU):
6397   """Query cluster configuration.
6398
6399   """
6400   REQ_BGL = False
6401
6402   def ExpandNames(self):
6403     self.needed_locks = {}
6404
6405   def Exec(self, feedback_fn):
6406     """Return cluster config.
6407
6408     """
6409     cluster = self.cfg.GetClusterInfo()
6410     os_hvp = {}
6411
6412     # Filter just for enabled hypervisors
6413     for os_name, hv_dict in cluster.os_hvp.items():
6414       os_hvp[os_name] = {}
6415       for hv_name, hv_params in hv_dict.items():
6416         if hv_name in cluster.enabled_hypervisors:
6417           os_hvp[os_name][hv_name] = hv_params
6418
6419     # Convert ip_family to ip_version
6420     primary_ip_version = constants.IP4_VERSION
6421     if cluster.primary_ip_family == netutils.IP6Address.family:
6422       primary_ip_version = constants.IP6_VERSION
6423
6424     result = {
6425       "software_version": constants.RELEASE_VERSION,
6426       "protocol_version": constants.PROTOCOL_VERSION,
6427       "config_version": constants.CONFIG_VERSION,
6428       "os_api_version": max(constants.OS_API_VERSIONS),
6429       "export_version": constants.EXPORT_VERSION,
6430       "architecture": runtime.GetArchInfo(),
6431       "name": cluster.cluster_name,
6432       "master": cluster.master_node,
6433       "default_hypervisor": cluster.primary_hypervisor,
6434       "enabled_hypervisors": cluster.enabled_hypervisors,
6435       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6436                         for hypervisor_name in cluster.enabled_hypervisors]),
6437       "os_hvp": os_hvp,
6438       "beparams": cluster.beparams,
6439       "osparams": cluster.osparams,
6440       "ipolicy": cluster.ipolicy,
6441       "nicparams": cluster.nicparams,
6442       "ndparams": cluster.ndparams,
6443       "diskparams": cluster.diskparams,
6444       "candidate_pool_size": cluster.candidate_pool_size,
6445       "master_netdev": cluster.master_netdev,
6446       "master_netmask": cluster.master_netmask,
6447       "use_external_mip_script": cluster.use_external_mip_script,
6448       "volume_group_name": cluster.volume_group_name,
6449       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6450       "file_storage_dir": cluster.file_storage_dir,
6451       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6452       "maintain_node_health": cluster.maintain_node_health,
6453       "ctime": cluster.ctime,
6454       "mtime": cluster.mtime,
6455       "uuid": cluster.uuid,
6456       "tags": list(cluster.GetTags()),
6457       "uid_pool": cluster.uid_pool,
6458       "default_iallocator": cluster.default_iallocator,
6459       "reserved_lvs": cluster.reserved_lvs,
6460       "primary_ip_version": primary_ip_version,
6461       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6462       "hidden_os": cluster.hidden_os,
6463       "blacklisted_os": cluster.blacklisted_os,
6464       }
6465
6466     return result
6467
6468
6469 class LUClusterConfigQuery(NoHooksLU):
6470   """Return configuration values.
6471
6472   """
6473   REQ_BGL = False
6474
6475   def CheckArguments(self):
6476     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6477
6478   def ExpandNames(self):
6479     self.cq.ExpandNames(self)
6480
6481   def DeclareLocks(self, level):
6482     self.cq.DeclareLocks(self, level)
6483
6484   def Exec(self, feedback_fn):
6485     result = self.cq.OldStyleQuery(self)
6486
6487     assert len(result) == 1
6488
6489     return result[0]
6490
6491
6492 class _ClusterQuery(_QueryBase):
6493   FIELDS = query.CLUSTER_FIELDS
6494
6495   #: Do not sort (there is only one item)
6496   SORT_FIELD = None
6497
6498   def ExpandNames(self, lu):
6499     lu.needed_locks = {}
6500
6501     # The following variables interact with _QueryBase._GetNames
6502     self.wanted = locking.ALL_SET
6503     self.do_locking = self.use_locking
6504
6505     if self.do_locking:
6506       raise errors.OpPrereqError("Can not use locking for cluster queries",
6507                                  errors.ECODE_INVAL)
6508
6509   def DeclareLocks(self, lu, level):
6510     pass
6511
6512   def _GetQueryData(self, lu):
6513     """Computes the list of nodes and their attributes.
6514
6515     """
6516     # Locking is not used
6517     assert not (compat.any(lu.glm.is_owned(level)
6518                            for level in locking.LEVELS
6519                            if level != locking.LEVEL_CLUSTER) or
6520                 self.do_locking or self.use_locking)
6521
6522     if query.CQ_CONFIG in self.requested_data:
6523       cluster = lu.cfg.GetClusterInfo()
6524     else:
6525       cluster = NotImplemented
6526
6527     if query.CQ_QUEUE_DRAINED in self.requested_data:
6528       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6529     else:
6530       drain_flag = NotImplemented
6531
6532     if query.CQ_WATCHER_PAUSE in self.requested_data:
6533       watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6534     else:
6535       watcher_pause = NotImplemented
6536
6537     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6538
6539
6540 class LUInstanceActivateDisks(NoHooksLU):
6541   """Bring up an instance's disks.
6542
6543   """
6544   REQ_BGL = False
6545
6546   def ExpandNames(self):
6547     self._ExpandAndLockInstance()
6548     self.needed_locks[locking.LEVEL_NODE] = []
6549     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6550
6551   def DeclareLocks(self, level):
6552     if level == locking.LEVEL_NODE:
6553       self._LockInstancesNodes()
6554
6555   def CheckPrereq(self):
6556     """Check prerequisites.
6557
6558     This checks that the instance is in the cluster.
6559
6560     """
6561     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6562     assert self.instance is not None, \
6563       "Cannot retrieve locked instance %s" % self.op.instance_name
6564     _CheckNodeOnline(self, self.instance.primary_node)
6565
6566   def Exec(self, feedback_fn):
6567     """Activate the disks.
6568
6569     """
6570     disks_ok, disks_info = \
6571               _AssembleInstanceDisks(self, self.instance,
6572                                      ignore_size=self.op.ignore_size)
6573     if not disks_ok:
6574       raise errors.OpExecError("Cannot activate block devices")
6575
6576     if self.op.wait_for_sync:
6577       if not _WaitForSync(self, self.instance):
6578         raise errors.OpExecError("Some disks of the instance are degraded!")
6579
6580     return disks_info
6581
6582
6583 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6584                            ignore_size=False):
6585   """Prepare the block devices for an instance.
6586
6587   This sets up the block devices on all nodes.
6588
6589   @type lu: L{LogicalUnit}
6590   @param lu: the logical unit on whose behalf we execute
6591   @type instance: L{objects.Instance}
6592   @param instance: the instance for whose disks we assemble
6593   @type disks: list of L{objects.Disk} or None
6594   @param disks: which disks to assemble (or all, if None)
6595   @type ignore_secondaries: boolean
6596   @param ignore_secondaries: if true, errors on secondary nodes
6597       won't result in an error return from the function
6598   @type ignore_size: boolean
6599   @param ignore_size: if true, the current known size of the disk
6600       will not be used during the disk activation, useful for cases
6601       when the size is wrong
6602   @return: False if the operation failed, otherwise a list of
6603       (host, instance_visible_name, node_visible_name)
6604       with the mapping from node devices to instance devices
6605
6606   """
6607   device_info = []
6608   disks_ok = True
6609   iname = instance.name
6610   disks = _ExpandCheckDisks(instance, disks)
6611
6612   # With the two passes mechanism we try to reduce the window of
6613   # opportunity for the race condition of switching DRBD to primary
6614   # before handshaking occured, but we do not eliminate it
6615
6616   # The proper fix would be to wait (with some limits) until the
6617   # connection has been made and drbd transitions from WFConnection
6618   # into any other network-connected state (Connected, SyncTarget,
6619   # SyncSource, etc.)
6620
6621   # 1st pass, assemble on all nodes in secondary mode
6622   for idx, inst_disk in enumerate(disks):
6623     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6624       if ignore_size:
6625         node_disk = node_disk.Copy()
6626         node_disk.UnsetSize()
6627       lu.cfg.SetDiskID(node_disk, node)
6628       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6629                                              False, idx)
6630       msg = result.fail_msg
6631       if msg:
6632         is_offline_secondary = (node in instance.secondary_nodes and
6633                                 result.offline)
6634         lu.LogWarning("Could not prepare block device %s on node %s"
6635                       " (is_primary=False, pass=1): %s",
6636                       inst_disk.iv_name, node, msg)
6637         if not (ignore_secondaries or is_offline_secondary):
6638           disks_ok = False
6639
6640   # FIXME: race condition on drbd migration to primary
6641
6642   # 2nd pass, do only the primary node
6643   for idx, inst_disk in enumerate(disks):
6644     dev_path = None
6645
6646     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6647       if node != instance.primary_node:
6648         continue
6649       if ignore_size:
6650         node_disk = node_disk.Copy()
6651         node_disk.UnsetSize()
6652       lu.cfg.SetDiskID(node_disk, node)
6653       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6654                                              True, idx)
6655       msg = result.fail_msg
6656       if msg:
6657         lu.LogWarning("Could not prepare block device %s on node %s"
6658                       " (is_primary=True, pass=2): %s",
6659                       inst_disk.iv_name, node, msg)
6660         disks_ok = False
6661       else:
6662         dev_path = result.payload
6663
6664     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6665
6666   # leave the disks configured for the primary node
6667   # this is a workaround that would be fixed better by
6668   # improving the logical/physical id handling
6669   for disk in disks:
6670     lu.cfg.SetDiskID(disk, instance.primary_node)
6671
6672   return disks_ok, device_info
6673
6674
6675 def _StartInstanceDisks(lu, instance, force):
6676   """Start the disks of an instance.
6677
6678   """
6679   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6680                                            ignore_secondaries=force)
6681   if not disks_ok:
6682     _ShutdownInstanceDisks(lu, instance)
6683     if force is not None and not force:
6684       lu.LogWarning("",
6685                     hint=("If the message above refers to a secondary node,"
6686                           " you can retry the operation using '--force'"))
6687     raise errors.OpExecError("Disk consistency error")
6688
6689
6690 class LUInstanceDeactivateDisks(NoHooksLU):
6691   """Shutdown an instance's disks.
6692
6693   """
6694   REQ_BGL = False
6695
6696   def ExpandNames(self):
6697     self._ExpandAndLockInstance()
6698     self.needed_locks[locking.LEVEL_NODE] = []
6699     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6700
6701   def DeclareLocks(self, level):
6702     if level == locking.LEVEL_NODE:
6703       self._LockInstancesNodes()
6704
6705   def CheckPrereq(self):
6706     """Check prerequisites.
6707
6708     This checks that the instance is in the cluster.
6709
6710     """
6711     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6712     assert self.instance is not None, \
6713       "Cannot retrieve locked instance %s" % self.op.instance_name
6714
6715   def Exec(self, feedback_fn):
6716     """Deactivate the disks
6717
6718     """
6719     instance = self.instance
6720     if self.op.force:
6721       _ShutdownInstanceDisks(self, instance)
6722     else:
6723       _SafeShutdownInstanceDisks(self, instance)
6724
6725
6726 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6727   """Shutdown block devices of an instance.
6728
6729   This function checks if an instance is running, before calling
6730   _ShutdownInstanceDisks.
6731
6732   """
6733   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6734   _ShutdownInstanceDisks(lu, instance, disks=disks)
6735
6736
6737 def _ExpandCheckDisks(instance, disks):
6738   """Return the instance disks selected by the disks list
6739
6740   @type disks: list of L{objects.Disk} or None
6741   @param disks: selected disks
6742   @rtype: list of L{objects.Disk}
6743   @return: selected instance disks to act on
6744
6745   """
6746   if disks is None:
6747     return instance.disks
6748   else:
6749     if not set(disks).issubset(instance.disks):
6750       raise errors.ProgrammerError("Can only act on disks belonging to the"
6751                                    " target instance")
6752     return disks
6753
6754
6755 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6756   """Shutdown block devices of an instance.
6757
6758   This does the shutdown on all nodes of the instance.
6759
6760   If the ignore_primary is false, errors on the primary node are
6761   ignored.
6762
6763   """
6764   all_result = True
6765   disks = _ExpandCheckDisks(instance, disks)
6766
6767   for disk in disks:
6768     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6769       lu.cfg.SetDiskID(top_disk, node)
6770       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6771       msg = result.fail_msg
6772       if msg:
6773         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6774                       disk.iv_name, node, msg)
6775         if ((node == instance.primary_node and not ignore_primary) or
6776             (node != instance.primary_node and not result.offline)):
6777           all_result = False
6778   return all_result
6779
6780
6781 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6782   """Checks if a node has enough free memory.
6783
6784   This function check if a given node has the needed amount of free
6785   memory. In case the node has less memory or we cannot get the
6786   information from the node, this function raise an OpPrereqError
6787   exception.
6788
6789   @type lu: C{LogicalUnit}
6790   @param lu: a logical unit from which we get configuration data
6791   @type node: C{str}
6792   @param node: the node to check
6793   @type reason: C{str}
6794   @param reason: string to use in the error message
6795   @type requested: C{int}
6796   @param requested: the amount of memory in MiB to check for
6797   @type hypervisor_name: C{str}
6798   @param hypervisor_name: the hypervisor to ask for memory stats
6799   @rtype: integer
6800   @return: node current free memory
6801   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6802       we cannot check the node
6803
6804   """
6805   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6806   nodeinfo[node].Raise("Can't get data from node %s" % node,
6807                        prereq=True, ecode=errors.ECODE_ENVIRON)
6808   (_, _, (hv_info, )) = nodeinfo[node].payload
6809
6810   free_mem = hv_info.get("memory_free", None)
6811   if not isinstance(free_mem, int):
6812     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6813                                " was '%s'" % (node, free_mem),
6814                                errors.ECODE_ENVIRON)
6815   if requested > free_mem:
6816     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6817                                " needed %s MiB, available %s MiB" %
6818                                (node, reason, requested, free_mem),
6819                                errors.ECODE_NORES)
6820   return free_mem
6821
6822
6823 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6824   """Checks if nodes have enough free disk space in the all VGs.
6825
6826   This function check if all given nodes have the needed amount of
6827   free disk. In case any node has less disk or we cannot get the
6828   information from the node, this function raise an OpPrereqError
6829   exception.
6830
6831   @type lu: C{LogicalUnit}
6832   @param lu: a logical unit from which we get configuration data
6833   @type nodenames: C{list}
6834   @param nodenames: the list of node names to check
6835   @type req_sizes: C{dict}
6836   @param req_sizes: the hash of vg and corresponding amount of disk in
6837       MiB to check for
6838   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6839       or we cannot check the node
6840
6841   """
6842   for vg, req_size in req_sizes.items():
6843     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6844
6845
6846 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6847   """Checks if nodes have enough free disk space in the specified VG.
6848
6849   This function check if all given nodes have the needed amount of
6850   free disk. In case any node has less disk or we cannot get the
6851   information from the node, this function raise an OpPrereqError
6852   exception.
6853
6854   @type lu: C{LogicalUnit}
6855   @param lu: a logical unit from which we get configuration data
6856   @type nodenames: C{list}
6857   @param nodenames: the list of node names to check
6858   @type vg: C{str}
6859   @param vg: the volume group to check
6860   @type requested: C{int}
6861   @param requested: the amount of disk in MiB to check for
6862   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6863       or we cannot check the node
6864
6865   """
6866   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6867   for node in nodenames:
6868     info = nodeinfo[node]
6869     info.Raise("Cannot get current information from node %s" % node,
6870                prereq=True, ecode=errors.ECODE_ENVIRON)
6871     (_, (vg_info, ), _) = info.payload
6872     vg_free = vg_info.get("vg_free", None)
6873     if not isinstance(vg_free, int):
6874       raise errors.OpPrereqError("Can't compute free disk space on node"
6875                                  " %s for vg %s, result was '%s'" %
6876                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6877     if requested > vg_free:
6878       raise errors.OpPrereqError("Not enough disk space on target node %s"
6879                                  " vg %s: required %d MiB, available %d MiB" %
6880                                  (node, vg, requested, vg_free),
6881                                  errors.ECODE_NORES)
6882
6883
6884 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6885   """Checks if nodes have enough physical CPUs
6886
6887   This function checks if all given nodes have the needed number of
6888   physical CPUs. In case any node has less CPUs or we cannot get the
6889   information from the node, this function raises an OpPrereqError
6890   exception.
6891
6892   @type lu: C{LogicalUnit}
6893   @param lu: a logical unit from which we get configuration data
6894   @type nodenames: C{list}
6895   @param nodenames: the list of node names to check
6896   @type requested: C{int}
6897   @param requested: the minimum acceptable number of physical CPUs
6898   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6899       or we cannot check the node
6900
6901   """
6902   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6903   for node in nodenames:
6904     info = nodeinfo[node]
6905     info.Raise("Cannot get current information from node %s" % node,
6906                prereq=True, ecode=errors.ECODE_ENVIRON)
6907     (_, _, (hv_info, )) = info.payload
6908     num_cpus = hv_info.get("cpu_total", None)
6909     if not isinstance(num_cpus, int):
6910       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6911                                  " on node %s, result was '%s'" %
6912                                  (node, num_cpus), errors.ECODE_ENVIRON)
6913     if requested > num_cpus:
6914       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6915                                  "required" % (node, num_cpus, requested),
6916                                  errors.ECODE_NORES)
6917
6918
6919 class LUInstanceStartup(LogicalUnit):
6920   """Starts an instance.
6921
6922   """
6923   HPATH = "instance-start"
6924   HTYPE = constants.HTYPE_INSTANCE
6925   REQ_BGL = False
6926
6927   def CheckArguments(self):
6928     # extra beparams
6929     if self.op.beparams:
6930       # fill the beparams dict
6931       objects.UpgradeBeParams(self.op.beparams)
6932       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6933
6934   def ExpandNames(self):
6935     self._ExpandAndLockInstance()
6936     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6937
6938   def DeclareLocks(self, level):
6939     if level == locking.LEVEL_NODE_RES:
6940       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6941
6942   def BuildHooksEnv(self):
6943     """Build hooks env.
6944
6945     This runs on master, primary and secondary nodes of the instance.
6946
6947     """
6948     env = {
6949       "FORCE": self.op.force,
6950       }
6951
6952     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6953
6954     return env
6955
6956   def BuildHooksNodes(self):
6957     """Build hooks nodes.
6958
6959     """
6960     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6961     return (nl, nl)
6962
6963   def CheckPrereq(self):
6964     """Check prerequisites.
6965
6966     This checks that the instance is in the cluster.
6967
6968     """
6969     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6970     assert self.instance is not None, \
6971       "Cannot retrieve locked instance %s" % self.op.instance_name
6972
6973     # extra hvparams
6974     if self.op.hvparams:
6975       # check hypervisor parameter syntax (locally)
6976       cluster = self.cfg.GetClusterInfo()
6977       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6978       filled_hvp = cluster.FillHV(instance)
6979       filled_hvp.update(self.op.hvparams)
6980       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6981       hv_type.CheckParameterSyntax(filled_hvp)
6982       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6983
6984     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6985
6986     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6987
6988     if self.primary_offline and self.op.ignore_offline_nodes:
6989       self.LogWarning("Ignoring offline primary node")
6990
6991       if self.op.hvparams or self.op.beparams:
6992         self.LogWarning("Overridden parameters are ignored")
6993     else:
6994       _CheckNodeOnline(self, instance.primary_node)
6995
6996       bep = self.cfg.GetClusterInfo().FillBE(instance)
6997       bep.update(self.op.beparams)
6998
6999       # check bridges existence
7000       _CheckInstanceBridgesExist(self, instance)
7001
7002       remote_info = self.rpc.call_instance_info(instance.primary_node,
7003                                                 instance.name,
7004                                                 instance.hypervisor)
7005       remote_info.Raise("Error checking node %s" % instance.primary_node,
7006                         prereq=True, ecode=errors.ECODE_ENVIRON)
7007       if not remote_info.payload: # not running already
7008         _CheckNodeFreeMemory(self, instance.primary_node,
7009                              "starting instance %s" % instance.name,
7010                              bep[constants.BE_MINMEM], instance.hypervisor)
7011
7012   def Exec(self, feedback_fn):
7013     """Start the instance.
7014
7015     """
7016     instance = self.instance
7017     force = self.op.force
7018
7019     if not self.op.no_remember:
7020       self.cfg.MarkInstanceUp(instance.name)
7021
7022     if self.primary_offline:
7023       assert self.op.ignore_offline_nodes
7024       self.LogInfo("Primary node offline, marked instance as started")
7025     else:
7026       node_current = instance.primary_node
7027
7028       _StartInstanceDisks(self, instance, force)
7029
7030       result = \
7031         self.rpc.call_instance_start(node_current,
7032                                      (instance, self.op.hvparams,
7033                                       self.op.beparams),
7034                                      self.op.startup_paused)
7035       msg = result.fail_msg
7036       if msg:
7037         _ShutdownInstanceDisks(self, instance)
7038         raise errors.OpExecError("Could not start instance: %s" % msg)
7039
7040
7041 class LUInstanceReboot(LogicalUnit):
7042   """Reboot an instance.
7043
7044   """
7045   HPATH = "instance-reboot"
7046   HTYPE = constants.HTYPE_INSTANCE
7047   REQ_BGL = False
7048
7049   def ExpandNames(self):
7050     self._ExpandAndLockInstance()
7051
7052   def BuildHooksEnv(self):
7053     """Build hooks env.
7054
7055     This runs on master, primary and secondary nodes of the instance.
7056
7057     """
7058     env = {
7059       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7060       "REBOOT_TYPE": self.op.reboot_type,
7061       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7062       }
7063
7064     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7065
7066     return env
7067
7068   def BuildHooksNodes(self):
7069     """Build hooks nodes.
7070
7071     """
7072     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7073     return (nl, nl)
7074
7075   def CheckPrereq(self):
7076     """Check prerequisites.
7077
7078     This checks that the instance is in the cluster.
7079
7080     """
7081     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7082     assert self.instance is not None, \
7083       "Cannot retrieve locked instance %s" % self.op.instance_name
7084     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7085     _CheckNodeOnline(self, instance.primary_node)
7086
7087     # check bridges existence
7088     _CheckInstanceBridgesExist(self, instance)
7089
7090   def Exec(self, feedback_fn):
7091     """Reboot the instance.
7092
7093     """
7094     instance = self.instance
7095     ignore_secondaries = self.op.ignore_secondaries
7096     reboot_type = self.op.reboot_type
7097
7098     remote_info = self.rpc.call_instance_info(instance.primary_node,
7099                                               instance.name,
7100                                               instance.hypervisor)
7101     remote_info.Raise("Error checking node %s" % instance.primary_node)
7102     instance_running = bool(remote_info.payload)
7103
7104     node_current = instance.primary_node
7105
7106     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7107                                             constants.INSTANCE_REBOOT_HARD]:
7108       for disk in instance.disks:
7109         self.cfg.SetDiskID(disk, node_current)
7110       result = self.rpc.call_instance_reboot(node_current, instance,
7111                                              reboot_type,
7112                                              self.op.shutdown_timeout)
7113       result.Raise("Could not reboot instance")
7114     else:
7115       if instance_running:
7116         result = self.rpc.call_instance_shutdown(node_current, instance,
7117                                                  self.op.shutdown_timeout)
7118         result.Raise("Could not shutdown instance for full reboot")
7119         _ShutdownInstanceDisks(self, instance)
7120       else:
7121         self.LogInfo("Instance %s was already stopped, starting now",
7122                      instance.name)
7123       _StartInstanceDisks(self, instance, ignore_secondaries)
7124       result = self.rpc.call_instance_start(node_current,
7125                                             (instance, None, None), False)
7126       msg = result.fail_msg
7127       if msg:
7128         _ShutdownInstanceDisks(self, instance)
7129         raise errors.OpExecError("Could not start instance for"
7130                                  " full reboot: %s" % msg)
7131
7132     self.cfg.MarkInstanceUp(instance.name)
7133
7134
7135 class LUInstanceShutdown(LogicalUnit):
7136   """Shutdown an instance.
7137
7138   """
7139   HPATH = "instance-stop"
7140   HTYPE = constants.HTYPE_INSTANCE
7141   REQ_BGL = False
7142
7143   def ExpandNames(self):
7144     self._ExpandAndLockInstance()
7145
7146   def BuildHooksEnv(self):
7147     """Build hooks env.
7148
7149     This runs on master, primary and secondary nodes of the instance.
7150
7151     """
7152     env = _BuildInstanceHookEnvByObject(self, self.instance)
7153     env["TIMEOUT"] = self.op.timeout
7154     return env
7155
7156   def BuildHooksNodes(self):
7157     """Build hooks nodes.
7158
7159     """
7160     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7161     return (nl, nl)
7162
7163   def CheckPrereq(self):
7164     """Check prerequisites.
7165
7166     This checks that the instance is in the cluster.
7167
7168     """
7169     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7170     assert self.instance is not None, \
7171       "Cannot retrieve locked instance %s" % self.op.instance_name
7172
7173     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7174
7175     self.primary_offline = \
7176       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7177
7178     if self.primary_offline and self.op.ignore_offline_nodes:
7179       self.LogWarning("Ignoring offline primary node")
7180     else:
7181       _CheckNodeOnline(self, self.instance.primary_node)
7182
7183   def Exec(self, feedback_fn):
7184     """Shutdown the instance.
7185
7186     """
7187     instance = self.instance
7188     node_current = instance.primary_node
7189     timeout = self.op.timeout
7190
7191     if not self.op.no_remember:
7192       self.cfg.MarkInstanceDown(instance.name)
7193
7194     if self.primary_offline:
7195       assert self.op.ignore_offline_nodes
7196       self.LogInfo("Primary node offline, marked instance as stopped")
7197     else:
7198       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7199       msg = result.fail_msg
7200       if msg:
7201         self.LogWarning("Could not shutdown instance: %s", msg)
7202
7203       _ShutdownInstanceDisks(self, instance)
7204
7205
7206 class LUInstanceReinstall(LogicalUnit):
7207   """Reinstall an instance.
7208
7209   """
7210   HPATH = "instance-reinstall"
7211   HTYPE = constants.HTYPE_INSTANCE
7212   REQ_BGL = False
7213
7214   def ExpandNames(self):
7215     self._ExpandAndLockInstance()
7216
7217   def BuildHooksEnv(self):
7218     """Build hooks env.
7219
7220     This runs on master, primary and secondary nodes of the instance.
7221
7222     """
7223     return _BuildInstanceHookEnvByObject(self, self.instance)
7224
7225   def BuildHooksNodes(self):
7226     """Build hooks nodes.
7227
7228     """
7229     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7230     return (nl, nl)
7231
7232   def CheckPrereq(self):
7233     """Check prerequisites.
7234
7235     This checks that the instance is in the cluster and is not running.
7236
7237     """
7238     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7239     assert instance is not None, \
7240       "Cannot retrieve locked instance %s" % self.op.instance_name
7241     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7242                      " offline, cannot reinstall")
7243
7244     if instance.disk_template == constants.DT_DISKLESS:
7245       raise errors.OpPrereqError("Instance '%s' has no disks" %
7246                                  self.op.instance_name,
7247                                  errors.ECODE_INVAL)
7248     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7249
7250     if self.op.os_type is not None:
7251       # OS verification
7252       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7253       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7254       instance_os = self.op.os_type
7255     else:
7256       instance_os = instance.os
7257
7258     nodelist = list(instance.all_nodes)
7259
7260     if self.op.osparams:
7261       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7262       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7263       self.os_inst = i_osdict # the new dict (without defaults)
7264     else:
7265       self.os_inst = None
7266
7267     self.instance = instance
7268
7269   def Exec(self, feedback_fn):
7270     """Reinstall the instance.
7271
7272     """
7273     inst = self.instance
7274
7275     if self.op.os_type is not None:
7276       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7277       inst.os = self.op.os_type
7278       # Write to configuration
7279       self.cfg.Update(inst, feedback_fn)
7280
7281     _StartInstanceDisks(self, inst, None)
7282     try:
7283       feedback_fn("Running the instance OS create scripts...")
7284       # FIXME: pass debug option from opcode to backend
7285       result = self.rpc.call_instance_os_add(inst.primary_node,
7286                                              (inst, self.os_inst), True,
7287                                              self.op.debug_level)
7288       result.Raise("Could not install OS for instance %s on node %s" %
7289                    (inst.name, inst.primary_node))
7290     finally:
7291       _ShutdownInstanceDisks(self, inst)
7292
7293
7294 class LUInstanceRecreateDisks(LogicalUnit):
7295   """Recreate an instance's missing disks.
7296
7297   """
7298   HPATH = "instance-recreate-disks"
7299   HTYPE = constants.HTYPE_INSTANCE
7300   REQ_BGL = False
7301
7302   _MODIFYABLE = frozenset([
7303     constants.IDISK_SIZE,
7304     constants.IDISK_MODE,
7305     ])
7306
7307   # New or changed disk parameters may have different semantics
7308   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7309     constants.IDISK_ADOPT,
7310
7311     # TODO: Implement support changing VG while recreating
7312     constants.IDISK_VG,
7313     constants.IDISK_METAVG,
7314     ]))
7315
7316   def _RunAllocator(self):
7317     """Run the allocator based on input opcode.
7318
7319     """
7320     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7321
7322     # FIXME
7323     # The allocator should actually run in "relocate" mode, but current
7324     # allocators don't support relocating all the nodes of an instance at
7325     # the same time. As a workaround we use "allocate" mode, but this is
7326     # suboptimal for two reasons:
7327     # - The instance name passed to the allocator is present in the list of
7328     #   existing instances, so there could be a conflict within the
7329     #   internal structures of the allocator. This doesn't happen with the
7330     #   current allocators, but it's a liability.
7331     # - The allocator counts the resources used by the instance twice: once
7332     #   because the instance exists already, and once because it tries to
7333     #   allocate a new instance.
7334     # The allocator could choose some of the nodes on which the instance is
7335     # running, but that's not a problem. If the instance nodes are broken,
7336     # they should be already be marked as drained or offline, and hence
7337     # skipped by the allocator. If instance disks have been lost for other
7338     # reasons, then recreating the disks on the same nodes should be fine.
7339     disk_template = self.instance.disk_template
7340     spindle_use = be_full[constants.BE_SPINDLE_USE]
7341     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7342                                         disk_template=disk_template,
7343                                         tags=list(self.instance.GetTags()),
7344                                         os=self.instance.os,
7345                                         nics=[{}],
7346                                         vcpus=be_full[constants.BE_VCPUS],
7347                                         memory=be_full[constants.BE_MAXMEM],
7348                                         spindle_use=spindle_use,
7349                                         disks=[{constants.IDISK_SIZE: d.size,
7350                                                 constants.IDISK_MODE: d.mode}
7351                                                 for d in self.instance.disks],
7352                                         hypervisor=self.instance.hypervisor)
7353     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7354
7355     ial.Run(self.op.iallocator)
7356
7357     assert req.RequiredNodes() == len(self.instance.all_nodes)
7358
7359     if not ial.success:
7360       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7361                                  " %s" % (self.op.iallocator, ial.info),
7362                                  errors.ECODE_NORES)
7363
7364     self.op.nodes = ial.result
7365     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7366                  self.op.instance_name, self.op.iallocator,
7367                  utils.CommaJoin(ial.result))
7368
7369   def CheckArguments(self):
7370     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7371       # Normalize and convert deprecated list of disk indices
7372       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7373
7374     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7375     if duplicates:
7376       raise errors.OpPrereqError("Some disks have been specified more than"
7377                                  " once: %s" % utils.CommaJoin(duplicates),
7378                                  errors.ECODE_INVAL)
7379
7380     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7381     # when neither iallocator nor nodes are specified
7382     if self.op.iallocator or self.op.nodes:
7383       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7384
7385     for (idx, params) in self.op.disks:
7386       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7387       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7388       if unsupported:
7389         raise errors.OpPrereqError("Parameters for disk %s try to change"
7390                                    " unmodifyable parameter(s): %s" %
7391                                    (idx, utils.CommaJoin(unsupported)),
7392                                    errors.ECODE_INVAL)
7393
7394   def ExpandNames(self):
7395     self._ExpandAndLockInstance()
7396     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7397
7398     if self.op.nodes:
7399       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7400       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7401     else:
7402       self.needed_locks[locking.LEVEL_NODE] = []
7403       if self.op.iallocator:
7404         # iallocator will select a new node in the same group
7405         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7406         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7407
7408     self.needed_locks[locking.LEVEL_NODE_RES] = []
7409
7410   def DeclareLocks(self, level):
7411     if level == locking.LEVEL_NODEGROUP:
7412       assert self.op.iallocator is not None
7413       assert not self.op.nodes
7414       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7415       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7416       # Lock the primary group used by the instance optimistically; this
7417       # requires going via the node before it's locked, requiring
7418       # verification later on
7419       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7420         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7421
7422     elif level == locking.LEVEL_NODE:
7423       # If an allocator is used, then we lock all the nodes in the current
7424       # instance group, as we don't know yet which ones will be selected;
7425       # if we replace the nodes without using an allocator, locks are
7426       # already declared in ExpandNames; otherwise, we need to lock all the
7427       # instance nodes for disk re-creation
7428       if self.op.iallocator:
7429         assert not self.op.nodes
7430         assert not self.needed_locks[locking.LEVEL_NODE]
7431         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7432
7433         # Lock member nodes of the group of the primary node
7434         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7435           self.needed_locks[locking.LEVEL_NODE].extend(
7436             self.cfg.GetNodeGroup(group_uuid).members)
7437
7438         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7439       elif not self.op.nodes:
7440         self._LockInstancesNodes(primary_only=False)
7441     elif level == locking.LEVEL_NODE_RES:
7442       # Copy node locks
7443       self.needed_locks[locking.LEVEL_NODE_RES] = \
7444         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7445
7446   def BuildHooksEnv(self):
7447     """Build hooks env.
7448
7449     This runs on master, primary and secondary nodes of the instance.
7450
7451     """
7452     return _BuildInstanceHookEnvByObject(self, self.instance)
7453
7454   def BuildHooksNodes(self):
7455     """Build hooks nodes.
7456
7457     """
7458     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7459     return (nl, nl)
7460
7461   def CheckPrereq(self):
7462     """Check prerequisites.
7463
7464     This checks that the instance is in the cluster and is not running.
7465
7466     """
7467     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7468     assert instance is not None, \
7469       "Cannot retrieve locked instance %s" % self.op.instance_name
7470     if self.op.nodes:
7471       if len(self.op.nodes) != len(instance.all_nodes):
7472         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7473                                    " %d replacement nodes were specified" %
7474                                    (instance.name, len(instance.all_nodes),
7475                                     len(self.op.nodes)),
7476                                    errors.ECODE_INVAL)
7477       assert instance.disk_template != constants.DT_DRBD8 or \
7478           len(self.op.nodes) == 2
7479       assert instance.disk_template != constants.DT_PLAIN or \
7480           len(self.op.nodes) == 1
7481       primary_node = self.op.nodes[0]
7482     else:
7483       primary_node = instance.primary_node
7484     if not self.op.iallocator:
7485       _CheckNodeOnline(self, primary_node)
7486
7487     if instance.disk_template == constants.DT_DISKLESS:
7488       raise errors.OpPrereqError("Instance '%s' has no disks" %
7489                                  self.op.instance_name, errors.ECODE_INVAL)
7490
7491     # Verify if node group locks are still correct
7492     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7493     if owned_groups:
7494       # Node group locks are acquired only for the primary node (and only
7495       # when the allocator is used)
7496       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7497                                primary_only=True)
7498
7499     # if we replace nodes *and* the old primary is offline, we don't
7500     # check the instance state
7501     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7502     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7503       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7504                           msg="cannot recreate disks")
7505
7506     if self.op.disks:
7507       self.disks = dict(self.op.disks)
7508     else:
7509       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7510
7511     maxidx = max(self.disks.keys())
7512     if maxidx >= len(instance.disks):
7513       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7514                                  errors.ECODE_INVAL)
7515
7516     if ((self.op.nodes or self.op.iallocator) and
7517         sorted(self.disks.keys()) != range(len(instance.disks))):
7518       raise errors.OpPrereqError("Can't recreate disks partially and"
7519                                  " change the nodes at the same time",
7520                                  errors.ECODE_INVAL)
7521
7522     self.instance = instance
7523
7524     if self.op.iallocator:
7525       self._RunAllocator()
7526       # Release unneeded node and node resource locks
7527       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7528       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7529       _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7530
7531     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7532
7533   def Exec(self, feedback_fn):
7534     """Recreate the disks.
7535
7536     """
7537     instance = self.instance
7538
7539     assert (self.owned_locks(locking.LEVEL_NODE) ==
7540             self.owned_locks(locking.LEVEL_NODE_RES))
7541
7542     to_skip = []
7543     mods = [] # keeps track of needed changes
7544
7545     for idx, disk in enumerate(instance.disks):
7546       try:
7547         changes = self.disks[idx]
7548       except KeyError:
7549         # Disk should not be recreated
7550         to_skip.append(idx)
7551         continue
7552
7553       # update secondaries for disks, if needed
7554       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7555         # need to update the nodes and minors
7556         assert len(self.op.nodes) == 2
7557         assert len(disk.logical_id) == 6 # otherwise disk internals
7558                                          # have changed
7559         (_, _, old_port, _, _, old_secret) = disk.logical_id
7560         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7561         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7562                   new_minors[0], new_minors[1], old_secret)
7563         assert len(disk.logical_id) == len(new_id)
7564       else:
7565         new_id = None
7566
7567       mods.append((idx, new_id, changes))
7568
7569     # now that we have passed all asserts above, we can apply the mods
7570     # in a single run (to avoid partial changes)
7571     for idx, new_id, changes in mods:
7572       disk = instance.disks[idx]
7573       if new_id is not None:
7574         assert disk.dev_type == constants.LD_DRBD8
7575         disk.logical_id = new_id
7576       if changes:
7577         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7578                     mode=changes.get(constants.IDISK_MODE, None))
7579
7580     # change primary node, if needed
7581     if self.op.nodes:
7582       instance.primary_node = self.op.nodes[0]
7583       self.LogWarning("Changing the instance's nodes, you will have to"
7584                       " remove any disks left on the older nodes manually")
7585
7586     if self.op.nodes:
7587       self.cfg.Update(instance, feedback_fn)
7588
7589     # All touched nodes must be locked
7590     mylocks = self.owned_locks(locking.LEVEL_NODE)
7591     assert mylocks.issuperset(frozenset(instance.all_nodes))
7592     _CreateDisks(self, instance, to_skip=to_skip)
7593
7594
7595 class LUInstanceRename(LogicalUnit):
7596   """Rename an instance.
7597
7598   """
7599   HPATH = "instance-rename"
7600   HTYPE = constants.HTYPE_INSTANCE
7601
7602   def CheckArguments(self):
7603     """Check arguments.
7604
7605     """
7606     if self.op.ip_check and not self.op.name_check:
7607       # TODO: make the ip check more flexible and not depend on the name check
7608       raise errors.OpPrereqError("IP address check requires a name check",
7609                                  errors.ECODE_INVAL)
7610
7611   def BuildHooksEnv(self):
7612     """Build hooks env.
7613
7614     This runs on master, primary and secondary nodes of the instance.
7615
7616     """
7617     env = _BuildInstanceHookEnvByObject(self, self.instance)
7618     env["INSTANCE_NEW_NAME"] = self.op.new_name
7619     return env
7620
7621   def BuildHooksNodes(self):
7622     """Build hooks nodes.
7623
7624     """
7625     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7626     return (nl, nl)
7627
7628   def CheckPrereq(self):
7629     """Check prerequisites.
7630
7631     This checks that the instance is in the cluster and is not running.
7632
7633     """
7634     self.op.instance_name = _ExpandInstanceName(self.cfg,
7635                                                 self.op.instance_name)
7636     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7637     assert instance is not None
7638     _CheckNodeOnline(self, instance.primary_node)
7639     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7640                         msg="cannot rename")
7641     self.instance = instance
7642
7643     new_name = self.op.new_name
7644     if self.op.name_check:
7645       hostname = _CheckHostnameSane(self, new_name)
7646       new_name = self.op.new_name = hostname.name
7647       if (self.op.ip_check and
7648           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7649         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7650                                    (hostname.ip, new_name),
7651                                    errors.ECODE_NOTUNIQUE)
7652
7653     instance_list = self.cfg.GetInstanceList()
7654     if new_name in instance_list and new_name != instance.name:
7655       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7656                                  new_name, errors.ECODE_EXISTS)
7657
7658   def Exec(self, feedback_fn):
7659     """Rename the instance.
7660
7661     """
7662     inst = self.instance
7663     old_name = inst.name
7664
7665     rename_file_storage = False
7666     if (inst.disk_template in constants.DTS_FILEBASED and
7667         self.op.new_name != inst.name):
7668       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7669       rename_file_storage = True
7670
7671     self.cfg.RenameInstance(inst.name, self.op.new_name)
7672     # Change the instance lock. This is definitely safe while we hold the BGL.
7673     # Otherwise the new lock would have to be added in acquired mode.
7674     assert self.REQ_BGL
7675     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7676     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7677     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7678
7679     # re-read the instance from the configuration after rename
7680     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7681
7682     if rename_file_storage:
7683       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7684       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7685                                                      old_file_storage_dir,
7686                                                      new_file_storage_dir)
7687       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7688                    " (but the instance has been renamed in Ganeti)" %
7689                    (inst.primary_node, old_file_storage_dir,
7690                     new_file_storage_dir))
7691
7692     _StartInstanceDisks(self, inst, None)
7693     # update info on disks
7694     info = _GetInstanceInfoText(inst)
7695     for (idx, disk) in enumerate(inst.disks):
7696       for node in inst.all_nodes:
7697         self.cfg.SetDiskID(disk, node)
7698         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7699         if result.fail_msg:
7700           self.LogWarning("Error setting info on node %s for disk %s: %s",
7701                           node, idx, result.fail_msg)
7702     try:
7703       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7704                                                  old_name, self.op.debug_level)
7705       msg = result.fail_msg
7706       if msg:
7707         msg = ("Could not run OS rename script for instance %s on node %s"
7708                " (but the instance has been renamed in Ganeti): %s" %
7709                (inst.name, inst.primary_node, msg))
7710         self.LogWarning(msg)
7711     finally:
7712       _ShutdownInstanceDisks(self, inst)
7713
7714     return inst.name
7715
7716
7717 class LUInstanceRemove(LogicalUnit):
7718   """Remove an instance.
7719
7720   """
7721   HPATH = "instance-remove"
7722   HTYPE = constants.HTYPE_INSTANCE
7723   REQ_BGL = False
7724
7725   def ExpandNames(self):
7726     self._ExpandAndLockInstance()
7727     self.needed_locks[locking.LEVEL_NODE] = []
7728     self.needed_locks[locking.LEVEL_NODE_RES] = []
7729     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7730
7731   def DeclareLocks(self, level):
7732     if level == locking.LEVEL_NODE:
7733       self._LockInstancesNodes()
7734     elif level == locking.LEVEL_NODE_RES:
7735       # Copy node locks
7736       self.needed_locks[locking.LEVEL_NODE_RES] = \
7737         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7738
7739   def BuildHooksEnv(self):
7740     """Build hooks env.
7741
7742     This runs on master, primary and secondary nodes of the instance.
7743
7744     """
7745     env = _BuildInstanceHookEnvByObject(self, self.instance)
7746     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7747     return env
7748
7749   def BuildHooksNodes(self):
7750     """Build hooks nodes.
7751
7752     """
7753     nl = [self.cfg.GetMasterNode()]
7754     nl_post = list(self.instance.all_nodes) + nl
7755     return (nl, nl_post)
7756
7757   def CheckPrereq(self):
7758     """Check prerequisites.
7759
7760     This checks that the instance is in the cluster.
7761
7762     """
7763     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7764     assert self.instance is not None, \
7765       "Cannot retrieve locked instance %s" % self.op.instance_name
7766
7767   def Exec(self, feedback_fn):
7768     """Remove the instance.
7769
7770     """
7771     instance = self.instance
7772     logging.info("Shutting down instance %s on node %s",
7773                  instance.name, instance.primary_node)
7774
7775     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7776                                              self.op.shutdown_timeout)
7777     msg = result.fail_msg
7778     if msg:
7779       if self.op.ignore_failures:
7780         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7781       else:
7782         raise errors.OpExecError("Could not shutdown instance %s on"
7783                                  " node %s: %s" %
7784                                  (instance.name, instance.primary_node, msg))
7785
7786     assert (self.owned_locks(locking.LEVEL_NODE) ==
7787             self.owned_locks(locking.LEVEL_NODE_RES))
7788     assert not (set(instance.all_nodes) -
7789                 self.owned_locks(locking.LEVEL_NODE)), \
7790       "Not owning correct locks"
7791
7792     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7793
7794
7795 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7796   """Utility function to remove an instance.
7797
7798   """
7799   logging.info("Removing block devices for instance %s", instance.name)
7800
7801   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7802     if not ignore_failures:
7803       raise errors.OpExecError("Can't remove instance's disks")
7804     feedback_fn("Warning: can't remove instance's disks")
7805
7806   logging.info("Removing instance %s out of cluster config", instance.name)
7807
7808   lu.cfg.RemoveInstance(instance.name)
7809
7810   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7811     "Instance lock removal conflict"
7812
7813   # Remove lock for the instance
7814   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7815
7816
7817 class LUInstanceQuery(NoHooksLU):
7818   """Logical unit for querying instances.
7819
7820   """
7821   # pylint: disable=W0142
7822   REQ_BGL = False
7823
7824   def CheckArguments(self):
7825     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7826                              self.op.output_fields, self.op.use_locking)
7827
7828   def ExpandNames(self):
7829     self.iq.ExpandNames(self)
7830
7831   def DeclareLocks(self, level):
7832     self.iq.DeclareLocks(self, level)
7833
7834   def Exec(self, feedback_fn):
7835     return self.iq.OldStyleQuery(self)
7836
7837
7838 def _ExpandNamesForMigration(lu):
7839   """Expands names for use with L{TLMigrateInstance}.
7840
7841   @type lu: L{LogicalUnit}
7842
7843   """
7844   if lu.op.target_node is not None:
7845     lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
7846
7847   lu.needed_locks[locking.LEVEL_NODE] = []
7848   lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7849
7850   lu.needed_locks[locking.LEVEL_NODE_RES] = []
7851   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7852
7853   # The node allocation lock is actually only needed for replicated instances
7854   # (e.g. DRBD8) and if an iallocator is used.
7855   lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
7856
7857
7858 def _DeclareLocksForMigration(lu, level):
7859   """Declares locks for L{TLMigrateInstance}.
7860
7861   @type lu: L{LogicalUnit}
7862   @param level: Lock level
7863
7864   """
7865   if level == locking.LEVEL_NODE_ALLOC:
7866     assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
7867
7868     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
7869
7870     if instance.disk_template in constants.DTS_EXT_MIRROR:
7871       if lu.op.target_node is None:
7872         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7873         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7874       else:
7875         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7876                                                lu.op.target_node]
7877       del lu.recalculate_locks[locking.LEVEL_NODE]
7878     else:
7879       lu._LockInstancesNodes() # pylint: disable=W0212
7880
7881   elif level == locking.LEVEL_NODE:
7882     # Node locks are declared together with the node allocation lock
7883     assert lu.needed_locks[locking.LEVEL_NODE]
7884
7885   elif level == locking.LEVEL_NODE_RES:
7886     # Copy node locks
7887     lu.needed_locks[locking.LEVEL_NODE_RES] = \
7888       _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
7889
7890
7891 class LUInstanceFailover(LogicalUnit):
7892   """Failover an instance.
7893
7894   """
7895   HPATH = "instance-failover"
7896   HTYPE = constants.HTYPE_INSTANCE
7897   REQ_BGL = False
7898
7899   def CheckArguments(self):
7900     """Check the arguments.
7901
7902     """
7903     self.iallocator = getattr(self.op, "iallocator", None)
7904     self.target_node = getattr(self.op, "target_node", None)
7905
7906   def ExpandNames(self):
7907     self._ExpandAndLockInstance()
7908     _ExpandNamesForMigration(self)
7909
7910     self._migrater = \
7911       TLMigrateInstance(self, self.op.instance_name, False, True, False,
7912                         self.op.ignore_consistency, True,
7913                         self.op.shutdown_timeout, self.op.ignore_ipolicy)
7914
7915     self.tasklets = [self._migrater]
7916
7917   def DeclareLocks(self, level):
7918     _DeclareLocksForMigration(self, level)
7919
7920   def BuildHooksEnv(self):
7921     """Build hooks env.
7922
7923     This runs on master, primary and secondary nodes of the instance.
7924
7925     """
7926     instance = self._migrater.instance
7927     source_node = instance.primary_node
7928     target_node = self.op.target_node
7929     env = {
7930       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7931       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7932       "OLD_PRIMARY": source_node,
7933       "NEW_PRIMARY": target_node,
7934       }
7935
7936     if instance.disk_template in constants.DTS_INT_MIRROR:
7937       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7938       env["NEW_SECONDARY"] = source_node
7939     else:
7940       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7941
7942     env.update(_BuildInstanceHookEnvByObject(self, instance))
7943
7944     return env
7945
7946   def BuildHooksNodes(self):
7947     """Build hooks nodes.
7948
7949     """
7950     instance = self._migrater.instance
7951     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7952     return (nl, nl + [instance.primary_node])
7953
7954
7955 class LUInstanceMigrate(LogicalUnit):
7956   """Migrate an instance.
7957
7958   This is migration without shutting down, compared to the failover,
7959   which is done with shutdown.
7960
7961   """
7962   HPATH = "instance-migrate"
7963   HTYPE = constants.HTYPE_INSTANCE
7964   REQ_BGL = False
7965
7966   def ExpandNames(self):
7967     self._ExpandAndLockInstance()
7968     _ExpandNamesForMigration(self)
7969
7970     self._migrater = \
7971       TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
7972                         False, self.op.allow_failover, False,
7973                         self.op.allow_runtime_changes,
7974                         constants.DEFAULT_SHUTDOWN_TIMEOUT,
7975                         self.op.ignore_ipolicy)
7976
7977     self.tasklets = [self._migrater]
7978
7979   def DeclareLocks(self, level):
7980     _DeclareLocksForMigration(self, level)
7981
7982   def BuildHooksEnv(self):
7983     """Build hooks env.
7984
7985     This runs on master, primary and secondary nodes of the instance.
7986
7987     """
7988     instance = self._migrater.instance
7989     source_node = instance.primary_node
7990     target_node = self.op.target_node
7991     env = _BuildInstanceHookEnvByObject(self, instance)
7992     env.update({
7993       "MIGRATE_LIVE": self._migrater.live,
7994       "MIGRATE_CLEANUP": self.op.cleanup,
7995       "OLD_PRIMARY": source_node,
7996       "NEW_PRIMARY": target_node,
7997       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7998       })
7999
8000     if instance.disk_template in constants.DTS_INT_MIRROR:
8001       env["OLD_SECONDARY"] = target_node
8002       env["NEW_SECONDARY"] = source_node
8003     else:
8004       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8005
8006     return env
8007
8008   def BuildHooksNodes(self):
8009     """Build hooks nodes.
8010
8011     """
8012     instance = self._migrater.instance
8013     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8014     return (nl, nl + [instance.primary_node])
8015
8016
8017 class LUInstanceMove(LogicalUnit):
8018   """Move an instance by data-copying.
8019
8020   """
8021   HPATH = "instance-move"
8022   HTYPE = constants.HTYPE_INSTANCE
8023   REQ_BGL = False
8024
8025   def ExpandNames(self):
8026     self._ExpandAndLockInstance()
8027     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8028     self.op.target_node = target_node
8029     self.needed_locks[locking.LEVEL_NODE] = [target_node]
8030     self.needed_locks[locking.LEVEL_NODE_RES] = []
8031     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8032
8033   def DeclareLocks(self, level):
8034     if level == locking.LEVEL_NODE:
8035       self._LockInstancesNodes(primary_only=True)
8036     elif level == locking.LEVEL_NODE_RES:
8037       # Copy node locks
8038       self.needed_locks[locking.LEVEL_NODE_RES] = \
8039         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8040
8041   def BuildHooksEnv(self):
8042     """Build hooks env.
8043
8044     This runs on master, primary and secondary nodes of the instance.
8045
8046     """
8047     env = {
8048       "TARGET_NODE": self.op.target_node,
8049       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8050       }
8051     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8052     return env
8053
8054   def BuildHooksNodes(self):
8055     """Build hooks nodes.
8056
8057     """
8058     nl = [
8059       self.cfg.GetMasterNode(),
8060       self.instance.primary_node,
8061       self.op.target_node,
8062       ]
8063     return (nl, nl)
8064
8065   def CheckPrereq(self):
8066     """Check prerequisites.
8067
8068     This checks that the instance is in the cluster.
8069
8070     """
8071     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8072     assert self.instance is not None, \
8073       "Cannot retrieve locked instance %s" % self.op.instance_name
8074
8075     node = self.cfg.GetNodeInfo(self.op.target_node)
8076     assert node is not None, \
8077       "Cannot retrieve locked node %s" % self.op.target_node
8078
8079     self.target_node = target_node = node.name
8080
8081     if target_node == instance.primary_node:
8082       raise errors.OpPrereqError("Instance %s is already on the node %s" %
8083                                  (instance.name, target_node),
8084                                  errors.ECODE_STATE)
8085
8086     bep = self.cfg.GetClusterInfo().FillBE(instance)
8087
8088     for idx, dsk in enumerate(instance.disks):
8089       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8090         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8091                                    " cannot copy" % idx, errors.ECODE_STATE)
8092
8093     _CheckNodeOnline(self, target_node)
8094     _CheckNodeNotDrained(self, target_node)
8095     _CheckNodeVmCapable(self, target_node)
8096     cluster = self.cfg.GetClusterInfo()
8097     group_info = self.cfg.GetNodeGroup(node.group)
8098     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8099     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8100                             ignore=self.op.ignore_ipolicy)
8101
8102     if instance.admin_state == constants.ADMINST_UP:
8103       # check memory requirements on the secondary node
8104       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8105                            instance.name, bep[constants.BE_MAXMEM],
8106                            instance.hypervisor)
8107     else:
8108       self.LogInfo("Not checking memory on the secondary node as"
8109                    " instance will not be started")
8110
8111     # check bridge existance
8112     _CheckInstanceBridgesExist(self, instance, node=target_node)
8113
8114   def Exec(self, feedback_fn):
8115     """Move an instance.
8116
8117     The move is done by shutting it down on its present node, copying
8118     the data over (slow) and starting it on the new node.
8119
8120     """
8121     instance = self.instance
8122
8123     source_node = instance.primary_node
8124     target_node = self.target_node
8125
8126     self.LogInfo("Shutting down instance %s on source node %s",
8127                  instance.name, source_node)
8128
8129     assert (self.owned_locks(locking.LEVEL_NODE) ==
8130             self.owned_locks(locking.LEVEL_NODE_RES))
8131
8132     result = self.rpc.call_instance_shutdown(source_node, instance,
8133                                              self.op.shutdown_timeout)
8134     msg = result.fail_msg
8135     if msg:
8136       if self.op.ignore_consistency:
8137         self.LogWarning("Could not shutdown instance %s on node %s."
8138                         " Proceeding anyway. Please make sure node"
8139                         " %s is down. Error details: %s",
8140                         instance.name, source_node, source_node, msg)
8141       else:
8142         raise errors.OpExecError("Could not shutdown instance %s on"
8143                                  " node %s: %s" %
8144                                  (instance.name, source_node, msg))
8145
8146     # create the target disks
8147     try:
8148       _CreateDisks(self, instance, target_node=target_node)
8149     except errors.OpExecError:
8150       self.LogWarning("Device creation failed, reverting...")
8151       try:
8152         _RemoveDisks(self, instance, target_node=target_node)
8153       finally:
8154         self.cfg.ReleaseDRBDMinors(instance.name)
8155         raise
8156
8157     cluster_name = self.cfg.GetClusterInfo().cluster_name
8158
8159     errs = []
8160     # activate, get path, copy the data over
8161     for idx, disk in enumerate(instance.disks):
8162       self.LogInfo("Copying data for disk %d", idx)
8163       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8164                                                instance.name, True, idx)
8165       if result.fail_msg:
8166         self.LogWarning("Can't assemble newly created disk %d: %s",
8167                         idx, result.fail_msg)
8168         errs.append(result.fail_msg)
8169         break
8170       dev_path = result.payload
8171       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8172                                              target_node, dev_path,
8173                                              cluster_name)
8174       if result.fail_msg:
8175         self.LogWarning("Can't copy data over for disk %d: %s",
8176                         idx, result.fail_msg)
8177         errs.append(result.fail_msg)
8178         break
8179
8180     if errs:
8181       self.LogWarning("Some disks failed to copy, aborting")
8182       try:
8183         _RemoveDisks(self, instance, target_node=target_node)
8184       finally:
8185         self.cfg.ReleaseDRBDMinors(instance.name)
8186         raise errors.OpExecError("Errors during disk copy: %s" %
8187                                  (",".join(errs),))
8188
8189     instance.primary_node = target_node
8190     self.cfg.Update(instance, feedback_fn)
8191
8192     self.LogInfo("Removing the disks on the original node")
8193     _RemoveDisks(self, instance, target_node=source_node)
8194
8195     # Only start the instance if it's marked as up
8196     if instance.admin_state == constants.ADMINST_UP:
8197       self.LogInfo("Starting instance %s on node %s",
8198                    instance.name, target_node)
8199
8200       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8201                                            ignore_secondaries=True)
8202       if not disks_ok:
8203         _ShutdownInstanceDisks(self, instance)
8204         raise errors.OpExecError("Can't activate the instance's disks")
8205
8206       result = self.rpc.call_instance_start(target_node,
8207                                             (instance, None, None), False)
8208       msg = result.fail_msg
8209       if msg:
8210         _ShutdownInstanceDisks(self, instance)
8211         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8212                                  (instance.name, target_node, msg))
8213
8214
8215 class LUNodeMigrate(LogicalUnit):
8216   """Migrate all instances from a node.
8217
8218   """
8219   HPATH = "node-migrate"
8220   HTYPE = constants.HTYPE_NODE
8221   REQ_BGL = False
8222
8223   def CheckArguments(self):
8224     pass
8225
8226   def ExpandNames(self):
8227     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8228
8229     self.share_locks = _ShareAll()
8230     self.needed_locks = {
8231       locking.LEVEL_NODE: [self.op.node_name],
8232       }
8233
8234   def BuildHooksEnv(self):
8235     """Build hooks env.
8236
8237     This runs on the master, the primary and all the secondaries.
8238
8239     """
8240     return {
8241       "NODE_NAME": self.op.node_name,
8242       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8243       }
8244
8245   def BuildHooksNodes(self):
8246     """Build hooks nodes.
8247
8248     """
8249     nl = [self.cfg.GetMasterNode()]
8250     return (nl, nl)
8251
8252   def CheckPrereq(self):
8253     pass
8254
8255   def Exec(self, feedback_fn):
8256     # Prepare jobs for migration instances
8257     allow_runtime_changes = self.op.allow_runtime_changes
8258     jobs = [
8259       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8260                                  mode=self.op.mode,
8261                                  live=self.op.live,
8262                                  iallocator=self.op.iallocator,
8263                                  target_node=self.op.target_node,
8264                                  allow_runtime_changes=allow_runtime_changes,
8265                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8266       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8267
8268     # TODO: Run iallocator in this opcode and pass correct placement options to
8269     # OpInstanceMigrate. Since other jobs can modify the cluster between
8270     # running the iallocator and the actual migration, a good consistency model
8271     # will have to be found.
8272
8273     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8274             frozenset([self.op.node_name]))
8275
8276     return ResultWithJobs(jobs)
8277
8278
8279 class TLMigrateInstance(Tasklet):
8280   """Tasklet class for instance migration.
8281
8282   @type live: boolean
8283   @ivar live: whether the migration will be done live or non-live;
8284       this variable is initalized only after CheckPrereq has run
8285   @type cleanup: boolean
8286   @ivar cleanup: Wheater we cleanup from a failed migration
8287   @type iallocator: string
8288   @ivar iallocator: The iallocator used to determine target_node
8289   @type target_node: string
8290   @ivar target_node: If given, the target_node to reallocate the instance to
8291   @type failover: boolean
8292   @ivar failover: Whether operation results in failover or migration
8293   @type fallback: boolean
8294   @ivar fallback: Whether fallback to failover is allowed if migration not
8295                   possible
8296   @type ignore_consistency: boolean
8297   @ivar ignore_consistency: Wheter we should ignore consistency between source
8298                             and target node
8299   @type shutdown_timeout: int
8300   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8301   @type ignore_ipolicy: bool
8302   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8303
8304   """
8305
8306   # Constants
8307   _MIGRATION_POLL_INTERVAL = 1      # seconds
8308   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8309
8310   def __init__(self, lu, instance_name, cleanup, failover, fallback,
8311                ignore_consistency, allow_runtime_changes, shutdown_timeout,
8312                ignore_ipolicy):
8313     """Initializes this class.
8314
8315     """
8316     Tasklet.__init__(self, lu)
8317
8318     # Parameters
8319     self.instance_name = instance_name
8320     self.cleanup = cleanup
8321     self.live = False # will be overridden later
8322     self.failover = failover
8323     self.fallback = fallback
8324     self.ignore_consistency = ignore_consistency
8325     self.shutdown_timeout = shutdown_timeout
8326     self.ignore_ipolicy = ignore_ipolicy
8327     self.allow_runtime_changes = allow_runtime_changes
8328
8329   def CheckPrereq(self):
8330     """Check prerequisites.
8331
8332     This checks that the instance is in the cluster.
8333
8334     """
8335     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8336     instance = self.cfg.GetInstanceInfo(instance_name)
8337     assert instance is not None
8338     self.instance = instance
8339     cluster = self.cfg.GetClusterInfo()
8340
8341     if (not self.cleanup and
8342         not instance.admin_state == constants.ADMINST_UP and
8343         not self.failover and self.fallback):
8344       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8345                       " switching to failover")
8346       self.failover = True
8347
8348     if instance.disk_template not in constants.DTS_MIRRORED:
8349       if self.failover:
8350         text = "failovers"
8351       else:
8352         text = "migrations"
8353       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8354                                  " %s" % (instance.disk_template, text),
8355                                  errors.ECODE_STATE)
8356
8357     if instance.disk_template in constants.DTS_EXT_MIRROR:
8358       assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8359
8360       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8361
8362       if self.lu.op.iallocator:
8363         self._RunAllocator()
8364       else:
8365         # We set set self.target_node as it is required by
8366         # BuildHooksEnv
8367         self.target_node = self.lu.op.target_node
8368
8369       # Check that the target node is correct in terms of instance policy
8370       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8371       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8372       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8373                                                               group_info)
8374       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8375                               ignore=self.ignore_ipolicy)
8376
8377       # self.target_node is already populated, either directly or by the
8378       # iallocator run
8379       target_node = self.target_node
8380       if self.target_node == instance.primary_node:
8381         raise errors.OpPrereqError("Cannot migrate instance %s"
8382                                    " to its primary (%s)" %
8383                                    (instance.name, instance.primary_node),
8384                                    errors.ECODE_STATE)
8385
8386       if len(self.lu.tasklets) == 1:
8387         # It is safe to release locks only when we're the only tasklet
8388         # in the LU
8389         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8390                       keep=[instance.primary_node, self.target_node])
8391         _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8392
8393     else:
8394       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8395
8396       secondary_nodes = instance.secondary_nodes
8397       if not secondary_nodes:
8398         raise errors.ConfigurationError("No secondary node but using"
8399                                         " %s disk template" %
8400                                         instance.disk_template)
8401       target_node = secondary_nodes[0]
8402       if self.lu.op.iallocator or (self.lu.op.target_node and
8403                                    self.lu.op.target_node != target_node):
8404         if self.failover:
8405           text = "failed over"
8406         else:
8407           text = "migrated"
8408         raise errors.OpPrereqError("Instances with disk template %s cannot"
8409                                    " be %s to arbitrary nodes"
8410                                    " (neither an iallocator nor a target"
8411                                    " node can be passed)" %
8412                                    (instance.disk_template, text),
8413                                    errors.ECODE_INVAL)
8414       nodeinfo = self.cfg.GetNodeInfo(target_node)
8415       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8416       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8417                                                               group_info)
8418       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8419                               ignore=self.ignore_ipolicy)
8420
8421     i_be = cluster.FillBE(instance)
8422
8423     # check memory requirements on the secondary node
8424     if (not self.cleanup and
8425          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8426       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8427                                                "migrating instance %s" %
8428                                                instance.name,
8429                                                i_be[constants.BE_MINMEM],
8430                                                instance.hypervisor)
8431     else:
8432       self.lu.LogInfo("Not checking memory on the secondary node as"
8433                       " instance will not be started")
8434
8435     # check if failover must be forced instead of migration
8436     if (not self.cleanup and not self.failover and
8437         i_be[constants.BE_ALWAYS_FAILOVER]):
8438       self.lu.LogInfo("Instance configured to always failover; fallback"
8439                       " to failover")
8440       self.failover = True
8441
8442     # check bridge existance
8443     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8444
8445     if not self.cleanup:
8446       _CheckNodeNotDrained(self.lu, target_node)
8447       if not self.failover:
8448         result = self.rpc.call_instance_migratable(instance.primary_node,
8449                                                    instance)
8450         if result.fail_msg and self.fallback:
8451           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8452                           " failover")
8453           self.failover = True
8454         else:
8455           result.Raise("Can't migrate, please use failover",
8456                        prereq=True, ecode=errors.ECODE_STATE)
8457
8458     assert not (self.failover and self.cleanup)
8459
8460     if not self.failover:
8461       if self.lu.op.live is not None and self.lu.op.mode is not None:
8462         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8463                                    " parameters are accepted",
8464                                    errors.ECODE_INVAL)
8465       if self.lu.op.live is not None:
8466         if self.lu.op.live:
8467           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8468         else:
8469           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8470         # reset the 'live' parameter to None so that repeated
8471         # invocations of CheckPrereq do not raise an exception
8472         self.lu.op.live = None
8473       elif self.lu.op.mode is None:
8474         # read the default value from the hypervisor
8475         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8476         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8477
8478       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8479     else:
8480       # Failover is never live
8481       self.live = False
8482
8483     if not (self.failover or self.cleanup):
8484       remote_info = self.rpc.call_instance_info(instance.primary_node,
8485                                                 instance.name,
8486                                                 instance.hypervisor)
8487       remote_info.Raise("Error checking instance on node %s" %
8488                         instance.primary_node)
8489       instance_running = bool(remote_info.payload)
8490       if instance_running:
8491         self.current_mem = int(remote_info.payload["memory"])
8492
8493   def _RunAllocator(self):
8494     """Run the allocator based on input opcode.
8495
8496     """
8497     assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8498
8499     # FIXME: add a self.ignore_ipolicy option
8500     req = iallocator.IAReqRelocate(name=self.instance_name,
8501                                    relocate_from=[self.instance.primary_node])
8502     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8503
8504     ial.Run(self.lu.op.iallocator)
8505
8506     if not ial.success:
8507       raise errors.OpPrereqError("Can't compute nodes using"
8508                                  " iallocator '%s': %s" %
8509                                  (self.lu.op.iallocator, ial.info),
8510                                  errors.ECODE_NORES)
8511     self.target_node = ial.result[0]
8512     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8513                     self.instance_name, self.lu.op.iallocator,
8514                     utils.CommaJoin(ial.result))
8515
8516   def _WaitUntilSync(self):
8517     """Poll with custom rpc for disk sync.
8518
8519     This uses our own step-based rpc call.
8520
8521     """
8522     self.feedback_fn("* wait until resync is done")
8523     all_done = False
8524     while not all_done:
8525       all_done = True
8526       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8527                                             self.nodes_ip,
8528                                             (self.instance.disks,
8529                                              self.instance))
8530       min_percent = 100
8531       for node, nres in result.items():
8532         nres.Raise("Cannot resync disks on node %s" % node)
8533         node_done, node_percent = nres.payload
8534         all_done = all_done and node_done
8535         if node_percent is not None:
8536           min_percent = min(min_percent, node_percent)
8537       if not all_done:
8538         if min_percent < 100:
8539           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8540         time.sleep(2)
8541
8542   def _EnsureSecondary(self, node):
8543     """Demote a node to secondary.
8544
8545     """
8546     self.feedback_fn("* switching node %s to secondary mode" % node)
8547
8548     for dev in self.instance.disks:
8549       self.cfg.SetDiskID(dev, node)
8550
8551     result = self.rpc.call_blockdev_close(node, self.instance.name,
8552                                           self.instance.disks)
8553     result.Raise("Cannot change disk to secondary on node %s" % node)
8554
8555   def _GoStandalone(self):
8556     """Disconnect from the network.
8557
8558     """
8559     self.feedback_fn("* changing into standalone mode")
8560     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8561                                                self.instance.disks)
8562     for node, nres in result.items():
8563       nres.Raise("Cannot disconnect disks node %s" % node)
8564
8565   def _GoReconnect(self, multimaster):
8566     """Reconnect to the network.
8567
8568     """
8569     if multimaster:
8570       msg = "dual-master"
8571     else:
8572       msg = "single-master"
8573     self.feedback_fn("* changing disks into %s mode" % msg)
8574     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8575                                            (self.instance.disks, self.instance),
8576                                            self.instance.name, multimaster)
8577     for node, nres in result.items():
8578       nres.Raise("Cannot change disks config on node %s" % node)
8579
8580   def _ExecCleanup(self):
8581     """Try to cleanup after a failed migration.
8582
8583     The cleanup is done by:
8584       - check that the instance is running only on one node
8585         (and update the config if needed)
8586       - change disks on its secondary node to secondary
8587       - wait until disks are fully synchronized
8588       - disconnect from the network
8589       - change disks into single-master mode
8590       - wait again until disks are fully synchronized
8591
8592     """
8593     instance = self.instance
8594     target_node = self.target_node
8595     source_node = self.source_node
8596
8597     # check running on only one node
8598     self.feedback_fn("* checking where the instance actually runs"
8599                      " (if this hangs, the hypervisor might be in"
8600                      " a bad state)")
8601     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8602     for node, result in ins_l.items():
8603       result.Raise("Can't contact node %s" % node)
8604
8605     runningon_source = instance.name in ins_l[source_node].payload
8606     runningon_target = instance.name in ins_l[target_node].payload
8607
8608     if runningon_source and runningon_target:
8609       raise errors.OpExecError("Instance seems to be running on two nodes,"
8610                                " or the hypervisor is confused; you will have"
8611                                " to ensure manually that it runs only on one"
8612                                " and restart this operation")
8613
8614     if not (runningon_source or runningon_target):
8615       raise errors.OpExecError("Instance does not seem to be running at all;"
8616                                " in this case it's safer to repair by"
8617                                " running 'gnt-instance stop' to ensure disk"
8618                                " shutdown, and then restarting it")
8619
8620     if runningon_target:
8621       # the migration has actually succeeded, we need to update the config
8622       self.feedback_fn("* instance running on secondary node (%s),"
8623                        " updating config" % target_node)
8624       instance.primary_node = target_node
8625       self.cfg.Update(instance, self.feedback_fn)
8626       demoted_node = source_node
8627     else:
8628       self.feedback_fn("* instance confirmed to be running on its"
8629                        " primary node (%s)" % source_node)
8630       demoted_node = target_node
8631
8632     if instance.disk_template in constants.DTS_INT_MIRROR:
8633       self._EnsureSecondary(demoted_node)
8634       try:
8635         self._WaitUntilSync()
8636       except errors.OpExecError:
8637         # we ignore here errors, since if the device is standalone, it
8638         # won't be able to sync
8639         pass
8640       self._GoStandalone()
8641       self._GoReconnect(False)
8642       self._WaitUntilSync()
8643
8644     self.feedback_fn("* done")
8645
8646   def _RevertDiskStatus(self):
8647     """Try to revert the disk status after a failed migration.
8648
8649     """
8650     target_node = self.target_node
8651     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8652       return
8653
8654     try:
8655       self._EnsureSecondary(target_node)
8656       self._GoStandalone()
8657       self._GoReconnect(False)
8658       self._WaitUntilSync()
8659     except errors.OpExecError, err:
8660       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8661                          " please try to recover the instance manually;"
8662                          " error '%s'" % str(err))
8663
8664   def _AbortMigration(self):
8665     """Call the hypervisor code to abort a started migration.
8666
8667     """
8668     instance = self.instance
8669     target_node = self.target_node
8670     source_node = self.source_node
8671     migration_info = self.migration_info
8672
8673     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8674                                                                  instance,
8675                                                                  migration_info,
8676                                                                  False)
8677     abort_msg = abort_result.fail_msg
8678     if abort_msg:
8679       logging.error("Aborting migration failed on target node %s: %s",
8680                     target_node, abort_msg)
8681       # Don't raise an exception here, as we stil have to try to revert the
8682       # disk status, even if this step failed.
8683
8684     abort_result = self.rpc.call_instance_finalize_migration_src(
8685       source_node, instance, False, self.live)
8686     abort_msg = abort_result.fail_msg
8687     if abort_msg:
8688       logging.error("Aborting migration failed on source node %s: %s",
8689                     source_node, abort_msg)
8690
8691   def _ExecMigration(self):
8692     """Migrate an instance.
8693
8694     The migrate is done by:
8695       - change the disks into dual-master mode
8696       - wait until disks are fully synchronized again
8697       - migrate the instance
8698       - change disks on the new secondary node (the old primary) to secondary
8699       - wait until disks are fully synchronized
8700       - change disks into single-master mode
8701
8702     """
8703     instance = self.instance
8704     target_node = self.target_node
8705     source_node = self.source_node
8706
8707     # Check for hypervisor version mismatch and warn the user.
8708     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8709                                        None, [self.instance.hypervisor])
8710     for ninfo in nodeinfo.values():
8711       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8712                   ninfo.node)
8713     (_, _, (src_info, )) = nodeinfo[source_node].payload
8714     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8715
8716     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8717         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8718       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8719       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8720       if src_version != dst_version:
8721         self.feedback_fn("* warning: hypervisor version mismatch between"
8722                          " source (%s) and target (%s) node" %
8723                          (src_version, dst_version))
8724
8725     self.feedback_fn("* checking disk consistency between source and target")
8726     for (idx, dev) in enumerate(instance.disks):
8727       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8728         raise errors.OpExecError("Disk %s is degraded or not fully"
8729                                  " synchronized on target node,"
8730                                  " aborting migration" % idx)
8731
8732     if self.current_mem > self.tgt_free_mem:
8733       if not self.allow_runtime_changes:
8734         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8735                                  " free memory to fit instance %s on target"
8736                                  " node %s (have %dMB, need %dMB)" %
8737                                  (instance.name, target_node,
8738                                   self.tgt_free_mem, self.current_mem))
8739       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8740       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8741                                                      instance,
8742                                                      self.tgt_free_mem)
8743       rpcres.Raise("Cannot modify instance runtime memory")
8744
8745     # First get the migration information from the remote node
8746     result = self.rpc.call_migration_info(source_node, instance)
8747     msg = result.fail_msg
8748     if msg:
8749       log_err = ("Failed fetching source migration information from %s: %s" %
8750                  (source_node, msg))
8751       logging.error(log_err)
8752       raise errors.OpExecError(log_err)
8753
8754     self.migration_info = migration_info = result.payload
8755
8756     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8757       # Then switch the disks to master/master mode
8758       self._EnsureSecondary(target_node)
8759       self._GoStandalone()
8760       self._GoReconnect(True)
8761       self._WaitUntilSync()
8762
8763     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8764     result = self.rpc.call_accept_instance(target_node,
8765                                            instance,
8766                                            migration_info,
8767                                            self.nodes_ip[target_node])
8768
8769     msg = result.fail_msg
8770     if msg:
8771       logging.error("Instance pre-migration failed, trying to revert"
8772                     " disk status: %s", msg)
8773       self.feedback_fn("Pre-migration failed, aborting")
8774       self._AbortMigration()
8775       self._RevertDiskStatus()
8776       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8777                                (instance.name, msg))
8778
8779     self.feedback_fn("* migrating instance to %s" % target_node)
8780     result = self.rpc.call_instance_migrate(source_node, instance,
8781                                             self.nodes_ip[target_node],
8782                                             self.live)
8783     msg = result.fail_msg
8784     if msg:
8785       logging.error("Instance migration failed, trying to revert"
8786                     " disk status: %s", msg)
8787       self.feedback_fn("Migration failed, aborting")
8788       self._AbortMigration()
8789       self._RevertDiskStatus()
8790       raise errors.OpExecError("Could not migrate instance %s: %s" %
8791                                (instance.name, msg))
8792
8793     self.feedback_fn("* starting memory transfer")
8794     last_feedback = time.time()
8795     while True:
8796       result = self.rpc.call_instance_get_migration_status(source_node,
8797                                                            instance)
8798       msg = result.fail_msg
8799       ms = result.payload   # MigrationStatus instance
8800       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8801         logging.error("Instance migration failed, trying to revert"
8802                       " disk status: %s", msg)
8803         self.feedback_fn("Migration failed, aborting")
8804         self._AbortMigration()
8805         self._RevertDiskStatus()
8806         if not msg:
8807           msg = "hypervisor returned failure"
8808         raise errors.OpExecError("Could not migrate instance %s: %s" %
8809                                  (instance.name, msg))
8810
8811       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8812         self.feedback_fn("* memory transfer complete")
8813         break
8814
8815       if (utils.TimeoutExpired(last_feedback,
8816                                self._MIGRATION_FEEDBACK_INTERVAL) and
8817           ms.transferred_ram is not None):
8818         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8819         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8820         last_feedback = time.time()
8821
8822       time.sleep(self._MIGRATION_POLL_INTERVAL)
8823
8824     result = self.rpc.call_instance_finalize_migration_src(source_node,
8825                                                            instance,
8826                                                            True,
8827                                                            self.live)
8828     msg = result.fail_msg
8829     if msg:
8830       logging.error("Instance migration succeeded, but finalization failed"
8831                     " on the source node: %s", msg)
8832       raise errors.OpExecError("Could not finalize instance migration: %s" %
8833                                msg)
8834
8835     instance.primary_node = target_node
8836
8837     # distribute new instance config to the other nodes
8838     self.cfg.Update(instance, self.feedback_fn)
8839
8840     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8841                                                            instance,
8842                                                            migration_info,
8843                                                            True)
8844     msg = result.fail_msg
8845     if msg:
8846       logging.error("Instance migration succeeded, but finalization failed"
8847                     " on the target node: %s", msg)
8848       raise errors.OpExecError("Could not finalize instance migration: %s" %
8849                                msg)
8850
8851     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8852       self._EnsureSecondary(source_node)
8853       self._WaitUntilSync()
8854       self._GoStandalone()
8855       self._GoReconnect(False)
8856       self._WaitUntilSync()
8857
8858     # If the instance's disk template is `rbd' and there was a successful
8859     # migration, unmap the device from the source node.
8860     if self.instance.disk_template == constants.DT_RBD:
8861       disks = _ExpandCheckDisks(instance, instance.disks)
8862       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8863       for disk in disks:
8864         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8865         msg = result.fail_msg
8866         if msg:
8867           logging.error("Migration was successful, but couldn't unmap the"
8868                         " block device %s on source node %s: %s",
8869                         disk.iv_name, source_node, msg)
8870           logging.error("You need to unmap the device %s manually on %s",
8871                         disk.iv_name, source_node)
8872
8873     self.feedback_fn("* done")
8874
8875   def _ExecFailover(self):
8876     """Failover an instance.
8877
8878     The failover is done by shutting it down on its present node and
8879     starting it on the secondary.
8880
8881     """
8882     instance = self.instance
8883     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8884
8885     source_node = instance.primary_node
8886     target_node = self.target_node
8887
8888     if instance.admin_state == constants.ADMINST_UP:
8889       self.feedback_fn("* checking disk consistency between source and target")
8890       for (idx, dev) in enumerate(instance.disks):
8891         # for drbd, these are drbd over lvm
8892         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8893                                      False):
8894           if primary_node.offline:
8895             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8896                              " target node %s" %
8897                              (primary_node.name, idx, target_node))
8898           elif not self.ignore_consistency:
8899             raise errors.OpExecError("Disk %s is degraded on target node,"
8900                                      " aborting failover" % idx)
8901     else:
8902       self.feedback_fn("* not checking disk consistency as instance is not"
8903                        " running")
8904
8905     self.feedback_fn("* shutting down instance on source node")
8906     logging.info("Shutting down instance %s on node %s",
8907                  instance.name, source_node)
8908
8909     result = self.rpc.call_instance_shutdown(source_node, instance,
8910                                              self.shutdown_timeout)
8911     msg = result.fail_msg
8912     if msg:
8913       if self.ignore_consistency or primary_node.offline:
8914         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8915                            " proceeding anyway; please make sure node"
8916                            " %s is down; error details: %s",
8917                            instance.name, source_node, source_node, msg)
8918       else:
8919         raise errors.OpExecError("Could not shutdown instance %s on"
8920                                  " node %s: %s" %
8921                                  (instance.name, source_node, msg))
8922
8923     self.feedback_fn("* deactivating the instance's disks on source node")
8924     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8925       raise errors.OpExecError("Can't shut down the instance's disks")
8926
8927     instance.primary_node = target_node
8928     # distribute new instance config to the other nodes
8929     self.cfg.Update(instance, self.feedback_fn)
8930
8931     # Only start the instance if it's marked as up
8932     if instance.admin_state == constants.ADMINST_UP:
8933       self.feedback_fn("* activating the instance's disks on target node %s" %
8934                        target_node)
8935       logging.info("Starting instance %s on node %s",
8936                    instance.name, target_node)
8937
8938       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8939                                            ignore_secondaries=True)
8940       if not disks_ok:
8941         _ShutdownInstanceDisks(self.lu, instance)
8942         raise errors.OpExecError("Can't activate the instance's disks")
8943
8944       self.feedback_fn("* starting the instance on the target node %s" %
8945                        target_node)
8946       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8947                                             False)
8948       msg = result.fail_msg
8949       if msg:
8950         _ShutdownInstanceDisks(self.lu, instance)
8951         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8952                                  (instance.name, target_node, msg))
8953
8954   def Exec(self, feedback_fn):
8955     """Perform the migration.
8956
8957     """
8958     self.feedback_fn = feedback_fn
8959     self.source_node = self.instance.primary_node
8960
8961     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8962     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8963       self.target_node = self.instance.secondary_nodes[0]
8964       # Otherwise self.target_node has been populated either
8965       # directly, or through an iallocator.
8966
8967     self.all_nodes = [self.source_node, self.target_node]
8968     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8969                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8970
8971     if self.failover:
8972       feedback_fn("Failover instance %s" % self.instance.name)
8973       self._ExecFailover()
8974     else:
8975       feedback_fn("Migrating instance %s" % self.instance.name)
8976
8977       if self.cleanup:
8978         return self._ExecCleanup()
8979       else:
8980         return self._ExecMigration()
8981
8982
8983 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8984                     force_open):
8985   """Wrapper around L{_CreateBlockDevInner}.
8986
8987   This method annotates the root device first.
8988
8989   """
8990   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8991   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8992                               force_open)
8993
8994
8995 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8996                          info, force_open):
8997   """Create a tree of block devices on a given node.
8998
8999   If this device type has to be created on secondaries, create it and
9000   all its children.
9001
9002   If not, just recurse to children keeping the same 'force' value.
9003
9004   @attention: The device has to be annotated already.
9005
9006   @param lu: the lu on whose behalf we execute
9007   @param node: the node on which to create the device
9008   @type instance: L{objects.Instance}
9009   @param instance: the instance which owns the device
9010   @type device: L{objects.Disk}
9011   @param device: the device to create
9012   @type force_create: boolean
9013   @param force_create: whether to force creation of this device; this
9014       will be change to True whenever we find a device which has
9015       CreateOnSecondary() attribute
9016   @param info: the extra 'metadata' we should attach to the device
9017       (this will be represented as a LVM tag)
9018   @type force_open: boolean
9019   @param force_open: this parameter will be passes to the
9020       L{backend.BlockdevCreate} function where it specifies
9021       whether we run on primary or not, and it affects both
9022       the child assembly and the device own Open() execution
9023
9024   """
9025   if device.CreateOnSecondary():
9026     force_create = True
9027
9028   if device.children:
9029     for child in device.children:
9030       _CreateBlockDevInner(lu, node, instance, child, force_create,
9031                            info, force_open)
9032
9033   if not force_create:
9034     return
9035
9036   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
9037
9038
9039 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
9040   """Create a single block device on a given node.
9041
9042   This will not recurse over children of the device, so they must be
9043   created in advance.
9044
9045   @param lu: the lu on whose behalf we execute
9046   @param node: the node on which to create the device
9047   @type instance: L{objects.Instance}
9048   @param instance: the instance which owns the device
9049   @type device: L{objects.Disk}
9050   @param device: the device to create
9051   @param info: the extra 'metadata' we should attach to the device
9052       (this will be represented as a LVM tag)
9053   @type force_open: boolean
9054   @param force_open: this parameter will be passes to the
9055       L{backend.BlockdevCreate} function where it specifies
9056       whether we run on primary or not, and it affects both
9057       the child assembly and the device own Open() execution
9058
9059   """
9060   lu.cfg.SetDiskID(device, node)
9061   result = lu.rpc.call_blockdev_create(node, device, device.size,
9062                                        instance.name, force_open, info)
9063   result.Raise("Can't create block device %s on"
9064                " node %s for instance %s" % (device, node, instance.name))
9065   if device.physical_id is None:
9066     device.physical_id = result.payload
9067
9068
9069 def _GenerateUniqueNames(lu, exts):
9070   """Generate a suitable LV name.
9071
9072   This will generate a logical volume name for the given instance.
9073
9074   """
9075   results = []
9076   for val in exts:
9077     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9078     results.append("%s%s" % (new_id, val))
9079   return results
9080
9081
9082 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9083                          iv_name, p_minor, s_minor):
9084   """Generate a drbd8 device complete with its children.
9085
9086   """
9087   assert len(vgnames) == len(names) == 2
9088   port = lu.cfg.AllocatePort()
9089   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9090
9091   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9092                           logical_id=(vgnames[0], names[0]),
9093                           params={})
9094   dev_meta = objects.Disk(dev_type=constants.LD_LV,
9095                           size=constants.DRBD_META_SIZE,
9096                           logical_id=(vgnames[1], names[1]),
9097                           params={})
9098   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9099                           logical_id=(primary, secondary, port,
9100                                       p_minor, s_minor,
9101                                       shared_secret),
9102                           children=[dev_data, dev_meta],
9103                           iv_name=iv_name, params={})
9104   return drbd_dev
9105
9106
9107 _DISK_TEMPLATE_NAME_PREFIX = {
9108   constants.DT_PLAIN: "",
9109   constants.DT_RBD: ".rbd",
9110   }
9111
9112
9113 _DISK_TEMPLATE_DEVICE_TYPE = {
9114   constants.DT_PLAIN: constants.LD_LV,
9115   constants.DT_FILE: constants.LD_FILE,
9116   constants.DT_SHARED_FILE: constants.LD_FILE,
9117   constants.DT_BLOCK: constants.LD_BLOCKDEV,
9118   constants.DT_RBD: constants.LD_RBD,
9119   }
9120
9121
9122 def _GenerateDiskTemplate(
9123   lu, template_name, instance_name, primary_node, secondary_nodes,
9124   disk_info, file_storage_dir, file_driver, base_index,
9125   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9126   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9127   """Generate the entire disk layout for a given template type.
9128
9129   """
9130   #TODO: compute space requirements
9131
9132   vgname = lu.cfg.GetVGName()
9133   disk_count = len(disk_info)
9134   disks = []
9135
9136   if template_name == constants.DT_DISKLESS:
9137     pass
9138   elif template_name == constants.DT_DRBD8:
9139     if len(secondary_nodes) != 1:
9140       raise errors.ProgrammerError("Wrong template configuration")
9141     remote_node = secondary_nodes[0]
9142     minors = lu.cfg.AllocateDRBDMinor(
9143       [primary_node, remote_node] * len(disk_info), instance_name)
9144
9145     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9146                                                        full_disk_params)
9147     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9148
9149     names = []
9150     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9151                                                for i in range(disk_count)]):
9152       names.append(lv_prefix + "_data")
9153       names.append(lv_prefix + "_meta")
9154     for idx, disk in enumerate(disk_info):
9155       disk_index = idx + base_index
9156       data_vg = disk.get(constants.IDISK_VG, vgname)
9157       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9158       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9159                                       disk[constants.IDISK_SIZE],
9160                                       [data_vg, meta_vg],
9161                                       names[idx * 2:idx * 2 + 2],
9162                                       "disk/%d" % disk_index,
9163                                       minors[idx * 2], minors[idx * 2 + 1])
9164       disk_dev.mode = disk[constants.IDISK_MODE]
9165       disks.append(disk_dev)
9166   else:
9167     if secondary_nodes:
9168       raise errors.ProgrammerError("Wrong template configuration")
9169
9170     if template_name == constants.DT_FILE:
9171       _req_file_storage()
9172     elif template_name == constants.DT_SHARED_FILE:
9173       _req_shr_file_storage()
9174
9175     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9176     if name_prefix is None:
9177       names = None
9178     else:
9179       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9180                                         (name_prefix, base_index + i)
9181                                         for i in range(disk_count)])
9182
9183     if template_name == constants.DT_PLAIN:
9184
9185       def logical_id_fn(idx, _, disk):
9186         vg = disk.get(constants.IDISK_VG, vgname)
9187         return (vg, names[idx])
9188
9189     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9190       logical_id_fn = \
9191         lambda _, disk_index, disk: (file_driver,
9192                                      "%s/disk%d" % (file_storage_dir,
9193                                                     disk_index))
9194     elif template_name == constants.DT_BLOCK:
9195       logical_id_fn = \
9196         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9197                                        disk[constants.IDISK_ADOPT])
9198     elif template_name == constants.DT_RBD:
9199       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9200     else:
9201       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9202
9203     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9204
9205     for idx, disk in enumerate(disk_info):
9206       disk_index = idx + base_index
9207       size = disk[constants.IDISK_SIZE]
9208       feedback_fn("* disk %s, size %s" %
9209                   (disk_index, utils.FormatUnit(size, "h")))
9210       disks.append(objects.Disk(dev_type=dev_type, size=size,
9211                                 logical_id=logical_id_fn(idx, disk_index, disk),
9212                                 iv_name="disk/%d" % disk_index,
9213                                 mode=disk[constants.IDISK_MODE],
9214                                 params={}))
9215
9216   return disks
9217
9218
9219 def _GetInstanceInfoText(instance):
9220   """Compute that text that should be added to the disk's metadata.
9221
9222   """
9223   return "originstname+%s" % instance.name
9224
9225
9226 def _CalcEta(time_taken, written, total_size):
9227   """Calculates the ETA based on size written and total size.
9228
9229   @param time_taken: The time taken so far
9230   @param written: amount written so far
9231   @param total_size: The total size of data to be written
9232   @return: The remaining time in seconds
9233
9234   """
9235   avg_time = time_taken / float(written)
9236   return (total_size - written) * avg_time
9237
9238
9239 def _WipeDisks(lu, instance, disks=None):
9240   """Wipes instance disks.
9241
9242   @type lu: L{LogicalUnit}
9243   @param lu: the logical unit on whose behalf we execute
9244   @type instance: L{objects.Instance}
9245   @param instance: the instance whose disks we should create
9246   @return: the success of the wipe
9247
9248   """
9249   node = instance.primary_node
9250
9251   if disks is None:
9252     disks = [(idx, disk, 0)
9253              for (idx, disk) in enumerate(instance.disks)]
9254
9255   for (_, device, _) in disks:
9256     lu.cfg.SetDiskID(device, node)
9257
9258   logging.info("Pausing synchronization of disks of instance '%s'",
9259                instance.name)
9260   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9261                                                   (map(compat.snd, disks),
9262                                                    instance),
9263                                                   True)
9264   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9265
9266   for idx, success in enumerate(result.payload):
9267     if not success:
9268       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9269                    " failed", idx, instance.name)
9270
9271   try:
9272     for (idx, device, offset) in disks:
9273       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9274       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9275       wipe_chunk_size = \
9276         int(min(constants.MAX_WIPE_CHUNK,
9277                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9278
9279       size = device.size
9280       last_output = 0
9281       start_time = time.time()
9282
9283       if offset == 0:
9284         info_text = ""
9285       else:
9286         info_text = (" (from %s to %s)" %
9287                      (utils.FormatUnit(offset, "h"),
9288                       utils.FormatUnit(size, "h")))
9289
9290       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9291
9292       logging.info("Wiping disk %d for instance %s on node %s using"
9293                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9294
9295       while offset < size:
9296         wipe_size = min(wipe_chunk_size, size - offset)
9297
9298         logging.debug("Wiping disk %d, offset %s, chunk %s",
9299                       idx, offset, wipe_size)
9300
9301         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9302                                            wipe_size)
9303         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9304                      (idx, offset, wipe_size))
9305
9306         now = time.time()
9307         offset += wipe_size
9308         if now - last_output >= 60:
9309           eta = _CalcEta(now - start_time, offset, size)
9310           lu.LogInfo(" - done: %.1f%% ETA: %s",
9311                      offset / float(size) * 100, utils.FormatSeconds(eta))
9312           last_output = now
9313   finally:
9314     logging.info("Resuming synchronization of disks for instance '%s'",
9315                  instance.name)
9316
9317     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9318                                                     (map(compat.snd, disks),
9319                                                      instance),
9320                                                     False)
9321
9322     if result.fail_msg:
9323       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9324                     node, result.fail_msg)
9325     else:
9326       for idx, success in enumerate(result.payload):
9327         if not success:
9328           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9329                         " failed", idx, instance.name)
9330
9331
9332 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9333   """Create all disks for an instance.
9334
9335   This abstracts away some work from AddInstance.
9336
9337   @type lu: L{LogicalUnit}
9338   @param lu: the logical unit on whose behalf we execute
9339   @type instance: L{objects.Instance}
9340   @param instance: the instance whose disks we should create
9341   @type to_skip: list
9342   @param to_skip: list of indices to skip
9343   @type target_node: string
9344   @param target_node: if passed, overrides the target node for creation
9345   @rtype: boolean
9346   @return: the success of the creation
9347
9348   """
9349   info = _GetInstanceInfoText(instance)
9350   if target_node is None:
9351     pnode = instance.primary_node
9352     all_nodes = instance.all_nodes
9353   else:
9354     pnode = target_node
9355     all_nodes = [pnode]
9356
9357   if instance.disk_template in constants.DTS_FILEBASED:
9358     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9359     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9360
9361     result.Raise("Failed to create directory '%s' on"
9362                  " node %s" % (file_storage_dir, pnode))
9363
9364   # Note: this needs to be kept in sync with adding of disks in
9365   # LUInstanceSetParams
9366   for idx, device in enumerate(instance.disks):
9367     if to_skip and idx in to_skip:
9368       continue
9369     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9370     #HARDCODE
9371     for node in all_nodes:
9372       f_create = node == pnode
9373       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9374
9375
9376 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9377   """Remove all disks for an instance.
9378
9379   This abstracts away some work from `AddInstance()` and
9380   `RemoveInstance()`. Note that in case some of the devices couldn't
9381   be removed, the removal will continue with the other ones (compare
9382   with `_CreateDisks()`).
9383
9384   @type lu: L{LogicalUnit}
9385   @param lu: the logical unit on whose behalf we execute
9386   @type instance: L{objects.Instance}
9387   @param instance: the instance whose disks we should remove
9388   @type target_node: string
9389   @param target_node: used to override the node on which to remove the disks
9390   @rtype: boolean
9391   @return: the success of the removal
9392
9393   """
9394   logging.info("Removing block devices for instance %s", instance.name)
9395
9396   all_result = True
9397   ports_to_release = set()
9398   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9399   for (idx, device) in enumerate(anno_disks):
9400     if target_node:
9401       edata = [(target_node, device)]
9402     else:
9403       edata = device.ComputeNodeTree(instance.primary_node)
9404     for node, disk in edata:
9405       lu.cfg.SetDiskID(disk, node)
9406       result = lu.rpc.call_blockdev_remove(node, disk)
9407       if result.fail_msg:
9408         lu.LogWarning("Could not remove disk %s on node %s,"
9409                       " continuing anyway: %s", idx, node, result.fail_msg)
9410         if not (result.offline and node != instance.primary_node):
9411           all_result = False
9412
9413     # if this is a DRBD disk, return its port to the pool
9414     if device.dev_type in constants.LDS_DRBD:
9415       ports_to_release.add(device.logical_id[2])
9416
9417   if all_result or ignore_failures:
9418     for port in ports_to_release:
9419       lu.cfg.AddTcpUdpPort(port)
9420
9421   if instance.disk_template in constants.DTS_FILEBASED:
9422     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9423     if target_node:
9424       tgt = target_node
9425     else:
9426       tgt = instance.primary_node
9427     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9428     if result.fail_msg:
9429       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9430                     file_storage_dir, instance.primary_node, result.fail_msg)
9431       all_result = False
9432
9433   return all_result
9434
9435
9436 def _ComputeDiskSizePerVG(disk_template, disks):
9437   """Compute disk size requirements in the volume group
9438
9439   """
9440   def _compute(disks, payload):
9441     """Universal algorithm.
9442
9443     """
9444     vgs = {}
9445     for disk in disks:
9446       vgs[disk[constants.IDISK_VG]] = \
9447         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9448
9449     return vgs
9450
9451   # Required free disk space as a function of disk and swap space
9452   req_size_dict = {
9453     constants.DT_DISKLESS: {},
9454     constants.DT_PLAIN: _compute(disks, 0),
9455     # 128 MB are added for drbd metadata for each disk
9456     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9457     constants.DT_FILE: {},
9458     constants.DT_SHARED_FILE: {},
9459   }
9460
9461   if disk_template not in req_size_dict:
9462     raise errors.ProgrammerError("Disk template '%s' size requirement"
9463                                  " is unknown" % disk_template)
9464
9465   return req_size_dict[disk_template]
9466
9467
9468 def _FilterVmNodes(lu, nodenames):
9469   """Filters out non-vm_capable nodes from a list.
9470
9471   @type lu: L{LogicalUnit}
9472   @param lu: the logical unit for which we check
9473   @type nodenames: list
9474   @param nodenames: the list of nodes on which we should check
9475   @rtype: list
9476   @return: the list of vm-capable nodes
9477
9478   """
9479   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9480   return [name for name in nodenames if name not in vm_nodes]
9481
9482
9483 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9484   """Hypervisor parameter validation.
9485
9486   This function abstract the hypervisor parameter validation to be
9487   used in both instance create and instance modify.
9488
9489   @type lu: L{LogicalUnit}
9490   @param lu: the logical unit for which we check
9491   @type nodenames: list
9492   @param nodenames: the list of nodes on which we should check
9493   @type hvname: string
9494   @param hvname: the name of the hypervisor we should use
9495   @type hvparams: dict
9496   @param hvparams: the parameters which we need to check
9497   @raise errors.OpPrereqError: if the parameters are not valid
9498
9499   """
9500   nodenames = _FilterVmNodes(lu, nodenames)
9501
9502   cluster = lu.cfg.GetClusterInfo()
9503   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9504
9505   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9506   for node in nodenames:
9507     info = hvinfo[node]
9508     if info.offline:
9509       continue
9510     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9511
9512
9513 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9514   """OS parameters validation.
9515
9516   @type lu: L{LogicalUnit}
9517   @param lu: the logical unit for which we check
9518   @type required: boolean
9519   @param required: whether the validation should fail if the OS is not
9520       found
9521   @type nodenames: list
9522   @param nodenames: the list of nodes on which we should check
9523   @type osname: string
9524   @param osname: the name of the hypervisor we should use
9525   @type osparams: dict
9526   @param osparams: the parameters which we need to check
9527   @raise errors.OpPrereqError: if the parameters are not valid
9528
9529   """
9530   nodenames = _FilterVmNodes(lu, nodenames)
9531   result = lu.rpc.call_os_validate(nodenames, required, osname,
9532                                    [constants.OS_VALIDATE_PARAMETERS],
9533                                    osparams)
9534   for node, nres in result.items():
9535     # we don't check for offline cases since this should be run only
9536     # against the master node and/or an instance's nodes
9537     nres.Raise("OS Parameters validation failed on node %s" % node)
9538     if not nres.payload:
9539       lu.LogInfo("OS %s not found on node %s, validation skipped",
9540                  osname, node)
9541
9542
9543 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9544   """Wrapper around IAReqInstanceAlloc.
9545
9546   @param op: The instance opcode
9547   @param disks: The computed disks
9548   @param nics: The computed nics
9549   @param beparams: The full filled beparams
9550   @param node_whitelist: List of nodes which should appear as online to the
9551     allocator (unless the node is already marked offline)
9552
9553   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9554
9555   """
9556   spindle_use = beparams[constants.BE_SPINDLE_USE]
9557   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9558                                        disk_template=op.disk_template,
9559                                        tags=op.tags,
9560                                        os=op.os_type,
9561                                        vcpus=beparams[constants.BE_VCPUS],
9562                                        memory=beparams[constants.BE_MAXMEM],
9563                                        spindle_use=spindle_use,
9564                                        disks=disks,
9565                                        nics=[n.ToDict() for n in nics],
9566                                        hypervisor=op.hypervisor,
9567                                        node_whitelist=node_whitelist)
9568
9569
9570 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9571   """Computes the nics.
9572
9573   @param op: The instance opcode
9574   @param cluster: Cluster configuration object
9575   @param default_ip: The default ip to assign
9576   @param cfg: An instance of the configuration object
9577   @param ec_id: Execution context ID
9578
9579   @returns: The build up nics
9580
9581   """
9582   nics = []
9583   for nic in op.nics:
9584     nic_mode_req = nic.get(constants.INIC_MODE, None)
9585     nic_mode = nic_mode_req
9586     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9587       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9588
9589     net = nic.get(constants.INIC_NETWORK, None)
9590     link = nic.get(constants.NIC_LINK, None)
9591     ip = nic.get(constants.INIC_IP, None)
9592
9593     if net is None or net.lower() == constants.VALUE_NONE:
9594       net = None
9595     else:
9596       if nic_mode_req is not None or link is not None:
9597         raise errors.OpPrereqError("If network is given, no mode or link"
9598                                    " is allowed to be passed",
9599                                    errors.ECODE_INVAL)
9600
9601     # ip validity checks
9602     if ip is None or ip.lower() == constants.VALUE_NONE:
9603       nic_ip = None
9604     elif ip.lower() == constants.VALUE_AUTO:
9605       if not op.name_check:
9606         raise errors.OpPrereqError("IP address set to auto but name checks"
9607                                    " have been skipped",
9608                                    errors.ECODE_INVAL)
9609       nic_ip = default_ip
9610     else:
9611       # We defer pool operations until later, so that the iallocator has
9612       # filled in the instance's node(s) dimara
9613       if ip.lower() == constants.NIC_IP_POOL:
9614         if net is None:
9615           raise errors.OpPrereqError("if ip=pool, parameter network"
9616                                      " must be passed too",
9617                                      errors.ECODE_INVAL)
9618
9619       elif not netutils.IPAddress.IsValid(ip):
9620         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9621                                    errors.ECODE_INVAL)
9622
9623       nic_ip = ip
9624
9625     # TODO: check the ip address for uniqueness
9626     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9627       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9628                                  errors.ECODE_INVAL)
9629
9630     # MAC address verification
9631     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9632     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9633       mac = utils.NormalizeAndValidateMac(mac)
9634
9635       try:
9636         # TODO: We need to factor this out
9637         cfg.ReserveMAC(mac, ec_id)
9638       except errors.ReservationError:
9639         raise errors.OpPrereqError("MAC address %s already in use"
9640                                    " in cluster" % mac,
9641                                    errors.ECODE_NOTUNIQUE)
9642
9643     #  Build nic parameters
9644     nicparams = {}
9645     if nic_mode_req:
9646       nicparams[constants.NIC_MODE] = nic_mode
9647     if link:
9648       nicparams[constants.NIC_LINK] = link
9649
9650     check_params = cluster.SimpleFillNIC(nicparams)
9651     objects.NIC.CheckParameterSyntax(check_params)
9652     nics.append(objects.NIC(mac=mac, ip=nic_ip,
9653                             network=net, nicparams=nicparams))
9654
9655   return nics
9656
9657
9658 def _ComputeDisks(op, default_vg):
9659   """Computes the instance disks.
9660
9661   @param op: The instance opcode
9662   @param default_vg: The default_vg to assume
9663
9664   @return: The computer disks
9665
9666   """
9667   disks = []
9668   for disk in op.disks:
9669     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9670     if mode not in constants.DISK_ACCESS_SET:
9671       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9672                                  mode, errors.ECODE_INVAL)
9673     size = disk.get(constants.IDISK_SIZE, None)
9674     if size is None:
9675       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9676     try:
9677       size = int(size)
9678     except (TypeError, ValueError):
9679       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9680                                  errors.ECODE_INVAL)
9681
9682     data_vg = disk.get(constants.IDISK_VG, default_vg)
9683     new_disk = {
9684       constants.IDISK_SIZE: size,
9685       constants.IDISK_MODE: mode,
9686       constants.IDISK_VG: data_vg,
9687       }
9688     if constants.IDISK_METAVG in disk:
9689       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9690     if constants.IDISK_ADOPT in disk:
9691       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9692     disks.append(new_disk)
9693
9694   return disks
9695
9696
9697 def _ComputeFullBeParams(op, cluster):
9698   """Computes the full beparams.
9699
9700   @param op: The instance opcode
9701   @param cluster: The cluster config object
9702
9703   @return: The fully filled beparams
9704
9705   """
9706   default_beparams = cluster.beparams[constants.PP_DEFAULT]
9707   for param, value in op.beparams.iteritems():
9708     if value == constants.VALUE_AUTO:
9709       op.beparams[param] = default_beparams[param]
9710   objects.UpgradeBeParams(op.beparams)
9711   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9712   return cluster.SimpleFillBE(op.beparams)
9713
9714
9715 class LUInstanceCreate(LogicalUnit):
9716   """Create an instance.
9717
9718   """
9719   HPATH = "instance-add"
9720   HTYPE = constants.HTYPE_INSTANCE
9721   REQ_BGL = False
9722
9723   def CheckArguments(self):
9724     """Check arguments.
9725
9726     """
9727     # do not require name_check to ease forward/backward compatibility
9728     # for tools
9729     if self.op.no_install and self.op.start:
9730       self.LogInfo("No-installation mode selected, disabling startup")
9731       self.op.start = False
9732     # validate/normalize the instance name
9733     self.op.instance_name = \
9734       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9735
9736     if self.op.ip_check and not self.op.name_check:
9737       # TODO: make the ip check more flexible and not depend on the name check
9738       raise errors.OpPrereqError("Cannot do IP address check without a name"
9739                                  " check", errors.ECODE_INVAL)
9740
9741     # check nics' parameter names
9742     for nic in self.op.nics:
9743       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9744
9745     # check disks. parameter names and consistent adopt/no-adopt strategy
9746     has_adopt = has_no_adopt = False
9747     for disk in self.op.disks:
9748       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9749       if constants.IDISK_ADOPT in disk:
9750         has_adopt = True
9751       else:
9752         has_no_adopt = True
9753     if has_adopt and has_no_adopt:
9754       raise errors.OpPrereqError("Either all disks are adopted or none is",
9755                                  errors.ECODE_INVAL)
9756     if has_adopt:
9757       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9758         raise errors.OpPrereqError("Disk adoption is not supported for the"
9759                                    " '%s' disk template" %
9760                                    self.op.disk_template,
9761                                    errors.ECODE_INVAL)
9762       if self.op.iallocator is not None:
9763         raise errors.OpPrereqError("Disk adoption not allowed with an"
9764                                    " iallocator script", errors.ECODE_INVAL)
9765       if self.op.mode == constants.INSTANCE_IMPORT:
9766         raise errors.OpPrereqError("Disk adoption not allowed for"
9767                                    " instance import", errors.ECODE_INVAL)
9768     else:
9769       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9770         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9771                                    " but no 'adopt' parameter given" %
9772                                    self.op.disk_template,
9773                                    errors.ECODE_INVAL)
9774
9775     self.adopt_disks = has_adopt
9776
9777     # instance name verification
9778     if self.op.name_check:
9779       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9780       self.op.instance_name = self.hostname1.name
9781       # used in CheckPrereq for ip ping check
9782       self.check_ip = self.hostname1.ip
9783     else:
9784       self.check_ip = None
9785
9786     # file storage checks
9787     if (self.op.file_driver and
9788         not self.op.file_driver in constants.FILE_DRIVER):
9789       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9790                                  self.op.file_driver, errors.ECODE_INVAL)
9791
9792     if self.op.disk_template == constants.DT_FILE:
9793       opcodes.RequireFileStorage()
9794     elif self.op.disk_template == constants.DT_SHARED_FILE:
9795       opcodes.RequireSharedFileStorage()
9796
9797     ### Node/iallocator related checks
9798     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9799
9800     if self.op.pnode is not None:
9801       if self.op.disk_template in constants.DTS_INT_MIRROR:
9802         if self.op.snode is None:
9803           raise errors.OpPrereqError("The networked disk templates need"
9804                                      " a mirror node", errors.ECODE_INVAL)
9805       elif self.op.snode:
9806         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9807                         " template")
9808         self.op.snode = None
9809
9810     self._cds = _GetClusterDomainSecret()
9811
9812     if self.op.mode == constants.INSTANCE_IMPORT:
9813       # On import force_variant must be True, because if we forced it at
9814       # initial install, our only chance when importing it back is that it
9815       # works again!
9816       self.op.force_variant = True
9817
9818       if self.op.no_install:
9819         self.LogInfo("No-installation mode has no effect during import")
9820
9821     elif self.op.mode == constants.INSTANCE_CREATE:
9822       if self.op.os_type is None:
9823         raise errors.OpPrereqError("No guest OS specified",
9824                                    errors.ECODE_INVAL)
9825       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9826         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9827                                    " installation" % self.op.os_type,
9828                                    errors.ECODE_STATE)
9829       if self.op.disk_template is None:
9830         raise errors.OpPrereqError("No disk template specified",
9831                                    errors.ECODE_INVAL)
9832
9833     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9834       # Check handshake to ensure both clusters have the same domain secret
9835       src_handshake = self.op.source_handshake
9836       if not src_handshake:
9837         raise errors.OpPrereqError("Missing source handshake",
9838                                    errors.ECODE_INVAL)
9839
9840       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9841                                                            src_handshake)
9842       if errmsg:
9843         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9844                                    errors.ECODE_INVAL)
9845
9846       # Load and check source CA
9847       self.source_x509_ca_pem = self.op.source_x509_ca
9848       if not self.source_x509_ca_pem:
9849         raise errors.OpPrereqError("Missing source X509 CA",
9850                                    errors.ECODE_INVAL)
9851
9852       try:
9853         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9854                                                     self._cds)
9855       except OpenSSL.crypto.Error, err:
9856         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9857                                    (err, ), errors.ECODE_INVAL)
9858
9859       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9860       if errcode is not None:
9861         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9862                                    errors.ECODE_INVAL)
9863
9864       self.source_x509_ca = cert
9865
9866       src_instance_name = self.op.source_instance_name
9867       if not src_instance_name:
9868         raise errors.OpPrereqError("Missing source instance name",
9869                                    errors.ECODE_INVAL)
9870
9871       self.source_instance_name = \
9872           netutils.GetHostname(name=src_instance_name).name
9873
9874     else:
9875       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9876                                  self.op.mode, errors.ECODE_INVAL)
9877
9878   def ExpandNames(self):
9879     """ExpandNames for CreateInstance.
9880
9881     Figure out the right locks for instance creation.
9882
9883     """
9884     self.needed_locks = {}
9885
9886     instance_name = self.op.instance_name
9887     # this is just a preventive check, but someone might still add this
9888     # instance in the meantime, and creation will fail at lock-add time
9889     if instance_name in self.cfg.GetInstanceList():
9890       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9891                                  instance_name, errors.ECODE_EXISTS)
9892
9893     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9894
9895     if self.op.iallocator:
9896       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9897       # specifying a group on instance creation and then selecting nodes from
9898       # that group
9899       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9900       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9901     else:
9902       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9903       nodelist = [self.op.pnode]
9904       if self.op.snode is not None:
9905         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9906         nodelist.append(self.op.snode)
9907       self.needed_locks[locking.LEVEL_NODE] = nodelist
9908
9909     # in case of import lock the source node too
9910     if self.op.mode == constants.INSTANCE_IMPORT:
9911       src_node = self.op.src_node
9912       src_path = self.op.src_path
9913
9914       if src_path is None:
9915         self.op.src_path = src_path = self.op.instance_name
9916
9917       if src_node is None:
9918         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9919         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9920         self.op.src_node = None
9921         if os.path.isabs(src_path):
9922           raise errors.OpPrereqError("Importing an instance from a path"
9923                                      " requires a source node option",
9924                                      errors.ECODE_INVAL)
9925       else:
9926         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9927         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9928           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9929         if not os.path.isabs(src_path):
9930           self.op.src_path = src_path = \
9931             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9932
9933     self.needed_locks[locking.LEVEL_NODE_RES] = \
9934       _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
9935
9936   def _RunAllocator(self):
9937     """Run the allocator based on input opcode.
9938
9939     """
9940     #TODO Export network to iallocator so that it chooses a pnode
9941     #     in a nodegroup that has the desired network connected to
9942     req = _CreateInstanceAllocRequest(self.op, self.disks,
9943                                       self.nics, self.be_full,
9944                                       None)
9945     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9946
9947     ial.Run(self.op.iallocator)
9948
9949     if not ial.success:
9950       raise errors.OpPrereqError("Can't compute nodes using"
9951                                  " iallocator '%s': %s" %
9952                                  (self.op.iallocator, ial.info),
9953                                  errors.ECODE_NORES)
9954     self.op.pnode = ial.result[0]
9955     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9956                  self.op.instance_name, self.op.iallocator,
9957                  utils.CommaJoin(ial.result))
9958
9959     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9960
9961     if req.RequiredNodes() == 2:
9962       self.op.snode = ial.result[1]
9963
9964   def BuildHooksEnv(self):
9965     """Build hooks env.
9966
9967     This runs on master, primary and secondary nodes of the instance.
9968
9969     """
9970     env = {
9971       "ADD_MODE": self.op.mode,
9972       }
9973     if self.op.mode == constants.INSTANCE_IMPORT:
9974       env["SRC_NODE"] = self.op.src_node
9975       env["SRC_PATH"] = self.op.src_path
9976       env["SRC_IMAGES"] = self.src_images
9977
9978     env.update(_BuildInstanceHookEnv(
9979       name=self.op.instance_name,
9980       primary_node=self.op.pnode,
9981       secondary_nodes=self.secondaries,
9982       status=self.op.start,
9983       os_type=self.op.os_type,
9984       minmem=self.be_full[constants.BE_MINMEM],
9985       maxmem=self.be_full[constants.BE_MAXMEM],
9986       vcpus=self.be_full[constants.BE_VCPUS],
9987       nics=_NICListToTuple(self, self.nics),
9988       disk_template=self.op.disk_template,
9989       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9990              for d in self.disks],
9991       bep=self.be_full,
9992       hvp=self.hv_full,
9993       hypervisor_name=self.op.hypervisor,
9994       tags=self.op.tags,
9995     ))
9996
9997     return env
9998
9999   def BuildHooksNodes(self):
10000     """Build hooks nodes.
10001
10002     """
10003     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10004     return nl, nl
10005
10006   def _ReadExportInfo(self):
10007     """Reads the export information from disk.
10008
10009     It will override the opcode source node and path with the actual
10010     information, if these two were not specified before.
10011
10012     @return: the export information
10013
10014     """
10015     assert self.op.mode == constants.INSTANCE_IMPORT
10016
10017     src_node = self.op.src_node
10018     src_path = self.op.src_path
10019
10020     if src_node is None:
10021       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10022       exp_list = self.rpc.call_export_list(locked_nodes)
10023       found = False
10024       for node in exp_list:
10025         if exp_list[node].fail_msg:
10026           continue
10027         if src_path in exp_list[node].payload:
10028           found = True
10029           self.op.src_node = src_node = node
10030           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10031                                                        src_path)
10032           break
10033       if not found:
10034         raise errors.OpPrereqError("No export found for relative path %s" %
10035                                     src_path, errors.ECODE_INVAL)
10036
10037     _CheckNodeOnline(self, src_node)
10038     result = self.rpc.call_export_info(src_node, src_path)
10039     result.Raise("No export or invalid export found in dir %s" % src_path)
10040
10041     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10042     if not export_info.has_section(constants.INISECT_EXP):
10043       raise errors.ProgrammerError("Corrupted export config",
10044                                    errors.ECODE_ENVIRON)
10045
10046     ei_version = export_info.get(constants.INISECT_EXP, "version")
10047     if (int(ei_version) != constants.EXPORT_VERSION):
10048       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10049                                  (ei_version, constants.EXPORT_VERSION),
10050                                  errors.ECODE_ENVIRON)
10051     return export_info
10052
10053   def _ReadExportParams(self, einfo):
10054     """Use export parameters as defaults.
10055
10056     In case the opcode doesn't specify (as in override) some instance
10057     parameters, then try to use them from the export information, if
10058     that declares them.
10059
10060     """
10061     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10062
10063     if self.op.disk_template is None:
10064       if einfo.has_option(constants.INISECT_INS, "disk_template"):
10065         self.op.disk_template = einfo.get(constants.INISECT_INS,
10066                                           "disk_template")
10067         if self.op.disk_template not in constants.DISK_TEMPLATES:
10068           raise errors.OpPrereqError("Disk template specified in configuration"
10069                                      " file is not one of the allowed values:"
10070                                      " %s" %
10071                                      " ".join(constants.DISK_TEMPLATES),
10072                                      errors.ECODE_INVAL)
10073       else:
10074         raise errors.OpPrereqError("No disk template specified and the export"
10075                                    " is missing the disk_template information",
10076                                    errors.ECODE_INVAL)
10077
10078     if not self.op.disks:
10079       disks = []
10080       # TODO: import the disk iv_name too
10081       for idx in range(constants.MAX_DISKS):
10082         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10083           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10084           disks.append({constants.IDISK_SIZE: disk_sz})
10085       self.op.disks = disks
10086       if not disks and self.op.disk_template != constants.DT_DISKLESS:
10087         raise errors.OpPrereqError("No disk info specified and the export"
10088                                    " is missing the disk information",
10089                                    errors.ECODE_INVAL)
10090
10091     if not self.op.nics:
10092       nics = []
10093       for idx in range(constants.MAX_NICS):
10094         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10095           ndict = {}
10096           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10097             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10098             ndict[name] = v
10099           nics.append(ndict)
10100         else:
10101           break
10102       self.op.nics = nics
10103
10104     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10105       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10106
10107     if (self.op.hypervisor is None and
10108         einfo.has_option(constants.INISECT_INS, "hypervisor")):
10109       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10110
10111     if einfo.has_section(constants.INISECT_HYP):
10112       # use the export parameters but do not override the ones
10113       # specified by the user
10114       for name, value in einfo.items(constants.INISECT_HYP):
10115         if name not in self.op.hvparams:
10116           self.op.hvparams[name] = value
10117
10118     if einfo.has_section(constants.INISECT_BEP):
10119       # use the parameters, without overriding
10120       for name, value in einfo.items(constants.INISECT_BEP):
10121         if name not in self.op.beparams:
10122           self.op.beparams[name] = value
10123         # Compatibility for the old "memory" be param
10124         if name == constants.BE_MEMORY:
10125           if constants.BE_MAXMEM not in self.op.beparams:
10126             self.op.beparams[constants.BE_MAXMEM] = value
10127           if constants.BE_MINMEM not in self.op.beparams:
10128             self.op.beparams[constants.BE_MINMEM] = value
10129     else:
10130       # try to read the parameters old style, from the main section
10131       for name in constants.BES_PARAMETERS:
10132         if (name not in self.op.beparams and
10133             einfo.has_option(constants.INISECT_INS, name)):
10134           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10135
10136     if einfo.has_section(constants.INISECT_OSP):
10137       # use the parameters, without overriding
10138       for name, value in einfo.items(constants.INISECT_OSP):
10139         if name not in self.op.osparams:
10140           self.op.osparams[name] = value
10141
10142   def _RevertToDefaults(self, cluster):
10143     """Revert the instance parameters to the default values.
10144
10145     """
10146     # hvparams
10147     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10148     for name in self.op.hvparams.keys():
10149       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10150         del self.op.hvparams[name]
10151     # beparams
10152     be_defs = cluster.SimpleFillBE({})
10153     for name in self.op.beparams.keys():
10154       if name in be_defs and be_defs[name] == self.op.beparams[name]:
10155         del self.op.beparams[name]
10156     # nic params
10157     nic_defs = cluster.SimpleFillNIC({})
10158     for nic in self.op.nics:
10159       for name in constants.NICS_PARAMETERS:
10160         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10161           del nic[name]
10162     # osparams
10163     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10164     for name in self.op.osparams.keys():
10165       if name in os_defs and os_defs[name] == self.op.osparams[name]:
10166         del self.op.osparams[name]
10167
10168   def _CalculateFileStorageDir(self):
10169     """Calculate final instance file storage dir.
10170
10171     """
10172     # file storage dir calculation/check
10173     self.instance_file_storage_dir = None
10174     if self.op.disk_template in constants.DTS_FILEBASED:
10175       # build the full file storage dir path
10176       joinargs = []
10177
10178       if self.op.disk_template == constants.DT_SHARED_FILE:
10179         get_fsd_fn = self.cfg.GetSharedFileStorageDir
10180       else:
10181         get_fsd_fn = self.cfg.GetFileStorageDir
10182
10183       cfg_storagedir = get_fsd_fn()
10184       if not cfg_storagedir:
10185         raise errors.OpPrereqError("Cluster file storage dir not defined",
10186                                    errors.ECODE_STATE)
10187       joinargs.append(cfg_storagedir)
10188
10189       if self.op.file_storage_dir is not None:
10190         joinargs.append(self.op.file_storage_dir)
10191
10192       joinargs.append(self.op.instance_name)
10193
10194       # pylint: disable=W0142
10195       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10196
10197   def CheckPrereq(self): # pylint: disable=R0914
10198     """Check prerequisites.
10199
10200     """
10201     self._CalculateFileStorageDir()
10202
10203     if self.op.mode == constants.INSTANCE_IMPORT:
10204       export_info = self._ReadExportInfo()
10205       self._ReadExportParams(export_info)
10206       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10207     else:
10208       self._old_instance_name = None
10209
10210     if (not self.cfg.GetVGName() and
10211         self.op.disk_template not in constants.DTS_NOT_LVM):
10212       raise errors.OpPrereqError("Cluster does not support lvm-based"
10213                                  " instances", errors.ECODE_STATE)
10214
10215     if (self.op.hypervisor is None or
10216         self.op.hypervisor == constants.VALUE_AUTO):
10217       self.op.hypervisor = self.cfg.GetHypervisorType()
10218
10219     cluster = self.cfg.GetClusterInfo()
10220     enabled_hvs = cluster.enabled_hypervisors
10221     if self.op.hypervisor not in enabled_hvs:
10222       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10223                                  " cluster (%s)" %
10224                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10225                                  errors.ECODE_STATE)
10226
10227     # Check tag validity
10228     for tag in self.op.tags:
10229       objects.TaggableObject.ValidateTag(tag)
10230
10231     # check hypervisor parameter syntax (locally)
10232     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10233     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10234                                       self.op.hvparams)
10235     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10236     hv_type.CheckParameterSyntax(filled_hvp)
10237     self.hv_full = filled_hvp
10238     # check that we don't specify global parameters on an instance
10239     _CheckGlobalHvParams(self.op.hvparams)
10240
10241     # fill and remember the beparams dict
10242     self.be_full = _ComputeFullBeParams(self.op, cluster)
10243
10244     # build os parameters
10245     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10246
10247     # now that hvp/bep are in final format, let's reset to defaults,
10248     # if told to do so
10249     if self.op.identify_defaults:
10250       self._RevertToDefaults(cluster)
10251
10252     # NIC buildup
10253     self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10254                              self.proc.GetECId())
10255
10256     # disk checks/pre-build
10257     default_vg = self.cfg.GetVGName()
10258     self.disks = _ComputeDisks(self.op, default_vg)
10259
10260     if self.op.mode == constants.INSTANCE_IMPORT:
10261       disk_images = []
10262       for idx in range(len(self.disks)):
10263         option = "disk%d_dump" % idx
10264         if export_info.has_option(constants.INISECT_INS, option):
10265           # FIXME: are the old os-es, disk sizes, etc. useful?
10266           export_name = export_info.get(constants.INISECT_INS, option)
10267           image = utils.PathJoin(self.op.src_path, export_name)
10268           disk_images.append(image)
10269         else:
10270           disk_images.append(False)
10271
10272       self.src_images = disk_images
10273
10274       if self.op.instance_name == self._old_instance_name:
10275         for idx, nic in enumerate(self.nics):
10276           if nic.mac == constants.VALUE_AUTO:
10277             nic_mac_ini = "nic%d_mac" % idx
10278             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10279
10280     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10281
10282     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10283     if self.op.ip_check:
10284       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10285         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10286                                    (self.check_ip, self.op.instance_name),
10287                                    errors.ECODE_NOTUNIQUE)
10288
10289     #### mac address generation
10290     # By generating here the mac address both the allocator and the hooks get
10291     # the real final mac address rather than the 'auto' or 'generate' value.
10292     # There is a race condition between the generation and the instance object
10293     # creation, which means that we know the mac is valid now, but we're not
10294     # sure it will be when we actually add the instance. If things go bad
10295     # adding the instance will abort because of a duplicate mac, and the
10296     # creation job will fail.
10297     for nic in self.nics:
10298       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10299         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10300
10301     #### allocator run
10302
10303     if self.op.iallocator is not None:
10304       self._RunAllocator()
10305
10306     # Release all unneeded node locks
10307     keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10308     _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10309     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10310     _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10311
10312     assert (self.owned_locks(locking.LEVEL_NODE) ==
10313             self.owned_locks(locking.LEVEL_NODE_RES)), \
10314       "Node locks differ from node resource locks"
10315
10316     #### node related checks
10317
10318     # check primary node
10319     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10320     assert self.pnode is not None, \
10321       "Cannot retrieve locked node %s" % self.op.pnode
10322     if pnode.offline:
10323       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10324                                  pnode.name, errors.ECODE_STATE)
10325     if pnode.drained:
10326       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10327                                  pnode.name, errors.ECODE_STATE)
10328     if not pnode.vm_capable:
10329       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10330                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10331
10332     self.secondaries = []
10333
10334     # Fill in any IPs from IP pools. This must happen here, because we need to
10335     # know the nic's primary node, as specified by the iallocator
10336     for idx, nic in enumerate(self.nics):
10337       net = nic.network
10338       if net is not None:
10339         netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10340         if netparams is None:
10341           raise errors.OpPrereqError("No netparams found for network"
10342                                      " %s. Propably not connected to"
10343                                      " node's %s nodegroup" %
10344                                      (net, self.pnode.name),
10345                                      errors.ECODE_INVAL)
10346         self.LogInfo("NIC/%d inherits netparams %s" %
10347                      (idx, netparams.values()))
10348         nic.nicparams = dict(netparams)
10349         if nic.ip is not None:
10350           if nic.ip.lower() == constants.NIC_IP_POOL:
10351             try:
10352               nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10353             except errors.ReservationError:
10354               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10355                                          " from the address pool" % idx,
10356                                          errors.ECODE_STATE)
10357             self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10358           else:
10359             try:
10360               self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10361             except errors.ReservationError:
10362               raise errors.OpPrereqError("IP address %s already in use"
10363                                          " or does not belong to network %s" %
10364                                          (nic.ip, net),
10365                                          errors.ECODE_NOTUNIQUE)
10366       else:
10367         # net is None, ip None or given
10368         if self.op.conflicts_check:
10369           _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10370
10371     # mirror node verification
10372     if self.op.disk_template in constants.DTS_INT_MIRROR:
10373       if self.op.snode == pnode.name:
10374         raise errors.OpPrereqError("The secondary node cannot be the"
10375                                    " primary node", errors.ECODE_INVAL)
10376       _CheckNodeOnline(self, self.op.snode)
10377       _CheckNodeNotDrained(self, self.op.snode)
10378       _CheckNodeVmCapable(self, self.op.snode)
10379       self.secondaries.append(self.op.snode)
10380
10381       snode = self.cfg.GetNodeInfo(self.op.snode)
10382       if pnode.group != snode.group:
10383         self.LogWarning("The primary and secondary nodes are in two"
10384                         " different node groups; the disk parameters"
10385                         " from the first disk's node group will be"
10386                         " used")
10387
10388     nodenames = [pnode.name] + self.secondaries
10389
10390     # Verify instance specs
10391     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10392     ispec = {
10393       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10394       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10395       constants.ISPEC_DISK_COUNT: len(self.disks),
10396       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10397       constants.ISPEC_NIC_COUNT: len(self.nics),
10398       constants.ISPEC_SPINDLE_USE: spindle_use,
10399       }
10400
10401     group_info = self.cfg.GetNodeGroup(pnode.group)
10402     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10403     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10404     if not self.op.ignore_ipolicy and res:
10405       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10406              (pnode.group, group_info.name, utils.CommaJoin(res)))
10407       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10408
10409     if not self.adopt_disks:
10410       if self.op.disk_template == constants.DT_RBD:
10411         # _CheckRADOSFreeSpace() is just a placeholder.
10412         # Any function that checks prerequisites can be placed here.
10413         # Check if there is enough space on the RADOS cluster.
10414         _CheckRADOSFreeSpace()
10415       else:
10416         # Check lv size requirements, if not adopting
10417         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10418         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10419
10420     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10421       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10422                                 disk[constants.IDISK_ADOPT])
10423                      for disk in self.disks])
10424       if len(all_lvs) != len(self.disks):
10425         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10426                                    errors.ECODE_INVAL)
10427       for lv_name in all_lvs:
10428         try:
10429           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10430           # to ReserveLV uses the same syntax
10431           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10432         except errors.ReservationError:
10433           raise errors.OpPrereqError("LV named %s used by another instance" %
10434                                      lv_name, errors.ECODE_NOTUNIQUE)
10435
10436       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10437       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10438
10439       node_lvs = self.rpc.call_lv_list([pnode.name],
10440                                        vg_names.payload.keys())[pnode.name]
10441       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10442       node_lvs = node_lvs.payload
10443
10444       delta = all_lvs.difference(node_lvs.keys())
10445       if delta:
10446         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10447                                    utils.CommaJoin(delta),
10448                                    errors.ECODE_INVAL)
10449       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10450       if online_lvs:
10451         raise errors.OpPrereqError("Online logical volumes found, cannot"
10452                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10453                                    errors.ECODE_STATE)
10454       # update the size of disk based on what is found
10455       for dsk in self.disks:
10456         dsk[constants.IDISK_SIZE] = \
10457           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10458                                         dsk[constants.IDISK_ADOPT])][0]))
10459
10460     elif self.op.disk_template == constants.DT_BLOCK:
10461       # Normalize and de-duplicate device paths
10462       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10463                        for disk in self.disks])
10464       if len(all_disks) != len(self.disks):
10465         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10466                                    errors.ECODE_INVAL)
10467       baddisks = [d for d in all_disks
10468                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10469       if baddisks:
10470         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10471                                    " cannot be adopted" %
10472                                    (utils.CommaJoin(baddisks),
10473                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10474                                    errors.ECODE_INVAL)
10475
10476       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10477                                             list(all_disks))[pnode.name]
10478       node_disks.Raise("Cannot get block device information from node %s" %
10479                        pnode.name)
10480       node_disks = node_disks.payload
10481       delta = all_disks.difference(node_disks.keys())
10482       if delta:
10483         raise errors.OpPrereqError("Missing block device(s): %s" %
10484                                    utils.CommaJoin(delta),
10485                                    errors.ECODE_INVAL)
10486       for dsk in self.disks:
10487         dsk[constants.IDISK_SIZE] = \
10488           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10489
10490     # Verify instance specs
10491     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10492     ispec = {
10493       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10494       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10495       constants.ISPEC_DISK_COUNT: len(self.disks),
10496       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10497                                   for disk in self.disks],
10498       constants.ISPEC_NIC_COUNT: len(self.nics),
10499       constants.ISPEC_SPINDLE_USE: spindle_use,
10500       }
10501
10502     group_info = self.cfg.GetNodeGroup(pnode.group)
10503     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10504     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10505     if not self.op.ignore_ipolicy and res:
10506       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10507                                   " policy: %s") % (pnode.group,
10508                                                     utils.CommaJoin(res)),
10509                                   errors.ECODE_INVAL)
10510
10511     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10512
10513     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10514     # check OS parameters (remotely)
10515     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10516
10517     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10518
10519     # memory check on primary node
10520     #TODO(dynmem): use MINMEM for checking
10521     if self.op.start:
10522       _CheckNodeFreeMemory(self, self.pnode.name,
10523                            "creating instance %s" % self.op.instance_name,
10524                            self.be_full[constants.BE_MAXMEM],
10525                            self.op.hypervisor)
10526
10527     self.dry_run_result = list(nodenames)
10528
10529   def Exec(self, feedback_fn):
10530     """Create and add the instance to the cluster.
10531
10532     """
10533     instance = self.op.instance_name
10534     pnode_name = self.pnode.name
10535
10536     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10537                 self.owned_locks(locking.LEVEL_NODE)), \
10538       "Node locks differ from node resource locks"
10539     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10540
10541     ht_kind = self.op.hypervisor
10542     if ht_kind in constants.HTS_REQ_PORT:
10543       network_port = self.cfg.AllocatePort()
10544     else:
10545       network_port = None
10546
10547     # This is ugly but we got a chicken-egg problem here
10548     # We can only take the group disk parameters, as the instance
10549     # has no disks yet (we are generating them right here).
10550     node = self.cfg.GetNodeInfo(pnode_name)
10551     nodegroup = self.cfg.GetNodeGroup(node.group)
10552     disks = _GenerateDiskTemplate(self,
10553                                   self.op.disk_template,
10554                                   instance, pnode_name,
10555                                   self.secondaries,
10556                                   self.disks,
10557                                   self.instance_file_storage_dir,
10558                                   self.op.file_driver,
10559                                   0,
10560                                   feedback_fn,
10561                                   self.cfg.GetGroupDiskParams(nodegroup))
10562
10563     iobj = objects.Instance(name=instance, os=self.op.os_type,
10564                             primary_node=pnode_name,
10565                             nics=self.nics, disks=disks,
10566                             disk_template=self.op.disk_template,
10567                             admin_state=constants.ADMINST_DOWN,
10568                             network_port=network_port,
10569                             beparams=self.op.beparams,
10570                             hvparams=self.op.hvparams,
10571                             hypervisor=self.op.hypervisor,
10572                             osparams=self.op.osparams,
10573                             )
10574
10575     if self.op.tags:
10576       for tag in self.op.tags:
10577         iobj.AddTag(tag)
10578
10579     if self.adopt_disks:
10580       if self.op.disk_template == constants.DT_PLAIN:
10581         # rename LVs to the newly-generated names; we need to construct
10582         # 'fake' LV disks with the old data, plus the new unique_id
10583         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10584         rename_to = []
10585         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10586           rename_to.append(t_dsk.logical_id)
10587           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10588           self.cfg.SetDiskID(t_dsk, pnode_name)
10589         result = self.rpc.call_blockdev_rename(pnode_name,
10590                                                zip(tmp_disks, rename_to))
10591         result.Raise("Failed to rename adoped LVs")
10592     else:
10593       feedback_fn("* creating instance disks...")
10594       try:
10595         _CreateDisks(self, iobj)
10596       except errors.OpExecError:
10597         self.LogWarning("Device creation failed, reverting...")
10598         try:
10599           _RemoveDisks(self, iobj)
10600         finally:
10601           self.cfg.ReleaseDRBDMinors(instance)
10602           raise
10603
10604     feedback_fn("adding instance %s to cluster config" % instance)
10605
10606     self.cfg.AddInstance(iobj, self.proc.GetECId())
10607
10608     # Declare that we don't want to remove the instance lock anymore, as we've
10609     # added the instance to the config
10610     del self.remove_locks[locking.LEVEL_INSTANCE]
10611
10612     if self.op.mode == constants.INSTANCE_IMPORT:
10613       # Release unused nodes
10614       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10615     else:
10616       # Release all nodes
10617       _ReleaseLocks(self, locking.LEVEL_NODE)
10618
10619     disk_abort = False
10620     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10621       feedback_fn("* wiping instance disks...")
10622       try:
10623         _WipeDisks(self, iobj)
10624       except errors.OpExecError, err:
10625         logging.exception("Wiping disks failed")
10626         self.LogWarning("Wiping instance disks failed (%s)", err)
10627         disk_abort = True
10628
10629     if disk_abort:
10630       # Something is already wrong with the disks, don't do anything else
10631       pass
10632     elif self.op.wait_for_sync:
10633       disk_abort = not _WaitForSync(self, iobj)
10634     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10635       # make sure the disks are not degraded (still sync-ing is ok)
10636       feedback_fn("* checking mirrors status")
10637       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10638     else:
10639       disk_abort = False
10640
10641     if disk_abort:
10642       _RemoveDisks(self, iobj)
10643       self.cfg.RemoveInstance(iobj.name)
10644       # Make sure the instance lock gets removed
10645       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10646       raise errors.OpExecError("There are some degraded disks for"
10647                                " this instance")
10648
10649     # Release all node resource locks
10650     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10651
10652     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10653       # we need to set the disks ID to the primary node, since the
10654       # preceding code might or might have not done it, depending on
10655       # disk template and other options
10656       for disk in iobj.disks:
10657         self.cfg.SetDiskID(disk, pnode_name)
10658       if self.op.mode == constants.INSTANCE_CREATE:
10659         if not self.op.no_install:
10660           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10661                         not self.op.wait_for_sync)
10662           if pause_sync:
10663             feedback_fn("* pausing disk sync to install instance OS")
10664             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10665                                                               (iobj.disks,
10666                                                                iobj), True)
10667             for idx, success in enumerate(result.payload):
10668               if not success:
10669                 logging.warn("pause-sync of instance %s for disk %d failed",
10670                              instance, idx)
10671
10672           feedback_fn("* running the instance OS create scripts...")
10673           # FIXME: pass debug option from opcode to backend
10674           os_add_result = \
10675             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10676                                           self.op.debug_level)
10677           if pause_sync:
10678             feedback_fn("* resuming disk sync")
10679             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10680                                                               (iobj.disks,
10681                                                                iobj), False)
10682             for idx, success in enumerate(result.payload):
10683               if not success:
10684                 logging.warn("resume-sync of instance %s for disk %d failed",
10685                              instance, idx)
10686
10687           os_add_result.Raise("Could not add os for instance %s"
10688                               " on node %s" % (instance, pnode_name))
10689
10690       else:
10691         if self.op.mode == constants.INSTANCE_IMPORT:
10692           feedback_fn("* running the instance OS import scripts...")
10693
10694           transfers = []
10695
10696           for idx, image in enumerate(self.src_images):
10697             if not image:
10698               continue
10699
10700             # FIXME: pass debug option from opcode to backend
10701             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10702                                                constants.IEIO_FILE, (image, ),
10703                                                constants.IEIO_SCRIPT,
10704                                                (iobj.disks[idx], idx),
10705                                                None)
10706             transfers.append(dt)
10707
10708           import_result = \
10709             masterd.instance.TransferInstanceData(self, feedback_fn,
10710                                                   self.op.src_node, pnode_name,
10711                                                   self.pnode.secondary_ip,
10712                                                   iobj, transfers)
10713           if not compat.all(import_result):
10714             self.LogWarning("Some disks for instance %s on node %s were not"
10715                             " imported successfully" % (instance, pnode_name))
10716
10717           rename_from = self._old_instance_name
10718
10719         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10720           feedback_fn("* preparing remote import...")
10721           # The source cluster will stop the instance before attempting to make
10722           # a connection. In some cases stopping an instance can take a long
10723           # time, hence the shutdown timeout is added to the connection
10724           # timeout.
10725           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10726                              self.op.source_shutdown_timeout)
10727           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10728
10729           assert iobj.primary_node == self.pnode.name
10730           disk_results = \
10731             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10732                                           self.source_x509_ca,
10733                                           self._cds, timeouts)
10734           if not compat.all(disk_results):
10735             # TODO: Should the instance still be started, even if some disks
10736             # failed to import (valid for local imports, too)?
10737             self.LogWarning("Some disks for instance %s on node %s were not"
10738                             " imported successfully" % (instance, pnode_name))
10739
10740           rename_from = self.source_instance_name
10741
10742         else:
10743           # also checked in the prereq part
10744           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10745                                        % self.op.mode)
10746
10747         # Run rename script on newly imported instance
10748         assert iobj.name == instance
10749         feedback_fn("Running rename script for %s" % instance)
10750         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10751                                                    rename_from,
10752                                                    self.op.debug_level)
10753         if result.fail_msg:
10754           self.LogWarning("Failed to run rename script for %s on node"
10755                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10756
10757     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10758
10759     if self.op.start:
10760       iobj.admin_state = constants.ADMINST_UP
10761       self.cfg.Update(iobj, feedback_fn)
10762       logging.info("Starting instance %s on node %s", instance, pnode_name)
10763       feedback_fn("* starting instance...")
10764       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10765                                             False)
10766       result.Raise("Could not start instance")
10767
10768     return list(iobj.all_nodes)
10769
10770
10771 class LUInstanceMultiAlloc(NoHooksLU):
10772   """Allocates multiple instances at the same time.
10773
10774   """
10775   REQ_BGL = False
10776
10777   def CheckArguments(self):
10778     """Check arguments.
10779
10780     """
10781     nodes = []
10782     for inst in self.op.instances:
10783       if inst.iallocator is not None:
10784         raise errors.OpPrereqError("iallocator are not allowed to be set on"
10785                                    " instance objects", errors.ECODE_INVAL)
10786       nodes.append(bool(inst.pnode))
10787       if inst.disk_template in constants.DTS_INT_MIRROR:
10788         nodes.append(bool(inst.snode))
10789
10790     has_nodes = compat.any(nodes)
10791     if compat.all(nodes) ^ has_nodes:
10792       raise errors.OpPrereqError("There are instance objects providing"
10793                                  " pnode/snode while others do not",
10794                                  errors.ECODE_INVAL)
10795
10796     if self.op.iallocator is None:
10797       default_iallocator = self.cfg.GetDefaultIAllocator()
10798       if default_iallocator and has_nodes:
10799         self.op.iallocator = default_iallocator
10800       else:
10801         raise errors.OpPrereqError("No iallocator or nodes on the instances"
10802                                    " given and no cluster-wide default"
10803                                    " iallocator found; please specify either"
10804                                    " an iallocator or nodes on the instances"
10805                                    " or set a cluster-wide default iallocator",
10806                                    errors.ECODE_INVAL)
10807
10808     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10809     if dups:
10810       raise errors.OpPrereqError("There are duplicate instance names: %s" %
10811                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
10812
10813   def ExpandNames(self):
10814     """Calculate the locks.
10815
10816     """
10817     self.share_locks = _ShareAll()
10818     self.needed_locks = {
10819       # iallocator will select nodes and even if no iallocator is used,
10820       # collisions with LUInstanceCreate should be avoided
10821       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
10822       }
10823
10824     if self.op.iallocator:
10825       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10826       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10827     else:
10828       nodeslist = []
10829       for inst in self.op.instances:
10830         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10831         nodeslist.append(inst.pnode)
10832         if inst.snode is not None:
10833           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10834           nodeslist.append(inst.snode)
10835
10836       self.needed_locks[locking.LEVEL_NODE] = nodeslist
10837       # Lock resources of instance's primary and secondary nodes (copy to
10838       # prevent accidential modification)
10839       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10840
10841   def CheckPrereq(self):
10842     """Check prerequisite.
10843
10844     """
10845     cluster = self.cfg.GetClusterInfo()
10846     default_vg = self.cfg.GetVGName()
10847     ec_id = self.proc.GetECId()
10848
10849     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10850                                          _ComputeNics(op, cluster, None,
10851                                                       self.cfg, ec_id),
10852                                          _ComputeFullBeParams(op, cluster),
10853                                          None)
10854              for op in self.op.instances]
10855
10856     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10857     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10858
10859     ial.Run(self.op.iallocator)
10860
10861     if not ial.success:
10862       raise errors.OpPrereqError("Can't compute nodes using"
10863                                  " iallocator '%s': %s" %
10864                                  (self.op.iallocator, ial.info),
10865                                  errors.ECODE_NORES)
10866
10867     self.ia_result = ial.result
10868
10869     if self.op.dry_run:
10870       self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10871         constants.JOB_IDS_KEY: [],
10872         })
10873
10874   def _ConstructPartialResult(self):
10875     """Contructs the partial result.
10876
10877     """
10878     (allocatable, failed) = self.ia_result
10879     return {
10880       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10881         map(compat.fst, allocatable),
10882       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10883       }
10884
10885   def Exec(self, feedback_fn):
10886     """Executes the opcode.
10887
10888     """
10889     op2inst = dict((op.instance_name, op) for op in self.op.instances)
10890     (allocatable, failed) = self.ia_result
10891
10892     jobs = []
10893     for (name, nodes) in allocatable:
10894       op = op2inst.pop(name)
10895
10896       if len(nodes) > 1:
10897         (op.pnode, op.snode) = nodes
10898       else:
10899         (op.pnode,) = nodes
10900
10901       jobs.append([op])
10902
10903     missing = set(op2inst.keys()) - set(failed)
10904     assert not missing, \
10905       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10906
10907     return ResultWithJobs(jobs, **self._ConstructPartialResult())
10908
10909
10910 def _CheckRADOSFreeSpace():
10911   """Compute disk size requirements inside the RADOS cluster.
10912
10913   """
10914   # For the RADOS cluster we assume there is always enough space.
10915   pass
10916
10917
10918 class LUInstanceConsole(NoHooksLU):
10919   """Connect to an instance's console.
10920
10921   This is somewhat special in that it returns the command line that
10922   you need to run on the master node in order to connect to the
10923   console.
10924
10925   """
10926   REQ_BGL = False
10927
10928   def ExpandNames(self):
10929     self.share_locks = _ShareAll()
10930     self._ExpandAndLockInstance()
10931
10932   def CheckPrereq(self):
10933     """Check prerequisites.
10934
10935     This checks that the instance is in the cluster.
10936
10937     """
10938     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10939     assert self.instance is not None, \
10940       "Cannot retrieve locked instance %s" % self.op.instance_name
10941     _CheckNodeOnline(self, self.instance.primary_node)
10942
10943   def Exec(self, feedback_fn):
10944     """Connect to the console of an instance
10945
10946     """
10947     instance = self.instance
10948     node = instance.primary_node
10949
10950     node_insts = self.rpc.call_instance_list([node],
10951                                              [instance.hypervisor])[node]
10952     node_insts.Raise("Can't get node information from %s" % node)
10953
10954     if instance.name not in node_insts.payload:
10955       if instance.admin_state == constants.ADMINST_UP:
10956         state = constants.INSTST_ERRORDOWN
10957       elif instance.admin_state == constants.ADMINST_DOWN:
10958         state = constants.INSTST_ADMINDOWN
10959       else:
10960         state = constants.INSTST_ADMINOFFLINE
10961       raise errors.OpExecError("Instance %s is not running (state %s)" %
10962                                (instance.name, state))
10963
10964     logging.debug("Connecting to console of %s on %s", instance.name, node)
10965
10966     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10967
10968
10969 def _GetInstanceConsole(cluster, instance):
10970   """Returns console information for an instance.
10971
10972   @type cluster: L{objects.Cluster}
10973   @type instance: L{objects.Instance}
10974   @rtype: dict
10975
10976   """
10977   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10978   # beparams and hvparams are passed separately, to avoid editing the
10979   # instance and then saving the defaults in the instance itself.
10980   hvparams = cluster.FillHV(instance)
10981   beparams = cluster.FillBE(instance)
10982   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10983
10984   assert console.instance == instance.name
10985   assert console.Validate()
10986
10987   return console.ToDict()
10988
10989
10990 class LUInstanceReplaceDisks(LogicalUnit):
10991   """Replace the disks of an instance.
10992
10993   """
10994   HPATH = "mirrors-replace"
10995   HTYPE = constants.HTYPE_INSTANCE
10996   REQ_BGL = False
10997
10998   def CheckArguments(self):
10999     """Check arguments.
11000
11001     """
11002     remote_node = self.op.remote_node
11003     ialloc = self.op.iallocator
11004     if self.op.mode == constants.REPLACE_DISK_CHG:
11005       if remote_node is None and ialloc is None:
11006         raise errors.OpPrereqError("When changing the secondary either an"
11007                                    " iallocator script must be used or the"
11008                                    " new node given", errors.ECODE_INVAL)
11009       else:
11010         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11011
11012     elif remote_node is not None or ialloc is not None:
11013       # Not replacing the secondary
11014       raise errors.OpPrereqError("The iallocator and new node options can"
11015                                  " only be used when changing the"
11016                                  " secondary node", errors.ECODE_INVAL)
11017
11018   def ExpandNames(self):
11019     self._ExpandAndLockInstance()
11020
11021     assert locking.LEVEL_NODE not in self.needed_locks
11022     assert locking.LEVEL_NODE_RES not in self.needed_locks
11023     assert locking.LEVEL_NODEGROUP not in self.needed_locks
11024
11025     assert self.op.iallocator is None or self.op.remote_node is None, \
11026       "Conflicting options"
11027
11028     if self.op.remote_node is not None:
11029       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11030
11031       # Warning: do not remove the locking of the new secondary here
11032       # unless DRBD8.AddChildren is changed to work in parallel;
11033       # currently it doesn't since parallel invocations of
11034       # FindUnusedMinor will conflict
11035       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11036       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11037     else:
11038       self.needed_locks[locking.LEVEL_NODE] = []
11039       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11040
11041       if self.op.iallocator is not None:
11042         # iallocator will select a new node in the same group
11043         self.needed_locks[locking.LEVEL_NODEGROUP] = []
11044         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11045
11046     self.needed_locks[locking.LEVEL_NODE_RES] = []
11047
11048     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11049                                    self.op.iallocator, self.op.remote_node,
11050                                    self.op.disks, self.op.early_release,
11051                                    self.op.ignore_ipolicy)
11052
11053     self.tasklets = [self.replacer]
11054
11055   def DeclareLocks(self, level):
11056     if level == locking.LEVEL_NODEGROUP:
11057       assert self.op.remote_node is None
11058       assert self.op.iallocator is not None
11059       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11060
11061       self.share_locks[locking.LEVEL_NODEGROUP] = 1
11062       # Lock all groups used by instance optimistically; this requires going
11063       # via the node before it's locked, requiring verification later on
11064       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11065         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11066
11067     elif level == locking.LEVEL_NODE:
11068       if self.op.iallocator is not None:
11069         assert self.op.remote_node is None
11070         assert not self.needed_locks[locking.LEVEL_NODE]
11071         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11072
11073         # Lock member nodes of all locked groups
11074         self.needed_locks[locking.LEVEL_NODE] = \
11075             [node_name
11076              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11077              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11078       else:
11079         assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11080
11081         self._LockInstancesNodes()
11082
11083     elif level == locking.LEVEL_NODE_RES:
11084       # Reuse node locks
11085       self.needed_locks[locking.LEVEL_NODE_RES] = \
11086         self.needed_locks[locking.LEVEL_NODE]
11087
11088   def BuildHooksEnv(self):
11089     """Build hooks env.
11090
11091     This runs on the master, the primary and all the secondaries.
11092
11093     """
11094     instance = self.replacer.instance
11095     env = {
11096       "MODE": self.op.mode,
11097       "NEW_SECONDARY": self.op.remote_node,
11098       "OLD_SECONDARY": instance.secondary_nodes[0],
11099       }
11100     env.update(_BuildInstanceHookEnvByObject(self, instance))
11101     return env
11102
11103   def BuildHooksNodes(self):
11104     """Build hooks nodes.
11105
11106     """
11107     instance = self.replacer.instance
11108     nl = [
11109       self.cfg.GetMasterNode(),
11110       instance.primary_node,
11111       ]
11112     if self.op.remote_node is not None:
11113       nl.append(self.op.remote_node)
11114     return nl, nl
11115
11116   def CheckPrereq(self):
11117     """Check prerequisites.
11118
11119     """
11120     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11121             self.op.iallocator is None)
11122
11123     # Verify if node group locks are still correct
11124     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11125     if owned_groups:
11126       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11127
11128     return LogicalUnit.CheckPrereq(self)
11129
11130
11131 class TLReplaceDisks(Tasklet):
11132   """Replaces disks for an instance.
11133
11134   Note: Locking is not within the scope of this class.
11135
11136   """
11137   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11138                disks, early_release, ignore_ipolicy):
11139     """Initializes this class.
11140
11141     """
11142     Tasklet.__init__(self, lu)
11143
11144     # Parameters
11145     self.instance_name = instance_name
11146     self.mode = mode
11147     self.iallocator_name = iallocator_name
11148     self.remote_node = remote_node
11149     self.disks = disks
11150     self.early_release = early_release
11151     self.ignore_ipolicy = ignore_ipolicy
11152
11153     # Runtime data
11154     self.instance = None
11155     self.new_node = None
11156     self.target_node = None
11157     self.other_node = None
11158     self.remote_node_info = None
11159     self.node_secondary_ip = None
11160
11161   @staticmethod
11162   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11163     """Compute a new secondary node using an IAllocator.
11164
11165     """
11166     req = iallocator.IAReqRelocate(name=instance_name,
11167                                    relocate_from=list(relocate_from))
11168     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11169
11170     ial.Run(iallocator_name)
11171
11172     if not ial.success:
11173       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11174                                  " %s" % (iallocator_name, ial.info),
11175                                  errors.ECODE_NORES)
11176
11177     remote_node_name = ial.result[0]
11178
11179     lu.LogInfo("Selected new secondary for instance '%s': %s",
11180                instance_name, remote_node_name)
11181
11182     return remote_node_name
11183
11184   def _FindFaultyDisks(self, node_name):
11185     """Wrapper for L{_FindFaultyInstanceDisks}.
11186
11187     """
11188     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11189                                     node_name, True)
11190
11191   def _CheckDisksActivated(self, instance):
11192     """Checks if the instance disks are activated.
11193
11194     @param instance: The instance to check disks
11195     @return: True if they are activated, False otherwise
11196
11197     """
11198     nodes = instance.all_nodes
11199
11200     for idx, dev in enumerate(instance.disks):
11201       for node in nodes:
11202         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11203         self.cfg.SetDiskID(dev, node)
11204
11205         result = _BlockdevFind(self, node, dev, instance)
11206
11207         if result.offline:
11208           continue
11209         elif result.fail_msg or not result.payload:
11210           return False
11211
11212     return True
11213
11214   def CheckPrereq(self):
11215     """Check prerequisites.
11216
11217     This checks that the instance is in the cluster.
11218
11219     """
11220     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11221     assert instance is not None, \
11222       "Cannot retrieve locked instance %s" % self.instance_name
11223
11224     if instance.disk_template != constants.DT_DRBD8:
11225       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11226                                  " instances", errors.ECODE_INVAL)
11227
11228     if len(instance.secondary_nodes) != 1:
11229       raise errors.OpPrereqError("The instance has a strange layout,"
11230                                  " expected one secondary but found %d" %
11231                                  len(instance.secondary_nodes),
11232                                  errors.ECODE_FAULT)
11233
11234     instance = self.instance
11235     secondary_node = instance.secondary_nodes[0]
11236
11237     if self.iallocator_name is None:
11238       remote_node = self.remote_node
11239     else:
11240       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11241                                        instance.name, instance.secondary_nodes)
11242
11243     if remote_node is None:
11244       self.remote_node_info = None
11245     else:
11246       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11247              "Remote node '%s' is not locked" % remote_node
11248
11249       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11250       assert self.remote_node_info is not None, \
11251         "Cannot retrieve locked node %s" % remote_node
11252
11253     if remote_node == self.instance.primary_node:
11254       raise errors.OpPrereqError("The specified node is the primary node of"
11255                                  " the instance", errors.ECODE_INVAL)
11256
11257     if remote_node == secondary_node:
11258       raise errors.OpPrereqError("The specified node is already the"
11259                                  " secondary node of the instance",
11260                                  errors.ECODE_INVAL)
11261
11262     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11263                                     constants.REPLACE_DISK_CHG):
11264       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11265                                  errors.ECODE_INVAL)
11266
11267     if self.mode == constants.REPLACE_DISK_AUTO:
11268       if not self._CheckDisksActivated(instance):
11269         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11270                                    " first" % self.instance_name,
11271                                    errors.ECODE_STATE)
11272       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11273       faulty_secondary = self._FindFaultyDisks(secondary_node)
11274
11275       if faulty_primary and faulty_secondary:
11276         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11277                                    " one node and can not be repaired"
11278                                    " automatically" % self.instance_name,
11279                                    errors.ECODE_STATE)
11280
11281       if faulty_primary:
11282         self.disks = faulty_primary
11283         self.target_node = instance.primary_node
11284         self.other_node = secondary_node
11285         check_nodes = [self.target_node, self.other_node]
11286       elif faulty_secondary:
11287         self.disks = faulty_secondary
11288         self.target_node = secondary_node
11289         self.other_node = instance.primary_node
11290         check_nodes = [self.target_node, self.other_node]
11291       else:
11292         self.disks = []
11293         check_nodes = []
11294
11295     else:
11296       # Non-automatic modes
11297       if self.mode == constants.REPLACE_DISK_PRI:
11298         self.target_node = instance.primary_node
11299         self.other_node = secondary_node
11300         check_nodes = [self.target_node, self.other_node]
11301
11302       elif self.mode == constants.REPLACE_DISK_SEC:
11303         self.target_node = secondary_node
11304         self.other_node = instance.primary_node
11305         check_nodes = [self.target_node, self.other_node]
11306
11307       elif self.mode == constants.REPLACE_DISK_CHG:
11308         self.new_node = remote_node
11309         self.other_node = instance.primary_node
11310         self.target_node = secondary_node
11311         check_nodes = [self.new_node, self.other_node]
11312
11313         _CheckNodeNotDrained(self.lu, remote_node)
11314         _CheckNodeVmCapable(self.lu, remote_node)
11315
11316         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11317         assert old_node_info is not None
11318         if old_node_info.offline and not self.early_release:
11319           # doesn't make sense to delay the release
11320           self.early_release = True
11321           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11322                           " early-release mode", secondary_node)
11323
11324       else:
11325         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11326                                      self.mode)
11327
11328       # If not specified all disks should be replaced
11329       if not self.disks:
11330         self.disks = range(len(self.instance.disks))
11331
11332     # TODO: This is ugly, but right now we can't distinguish between internal
11333     # submitted opcode and external one. We should fix that.
11334     if self.remote_node_info:
11335       # We change the node, lets verify it still meets instance policy
11336       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11337       cluster = self.cfg.GetClusterInfo()
11338       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11339                                                               new_group_info)
11340       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11341                               ignore=self.ignore_ipolicy)
11342
11343     for node in check_nodes:
11344       _CheckNodeOnline(self.lu, node)
11345
11346     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11347                                                           self.other_node,
11348                                                           self.target_node]
11349                               if node_name is not None)
11350
11351     # Release unneeded node and node resource locks
11352     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11353     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11354     _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11355
11356     # Release any owned node group
11357     _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11358
11359     # Check whether disks are valid
11360     for disk_idx in self.disks:
11361       instance.FindDisk(disk_idx)
11362
11363     # Get secondary node IP addresses
11364     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11365                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11366
11367   def Exec(self, feedback_fn):
11368     """Execute disk replacement.
11369
11370     This dispatches the disk replacement to the appropriate handler.
11371
11372     """
11373     if __debug__:
11374       # Verify owned locks before starting operation
11375       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11376       assert set(owned_nodes) == set(self.node_secondary_ip), \
11377           ("Incorrect node locks, owning %s, expected %s" %
11378            (owned_nodes, self.node_secondary_ip.keys()))
11379       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11380               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11381       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11382
11383       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11384       assert list(owned_instances) == [self.instance_name], \
11385           "Instance '%s' not locked" % self.instance_name
11386
11387       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11388           "Should not own any node group lock at this point"
11389
11390     if not self.disks:
11391       feedback_fn("No disks need replacement for instance '%s'" %
11392                   self.instance.name)
11393       return
11394
11395     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11396                 (utils.CommaJoin(self.disks), self.instance.name))
11397     feedback_fn("Current primary node: %s", self.instance.primary_node)
11398     feedback_fn("Current seconary node: %s",
11399                 utils.CommaJoin(self.instance.secondary_nodes))
11400
11401     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11402
11403     # Activate the instance disks if we're replacing them on a down instance
11404     if activate_disks:
11405       _StartInstanceDisks(self.lu, self.instance, True)
11406
11407     try:
11408       # Should we replace the secondary node?
11409       if self.new_node is not None:
11410         fn = self._ExecDrbd8Secondary
11411       else:
11412         fn = self._ExecDrbd8DiskOnly
11413
11414       result = fn(feedback_fn)
11415     finally:
11416       # Deactivate the instance disks if we're replacing them on a
11417       # down instance
11418       if activate_disks:
11419         _SafeShutdownInstanceDisks(self.lu, self.instance)
11420
11421     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11422
11423     if __debug__:
11424       # Verify owned locks
11425       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11426       nodes = frozenset(self.node_secondary_ip)
11427       assert ((self.early_release and not owned_nodes) or
11428               (not self.early_release and not (set(owned_nodes) - nodes))), \
11429         ("Not owning the correct locks, early_release=%s, owned=%r,"
11430          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11431
11432     return result
11433
11434   def _CheckVolumeGroup(self, nodes):
11435     self.lu.LogInfo("Checking volume groups")
11436
11437     vgname = self.cfg.GetVGName()
11438
11439     # Make sure volume group exists on all involved nodes
11440     results = self.rpc.call_vg_list(nodes)
11441     if not results:
11442       raise errors.OpExecError("Can't list volume groups on the nodes")
11443
11444     for node in nodes:
11445       res = results[node]
11446       res.Raise("Error checking node %s" % node)
11447       if vgname not in res.payload:
11448         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11449                                  (vgname, node))
11450
11451   def _CheckDisksExistence(self, nodes):
11452     # Check disk existence
11453     for idx, dev in enumerate(self.instance.disks):
11454       if idx not in self.disks:
11455         continue
11456
11457       for node in nodes:
11458         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11459         self.cfg.SetDiskID(dev, node)
11460
11461         result = _BlockdevFind(self, node, dev, self.instance)
11462
11463         msg = result.fail_msg
11464         if msg or not result.payload:
11465           if not msg:
11466             msg = "disk not found"
11467           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11468                                    (idx, node, msg))
11469
11470   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11471     for idx, dev in enumerate(self.instance.disks):
11472       if idx not in self.disks:
11473         continue
11474
11475       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11476                       (idx, node_name))
11477
11478       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11479                                    on_primary, ldisk=ldisk):
11480         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11481                                  " replace disks for instance %s" %
11482                                  (node_name, self.instance.name))
11483
11484   def _CreateNewStorage(self, node_name):
11485     """Create new storage on the primary or secondary node.
11486
11487     This is only used for same-node replaces, not for changing the
11488     secondary node, hence we don't want to modify the existing disk.
11489
11490     """
11491     iv_names = {}
11492
11493     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11494     for idx, dev in enumerate(disks):
11495       if idx not in self.disks:
11496         continue
11497
11498       self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11499
11500       self.cfg.SetDiskID(dev, node_name)
11501
11502       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11503       names = _GenerateUniqueNames(self.lu, lv_names)
11504
11505       (data_disk, meta_disk) = dev.children
11506       vg_data = data_disk.logical_id[0]
11507       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11508                              logical_id=(vg_data, names[0]),
11509                              params=data_disk.params)
11510       vg_meta = meta_disk.logical_id[0]
11511       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11512                              size=constants.DRBD_META_SIZE,
11513                              logical_id=(vg_meta, names[1]),
11514                              params=meta_disk.params)
11515
11516       new_lvs = [lv_data, lv_meta]
11517       old_lvs = [child.Copy() for child in dev.children]
11518       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11519
11520       # we pass force_create=True to force the LVM creation
11521       for new_lv in new_lvs:
11522         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11523                              _GetInstanceInfoText(self.instance), False)
11524
11525     return iv_names
11526
11527   def _CheckDevices(self, node_name, iv_names):
11528     for name, (dev, _, _) in iv_names.iteritems():
11529       self.cfg.SetDiskID(dev, node_name)
11530
11531       result = _BlockdevFind(self, node_name, dev, self.instance)
11532
11533       msg = result.fail_msg
11534       if msg or not result.payload:
11535         if not msg:
11536           msg = "disk not found"
11537         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11538                                  (name, msg))
11539
11540       if result.payload.is_degraded:
11541         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11542
11543   def _RemoveOldStorage(self, node_name, iv_names):
11544     for name, (_, old_lvs, _) in iv_names.iteritems():
11545       self.lu.LogInfo("Remove logical volumes for %s", name)
11546
11547       for lv in old_lvs:
11548         self.cfg.SetDiskID(lv, node_name)
11549
11550         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11551         if msg:
11552           self.lu.LogWarning("Can't remove old LV: %s", msg,
11553                              hint="remove unused LVs manually")
11554
11555   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11556     """Replace a disk on the primary or secondary for DRBD 8.
11557
11558     The algorithm for replace is quite complicated:
11559
11560       1. for each disk to be replaced:
11561
11562         1. create new LVs on the target node with unique names
11563         1. detach old LVs from the drbd device
11564         1. rename old LVs to name_replaced.<time_t>
11565         1. rename new LVs to old LVs
11566         1. attach the new LVs (with the old names now) to the drbd device
11567
11568       1. wait for sync across all devices
11569
11570       1. for each modified disk:
11571
11572         1. remove old LVs (which have the name name_replaces.<time_t>)
11573
11574     Failures are not very well handled.
11575
11576     """
11577     steps_total = 6
11578
11579     # Step: check device activation
11580     self.lu.LogStep(1, steps_total, "Check device existence")
11581     self._CheckDisksExistence([self.other_node, self.target_node])
11582     self._CheckVolumeGroup([self.target_node, self.other_node])
11583
11584     # Step: check other node consistency
11585     self.lu.LogStep(2, steps_total, "Check peer consistency")
11586     self._CheckDisksConsistency(self.other_node,
11587                                 self.other_node == self.instance.primary_node,
11588                                 False)
11589
11590     # Step: create new storage
11591     self.lu.LogStep(3, steps_total, "Allocate new storage")
11592     iv_names = self._CreateNewStorage(self.target_node)
11593
11594     # Step: for each lv, detach+rename*2+attach
11595     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11596     for dev, old_lvs, new_lvs in iv_names.itervalues():
11597       self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11598
11599       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11600                                                      old_lvs)
11601       result.Raise("Can't detach drbd from local storage on node"
11602                    " %s for device %s" % (self.target_node, dev.iv_name))
11603       #dev.children = []
11604       #cfg.Update(instance)
11605
11606       # ok, we created the new LVs, so now we know we have the needed
11607       # storage; as such, we proceed on the target node to rename
11608       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11609       # using the assumption that logical_id == physical_id (which in
11610       # turn is the unique_id on that node)
11611
11612       # FIXME(iustin): use a better name for the replaced LVs
11613       temp_suffix = int(time.time())
11614       ren_fn = lambda d, suff: (d.physical_id[0],
11615                                 d.physical_id[1] + "_replaced-%s" % suff)
11616
11617       # Build the rename list based on what LVs exist on the node
11618       rename_old_to_new = []
11619       for to_ren in old_lvs:
11620         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11621         if not result.fail_msg and result.payload:
11622           # device exists
11623           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11624
11625       self.lu.LogInfo("Renaming the old LVs on the target node")
11626       result = self.rpc.call_blockdev_rename(self.target_node,
11627                                              rename_old_to_new)
11628       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11629
11630       # Now we rename the new LVs to the old LVs
11631       self.lu.LogInfo("Renaming the new LVs on the target node")
11632       rename_new_to_old = [(new, old.physical_id)
11633                            for old, new in zip(old_lvs, new_lvs)]
11634       result = self.rpc.call_blockdev_rename(self.target_node,
11635                                              rename_new_to_old)
11636       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11637
11638       # Intermediate steps of in memory modifications
11639       for old, new in zip(old_lvs, new_lvs):
11640         new.logical_id = old.logical_id
11641         self.cfg.SetDiskID(new, self.target_node)
11642
11643       # We need to modify old_lvs so that removal later removes the
11644       # right LVs, not the newly added ones; note that old_lvs is a
11645       # copy here
11646       for disk in old_lvs:
11647         disk.logical_id = ren_fn(disk, temp_suffix)
11648         self.cfg.SetDiskID(disk, self.target_node)
11649
11650       # Now that the new lvs have the old name, we can add them to the device
11651       self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
11652       result = self.rpc.call_blockdev_addchildren(self.target_node,
11653                                                   (dev, self.instance), new_lvs)
11654       msg = result.fail_msg
11655       if msg:
11656         for new_lv in new_lvs:
11657           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11658                                                new_lv).fail_msg
11659           if msg2:
11660             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11661                                hint=("cleanup manually the unused logical"
11662                                      "volumes"))
11663         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11664
11665     cstep = itertools.count(5)
11666
11667     if self.early_release:
11668       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11669       self._RemoveOldStorage(self.target_node, iv_names)
11670       # TODO: Check if releasing locks early still makes sense
11671       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11672     else:
11673       # Release all resource locks except those used by the instance
11674       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11675                     keep=self.node_secondary_ip.keys())
11676
11677     # Release all node locks while waiting for sync
11678     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11679
11680     # TODO: Can the instance lock be downgraded here? Take the optional disk
11681     # shutdown in the caller into consideration.
11682
11683     # Wait for sync
11684     # This can fail as the old devices are degraded and _WaitForSync
11685     # does a combined result over all disks, so we don't check its return value
11686     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11687     _WaitForSync(self.lu, self.instance)
11688
11689     # Check all devices manually
11690     self._CheckDevices(self.instance.primary_node, iv_names)
11691
11692     # Step: remove old storage
11693     if not self.early_release:
11694       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11695       self._RemoveOldStorage(self.target_node, iv_names)
11696
11697   def _ExecDrbd8Secondary(self, feedback_fn):
11698     """Replace the secondary node for DRBD 8.
11699
11700     The algorithm for replace is quite complicated:
11701       - for all disks of the instance:
11702         - create new LVs on the new node with same names
11703         - shutdown the drbd device on the old secondary
11704         - disconnect the drbd network on the primary
11705         - create the drbd device on the new secondary
11706         - network attach the drbd on the primary, using an artifice:
11707           the drbd code for Attach() will connect to the network if it
11708           finds a device which is connected to the good local disks but
11709           not network enabled
11710       - wait for sync across all devices
11711       - remove all disks from the old secondary
11712
11713     Failures are not very well handled.
11714
11715     """
11716     steps_total = 6
11717
11718     pnode = self.instance.primary_node
11719
11720     # Step: check device activation
11721     self.lu.LogStep(1, steps_total, "Check device existence")
11722     self._CheckDisksExistence([self.instance.primary_node])
11723     self._CheckVolumeGroup([self.instance.primary_node])
11724
11725     # Step: check other node consistency
11726     self.lu.LogStep(2, steps_total, "Check peer consistency")
11727     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11728
11729     # Step: create new storage
11730     self.lu.LogStep(3, steps_total, "Allocate new storage")
11731     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11732     for idx, dev in enumerate(disks):
11733       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11734                       (self.new_node, idx))
11735       # we pass force_create=True to force LVM creation
11736       for new_lv in dev.children:
11737         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11738                              True, _GetInstanceInfoText(self.instance), False)
11739
11740     # Step 4: dbrd minors and drbd setups changes
11741     # after this, we must manually remove the drbd minors on both the
11742     # error and the success paths
11743     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11744     minors = self.cfg.AllocateDRBDMinor([self.new_node
11745                                          for dev in self.instance.disks],
11746                                         self.instance.name)
11747     logging.debug("Allocated minors %r", minors)
11748
11749     iv_names = {}
11750     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11751       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11752                       (self.new_node, idx))
11753       # create new devices on new_node; note that we create two IDs:
11754       # one without port, so the drbd will be activated without
11755       # networking information on the new node at this stage, and one
11756       # with network, for the latter activation in step 4
11757       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11758       if self.instance.primary_node == o_node1:
11759         p_minor = o_minor1
11760       else:
11761         assert self.instance.primary_node == o_node2, "Three-node instance?"
11762         p_minor = o_minor2
11763
11764       new_alone_id = (self.instance.primary_node, self.new_node, None,
11765                       p_minor, new_minor, o_secret)
11766       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11767                     p_minor, new_minor, o_secret)
11768
11769       iv_names[idx] = (dev, dev.children, new_net_id)
11770       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11771                     new_net_id)
11772       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11773                               logical_id=new_alone_id,
11774                               children=dev.children,
11775                               size=dev.size,
11776                               params={})
11777       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11778                                              self.cfg)
11779       try:
11780         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11781                               anno_new_drbd,
11782                               _GetInstanceInfoText(self.instance), False)
11783       except errors.GenericError:
11784         self.cfg.ReleaseDRBDMinors(self.instance.name)
11785         raise
11786
11787     # We have new devices, shutdown the drbd on the old secondary
11788     for idx, dev in enumerate(self.instance.disks):
11789       self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
11790       self.cfg.SetDiskID(dev, self.target_node)
11791       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11792                                             (dev, self.instance)).fail_msg
11793       if msg:
11794         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11795                            "node: %s" % (idx, msg),
11796                            hint=("Please cleanup this device manually as"
11797                                  " soon as possible"))
11798
11799     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11800     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11801                                                self.instance.disks)[pnode]
11802
11803     msg = result.fail_msg
11804     if msg:
11805       # detaches didn't succeed (unlikely)
11806       self.cfg.ReleaseDRBDMinors(self.instance.name)
11807       raise errors.OpExecError("Can't detach the disks from the network on"
11808                                " old node: %s" % (msg,))
11809
11810     # if we managed to detach at least one, we update all the disks of
11811     # the instance to point to the new secondary
11812     self.lu.LogInfo("Updating instance configuration")
11813     for dev, _, new_logical_id in iv_names.itervalues():
11814       dev.logical_id = new_logical_id
11815       self.cfg.SetDiskID(dev, self.instance.primary_node)
11816
11817     self.cfg.Update(self.instance, feedback_fn)
11818
11819     # Release all node locks (the configuration has been updated)
11820     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11821
11822     # and now perform the drbd attach
11823     self.lu.LogInfo("Attaching primary drbds to new secondary"
11824                     " (standalone => connected)")
11825     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11826                                             self.new_node],
11827                                            self.node_secondary_ip,
11828                                            (self.instance.disks, self.instance),
11829                                            self.instance.name,
11830                                            False)
11831     for to_node, to_result in result.items():
11832       msg = to_result.fail_msg
11833       if msg:
11834         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11835                            to_node, msg,
11836                            hint=("please do a gnt-instance info to see the"
11837                                  " status of disks"))
11838
11839     cstep = itertools.count(5)
11840
11841     if self.early_release:
11842       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11843       self._RemoveOldStorage(self.target_node, iv_names)
11844       # TODO: Check if releasing locks early still makes sense
11845       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11846     else:
11847       # Release all resource locks except those used by the instance
11848       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11849                     keep=self.node_secondary_ip.keys())
11850
11851     # TODO: Can the instance lock be downgraded here? Take the optional disk
11852     # shutdown in the caller into consideration.
11853
11854     # Wait for sync
11855     # This can fail as the old devices are degraded and _WaitForSync
11856     # does a combined result over all disks, so we don't check its return value
11857     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11858     _WaitForSync(self.lu, self.instance)
11859
11860     # Check all devices manually
11861     self._CheckDevices(self.instance.primary_node, iv_names)
11862
11863     # Step: remove old storage
11864     if not self.early_release:
11865       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11866       self._RemoveOldStorage(self.target_node, iv_names)
11867
11868
11869 class LURepairNodeStorage(NoHooksLU):
11870   """Repairs the volume group on a node.
11871
11872   """
11873   REQ_BGL = False
11874
11875   def CheckArguments(self):
11876     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11877
11878     storage_type = self.op.storage_type
11879
11880     if (constants.SO_FIX_CONSISTENCY not in
11881         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11882       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11883                                  " repaired" % storage_type,
11884                                  errors.ECODE_INVAL)
11885
11886   def ExpandNames(self):
11887     self.needed_locks = {
11888       locking.LEVEL_NODE: [self.op.node_name],
11889       }
11890
11891   def _CheckFaultyDisks(self, instance, node_name):
11892     """Ensure faulty disks abort the opcode or at least warn."""
11893     try:
11894       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11895                                   node_name, True):
11896         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11897                                    " node '%s'" % (instance.name, node_name),
11898                                    errors.ECODE_STATE)
11899     except errors.OpPrereqError, err:
11900       if self.op.ignore_consistency:
11901         self.LogWarning(str(err.args[0]))
11902       else:
11903         raise
11904
11905   def CheckPrereq(self):
11906     """Check prerequisites.
11907
11908     """
11909     # Check whether any instance on this node has faulty disks
11910     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11911       if inst.admin_state != constants.ADMINST_UP:
11912         continue
11913       check_nodes = set(inst.all_nodes)
11914       check_nodes.discard(self.op.node_name)
11915       for inst_node_name in check_nodes:
11916         self._CheckFaultyDisks(inst, inst_node_name)
11917
11918   def Exec(self, feedback_fn):
11919     feedback_fn("Repairing storage unit '%s' on %s ..." %
11920                 (self.op.name, self.op.node_name))
11921
11922     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11923     result = self.rpc.call_storage_execute(self.op.node_name,
11924                                            self.op.storage_type, st_args,
11925                                            self.op.name,
11926                                            constants.SO_FIX_CONSISTENCY)
11927     result.Raise("Failed to repair storage unit '%s' on %s" %
11928                  (self.op.name, self.op.node_name))
11929
11930
11931 class LUNodeEvacuate(NoHooksLU):
11932   """Evacuates instances off a list of nodes.
11933
11934   """
11935   REQ_BGL = False
11936
11937   _MODE2IALLOCATOR = {
11938     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11939     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11940     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11941     }
11942   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11943   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11944           constants.IALLOCATOR_NEVAC_MODES)
11945
11946   def CheckArguments(self):
11947     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11948
11949   def ExpandNames(self):
11950     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11951
11952     if self.op.remote_node is not None:
11953       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11954       assert self.op.remote_node
11955
11956       if self.op.remote_node == self.op.node_name:
11957         raise errors.OpPrereqError("Can not use evacuated node as a new"
11958                                    " secondary node", errors.ECODE_INVAL)
11959
11960       if self.op.mode != constants.NODE_EVAC_SEC:
11961         raise errors.OpPrereqError("Without the use of an iallocator only"
11962                                    " secondary instances can be evacuated",
11963                                    errors.ECODE_INVAL)
11964
11965     # Declare locks
11966     self.share_locks = _ShareAll()
11967     self.needed_locks = {
11968       locking.LEVEL_INSTANCE: [],
11969       locking.LEVEL_NODEGROUP: [],
11970       locking.LEVEL_NODE: [],
11971       }
11972
11973     # Determine nodes (via group) optimistically, needs verification once locks
11974     # have been acquired
11975     self.lock_nodes = self._DetermineNodes()
11976
11977   def _DetermineNodes(self):
11978     """Gets the list of nodes to operate on.
11979
11980     """
11981     if self.op.remote_node is None:
11982       # Iallocator will choose any node(s) in the same group
11983       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11984     else:
11985       group_nodes = frozenset([self.op.remote_node])
11986
11987     # Determine nodes to be locked
11988     return set([self.op.node_name]) | group_nodes
11989
11990   def _DetermineInstances(self):
11991     """Builds list of instances to operate on.
11992
11993     """
11994     assert self.op.mode in constants.NODE_EVAC_MODES
11995
11996     if self.op.mode == constants.NODE_EVAC_PRI:
11997       # Primary instances only
11998       inst_fn = _GetNodePrimaryInstances
11999       assert self.op.remote_node is None, \
12000         "Evacuating primary instances requires iallocator"
12001     elif self.op.mode == constants.NODE_EVAC_SEC:
12002       # Secondary instances only
12003       inst_fn = _GetNodeSecondaryInstances
12004     else:
12005       # All instances
12006       assert self.op.mode == constants.NODE_EVAC_ALL
12007       inst_fn = _GetNodeInstances
12008       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12009       # per instance
12010       raise errors.OpPrereqError("Due to an issue with the iallocator"
12011                                  " interface it is not possible to evacuate"
12012                                  " all instances at once; specify explicitly"
12013                                  " whether to evacuate primary or secondary"
12014                                  " instances",
12015                                  errors.ECODE_INVAL)
12016
12017     return inst_fn(self.cfg, self.op.node_name)
12018
12019   def DeclareLocks(self, level):
12020     if level == locking.LEVEL_INSTANCE:
12021       # Lock instances optimistically, needs verification once node and group
12022       # locks have been acquired
12023       self.needed_locks[locking.LEVEL_INSTANCE] = \
12024         set(i.name for i in self._DetermineInstances())
12025
12026     elif level == locking.LEVEL_NODEGROUP:
12027       # Lock node groups for all potential target nodes optimistically, needs
12028       # verification once nodes have been acquired
12029       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12030         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12031
12032     elif level == locking.LEVEL_NODE:
12033       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12034
12035   def CheckPrereq(self):
12036     # Verify locks
12037     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12038     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12039     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12040
12041     need_nodes = self._DetermineNodes()
12042
12043     if not owned_nodes.issuperset(need_nodes):
12044       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12045                                  " locks were acquired, current nodes are"
12046                                  " are '%s', used to be '%s'; retry the"
12047                                  " operation" %
12048                                  (self.op.node_name,
12049                                   utils.CommaJoin(need_nodes),
12050                                   utils.CommaJoin(owned_nodes)),
12051                                  errors.ECODE_STATE)
12052
12053     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12054     if owned_groups != wanted_groups:
12055       raise errors.OpExecError("Node groups changed since locks were acquired,"
12056                                " current groups are '%s', used to be '%s';"
12057                                " retry the operation" %
12058                                (utils.CommaJoin(wanted_groups),
12059                                 utils.CommaJoin(owned_groups)))
12060
12061     # Determine affected instances
12062     self.instances = self._DetermineInstances()
12063     self.instance_names = [i.name for i in self.instances]
12064
12065     if set(self.instance_names) != owned_instances:
12066       raise errors.OpExecError("Instances on node '%s' changed since locks"
12067                                " were acquired, current instances are '%s',"
12068                                " used to be '%s'; retry the operation" %
12069                                (self.op.node_name,
12070                                 utils.CommaJoin(self.instance_names),
12071                                 utils.CommaJoin(owned_instances)))
12072
12073     if self.instance_names:
12074       self.LogInfo("Evacuating instances from node '%s': %s",
12075                    self.op.node_name,
12076                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
12077     else:
12078       self.LogInfo("No instances to evacuate from node '%s'",
12079                    self.op.node_name)
12080
12081     if self.op.remote_node is not None:
12082       for i in self.instances:
12083         if i.primary_node == self.op.remote_node:
12084           raise errors.OpPrereqError("Node %s is the primary node of"
12085                                      " instance %s, cannot use it as"
12086                                      " secondary" %
12087                                      (self.op.remote_node, i.name),
12088                                      errors.ECODE_INVAL)
12089
12090   def Exec(self, feedback_fn):
12091     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12092
12093     if not self.instance_names:
12094       # No instances to evacuate
12095       jobs = []
12096
12097     elif self.op.iallocator is not None:
12098       # TODO: Implement relocation to other group
12099       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12100       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12101                                      instances=list(self.instance_names))
12102       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12103
12104       ial.Run(self.op.iallocator)
12105
12106       if not ial.success:
12107         raise errors.OpPrereqError("Can't compute node evacuation using"
12108                                    " iallocator '%s': %s" %
12109                                    (self.op.iallocator, ial.info),
12110                                    errors.ECODE_NORES)
12111
12112       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12113
12114     elif self.op.remote_node is not None:
12115       assert self.op.mode == constants.NODE_EVAC_SEC
12116       jobs = [
12117         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12118                                         remote_node=self.op.remote_node,
12119                                         disks=[],
12120                                         mode=constants.REPLACE_DISK_CHG,
12121                                         early_release=self.op.early_release)]
12122         for instance_name in self.instance_names]
12123
12124     else:
12125       raise errors.ProgrammerError("No iallocator or remote node")
12126
12127     return ResultWithJobs(jobs)
12128
12129
12130 def _SetOpEarlyRelease(early_release, op):
12131   """Sets C{early_release} flag on opcodes if available.
12132
12133   """
12134   try:
12135     op.early_release = early_release
12136   except AttributeError:
12137     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12138
12139   return op
12140
12141
12142 def _NodeEvacDest(use_nodes, group, nodes):
12143   """Returns group or nodes depending on caller's choice.
12144
12145   """
12146   if use_nodes:
12147     return utils.CommaJoin(nodes)
12148   else:
12149     return group
12150
12151
12152 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12153   """Unpacks the result of change-group and node-evacuate iallocator requests.
12154
12155   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12156   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12157
12158   @type lu: L{LogicalUnit}
12159   @param lu: Logical unit instance
12160   @type alloc_result: tuple/list
12161   @param alloc_result: Result from iallocator
12162   @type early_release: bool
12163   @param early_release: Whether to release locks early if possible
12164   @type use_nodes: bool
12165   @param use_nodes: Whether to display node names instead of groups
12166
12167   """
12168   (moved, failed, jobs) = alloc_result
12169
12170   if failed:
12171     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12172                                  for (name, reason) in failed)
12173     lu.LogWarning("Unable to evacuate instances %s", failreason)
12174     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12175
12176   if moved:
12177     lu.LogInfo("Instances to be moved: %s",
12178                utils.CommaJoin("%s (to %s)" %
12179                                (name, _NodeEvacDest(use_nodes, group, nodes))
12180                                for (name, group, nodes) in moved))
12181
12182   return [map(compat.partial(_SetOpEarlyRelease, early_release),
12183               map(opcodes.OpCode.LoadOpCode, ops))
12184           for ops in jobs]
12185
12186
12187 def _DiskSizeInBytesToMebibytes(lu, size):
12188   """Converts a disk size in bytes to mebibytes.
12189
12190   Warns and rounds up if the size isn't an even multiple of 1 MiB.
12191
12192   """
12193   (mib, remainder) = divmod(size, 1024 * 1024)
12194
12195   if remainder != 0:
12196     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12197                   " to not overwrite existing data (%s bytes will not be"
12198                   " wiped)", (1024 * 1024) - remainder)
12199     mib += 1
12200
12201   return mib
12202
12203
12204 class LUInstanceGrowDisk(LogicalUnit):
12205   """Grow a disk of an instance.
12206
12207   """
12208   HPATH = "disk-grow"
12209   HTYPE = constants.HTYPE_INSTANCE
12210   REQ_BGL = False
12211
12212   def ExpandNames(self):
12213     self._ExpandAndLockInstance()
12214     self.needed_locks[locking.LEVEL_NODE] = []
12215     self.needed_locks[locking.LEVEL_NODE_RES] = []
12216     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12217     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12218
12219   def DeclareLocks(self, level):
12220     if level == locking.LEVEL_NODE:
12221       self._LockInstancesNodes()
12222     elif level == locking.LEVEL_NODE_RES:
12223       # Copy node locks
12224       self.needed_locks[locking.LEVEL_NODE_RES] = \
12225         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12226
12227   def BuildHooksEnv(self):
12228     """Build hooks env.
12229
12230     This runs on the master, the primary and all the secondaries.
12231
12232     """
12233     env = {
12234       "DISK": self.op.disk,
12235       "AMOUNT": self.op.amount,
12236       "ABSOLUTE": self.op.absolute,
12237       }
12238     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12239     return env
12240
12241   def BuildHooksNodes(self):
12242     """Build hooks nodes.
12243
12244     """
12245     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12246     return (nl, nl)
12247
12248   def CheckPrereq(self):
12249     """Check prerequisites.
12250
12251     This checks that the instance is in the cluster.
12252
12253     """
12254     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12255     assert instance is not None, \
12256       "Cannot retrieve locked instance %s" % self.op.instance_name
12257     nodenames = list(instance.all_nodes)
12258     for node in nodenames:
12259       _CheckNodeOnline(self, node)
12260
12261     self.instance = instance
12262
12263     if instance.disk_template not in constants.DTS_GROWABLE:
12264       raise errors.OpPrereqError("Instance's disk layout does not support"
12265                                  " growing", errors.ECODE_INVAL)
12266
12267     self.disk = instance.FindDisk(self.op.disk)
12268
12269     if self.op.absolute:
12270       self.target = self.op.amount
12271       self.delta = self.target - self.disk.size
12272       if self.delta < 0:
12273         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12274                                    "current disk size (%s)" %
12275                                    (utils.FormatUnit(self.target, "h"),
12276                                     utils.FormatUnit(self.disk.size, "h")),
12277                                    errors.ECODE_STATE)
12278     else:
12279       self.delta = self.op.amount
12280       self.target = self.disk.size + self.delta
12281       if self.delta < 0:
12282         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12283                                    utils.FormatUnit(self.delta, "h"),
12284                                    errors.ECODE_INVAL)
12285
12286     if instance.disk_template not in (constants.DT_FILE,
12287                                       constants.DT_SHARED_FILE,
12288                                       constants.DT_RBD):
12289       # TODO: check the free disk space for file, when that feature will be
12290       # supported
12291       _CheckNodesFreeDiskPerVG(self, nodenames,
12292                                self.disk.ComputeGrowth(self.delta))
12293
12294   def Exec(self, feedback_fn):
12295     """Execute disk grow.
12296
12297     """
12298     instance = self.instance
12299     disk = self.disk
12300
12301     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12302     assert (self.owned_locks(locking.LEVEL_NODE) ==
12303             self.owned_locks(locking.LEVEL_NODE_RES))
12304
12305     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12306
12307     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12308     if not disks_ok:
12309       raise errors.OpExecError("Cannot activate block device to grow")
12310
12311     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12312                 (self.op.disk, instance.name,
12313                  utils.FormatUnit(self.delta, "h"),
12314                  utils.FormatUnit(self.target, "h")))
12315
12316     # First run all grow ops in dry-run mode
12317     for node in instance.all_nodes:
12318       self.cfg.SetDiskID(disk, node)
12319       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12320                                            True, True)
12321       result.Raise("Dry-run grow request failed to node %s" % node)
12322
12323     if wipe_disks:
12324       # Get disk size from primary node for wiping
12325       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12326       result.Raise("Failed to retrieve disk size from node '%s'" %
12327                    instance.primary_node)
12328
12329       (disk_size_in_bytes, ) = result.payload
12330
12331       if disk_size_in_bytes is None:
12332         raise errors.OpExecError("Failed to retrieve disk size from primary"
12333                                  " node '%s'" % instance.primary_node)
12334
12335       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12336
12337       assert old_disk_size >= disk.size, \
12338         ("Retrieved disk size too small (got %s, should be at least %s)" %
12339          (old_disk_size, disk.size))
12340     else:
12341       old_disk_size = None
12342
12343     # We know that (as far as we can test) operations across different
12344     # nodes will succeed, time to run it for real on the backing storage
12345     for node in instance.all_nodes:
12346       self.cfg.SetDiskID(disk, node)
12347       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12348                                            False, True)
12349       result.Raise("Grow request failed to node %s" % node)
12350
12351     # And now execute it for logical storage, on the primary node
12352     node = instance.primary_node
12353     self.cfg.SetDiskID(disk, node)
12354     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12355                                          False, False)
12356     result.Raise("Grow request failed to node %s" % node)
12357
12358     disk.RecordGrow(self.delta)
12359     self.cfg.Update(instance, feedback_fn)
12360
12361     # Changes have been recorded, release node lock
12362     _ReleaseLocks(self, locking.LEVEL_NODE)
12363
12364     # Downgrade lock while waiting for sync
12365     self.glm.downgrade(locking.LEVEL_INSTANCE)
12366
12367     assert wipe_disks ^ (old_disk_size is None)
12368
12369     if wipe_disks:
12370       assert instance.disks[self.op.disk] == disk
12371
12372       # Wipe newly added disk space
12373       _WipeDisks(self, instance,
12374                  disks=[(self.op.disk, disk, old_disk_size)])
12375
12376     if self.op.wait_for_sync:
12377       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12378       if disk_abort:
12379         self.LogWarning("Disk syncing has not returned a good status; check"
12380                         " the instance")
12381       if instance.admin_state != constants.ADMINST_UP:
12382         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12383     elif instance.admin_state != constants.ADMINST_UP:
12384       self.LogWarning("Not shutting down the disk even if the instance is"
12385                       " not supposed to be running because no wait for"
12386                       " sync mode was requested")
12387
12388     assert self.owned_locks(locking.LEVEL_NODE_RES)
12389     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12390
12391
12392 class LUInstanceQueryData(NoHooksLU):
12393   """Query runtime instance data.
12394
12395   """
12396   REQ_BGL = False
12397
12398   def ExpandNames(self):
12399     self.needed_locks = {}
12400
12401     # Use locking if requested or when non-static information is wanted
12402     if not (self.op.static or self.op.use_locking):
12403       self.LogWarning("Non-static data requested, locks need to be acquired")
12404       self.op.use_locking = True
12405
12406     if self.op.instances or not self.op.use_locking:
12407       # Expand instance names right here
12408       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12409     else:
12410       # Will use acquired locks
12411       self.wanted_names = None
12412
12413     if self.op.use_locking:
12414       self.share_locks = _ShareAll()
12415
12416       if self.wanted_names is None:
12417         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12418       else:
12419         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12420
12421       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12422       self.needed_locks[locking.LEVEL_NODE] = []
12423       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12424
12425   def DeclareLocks(self, level):
12426     if self.op.use_locking:
12427       if level == locking.LEVEL_NODEGROUP:
12428         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12429
12430         # Lock all groups used by instances optimistically; this requires going
12431         # via the node before it's locked, requiring verification later on
12432         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12433           frozenset(group_uuid
12434                     for instance_name in owned_instances
12435                     for group_uuid in
12436                       self.cfg.GetInstanceNodeGroups(instance_name))
12437
12438       elif level == locking.LEVEL_NODE:
12439         self._LockInstancesNodes()
12440
12441   def CheckPrereq(self):
12442     """Check prerequisites.
12443
12444     This only checks the optional instance list against the existing names.
12445
12446     """
12447     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12448     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12449     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12450
12451     if self.wanted_names is None:
12452       assert self.op.use_locking, "Locking was not used"
12453       self.wanted_names = owned_instances
12454
12455     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12456
12457     if self.op.use_locking:
12458       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12459                                 None)
12460     else:
12461       assert not (owned_instances or owned_groups or owned_nodes)
12462
12463     self.wanted_instances = instances.values()
12464
12465   def _ComputeBlockdevStatus(self, node, instance, dev):
12466     """Returns the status of a block device
12467
12468     """
12469     if self.op.static or not node:
12470       return None
12471
12472     self.cfg.SetDiskID(dev, node)
12473
12474     result = self.rpc.call_blockdev_find(node, dev)
12475     if result.offline:
12476       return None
12477
12478     result.Raise("Can't compute disk status for %s" % instance.name)
12479
12480     status = result.payload
12481     if status is None:
12482       return None
12483
12484     return (status.dev_path, status.major, status.minor,
12485             status.sync_percent, status.estimated_time,
12486             status.is_degraded, status.ldisk_status)
12487
12488   def _ComputeDiskStatus(self, instance, snode, dev):
12489     """Compute block device status.
12490
12491     """
12492     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12493
12494     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12495
12496   def _ComputeDiskStatusInner(self, instance, snode, dev):
12497     """Compute block device status.
12498
12499     @attention: The device has to be annotated already.
12500
12501     """
12502     if dev.dev_type in constants.LDS_DRBD:
12503       # we change the snode then (otherwise we use the one passed in)
12504       if dev.logical_id[0] == instance.primary_node:
12505         snode = dev.logical_id[1]
12506       else:
12507         snode = dev.logical_id[0]
12508
12509     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12510                                               instance, dev)
12511     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12512
12513     if dev.children:
12514       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12515                                         instance, snode),
12516                          dev.children)
12517     else:
12518       dev_children = []
12519
12520     return {
12521       "iv_name": dev.iv_name,
12522       "dev_type": dev.dev_type,
12523       "logical_id": dev.logical_id,
12524       "physical_id": dev.physical_id,
12525       "pstatus": dev_pstatus,
12526       "sstatus": dev_sstatus,
12527       "children": dev_children,
12528       "mode": dev.mode,
12529       "size": dev.size,
12530       }
12531
12532   def Exec(self, feedback_fn):
12533     """Gather and return data"""
12534     result = {}
12535
12536     cluster = self.cfg.GetClusterInfo()
12537
12538     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12539     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12540
12541     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12542                                                  for node in nodes.values()))
12543
12544     group2name_fn = lambda uuid: groups[uuid].name
12545
12546     for instance in self.wanted_instances:
12547       pnode = nodes[instance.primary_node]
12548
12549       if self.op.static or pnode.offline:
12550         remote_state = None
12551         if pnode.offline:
12552           self.LogWarning("Primary node %s is marked offline, returning static"
12553                           " information only for instance %s" %
12554                           (pnode.name, instance.name))
12555       else:
12556         remote_info = self.rpc.call_instance_info(instance.primary_node,
12557                                                   instance.name,
12558                                                   instance.hypervisor)
12559         remote_info.Raise("Error checking node %s" % instance.primary_node)
12560         remote_info = remote_info.payload
12561         if remote_info and "state" in remote_info:
12562           remote_state = "up"
12563         else:
12564           if instance.admin_state == constants.ADMINST_UP:
12565             remote_state = "down"
12566           else:
12567             remote_state = instance.admin_state
12568
12569       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12570                   instance.disks)
12571
12572       snodes_group_uuids = [nodes[snode_name].group
12573                             for snode_name in instance.secondary_nodes]
12574
12575       result[instance.name] = {
12576         "name": instance.name,
12577         "config_state": instance.admin_state,
12578         "run_state": remote_state,
12579         "pnode": instance.primary_node,
12580         "pnode_group_uuid": pnode.group,
12581         "pnode_group_name": group2name_fn(pnode.group),
12582         "snodes": instance.secondary_nodes,
12583         "snodes_group_uuids": snodes_group_uuids,
12584         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12585         "os": instance.os,
12586         # this happens to be the same format used for hooks
12587         "nics": _NICListToTuple(self, instance.nics),
12588         "disk_template": instance.disk_template,
12589         "disks": disks,
12590         "hypervisor": instance.hypervisor,
12591         "network_port": instance.network_port,
12592         "hv_instance": instance.hvparams,
12593         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12594         "be_instance": instance.beparams,
12595         "be_actual": cluster.FillBE(instance),
12596         "os_instance": instance.osparams,
12597         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12598         "serial_no": instance.serial_no,
12599         "mtime": instance.mtime,
12600         "ctime": instance.ctime,
12601         "uuid": instance.uuid,
12602         }
12603
12604     return result
12605
12606
12607 def PrepareContainerMods(mods, private_fn):
12608   """Prepares a list of container modifications by adding a private data field.
12609
12610   @type mods: list of tuples; (operation, index, parameters)
12611   @param mods: List of modifications
12612   @type private_fn: callable or None
12613   @param private_fn: Callable for constructing a private data field for a
12614     modification
12615   @rtype: list
12616
12617   """
12618   if private_fn is None:
12619     fn = lambda: None
12620   else:
12621     fn = private_fn
12622
12623   return [(op, idx, params, fn()) for (op, idx, params) in mods]
12624
12625
12626 #: Type description for changes as returned by L{ApplyContainerMods}'s
12627 #: callbacks
12628 _TApplyContModsCbChanges = \
12629   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12630     ht.TNonEmptyString,
12631     ht.TAny,
12632     ])))
12633
12634
12635 def ApplyContainerMods(kind, container, chgdesc, mods,
12636                        create_fn, modify_fn, remove_fn):
12637   """Applies descriptions in C{mods} to C{container}.
12638
12639   @type kind: string
12640   @param kind: One-word item description
12641   @type container: list
12642   @param container: Container to modify
12643   @type chgdesc: None or list
12644   @param chgdesc: List of applied changes
12645   @type mods: list
12646   @param mods: Modifications as returned by L{PrepareContainerMods}
12647   @type create_fn: callable
12648   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12649     receives absolute item index, parameters and private data object as added
12650     by L{PrepareContainerMods}, returns tuple containing new item and changes
12651     as list
12652   @type modify_fn: callable
12653   @param modify_fn: Callback for modifying an existing item
12654     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12655     and private data object as added by L{PrepareContainerMods}, returns
12656     changes as list
12657   @type remove_fn: callable
12658   @param remove_fn: Callback on removing item; receives absolute item index,
12659     item and private data object as added by L{PrepareContainerMods}
12660
12661   """
12662   for (op, idx, params, private) in mods:
12663     if idx == -1:
12664       # Append
12665       absidx = len(container) - 1
12666     elif idx < 0:
12667       raise IndexError("Not accepting negative indices other than -1")
12668     elif idx > len(container):
12669       raise IndexError("Got %s index %s, but there are only %s" %
12670                        (kind, idx, len(container)))
12671     else:
12672       absidx = idx
12673
12674     changes = None
12675
12676     if op == constants.DDM_ADD:
12677       # Calculate where item will be added
12678       if idx == -1:
12679         addidx = len(container)
12680       else:
12681         addidx = idx
12682
12683       if create_fn is None:
12684         item = params
12685       else:
12686         (item, changes) = create_fn(addidx, params, private)
12687
12688       if idx == -1:
12689         container.append(item)
12690       else:
12691         assert idx >= 0
12692         assert idx <= len(container)
12693         # list.insert does so before the specified index
12694         container.insert(idx, item)
12695     else:
12696       # Retrieve existing item
12697       try:
12698         item = container[absidx]
12699       except IndexError:
12700         raise IndexError("Invalid %s index %s" % (kind, idx))
12701
12702       if op == constants.DDM_REMOVE:
12703         assert not params
12704
12705         if remove_fn is not None:
12706           remove_fn(absidx, item, private)
12707
12708         changes = [("%s/%s" % (kind, absidx), "remove")]
12709
12710         assert container[absidx] == item
12711         del container[absidx]
12712       elif op == constants.DDM_MODIFY:
12713         if modify_fn is not None:
12714           changes = modify_fn(absidx, item, params, private)
12715       else:
12716         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12717
12718     assert _TApplyContModsCbChanges(changes)
12719
12720     if not (chgdesc is None or changes is None):
12721       chgdesc.extend(changes)
12722
12723
12724 def _UpdateIvNames(base_index, disks):
12725   """Updates the C{iv_name} attribute of disks.
12726
12727   @type disks: list of L{objects.Disk}
12728
12729   """
12730   for (idx, disk) in enumerate(disks):
12731     disk.iv_name = "disk/%s" % (base_index + idx, )
12732
12733
12734 class _InstNicModPrivate:
12735   """Data structure for network interface modifications.
12736
12737   Used by L{LUInstanceSetParams}.
12738
12739   """
12740   def __init__(self):
12741     self.params = None
12742     self.filled = None
12743
12744
12745 class LUInstanceSetParams(LogicalUnit):
12746   """Modifies an instances's parameters.
12747
12748   """
12749   HPATH = "instance-modify"
12750   HTYPE = constants.HTYPE_INSTANCE
12751   REQ_BGL = False
12752
12753   @staticmethod
12754   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12755     assert ht.TList(mods)
12756     assert not mods or len(mods[0]) in (2, 3)
12757
12758     if mods and len(mods[0]) == 2:
12759       result = []
12760
12761       addremove = 0
12762       for op, params in mods:
12763         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12764           result.append((op, -1, params))
12765           addremove += 1
12766
12767           if addremove > 1:
12768             raise errors.OpPrereqError("Only one %s add or remove operation is"
12769                                        " supported at a time" % kind,
12770                                        errors.ECODE_INVAL)
12771         else:
12772           result.append((constants.DDM_MODIFY, op, params))
12773
12774       assert verify_fn(result)
12775     else:
12776       result = mods
12777
12778     return result
12779
12780   @staticmethod
12781   def _CheckMods(kind, mods, key_types, item_fn):
12782     """Ensures requested disk/NIC modifications are valid.
12783
12784     """
12785     for (op, _, params) in mods:
12786       assert ht.TDict(params)
12787
12788       utils.ForceDictType(params, key_types)
12789
12790       if op == constants.DDM_REMOVE:
12791         if params:
12792           raise errors.OpPrereqError("No settings should be passed when"
12793                                      " removing a %s" % kind,
12794                                      errors.ECODE_INVAL)
12795       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12796         item_fn(op, params)
12797       else:
12798         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12799
12800   @staticmethod
12801   def _VerifyDiskModification(op, params):
12802     """Verifies a disk modification.
12803
12804     """
12805     if op == constants.DDM_ADD:
12806       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12807       if mode not in constants.DISK_ACCESS_SET:
12808         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12809                                    errors.ECODE_INVAL)
12810
12811       size = params.get(constants.IDISK_SIZE, None)
12812       if size is None:
12813         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12814                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12815
12816       try:
12817         size = int(size)
12818       except (TypeError, ValueError), err:
12819         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12820                                    errors.ECODE_INVAL)
12821
12822       params[constants.IDISK_SIZE] = size
12823
12824     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12825       raise errors.OpPrereqError("Disk size change not possible, use"
12826                                  " grow-disk", errors.ECODE_INVAL)
12827
12828   @staticmethod
12829   def _VerifyNicModification(op, params):
12830     """Verifies a network interface modification.
12831
12832     """
12833     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12834       ip = params.get(constants.INIC_IP, None)
12835       req_net = params.get(constants.INIC_NETWORK, None)
12836       link = params.get(constants.NIC_LINK, None)
12837       mode = params.get(constants.NIC_MODE, None)
12838       if req_net is not None:
12839         if req_net.lower() == constants.VALUE_NONE:
12840           params[constants.INIC_NETWORK] = None
12841           req_net = None
12842         elif link is not None or mode is not None:
12843           raise errors.OpPrereqError("If network is given"
12844                                      " mode or link should not",
12845                                      errors.ECODE_INVAL)
12846
12847       if op == constants.DDM_ADD:
12848         macaddr = params.get(constants.INIC_MAC, None)
12849         if macaddr is None:
12850           params[constants.INIC_MAC] = constants.VALUE_AUTO
12851
12852       if ip is not None:
12853         if ip.lower() == constants.VALUE_NONE:
12854           params[constants.INIC_IP] = None
12855         else:
12856           if ip.lower() == constants.NIC_IP_POOL:
12857             if op == constants.DDM_ADD and req_net is None:
12858               raise errors.OpPrereqError("If ip=pool, parameter network"
12859                                          " cannot be none",
12860                                          errors.ECODE_INVAL)
12861           else:
12862             if not netutils.IPAddress.IsValid(ip):
12863               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12864                                          errors.ECODE_INVAL)
12865
12866       if constants.INIC_MAC in params:
12867         macaddr = params[constants.INIC_MAC]
12868         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12869           macaddr = utils.NormalizeAndValidateMac(macaddr)
12870
12871         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12872           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12873                                      " modifying an existing NIC",
12874                                      errors.ECODE_INVAL)
12875
12876   def CheckArguments(self):
12877     if not (self.op.nics or self.op.disks or self.op.disk_template or
12878             self.op.hvparams or self.op.beparams or self.op.os_name or
12879             self.op.offline is not None or self.op.runtime_mem):
12880       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12881
12882     if self.op.hvparams:
12883       _CheckGlobalHvParams(self.op.hvparams)
12884
12885     self.op.disks = self._UpgradeDiskNicMods(
12886       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12887     self.op.nics = self._UpgradeDiskNicMods(
12888       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12889
12890     # Check disk modifications
12891     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12892                     self._VerifyDiskModification)
12893
12894     if self.op.disks and self.op.disk_template is not None:
12895       raise errors.OpPrereqError("Disk template conversion and other disk"
12896                                  " changes not supported at the same time",
12897                                  errors.ECODE_INVAL)
12898
12899     if (self.op.disk_template and
12900         self.op.disk_template in constants.DTS_INT_MIRROR and
12901         self.op.remote_node is None):
12902       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12903                                  " one requires specifying a secondary node",
12904                                  errors.ECODE_INVAL)
12905
12906     # Check NIC modifications
12907     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12908                     self._VerifyNicModification)
12909
12910   def ExpandNames(self):
12911     self._ExpandAndLockInstance()
12912     self.needed_locks[locking.LEVEL_NODEGROUP] = []
12913     # Can't even acquire node locks in shared mode as upcoming changes in
12914     # Ganeti 2.6 will start to modify the node object on disk conversion
12915     self.needed_locks[locking.LEVEL_NODE] = []
12916     self.needed_locks[locking.LEVEL_NODE_RES] = []
12917     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12918     # Look node group to look up the ipolicy
12919     self.share_locks[locking.LEVEL_NODEGROUP] = 1
12920
12921   def DeclareLocks(self, level):
12922     if level == locking.LEVEL_NODEGROUP:
12923       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12924       # Acquire locks for the instance's nodegroups optimistically. Needs
12925       # to be verified in CheckPrereq
12926       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12927         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12928     elif level == locking.LEVEL_NODE:
12929       self._LockInstancesNodes()
12930       if self.op.disk_template and self.op.remote_node:
12931         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12932         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12933     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12934       # Copy node locks
12935       self.needed_locks[locking.LEVEL_NODE_RES] = \
12936         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12937
12938   def BuildHooksEnv(self):
12939     """Build hooks env.
12940
12941     This runs on the master, primary and secondaries.
12942
12943     """
12944     args = {}
12945     if constants.BE_MINMEM in self.be_new:
12946       args["minmem"] = self.be_new[constants.BE_MINMEM]
12947     if constants.BE_MAXMEM in self.be_new:
12948       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12949     if constants.BE_VCPUS in self.be_new:
12950       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12951     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12952     # information at all.
12953
12954     if self._new_nics is not None:
12955       nics = []
12956
12957       for nic in self._new_nics:
12958         n = copy.deepcopy(nic)
12959         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12960         n.nicparams = nicparams
12961         nics.append(_NICToTuple(self, n))
12962
12963       args["nics"] = nics
12964
12965     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12966     if self.op.disk_template:
12967       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12968     if self.op.runtime_mem:
12969       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12970
12971     return env
12972
12973   def BuildHooksNodes(self):
12974     """Build hooks nodes.
12975
12976     """
12977     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12978     return (nl, nl)
12979
12980   def _PrepareNicModification(self, params, private, old_ip, old_net,
12981                               old_params, cluster, pnode):
12982
12983     update_params_dict = dict([(key, params[key])
12984                                for key in constants.NICS_PARAMETERS
12985                                if key in params])
12986
12987     req_link = update_params_dict.get(constants.NIC_LINK, None)
12988     req_mode = update_params_dict.get(constants.NIC_MODE, None)
12989
12990     new_net = params.get(constants.INIC_NETWORK, old_net)
12991     if new_net is not None:
12992       netparams = self.cfg.GetGroupNetParams(new_net, pnode)
12993       if netparams is None:
12994         raise errors.OpPrereqError("No netparams found for the network"
12995                                    " %s, probably not connected" % new_net,
12996                                    errors.ECODE_INVAL)
12997       new_params = dict(netparams)
12998     else:
12999       new_params = _GetUpdatedParams(old_params, update_params_dict)
13000
13001     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13002
13003     new_filled_params = cluster.SimpleFillNIC(new_params)
13004     objects.NIC.CheckParameterSyntax(new_filled_params)
13005
13006     new_mode = new_filled_params[constants.NIC_MODE]
13007     if new_mode == constants.NIC_MODE_BRIDGED:
13008       bridge = new_filled_params[constants.NIC_LINK]
13009       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13010       if msg:
13011         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13012         if self.op.force:
13013           self.warn.append(msg)
13014         else:
13015           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13016
13017     elif new_mode == constants.NIC_MODE_ROUTED:
13018       ip = params.get(constants.INIC_IP, old_ip)
13019       if ip is None:
13020         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13021                                    " on a routed NIC", errors.ECODE_INVAL)
13022
13023     if constants.INIC_MAC in params:
13024       mac = params[constants.INIC_MAC]
13025       if mac is None:
13026         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13027                                    errors.ECODE_INVAL)
13028       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13029         # otherwise generate the MAC address
13030         params[constants.INIC_MAC] = \
13031           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13032       else:
13033         # or validate/reserve the current one
13034         try:
13035           self.cfg.ReserveMAC(mac, self.proc.GetECId())
13036         except errors.ReservationError:
13037           raise errors.OpPrereqError("MAC address '%s' already in use"
13038                                      " in cluster" % mac,
13039                                      errors.ECODE_NOTUNIQUE)
13040     elif new_net != old_net:
13041
13042       def get_net_prefix(net):
13043         if net:
13044           uuid = self.cfg.LookupNetwork(net)
13045           if uuid:
13046             nobj = self.cfg.GetNetwork(uuid)
13047             return nobj.mac_prefix
13048         return None
13049
13050       new_prefix = get_net_prefix(new_net)
13051       old_prefix = get_net_prefix(old_net)
13052       if old_prefix != new_prefix:
13053         params[constants.INIC_MAC] = \
13054           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13055
13056     #if there is a change in nic-network configuration
13057     new_ip = params.get(constants.INIC_IP, old_ip)
13058     if (new_ip, new_net) != (old_ip, old_net):
13059       if new_ip:
13060         if new_net:
13061           if new_ip.lower() == constants.NIC_IP_POOL:
13062             try:
13063               new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13064             except errors.ReservationError:
13065               raise errors.OpPrereqError("Unable to get a free IP"
13066                                          " from the address pool",
13067                                          errors.ECODE_STATE)
13068             self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13069             params[constants.INIC_IP] = new_ip
13070           elif new_ip != old_ip or new_net != old_net:
13071             try:
13072               self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13073               self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13074             except errors.ReservationError:
13075               raise errors.OpPrereqError("IP %s not available in network %s" %
13076                                          (new_ip, new_net),
13077                                          errors.ECODE_NOTUNIQUE)
13078         elif new_ip.lower() == constants.NIC_IP_POOL:
13079           raise errors.OpPrereqError("ip=pool, but no network found",
13080                                      errors.ECODE_INVAL)
13081         else:
13082           # new net is None
13083           if self.op.conflicts_check:
13084             _CheckForConflictingIp(self, new_ip, pnode)
13085
13086       if old_ip:
13087         if old_net:
13088           try:
13089             self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13090           except errors.AddressPoolError:
13091             logging.warning("Release IP %s not contained in network %s",
13092                             old_ip, old_net)
13093
13094     # there are no changes in (net, ip) tuple
13095     elif (old_net is not None and
13096           (req_link is not None or req_mode is not None)):
13097       raise errors.OpPrereqError("Not allowed to change link or mode of"
13098                                  " a NIC that is connected to a network",
13099                                  errors.ECODE_INVAL)
13100
13101     private.params = new_params
13102     private.filled = new_filled_params
13103
13104   def CheckPrereq(self):
13105     """Check prerequisites.
13106
13107     This only checks the instance list against the existing names.
13108
13109     """
13110     assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13111     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13112
13113     cluster = self.cluster = self.cfg.GetClusterInfo()
13114     assert self.instance is not None, \
13115       "Cannot retrieve locked instance %s" % self.op.instance_name
13116
13117     pnode = instance.primary_node
13118     assert pnode in self.owned_locks(locking.LEVEL_NODE)
13119     nodelist = list(instance.all_nodes)
13120     pnode_info = self.cfg.GetNodeInfo(pnode)
13121     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13122
13123     #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13124     assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13125     group_info = self.cfg.GetNodeGroup(pnode_info.group)
13126
13127     # dictionary with instance information after the modification
13128     ispec = {}
13129
13130     # Prepare disk/NIC modifications
13131     self.diskmod = PrepareContainerMods(self.op.disks, None)
13132     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13133
13134     # OS change
13135     if self.op.os_name and not self.op.force:
13136       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13137                       self.op.force_variant)
13138       instance_os = self.op.os_name
13139     else:
13140       instance_os = instance.os
13141
13142     assert not (self.op.disk_template and self.op.disks), \
13143       "Can't modify disk template and apply disk changes at the same time"
13144
13145     if self.op.disk_template:
13146       if instance.disk_template == self.op.disk_template:
13147         raise errors.OpPrereqError("Instance already has disk template %s" %
13148                                    instance.disk_template, errors.ECODE_INVAL)
13149
13150       if (instance.disk_template,
13151           self.op.disk_template) not in self._DISK_CONVERSIONS:
13152         raise errors.OpPrereqError("Unsupported disk template conversion from"
13153                                    " %s to %s" % (instance.disk_template,
13154                                                   self.op.disk_template),
13155                                    errors.ECODE_INVAL)
13156       _CheckInstanceState(self, instance, INSTANCE_DOWN,
13157                           msg="cannot change disk template")
13158       if self.op.disk_template in constants.DTS_INT_MIRROR:
13159         if self.op.remote_node == pnode:
13160           raise errors.OpPrereqError("Given new secondary node %s is the same"
13161                                      " as the primary node of the instance" %
13162                                      self.op.remote_node, errors.ECODE_STATE)
13163         _CheckNodeOnline(self, self.op.remote_node)
13164         _CheckNodeNotDrained(self, self.op.remote_node)
13165         # FIXME: here we assume that the old instance type is DT_PLAIN
13166         assert instance.disk_template == constants.DT_PLAIN
13167         disks = [{constants.IDISK_SIZE: d.size,
13168                   constants.IDISK_VG: d.logical_id[0]}
13169                  for d in instance.disks]
13170         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13171         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13172
13173         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13174         snode_group = self.cfg.GetNodeGroup(snode_info.group)
13175         ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13176                                                                 snode_group)
13177         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13178                                 ignore=self.op.ignore_ipolicy)
13179         if pnode_info.group != snode_info.group:
13180           self.LogWarning("The primary and secondary nodes are in two"
13181                           " different node groups; the disk parameters"
13182                           " from the first disk's node group will be"
13183                           " used")
13184
13185     # hvparams processing
13186     if self.op.hvparams:
13187       hv_type = instance.hypervisor
13188       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13189       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13190       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13191
13192       # local check
13193       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
13194       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13195       self.hv_proposed = self.hv_new = hv_new # the new actual values
13196       self.hv_inst = i_hvdict # the new dict (without defaults)
13197     else:
13198       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13199                                               instance.hvparams)
13200       self.hv_new = self.hv_inst = {}
13201
13202     # beparams processing
13203     if self.op.beparams:
13204       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13205                                    use_none=True)
13206       objects.UpgradeBeParams(i_bedict)
13207       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13208       be_new = cluster.SimpleFillBE(i_bedict)
13209       self.be_proposed = self.be_new = be_new # the new actual values
13210       self.be_inst = i_bedict # the new dict (without defaults)
13211     else:
13212       self.be_new = self.be_inst = {}
13213       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13214     be_old = cluster.FillBE(instance)
13215
13216     # CPU param validation -- checking every time a parameter is
13217     # changed to cover all cases where either CPU mask or vcpus have
13218     # changed
13219     if (constants.BE_VCPUS in self.be_proposed and
13220         constants.HV_CPU_MASK in self.hv_proposed):
13221       cpu_list = \
13222         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13223       # Verify mask is consistent with number of vCPUs. Can skip this
13224       # test if only 1 entry in the CPU mask, which means same mask
13225       # is applied to all vCPUs.
13226       if (len(cpu_list) > 1 and
13227           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13228         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13229                                    " CPU mask [%s]" %
13230                                    (self.be_proposed[constants.BE_VCPUS],
13231                                     self.hv_proposed[constants.HV_CPU_MASK]),
13232                                    errors.ECODE_INVAL)
13233
13234       # Only perform this test if a new CPU mask is given
13235       if constants.HV_CPU_MASK in self.hv_new:
13236         # Calculate the largest CPU number requested
13237         max_requested_cpu = max(map(max, cpu_list))
13238         # Check that all of the instance's nodes have enough physical CPUs to
13239         # satisfy the requested CPU mask
13240         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13241                                 max_requested_cpu + 1, instance.hypervisor)
13242
13243     # osparams processing
13244     if self.op.osparams:
13245       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13246       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13247       self.os_inst = i_osdict # the new dict (without defaults)
13248     else:
13249       self.os_inst = {}
13250
13251     self.warn = []
13252
13253     #TODO(dynmem): do the appropriate check involving MINMEM
13254     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13255         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13256       mem_check_list = [pnode]
13257       if be_new[constants.BE_AUTO_BALANCE]:
13258         # either we changed auto_balance to yes or it was from before
13259         mem_check_list.extend(instance.secondary_nodes)
13260       instance_info = self.rpc.call_instance_info(pnode, instance.name,
13261                                                   instance.hypervisor)
13262       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13263                                          [instance.hypervisor])
13264       pninfo = nodeinfo[pnode]
13265       msg = pninfo.fail_msg
13266       if msg:
13267         # Assume the primary node is unreachable and go ahead
13268         self.warn.append("Can't get info from primary node %s: %s" %
13269                          (pnode, msg))
13270       else:
13271         (_, _, (pnhvinfo, )) = pninfo.payload
13272         if not isinstance(pnhvinfo.get("memory_free", None), int):
13273           self.warn.append("Node data from primary node %s doesn't contain"
13274                            " free memory information" % pnode)
13275         elif instance_info.fail_msg:
13276           self.warn.append("Can't get instance runtime information: %s" %
13277                            instance_info.fail_msg)
13278         else:
13279           if instance_info.payload:
13280             current_mem = int(instance_info.payload["memory"])
13281           else:
13282             # Assume instance not running
13283             # (there is a slight race condition here, but it's not very
13284             # probable, and we have no other way to check)
13285             # TODO: Describe race condition
13286             current_mem = 0
13287           #TODO(dynmem): do the appropriate check involving MINMEM
13288           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13289                       pnhvinfo["memory_free"])
13290           if miss_mem > 0:
13291             raise errors.OpPrereqError("This change will prevent the instance"
13292                                        " from starting, due to %d MB of memory"
13293                                        " missing on its primary node" %
13294                                        miss_mem, errors.ECODE_NORES)
13295
13296       if be_new[constants.BE_AUTO_BALANCE]:
13297         for node, nres in nodeinfo.items():
13298           if node not in instance.secondary_nodes:
13299             continue
13300           nres.Raise("Can't get info from secondary node %s" % node,
13301                      prereq=True, ecode=errors.ECODE_STATE)
13302           (_, _, (nhvinfo, )) = nres.payload
13303           if not isinstance(nhvinfo.get("memory_free", None), int):
13304             raise errors.OpPrereqError("Secondary node %s didn't return free"
13305                                        " memory information" % node,
13306                                        errors.ECODE_STATE)
13307           #TODO(dynmem): do the appropriate check involving MINMEM
13308           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13309             raise errors.OpPrereqError("This change will prevent the instance"
13310                                        " from failover to its secondary node"
13311                                        " %s, due to not enough memory" % node,
13312                                        errors.ECODE_STATE)
13313
13314     if self.op.runtime_mem:
13315       remote_info = self.rpc.call_instance_info(instance.primary_node,
13316                                                 instance.name,
13317                                                 instance.hypervisor)
13318       remote_info.Raise("Error checking node %s" % instance.primary_node)
13319       if not remote_info.payload: # not running already
13320         raise errors.OpPrereqError("Instance %s is not running" %
13321                                    instance.name, errors.ECODE_STATE)
13322
13323       current_memory = remote_info.payload["memory"]
13324       if (not self.op.force and
13325            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13326             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13327         raise errors.OpPrereqError("Instance %s must have memory between %d"
13328                                    " and %d MB of memory unless --force is"
13329                                    " given" %
13330                                    (instance.name,
13331                                     self.be_proposed[constants.BE_MINMEM],
13332                                     self.be_proposed[constants.BE_MAXMEM]),
13333                                    errors.ECODE_INVAL)
13334
13335       delta = self.op.runtime_mem - current_memory
13336       if delta > 0:
13337         _CheckNodeFreeMemory(self, instance.primary_node,
13338                              "ballooning memory for instance %s" %
13339                              instance.name, delta, instance.hypervisor)
13340
13341     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13342       raise errors.OpPrereqError("Disk operations not supported for"
13343                                  " diskless instances", errors.ECODE_INVAL)
13344
13345     def _PrepareNicCreate(_, params, private):
13346       self._PrepareNicModification(params, private, None, None,
13347                                    {}, cluster, pnode)
13348       return (None, None)
13349
13350     def _PrepareNicMod(_, nic, params, private):
13351       self._PrepareNicModification(params, private, nic.ip, nic.network,
13352                                    nic.nicparams, cluster, pnode)
13353       return None
13354
13355     def _PrepareNicRemove(_, params, __):
13356       ip = params.ip
13357       net = params.network
13358       if net is not None and ip is not None:
13359         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13360
13361     # Verify NIC changes (operating on copy)
13362     nics = instance.nics[:]
13363     ApplyContainerMods("NIC", nics, None, self.nicmod,
13364                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13365     if len(nics) > constants.MAX_NICS:
13366       raise errors.OpPrereqError("Instance has too many network interfaces"
13367                                  " (%d), cannot add more" % constants.MAX_NICS,
13368                                  errors.ECODE_STATE)
13369
13370     # Verify disk changes (operating on a copy)
13371     disks = instance.disks[:]
13372     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13373     if len(disks) > constants.MAX_DISKS:
13374       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13375                                  " more" % constants.MAX_DISKS,
13376                                  errors.ECODE_STATE)
13377     disk_sizes = [disk.size for disk in instance.disks]
13378     disk_sizes.extend(params["size"] for (op, idx, params, private) in
13379                       self.diskmod if op == constants.DDM_ADD)
13380     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13381     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13382
13383     if self.op.offline is not None:
13384       if self.op.offline:
13385         msg = "can't change to offline"
13386       else:
13387         msg = "can't change to online"
13388       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13389
13390     # Pre-compute NIC changes (necessary to use result in hooks)
13391     self._nic_chgdesc = []
13392     if self.nicmod:
13393       # Operate on copies as this is still in prereq
13394       nics = [nic.Copy() for nic in instance.nics]
13395       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13396                          self._CreateNewNic, self._ApplyNicMods, None)
13397       self._new_nics = nics
13398       ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13399     else:
13400       self._new_nics = None
13401       ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13402
13403     if not self.op.ignore_ipolicy:
13404       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13405                                                               group_info)
13406
13407       # Fill ispec with backend parameters
13408       ispec[constants.ISPEC_SPINDLE_USE] = \
13409         self.be_new.get(constants.BE_SPINDLE_USE, None)
13410       ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13411                                                          None)
13412
13413       # Copy ispec to verify parameters with min/max values separately
13414       ispec_max = ispec.copy()
13415       ispec_max[constants.ISPEC_MEM_SIZE] = \
13416         self.be_new.get(constants.BE_MAXMEM, None)
13417       res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13418       ispec_min = ispec.copy()
13419       ispec_min[constants.ISPEC_MEM_SIZE] = \
13420         self.be_new.get(constants.BE_MINMEM, None)
13421       res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13422
13423       if (res_max or res_min):
13424         # FIXME: Improve error message by including information about whether
13425         # the upper or lower limit of the parameter fails the ipolicy.
13426         msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13427                (group_info, group_info.name,
13428                 utils.CommaJoin(set(res_max + res_min))))
13429         raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13430
13431   def _ConvertPlainToDrbd(self, feedback_fn):
13432     """Converts an instance from plain to drbd.
13433
13434     """
13435     feedback_fn("Converting template to drbd")
13436     instance = self.instance
13437     pnode = instance.primary_node
13438     snode = self.op.remote_node
13439
13440     assert instance.disk_template == constants.DT_PLAIN
13441
13442     # create a fake disk info for _GenerateDiskTemplate
13443     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13444                   constants.IDISK_VG: d.logical_id[0]}
13445                  for d in instance.disks]
13446     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13447                                       instance.name, pnode, [snode],
13448                                       disk_info, None, None, 0, feedback_fn,
13449                                       self.diskparams)
13450     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13451                                         self.diskparams)
13452     info = _GetInstanceInfoText(instance)
13453     feedback_fn("Creating additional volumes...")
13454     # first, create the missing data and meta devices
13455     for disk in anno_disks:
13456       # unfortunately this is... not too nice
13457       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13458                             info, True)
13459       for child in disk.children:
13460         _CreateSingleBlockDev(self, snode, instance, child, info, True)
13461     # at this stage, all new LVs have been created, we can rename the
13462     # old ones
13463     feedback_fn("Renaming original volumes...")
13464     rename_list = [(o, n.children[0].logical_id)
13465                    for (o, n) in zip(instance.disks, new_disks)]
13466     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13467     result.Raise("Failed to rename original LVs")
13468
13469     feedback_fn("Initializing DRBD devices...")
13470     # all child devices are in place, we can now create the DRBD devices
13471     for disk in anno_disks:
13472       for node in [pnode, snode]:
13473         f_create = node == pnode
13474         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13475
13476     # at this point, the instance has been modified
13477     instance.disk_template = constants.DT_DRBD8
13478     instance.disks = new_disks
13479     self.cfg.Update(instance, feedback_fn)
13480
13481     # Release node locks while waiting for sync
13482     _ReleaseLocks(self, locking.LEVEL_NODE)
13483
13484     # disks are created, waiting for sync
13485     disk_abort = not _WaitForSync(self, instance,
13486                                   oneshot=not self.op.wait_for_sync)
13487     if disk_abort:
13488       raise errors.OpExecError("There are some degraded disks for"
13489                                " this instance, please cleanup manually")
13490
13491     # Node resource locks will be released by caller
13492
13493   def _ConvertDrbdToPlain(self, feedback_fn):
13494     """Converts an instance from drbd to plain.
13495
13496     """
13497     instance = self.instance
13498
13499     assert len(instance.secondary_nodes) == 1
13500     assert instance.disk_template == constants.DT_DRBD8
13501
13502     pnode = instance.primary_node
13503     snode = instance.secondary_nodes[0]
13504     feedback_fn("Converting template to plain")
13505
13506     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13507     new_disks = [d.children[0] for d in instance.disks]
13508
13509     # copy over size and mode
13510     for parent, child in zip(old_disks, new_disks):
13511       child.size = parent.size
13512       child.mode = parent.mode
13513
13514     # this is a DRBD disk, return its port to the pool
13515     # NOTE: this must be done right before the call to cfg.Update!
13516     for disk in old_disks:
13517       tcp_port = disk.logical_id[2]
13518       self.cfg.AddTcpUdpPort(tcp_port)
13519
13520     # update instance structure
13521     instance.disks = new_disks
13522     instance.disk_template = constants.DT_PLAIN
13523     self.cfg.Update(instance, feedback_fn)
13524
13525     # Release locks in case removing disks takes a while
13526     _ReleaseLocks(self, locking.LEVEL_NODE)
13527
13528     feedback_fn("Removing volumes on the secondary node...")
13529     for disk in old_disks:
13530       self.cfg.SetDiskID(disk, snode)
13531       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13532       if msg:
13533         self.LogWarning("Could not remove block device %s on node %s,"
13534                         " continuing anyway: %s", disk.iv_name, snode, msg)
13535
13536     feedback_fn("Removing unneeded volumes on the primary node...")
13537     for idx, disk in enumerate(old_disks):
13538       meta = disk.children[1]
13539       self.cfg.SetDiskID(meta, pnode)
13540       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13541       if msg:
13542         self.LogWarning("Could not remove metadata for disk %d on node %s,"
13543                         " continuing anyway: %s", idx, pnode, msg)
13544
13545   def _CreateNewDisk(self, idx, params, _):
13546     """Creates a new disk.
13547
13548     """
13549     instance = self.instance
13550
13551     # add a new disk
13552     if instance.disk_template in constants.DTS_FILEBASED:
13553       (file_driver, file_path) = instance.disks[0].logical_id
13554       file_path = os.path.dirname(file_path)
13555     else:
13556       file_driver = file_path = None
13557
13558     disk = \
13559       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13560                             instance.primary_node, instance.secondary_nodes,
13561                             [params], file_path, file_driver, idx,
13562                             self.Log, self.diskparams)[0]
13563
13564     info = _GetInstanceInfoText(instance)
13565
13566     logging.info("Creating volume %s for instance %s",
13567                  disk.iv_name, instance.name)
13568     # Note: this needs to be kept in sync with _CreateDisks
13569     #HARDCODE
13570     for node in instance.all_nodes:
13571       f_create = (node == instance.primary_node)
13572       try:
13573         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13574       except errors.OpExecError, err:
13575         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13576                         disk.iv_name, disk, node, err)
13577
13578     return (disk, [
13579       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13580       ])
13581
13582   @staticmethod
13583   def _ModifyDisk(idx, disk, params, _):
13584     """Modifies a disk.
13585
13586     """
13587     disk.mode = params[constants.IDISK_MODE]
13588
13589     return [
13590       ("disk.mode/%d" % idx, disk.mode),
13591       ]
13592
13593   def _RemoveDisk(self, idx, root, _):
13594     """Removes a disk.
13595
13596     """
13597     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13598     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13599       self.cfg.SetDiskID(disk, node)
13600       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13601       if msg:
13602         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13603                         " continuing anyway", idx, node, msg)
13604
13605     # if this is a DRBD disk, return its port to the pool
13606     if root.dev_type in constants.LDS_DRBD:
13607       self.cfg.AddTcpUdpPort(root.logical_id[2])
13608
13609   @staticmethod
13610   def _CreateNewNic(idx, params, private):
13611     """Creates data structure for a new network interface.
13612
13613     """
13614     mac = params[constants.INIC_MAC]
13615     ip = params.get(constants.INIC_IP, None)
13616     net = params.get(constants.INIC_NETWORK, None)
13617     #TODO: not private.filled?? can a nic have no nicparams??
13618     nicparams = private.filled
13619
13620     return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
13621       ("nic.%d" % idx,
13622        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13623        (mac, ip, private.filled[constants.NIC_MODE],
13624        private.filled[constants.NIC_LINK],
13625        net)),
13626       ])
13627
13628   @staticmethod
13629   def _ApplyNicMods(idx, nic, params, private):
13630     """Modifies a network interface.
13631
13632     """
13633     changes = []
13634
13635     for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13636       if key in params:
13637         changes.append(("nic.%s/%d" % (key, idx), params[key]))
13638         setattr(nic, key, params[key])
13639
13640     if private.filled:
13641       nic.nicparams = private.filled
13642
13643       for (key, val) in nic.nicparams.items():
13644         changes.append(("nic.%s/%d" % (key, idx), val))
13645
13646     return changes
13647
13648   def Exec(self, feedback_fn):
13649     """Modifies an instance.
13650
13651     All parameters take effect only at the next restart of the instance.
13652
13653     """
13654     # Process here the warnings from CheckPrereq, as we don't have a
13655     # feedback_fn there.
13656     # TODO: Replace with self.LogWarning
13657     for warn in self.warn:
13658       feedback_fn("WARNING: %s" % warn)
13659
13660     assert ((self.op.disk_template is None) ^
13661             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13662       "Not owning any node resource locks"
13663
13664     result = []
13665     instance = self.instance
13666
13667     # runtime memory
13668     if self.op.runtime_mem:
13669       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13670                                                      instance,
13671                                                      self.op.runtime_mem)
13672       rpcres.Raise("Cannot modify instance runtime memory")
13673       result.append(("runtime_memory", self.op.runtime_mem))
13674
13675     # Apply disk changes
13676     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13677                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13678     _UpdateIvNames(0, instance.disks)
13679
13680     if self.op.disk_template:
13681       if __debug__:
13682         check_nodes = set(instance.all_nodes)
13683         if self.op.remote_node:
13684           check_nodes.add(self.op.remote_node)
13685         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13686           owned = self.owned_locks(level)
13687           assert not (check_nodes - owned), \
13688             ("Not owning the correct locks, owning %r, expected at least %r" %
13689              (owned, check_nodes))
13690
13691       r_shut = _ShutdownInstanceDisks(self, instance)
13692       if not r_shut:
13693         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13694                                  " proceed with disk template conversion")
13695       mode = (instance.disk_template, self.op.disk_template)
13696       try:
13697         self._DISK_CONVERSIONS[mode](self, feedback_fn)
13698       except:
13699         self.cfg.ReleaseDRBDMinors(instance.name)
13700         raise
13701       result.append(("disk_template", self.op.disk_template))
13702
13703       assert instance.disk_template == self.op.disk_template, \
13704         ("Expected disk template '%s', found '%s'" %
13705          (self.op.disk_template, instance.disk_template))
13706
13707     # Release node and resource locks if there are any (they might already have
13708     # been released during disk conversion)
13709     _ReleaseLocks(self, locking.LEVEL_NODE)
13710     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13711
13712     # Apply NIC changes
13713     if self._new_nics is not None:
13714       instance.nics = self._new_nics
13715       result.extend(self._nic_chgdesc)
13716
13717     # hvparams changes
13718     if self.op.hvparams:
13719       instance.hvparams = self.hv_inst
13720       for key, val in self.op.hvparams.iteritems():
13721         result.append(("hv/%s" % key, val))
13722
13723     # beparams changes
13724     if self.op.beparams:
13725       instance.beparams = self.be_inst
13726       for key, val in self.op.beparams.iteritems():
13727         result.append(("be/%s" % key, val))
13728
13729     # OS change
13730     if self.op.os_name:
13731       instance.os = self.op.os_name
13732
13733     # osparams changes
13734     if self.op.osparams:
13735       instance.osparams = self.os_inst
13736       for key, val in self.op.osparams.iteritems():
13737         result.append(("os/%s" % key, val))
13738
13739     if self.op.offline is None:
13740       # Ignore
13741       pass
13742     elif self.op.offline:
13743       # Mark instance as offline
13744       self.cfg.MarkInstanceOffline(instance.name)
13745       result.append(("admin_state", constants.ADMINST_OFFLINE))
13746     else:
13747       # Mark instance as online, but stopped
13748       self.cfg.MarkInstanceDown(instance.name)
13749       result.append(("admin_state", constants.ADMINST_DOWN))
13750
13751     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13752
13753     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13754                 self.owned_locks(locking.LEVEL_NODE)), \
13755       "All node locks should have been released by now"
13756
13757     return result
13758
13759   _DISK_CONVERSIONS = {
13760     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13761     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13762     }
13763
13764
13765 class LUInstanceChangeGroup(LogicalUnit):
13766   HPATH = "instance-change-group"
13767   HTYPE = constants.HTYPE_INSTANCE
13768   REQ_BGL = False
13769
13770   def ExpandNames(self):
13771     self.share_locks = _ShareAll()
13772
13773     self.needed_locks = {
13774       locking.LEVEL_NODEGROUP: [],
13775       locking.LEVEL_NODE: [],
13776       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
13777       }
13778
13779     self._ExpandAndLockInstance()
13780
13781     if self.op.target_groups:
13782       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13783                                   self.op.target_groups)
13784     else:
13785       self.req_target_uuids = None
13786
13787     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13788
13789   def DeclareLocks(self, level):
13790     if level == locking.LEVEL_NODEGROUP:
13791       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13792
13793       if self.req_target_uuids:
13794         lock_groups = set(self.req_target_uuids)
13795
13796         # Lock all groups used by instance optimistically; this requires going
13797         # via the node before it's locked, requiring verification later on
13798         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13799         lock_groups.update(instance_groups)
13800       else:
13801         # No target groups, need to lock all of them
13802         lock_groups = locking.ALL_SET
13803
13804       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13805
13806     elif level == locking.LEVEL_NODE:
13807       if self.req_target_uuids:
13808         # Lock all nodes used by instances
13809         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13810         self._LockInstancesNodes()
13811
13812         # Lock all nodes in all potential target groups
13813         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13814                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13815         member_nodes = [node_name
13816                         for group in lock_groups
13817                         for node_name in self.cfg.GetNodeGroup(group).members]
13818         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13819       else:
13820         # Lock all nodes as all groups are potential targets
13821         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13822
13823   def CheckPrereq(self):
13824     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13825     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13826     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13827
13828     assert (self.req_target_uuids is None or
13829             owned_groups.issuperset(self.req_target_uuids))
13830     assert owned_instances == set([self.op.instance_name])
13831
13832     # Get instance information
13833     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13834
13835     # Check if node groups for locked instance are still correct
13836     assert owned_nodes.issuperset(self.instance.all_nodes), \
13837       ("Instance %s's nodes changed while we kept the lock" %
13838        self.op.instance_name)
13839
13840     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13841                                            owned_groups)
13842
13843     if self.req_target_uuids:
13844       # User requested specific target groups
13845       self.target_uuids = frozenset(self.req_target_uuids)
13846     else:
13847       # All groups except those used by the instance are potential targets
13848       self.target_uuids = owned_groups - inst_groups
13849
13850     conflicting_groups = self.target_uuids & inst_groups
13851     if conflicting_groups:
13852       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13853                                  " used by the instance '%s'" %
13854                                  (utils.CommaJoin(conflicting_groups),
13855                                   self.op.instance_name),
13856                                  errors.ECODE_INVAL)
13857
13858     if not self.target_uuids:
13859       raise errors.OpPrereqError("There are no possible target groups",
13860                                  errors.ECODE_INVAL)
13861
13862   def BuildHooksEnv(self):
13863     """Build hooks env.
13864
13865     """
13866     assert self.target_uuids
13867
13868     env = {
13869       "TARGET_GROUPS": " ".join(self.target_uuids),
13870       }
13871
13872     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13873
13874     return env
13875
13876   def BuildHooksNodes(self):
13877     """Build hooks nodes.
13878
13879     """
13880     mn = self.cfg.GetMasterNode()
13881     return ([mn], [mn])
13882
13883   def Exec(self, feedback_fn):
13884     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13885
13886     assert instances == [self.op.instance_name], "Instance not locked"
13887
13888     req = iallocator.IAReqGroupChange(instances=instances,
13889                                       target_groups=list(self.target_uuids))
13890     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13891
13892     ial.Run(self.op.iallocator)
13893
13894     if not ial.success:
13895       raise errors.OpPrereqError("Can't compute solution for changing group of"
13896                                  " instance '%s' using iallocator '%s': %s" %
13897                                  (self.op.instance_name, self.op.iallocator,
13898                                   ial.info), errors.ECODE_NORES)
13899
13900     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13901
13902     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13903                  " instance '%s'", len(jobs), self.op.instance_name)
13904
13905     return ResultWithJobs(jobs)
13906
13907
13908 class LUBackupQuery(NoHooksLU):
13909   """Query the exports list
13910
13911   """
13912   REQ_BGL = False
13913
13914   def CheckArguments(self):
13915     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13916                              ["node", "export"], self.op.use_locking)
13917
13918   def ExpandNames(self):
13919     self.expq.ExpandNames(self)
13920
13921   def DeclareLocks(self, level):
13922     self.expq.DeclareLocks(self, level)
13923
13924   def Exec(self, feedback_fn):
13925     result = {}
13926
13927     for (node, expname) in self.expq.OldStyleQuery(self):
13928       if expname is None:
13929         result[node] = False
13930       else:
13931         result.setdefault(node, []).append(expname)
13932
13933     return result
13934
13935
13936 class _ExportQuery(_QueryBase):
13937   FIELDS = query.EXPORT_FIELDS
13938
13939   #: The node name is not a unique key for this query
13940   SORT_FIELD = "node"
13941
13942   def ExpandNames(self, lu):
13943     lu.needed_locks = {}
13944
13945     # The following variables interact with _QueryBase._GetNames
13946     if self.names:
13947       self.wanted = _GetWantedNodes(lu, self.names)
13948     else:
13949       self.wanted = locking.ALL_SET
13950
13951     self.do_locking = self.use_locking
13952
13953     if self.do_locking:
13954       lu.share_locks = _ShareAll()
13955       lu.needed_locks = {
13956         locking.LEVEL_NODE: self.wanted,
13957         }
13958
13959       if not self.names:
13960         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
13961
13962   def DeclareLocks(self, lu, level):
13963     pass
13964
13965   def _GetQueryData(self, lu):
13966     """Computes the list of nodes and their attributes.
13967
13968     """
13969     # Locking is not used
13970     # TODO
13971     assert not (compat.any(lu.glm.is_owned(level)
13972                            for level in locking.LEVELS
13973                            if level != locking.LEVEL_CLUSTER) or
13974                 self.do_locking or self.use_locking)
13975
13976     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13977
13978     result = []
13979
13980     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13981       if nres.fail_msg:
13982         result.append((node, None))
13983       else:
13984         result.extend((node, expname) for expname in nres.payload)
13985
13986     return result
13987
13988
13989 class LUBackupPrepare(NoHooksLU):
13990   """Prepares an instance for an export and returns useful information.
13991
13992   """
13993   REQ_BGL = False
13994
13995   def ExpandNames(self):
13996     self._ExpandAndLockInstance()
13997
13998   def CheckPrereq(self):
13999     """Check prerequisites.
14000
14001     """
14002     instance_name = self.op.instance_name
14003
14004     self.instance = self.cfg.GetInstanceInfo(instance_name)
14005     assert self.instance is not None, \
14006           "Cannot retrieve locked instance %s" % self.op.instance_name
14007     _CheckNodeOnline(self, self.instance.primary_node)
14008
14009     self._cds = _GetClusterDomainSecret()
14010
14011   def Exec(self, feedback_fn):
14012     """Prepares an instance for an export.
14013
14014     """
14015     instance = self.instance
14016
14017     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14018       salt = utils.GenerateSecret(8)
14019
14020       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14021       result = self.rpc.call_x509_cert_create(instance.primary_node,
14022                                               constants.RIE_CERT_VALIDITY)
14023       result.Raise("Can't create X509 key and certificate on %s" % result.node)
14024
14025       (name, cert_pem) = result.payload
14026
14027       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14028                                              cert_pem)
14029
14030       return {
14031         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14032         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14033                           salt),
14034         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14035         }
14036
14037     return None
14038
14039
14040 class LUBackupExport(LogicalUnit):
14041   """Export an instance to an image in the cluster.
14042
14043   """
14044   HPATH = "instance-export"
14045   HTYPE = constants.HTYPE_INSTANCE
14046   REQ_BGL = False
14047
14048   def CheckArguments(self):
14049     """Check the arguments.
14050
14051     """
14052     self.x509_key_name = self.op.x509_key_name
14053     self.dest_x509_ca_pem = self.op.destination_x509_ca
14054
14055     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14056       if not self.x509_key_name:
14057         raise errors.OpPrereqError("Missing X509 key name for encryption",
14058                                    errors.ECODE_INVAL)
14059
14060       if not self.dest_x509_ca_pem:
14061         raise errors.OpPrereqError("Missing destination X509 CA",
14062                                    errors.ECODE_INVAL)
14063
14064   def ExpandNames(self):
14065     self._ExpandAndLockInstance()
14066
14067     # Lock all nodes for local exports
14068     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14069       # FIXME: lock only instance primary and destination node
14070       #
14071       # Sad but true, for now we have do lock all nodes, as we don't know where
14072       # the previous export might be, and in this LU we search for it and
14073       # remove it from its current node. In the future we could fix this by:
14074       #  - making a tasklet to search (share-lock all), then create the
14075       #    new one, then one to remove, after
14076       #  - removing the removal operation altogether
14077       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14078
14079       # Allocations should be stopped while this LU runs with node locks, but
14080       # it doesn't have to be exclusive
14081       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14082       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14083
14084   def DeclareLocks(self, level):
14085     """Last minute lock declaration."""
14086     # All nodes are locked anyway, so nothing to do here.
14087
14088   def BuildHooksEnv(self):
14089     """Build hooks env.
14090
14091     This will run on the master, primary node and target node.
14092
14093     """
14094     env = {
14095       "EXPORT_MODE": self.op.mode,
14096       "EXPORT_NODE": self.op.target_node,
14097       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14098       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14099       # TODO: Generic function for boolean env variables
14100       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14101       }
14102
14103     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14104
14105     return env
14106
14107   def BuildHooksNodes(self):
14108     """Build hooks nodes.
14109
14110     """
14111     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14112
14113     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14114       nl.append(self.op.target_node)
14115
14116     return (nl, nl)
14117
14118   def CheckPrereq(self):
14119     """Check prerequisites.
14120
14121     This checks that the instance and node names are valid.
14122
14123     """
14124     instance_name = self.op.instance_name
14125
14126     self.instance = self.cfg.GetInstanceInfo(instance_name)
14127     assert self.instance is not None, \
14128           "Cannot retrieve locked instance %s" % self.op.instance_name
14129     _CheckNodeOnline(self, self.instance.primary_node)
14130
14131     if (self.op.remove_instance and
14132         self.instance.admin_state == constants.ADMINST_UP and
14133         not self.op.shutdown):
14134       raise errors.OpPrereqError("Can not remove instance without shutting it"
14135                                  " down before", errors.ECODE_STATE)
14136
14137     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14138       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14139       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14140       assert self.dst_node is not None
14141
14142       _CheckNodeOnline(self, self.dst_node.name)
14143       _CheckNodeNotDrained(self, self.dst_node.name)
14144
14145       self._cds = None
14146       self.dest_disk_info = None
14147       self.dest_x509_ca = None
14148
14149     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14150       self.dst_node = None
14151
14152       if len(self.op.target_node) != len(self.instance.disks):
14153         raise errors.OpPrereqError(("Received destination information for %s"
14154                                     " disks, but instance %s has %s disks") %
14155                                    (len(self.op.target_node), instance_name,
14156                                     len(self.instance.disks)),
14157                                    errors.ECODE_INVAL)
14158
14159       cds = _GetClusterDomainSecret()
14160
14161       # Check X509 key name
14162       try:
14163         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14164       except (TypeError, ValueError), err:
14165         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14166                                    errors.ECODE_INVAL)
14167
14168       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14169         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14170                                    errors.ECODE_INVAL)
14171
14172       # Load and verify CA
14173       try:
14174         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14175       except OpenSSL.crypto.Error, err:
14176         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14177                                    (err, ), errors.ECODE_INVAL)
14178
14179       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14180       if errcode is not None:
14181         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14182                                    (msg, ), errors.ECODE_INVAL)
14183
14184       self.dest_x509_ca = cert
14185
14186       # Verify target information
14187       disk_info = []
14188       for idx, disk_data in enumerate(self.op.target_node):
14189         try:
14190           (host, port, magic) = \
14191             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14192         except errors.GenericError, err:
14193           raise errors.OpPrereqError("Target info for disk %s: %s" %
14194                                      (idx, err), errors.ECODE_INVAL)
14195
14196         disk_info.append((host, port, magic))
14197
14198       assert len(disk_info) == len(self.op.target_node)
14199       self.dest_disk_info = disk_info
14200
14201     else:
14202       raise errors.ProgrammerError("Unhandled export mode %r" %
14203                                    self.op.mode)
14204
14205     # instance disk type verification
14206     # TODO: Implement export support for file-based disks
14207     for disk in self.instance.disks:
14208       if disk.dev_type == constants.LD_FILE:
14209         raise errors.OpPrereqError("Export not supported for instances with"
14210                                    " file-based disks", errors.ECODE_INVAL)
14211
14212   def _CleanupExports(self, feedback_fn):
14213     """Removes exports of current instance from all other nodes.
14214
14215     If an instance in a cluster with nodes A..D was exported to node C, its
14216     exports will be removed from the nodes A, B and D.
14217
14218     """
14219     assert self.op.mode != constants.EXPORT_MODE_REMOTE
14220
14221     nodelist = self.cfg.GetNodeList()
14222     nodelist.remove(self.dst_node.name)
14223
14224     # on one-node clusters nodelist will be empty after the removal
14225     # if we proceed the backup would be removed because OpBackupQuery
14226     # substitutes an empty list with the full cluster node list.
14227     iname = self.instance.name
14228     if nodelist:
14229       feedback_fn("Removing old exports for instance %s" % iname)
14230       exportlist = self.rpc.call_export_list(nodelist)
14231       for node in exportlist:
14232         if exportlist[node].fail_msg:
14233           continue
14234         if iname in exportlist[node].payload:
14235           msg = self.rpc.call_export_remove(node, iname).fail_msg
14236           if msg:
14237             self.LogWarning("Could not remove older export for instance %s"
14238                             " on node %s: %s", iname, node, msg)
14239
14240   def Exec(self, feedback_fn):
14241     """Export an instance to an image in the cluster.
14242
14243     """
14244     assert self.op.mode in constants.EXPORT_MODES
14245
14246     instance = self.instance
14247     src_node = instance.primary_node
14248
14249     if self.op.shutdown:
14250       # shutdown the instance, but not the disks
14251       feedback_fn("Shutting down instance %s" % instance.name)
14252       result = self.rpc.call_instance_shutdown(src_node, instance,
14253                                                self.op.shutdown_timeout)
14254       # TODO: Maybe ignore failures if ignore_remove_failures is set
14255       result.Raise("Could not shutdown instance %s on"
14256                    " node %s" % (instance.name, src_node))
14257
14258     # set the disks ID correctly since call_instance_start needs the
14259     # correct drbd minor to create the symlinks
14260     for disk in instance.disks:
14261       self.cfg.SetDiskID(disk, src_node)
14262
14263     activate_disks = (instance.admin_state != constants.ADMINST_UP)
14264
14265     if activate_disks:
14266       # Activate the instance disks if we'exporting a stopped instance
14267       feedback_fn("Activating disks for %s" % instance.name)
14268       _StartInstanceDisks(self, instance, None)
14269
14270     try:
14271       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14272                                                      instance)
14273
14274       helper.CreateSnapshots()
14275       try:
14276         if (self.op.shutdown and
14277             instance.admin_state == constants.ADMINST_UP and
14278             not self.op.remove_instance):
14279           assert not activate_disks
14280           feedback_fn("Starting instance %s" % instance.name)
14281           result = self.rpc.call_instance_start(src_node,
14282                                                 (instance, None, None), False)
14283           msg = result.fail_msg
14284           if msg:
14285             feedback_fn("Failed to start instance: %s" % msg)
14286             _ShutdownInstanceDisks(self, instance)
14287             raise errors.OpExecError("Could not start instance: %s" % msg)
14288
14289         if self.op.mode == constants.EXPORT_MODE_LOCAL:
14290           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14291         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14292           connect_timeout = constants.RIE_CONNECT_TIMEOUT
14293           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14294
14295           (key_name, _, _) = self.x509_key_name
14296
14297           dest_ca_pem = \
14298             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14299                                             self.dest_x509_ca)
14300
14301           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14302                                                      key_name, dest_ca_pem,
14303                                                      timeouts)
14304       finally:
14305         helper.Cleanup()
14306
14307       # Check for backwards compatibility
14308       assert len(dresults) == len(instance.disks)
14309       assert compat.all(isinstance(i, bool) for i in dresults), \
14310              "Not all results are boolean: %r" % dresults
14311
14312     finally:
14313       if activate_disks:
14314         feedback_fn("Deactivating disks for %s" % instance.name)
14315         _ShutdownInstanceDisks(self, instance)
14316
14317     if not (compat.all(dresults) and fin_resu):
14318       failures = []
14319       if not fin_resu:
14320         failures.append("export finalization")
14321       if not compat.all(dresults):
14322         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14323                                if not dsk)
14324         failures.append("disk export: disk(s) %s" % fdsk)
14325
14326       raise errors.OpExecError("Export failed, errors in %s" %
14327                                utils.CommaJoin(failures))
14328
14329     # At this point, the export was successful, we can cleanup/finish
14330
14331     # Remove instance if requested
14332     if self.op.remove_instance:
14333       feedback_fn("Removing instance %s" % instance.name)
14334       _RemoveInstance(self, feedback_fn, instance,
14335                       self.op.ignore_remove_failures)
14336
14337     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14338       self._CleanupExports(feedback_fn)
14339
14340     return fin_resu, dresults
14341
14342
14343 class LUBackupRemove(NoHooksLU):
14344   """Remove exports related to the named instance.
14345
14346   """
14347   REQ_BGL = False
14348
14349   def ExpandNames(self):
14350     self.needed_locks = {
14351       # We need all nodes to be locked in order for RemoveExport to work, but
14352       # we don't need to lock the instance itself, as nothing will happen to it
14353       # (and we can remove exports also for a removed instance)
14354       locking.LEVEL_NODE: locking.ALL_SET,
14355
14356       # Removing backups is quick, so blocking allocations is justified
14357       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14358       }
14359
14360     # Allocations should be stopped while this LU runs with node locks, but it
14361     # doesn't have to be exclusive
14362     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14363
14364   def Exec(self, feedback_fn):
14365     """Remove any export.
14366
14367     """
14368     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14369     # If the instance was not found we'll try with the name that was passed in.
14370     # This will only work if it was an FQDN, though.
14371     fqdn_warn = False
14372     if not instance_name:
14373       fqdn_warn = True
14374       instance_name = self.op.instance_name
14375
14376     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14377     exportlist = self.rpc.call_export_list(locked_nodes)
14378     found = False
14379     for node in exportlist:
14380       msg = exportlist[node].fail_msg
14381       if msg:
14382         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14383         continue
14384       if instance_name in exportlist[node].payload:
14385         found = True
14386         result = self.rpc.call_export_remove(node, instance_name)
14387         msg = result.fail_msg
14388         if msg:
14389           logging.error("Could not remove export for instance %s"
14390                         " on node %s: %s", instance_name, node, msg)
14391
14392     if fqdn_warn and not found:
14393       feedback_fn("Export not found. If trying to remove an export belonging"
14394                   " to a deleted instance please use its Fully Qualified"
14395                   " Domain Name.")
14396
14397
14398 class LUGroupAdd(LogicalUnit):
14399   """Logical unit for creating node groups.
14400
14401   """
14402   HPATH = "group-add"
14403   HTYPE = constants.HTYPE_GROUP
14404   REQ_BGL = False
14405
14406   def ExpandNames(self):
14407     # We need the new group's UUID here so that we can create and acquire the
14408     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14409     # that it should not check whether the UUID exists in the configuration.
14410     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14411     self.needed_locks = {}
14412     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14413
14414   def CheckPrereq(self):
14415     """Check prerequisites.
14416
14417     This checks that the given group name is not an existing node group
14418     already.
14419
14420     """
14421     try:
14422       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14423     except errors.OpPrereqError:
14424       pass
14425     else:
14426       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14427                                  " node group (UUID: %s)" %
14428                                  (self.op.group_name, existing_uuid),
14429                                  errors.ECODE_EXISTS)
14430
14431     if self.op.ndparams:
14432       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14433
14434     if self.op.hv_state:
14435       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14436     else:
14437       self.new_hv_state = None
14438
14439     if self.op.disk_state:
14440       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14441     else:
14442       self.new_disk_state = None
14443
14444     if self.op.diskparams:
14445       for templ in constants.DISK_TEMPLATES:
14446         if templ in self.op.diskparams:
14447           utils.ForceDictType(self.op.diskparams[templ],
14448                               constants.DISK_DT_TYPES)
14449       self.new_diskparams = self.op.diskparams
14450       try:
14451         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14452       except errors.OpPrereqError, err:
14453         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14454                                    errors.ECODE_INVAL)
14455     else:
14456       self.new_diskparams = {}
14457
14458     if self.op.ipolicy:
14459       cluster = self.cfg.GetClusterInfo()
14460       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14461       try:
14462         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14463       except errors.ConfigurationError, err:
14464         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14465                                    errors.ECODE_INVAL)
14466
14467   def BuildHooksEnv(self):
14468     """Build hooks env.
14469
14470     """
14471     return {
14472       "GROUP_NAME": self.op.group_name,
14473       }
14474
14475   def BuildHooksNodes(self):
14476     """Build hooks nodes.
14477
14478     """
14479     mn = self.cfg.GetMasterNode()
14480     return ([mn], [mn])
14481
14482   def Exec(self, feedback_fn):
14483     """Add the node group to the cluster.
14484
14485     """
14486     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14487                                   uuid=self.group_uuid,
14488                                   alloc_policy=self.op.alloc_policy,
14489                                   ndparams=self.op.ndparams,
14490                                   diskparams=self.new_diskparams,
14491                                   ipolicy=self.op.ipolicy,
14492                                   hv_state_static=self.new_hv_state,
14493                                   disk_state_static=self.new_disk_state)
14494
14495     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14496     del self.remove_locks[locking.LEVEL_NODEGROUP]
14497
14498
14499 class LUGroupAssignNodes(NoHooksLU):
14500   """Logical unit for assigning nodes to groups.
14501
14502   """
14503   REQ_BGL = False
14504
14505   def ExpandNames(self):
14506     # These raise errors.OpPrereqError on their own:
14507     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14508     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14509
14510     # We want to lock all the affected nodes and groups. We have readily
14511     # available the list of nodes, and the *destination* group. To gather the
14512     # list of "source" groups, we need to fetch node information later on.
14513     self.needed_locks = {
14514       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14515       locking.LEVEL_NODE: self.op.nodes,
14516       }
14517
14518   def DeclareLocks(self, level):
14519     if level == locking.LEVEL_NODEGROUP:
14520       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14521
14522       # Try to get all affected nodes' groups without having the group or node
14523       # lock yet. Needs verification later in the code flow.
14524       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14525
14526       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14527
14528   def CheckPrereq(self):
14529     """Check prerequisites.
14530
14531     """
14532     assert self.needed_locks[locking.LEVEL_NODEGROUP]
14533     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14534             frozenset(self.op.nodes))
14535
14536     expected_locks = (set([self.group_uuid]) |
14537                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14538     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14539     if actual_locks != expected_locks:
14540       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14541                                " current groups are '%s', used to be '%s'" %
14542                                (utils.CommaJoin(expected_locks),
14543                                 utils.CommaJoin(actual_locks)))
14544
14545     self.node_data = self.cfg.GetAllNodesInfo()
14546     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14547     instance_data = self.cfg.GetAllInstancesInfo()
14548
14549     if self.group is None:
14550       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14551                                (self.op.group_name, self.group_uuid))
14552
14553     (new_splits, previous_splits) = \
14554       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14555                                              for node in self.op.nodes],
14556                                             self.node_data, instance_data)
14557
14558     if new_splits:
14559       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14560
14561       if not self.op.force:
14562         raise errors.OpExecError("The following instances get split by this"
14563                                  " change and --force was not given: %s" %
14564                                  fmt_new_splits)
14565       else:
14566         self.LogWarning("This operation will split the following instances: %s",
14567                         fmt_new_splits)
14568
14569         if previous_splits:
14570           self.LogWarning("In addition, these already-split instances continue"
14571                           " to be split across groups: %s",
14572                           utils.CommaJoin(utils.NiceSort(previous_splits)))
14573
14574   def Exec(self, feedback_fn):
14575     """Assign nodes to a new group.
14576
14577     """
14578     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14579
14580     self.cfg.AssignGroupNodes(mods)
14581
14582   @staticmethod
14583   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14584     """Check for split instances after a node assignment.
14585
14586     This method considers a series of node assignments as an atomic operation,
14587     and returns information about split instances after applying the set of
14588     changes.
14589
14590     In particular, it returns information about newly split instances, and
14591     instances that were already split, and remain so after the change.
14592
14593     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14594     considered.
14595
14596     @type changes: list of (node_name, new_group_uuid) pairs.
14597     @param changes: list of node assignments to consider.
14598     @param node_data: a dict with data for all nodes
14599     @param instance_data: a dict with all instances to consider
14600     @rtype: a two-tuple
14601     @return: a list of instances that were previously okay and result split as a
14602       consequence of this change, and a list of instances that were previously
14603       split and this change does not fix.
14604
14605     """
14606     changed_nodes = dict((node, group) for node, group in changes
14607                          if node_data[node].group != group)
14608
14609     all_split_instances = set()
14610     previously_split_instances = set()
14611
14612     def InstanceNodes(instance):
14613       return [instance.primary_node] + list(instance.secondary_nodes)
14614
14615     for inst in instance_data.values():
14616       if inst.disk_template not in constants.DTS_INT_MIRROR:
14617         continue
14618
14619       instance_nodes = InstanceNodes(inst)
14620
14621       if len(set(node_data[node].group for node in instance_nodes)) > 1:
14622         previously_split_instances.add(inst.name)
14623
14624       if len(set(changed_nodes.get(node, node_data[node].group)
14625                  for node in instance_nodes)) > 1:
14626         all_split_instances.add(inst.name)
14627
14628     return (list(all_split_instances - previously_split_instances),
14629             list(previously_split_instances & all_split_instances))
14630
14631
14632 class _GroupQuery(_QueryBase):
14633   FIELDS = query.GROUP_FIELDS
14634
14635   def ExpandNames(self, lu):
14636     lu.needed_locks = {}
14637
14638     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14639     self._cluster = lu.cfg.GetClusterInfo()
14640     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14641
14642     if not self.names:
14643       self.wanted = [name_to_uuid[name]
14644                      for name in utils.NiceSort(name_to_uuid.keys())]
14645     else:
14646       # Accept names to be either names or UUIDs.
14647       missing = []
14648       self.wanted = []
14649       all_uuid = frozenset(self._all_groups.keys())
14650
14651       for name in self.names:
14652         if name in all_uuid:
14653           self.wanted.append(name)
14654         elif name in name_to_uuid:
14655           self.wanted.append(name_to_uuid[name])
14656         else:
14657           missing.append(name)
14658
14659       if missing:
14660         raise errors.OpPrereqError("Some groups do not exist: %s" %
14661                                    utils.CommaJoin(missing),
14662                                    errors.ECODE_NOENT)
14663
14664   def DeclareLocks(self, lu, level):
14665     pass
14666
14667   def _GetQueryData(self, lu):
14668     """Computes the list of node groups and their attributes.
14669
14670     """
14671     do_nodes = query.GQ_NODE in self.requested_data
14672     do_instances = query.GQ_INST in self.requested_data
14673
14674     group_to_nodes = None
14675     group_to_instances = None
14676
14677     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14678     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14679     # latter GetAllInstancesInfo() is not enough, for we have to go through
14680     # instance->node. Hence, we will need to process nodes even if we only need
14681     # instance information.
14682     if do_nodes or do_instances:
14683       all_nodes = lu.cfg.GetAllNodesInfo()
14684       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14685       node_to_group = {}
14686
14687       for node in all_nodes.values():
14688         if node.group in group_to_nodes:
14689           group_to_nodes[node.group].append(node.name)
14690           node_to_group[node.name] = node.group
14691
14692       if do_instances:
14693         all_instances = lu.cfg.GetAllInstancesInfo()
14694         group_to_instances = dict((uuid, []) for uuid in self.wanted)
14695
14696         for instance in all_instances.values():
14697           node = instance.primary_node
14698           if node in node_to_group:
14699             group_to_instances[node_to_group[node]].append(instance.name)
14700
14701         if not do_nodes:
14702           # Do not pass on node information if it was not requested.
14703           group_to_nodes = None
14704
14705     return query.GroupQueryData(self._cluster,
14706                                 [self._all_groups[uuid]
14707                                  for uuid in self.wanted],
14708                                 group_to_nodes, group_to_instances,
14709                                 query.GQ_DISKPARAMS in self.requested_data)
14710
14711
14712 class LUGroupQuery(NoHooksLU):
14713   """Logical unit for querying node groups.
14714
14715   """
14716   REQ_BGL = False
14717
14718   def CheckArguments(self):
14719     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14720                           self.op.output_fields, False)
14721
14722   def ExpandNames(self):
14723     self.gq.ExpandNames(self)
14724
14725   def DeclareLocks(self, level):
14726     self.gq.DeclareLocks(self, level)
14727
14728   def Exec(self, feedback_fn):
14729     return self.gq.OldStyleQuery(self)
14730
14731
14732 class LUGroupSetParams(LogicalUnit):
14733   """Modifies the parameters of a node group.
14734
14735   """
14736   HPATH = "group-modify"
14737   HTYPE = constants.HTYPE_GROUP
14738   REQ_BGL = False
14739
14740   def CheckArguments(self):
14741     all_changes = [
14742       self.op.ndparams,
14743       self.op.diskparams,
14744       self.op.alloc_policy,
14745       self.op.hv_state,
14746       self.op.disk_state,
14747       self.op.ipolicy,
14748       ]
14749
14750     if all_changes.count(None) == len(all_changes):
14751       raise errors.OpPrereqError("Please pass at least one modification",
14752                                  errors.ECODE_INVAL)
14753
14754   def ExpandNames(self):
14755     # This raises errors.OpPrereqError on its own:
14756     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14757
14758     self.needed_locks = {
14759       locking.LEVEL_INSTANCE: [],
14760       locking.LEVEL_NODEGROUP: [self.group_uuid],
14761       }
14762
14763     self.share_locks[locking.LEVEL_INSTANCE] = 1
14764
14765   def DeclareLocks(self, level):
14766     if level == locking.LEVEL_INSTANCE:
14767       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14768
14769       # Lock instances optimistically, needs verification once group lock has
14770       # been acquired
14771       self.needed_locks[locking.LEVEL_INSTANCE] = \
14772           self.cfg.GetNodeGroupInstances(self.group_uuid)
14773
14774   @staticmethod
14775   def _UpdateAndVerifyDiskParams(old, new):
14776     """Updates and verifies disk parameters.
14777
14778     """
14779     new_params = _GetUpdatedParams(old, new)
14780     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14781     return new_params
14782
14783   def CheckPrereq(self):
14784     """Check prerequisites.
14785
14786     """
14787     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14788
14789     # Check if locked instances are still correct
14790     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14791
14792     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14793     cluster = self.cfg.GetClusterInfo()
14794
14795     if self.group is None:
14796       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14797                                (self.op.group_name, self.group_uuid))
14798
14799     if self.op.ndparams:
14800       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14801       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14802       self.new_ndparams = new_ndparams
14803
14804     if self.op.diskparams:
14805       diskparams = self.group.diskparams
14806       uavdp = self._UpdateAndVerifyDiskParams
14807       # For each disktemplate subdict update and verify the values
14808       new_diskparams = dict((dt,
14809                              uavdp(diskparams.get(dt, {}),
14810                                    self.op.diskparams[dt]))
14811                             for dt in constants.DISK_TEMPLATES
14812                             if dt in self.op.diskparams)
14813       # As we've all subdicts of diskparams ready, lets merge the actual
14814       # dict with all updated subdicts
14815       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14816       try:
14817         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14818       except errors.OpPrereqError, err:
14819         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14820                                    errors.ECODE_INVAL)
14821
14822     if self.op.hv_state:
14823       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14824                                                  self.group.hv_state_static)
14825
14826     if self.op.disk_state:
14827       self.new_disk_state = \
14828         _MergeAndVerifyDiskState(self.op.disk_state,
14829                                  self.group.disk_state_static)
14830
14831     if self.op.ipolicy:
14832       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14833                                             self.op.ipolicy,
14834                                             group_policy=True)
14835
14836       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14837       inst_filter = lambda inst: inst.name in owned_instances
14838       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14839       gmi = ganeti.masterd.instance
14840       violations = \
14841           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14842                                                                   self.group),
14843                                         new_ipolicy, instances)
14844
14845       if violations:
14846         self.LogWarning("After the ipolicy change the following instances"
14847                         " violate them: %s",
14848                         utils.CommaJoin(violations))
14849
14850   def BuildHooksEnv(self):
14851     """Build hooks env.
14852
14853     """
14854     return {
14855       "GROUP_NAME": self.op.group_name,
14856       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14857       }
14858
14859   def BuildHooksNodes(self):
14860     """Build hooks nodes.
14861
14862     """
14863     mn = self.cfg.GetMasterNode()
14864     return ([mn], [mn])
14865
14866   def Exec(self, feedback_fn):
14867     """Modifies the node group.
14868
14869     """
14870     result = []
14871
14872     if self.op.ndparams:
14873       self.group.ndparams = self.new_ndparams
14874       result.append(("ndparams", str(self.group.ndparams)))
14875
14876     if self.op.diskparams:
14877       self.group.diskparams = self.new_diskparams
14878       result.append(("diskparams", str(self.group.diskparams)))
14879
14880     if self.op.alloc_policy:
14881       self.group.alloc_policy = self.op.alloc_policy
14882
14883     if self.op.hv_state:
14884       self.group.hv_state_static = self.new_hv_state
14885
14886     if self.op.disk_state:
14887       self.group.disk_state_static = self.new_disk_state
14888
14889     if self.op.ipolicy:
14890       self.group.ipolicy = self.new_ipolicy
14891
14892     self.cfg.Update(self.group, feedback_fn)
14893     return result
14894
14895
14896 class LUGroupRemove(LogicalUnit):
14897   HPATH = "group-remove"
14898   HTYPE = constants.HTYPE_GROUP
14899   REQ_BGL = False
14900
14901   def ExpandNames(self):
14902     # This will raises errors.OpPrereqError on its own:
14903     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14904     self.needed_locks = {
14905       locking.LEVEL_NODEGROUP: [self.group_uuid],
14906       }
14907
14908   def CheckPrereq(self):
14909     """Check prerequisites.
14910
14911     This checks that the given group name exists as a node group, that is
14912     empty (i.e., contains no nodes), and that is not the last group of the
14913     cluster.
14914
14915     """
14916     # Verify that the group is empty.
14917     group_nodes = [node.name
14918                    for node in self.cfg.GetAllNodesInfo().values()
14919                    if node.group == self.group_uuid]
14920
14921     if group_nodes:
14922       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14923                                  " nodes: %s" %
14924                                  (self.op.group_name,
14925                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14926                                  errors.ECODE_STATE)
14927
14928     # Verify the cluster would not be left group-less.
14929     if len(self.cfg.GetNodeGroupList()) == 1:
14930       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14931                                  " removed" % self.op.group_name,
14932                                  errors.ECODE_STATE)
14933
14934   def BuildHooksEnv(self):
14935     """Build hooks env.
14936
14937     """
14938     return {
14939       "GROUP_NAME": self.op.group_name,
14940       }
14941
14942   def BuildHooksNodes(self):
14943     """Build hooks nodes.
14944
14945     """
14946     mn = self.cfg.GetMasterNode()
14947     return ([mn], [mn])
14948
14949   def Exec(self, feedback_fn):
14950     """Remove the node group.
14951
14952     """
14953     try:
14954       self.cfg.RemoveNodeGroup(self.group_uuid)
14955     except errors.ConfigurationError:
14956       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14957                                (self.op.group_name, self.group_uuid))
14958
14959     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14960
14961
14962 class LUGroupRename(LogicalUnit):
14963   HPATH = "group-rename"
14964   HTYPE = constants.HTYPE_GROUP
14965   REQ_BGL = False
14966
14967   def ExpandNames(self):
14968     # This raises errors.OpPrereqError on its own:
14969     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14970
14971     self.needed_locks = {
14972       locking.LEVEL_NODEGROUP: [self.group_uuid],
14973       }
14974
14975   def CheckPrereq(self):
14976     """Check prerequisites.
14977
14978     Ensures requested new name is not yet used.
14979
14980     """
14981     try:
14982       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14983     except errors.OpPrereqError:
14984       pass
14985     else:
14986       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14987                                  " node group (UUID: %s)" %
14988                                  (self.op.new_name, new_name_uuid),
14989                                  errors.ECODE_EXISTS)
14990
14991   def BuildHooksEnv(self):
14992     """Build hooks env.
14993
14994     """
14995     return {
14996       "OLD_NAME": self.op.group_name,
14997       "NEW_NAME": self.op.new_name,
14998       }
14999
15000   def BuildHooksNodes(self):
15001     """Build hooks nodes.
15002
15003     """
15004     mn = self.cfg.GetMasterNode()
15005
15006     all_nodes = self.cfg.GetAllNodesInfo()
15007     all_nodes.pop(mn, None)
15008
15009     run_nodes = [mn]
15010     run_nodes.extend(node.name for node in all_nodes.values()
15011                      if node.group == self.group_uuid)
15012
15013     return (run_nodes, run_nodes)
15014
15015   def Exec(self, feedback_fn):
15016     """Rename the node group.
15017
15018     """
15019     group = self.cfg.GetNodeGroup(self.group_uuid)
15020
15021     if group is None:
15022       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15023                                (self.op.group_name, self.group_uuid))
15024
15025     group.name = self.op.new_name
15026     self.cfg.Update(group, feedback_fn)
15027
15028     return self.op.new_name
15029
15030
15031 class LUGroupEvacuate(LogicalUnit):
15032   HPATH = "group-evacuate"
15033   HTYPE = constants.HTYPE_GROUP
15034   REQ_BGL = False
15035
15036   def ExpandNames(self):
15037     # This raises errors.OpPrereqError on its own:
15038     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15039
15040     if self.op.target_groups:
15041       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15042                                   self.op.target_groups)
15043     else:
15044       self.req_target_uuids = []
15045
15046     if self.group_uuid in self.req_target_uuids:
15047       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15048                                  " as a target group (targets are %s)" %
15049                                  (self.group_uuid,
15050                                   utils.CommaJoin(self.req_target_uuids)),
15051                                  errors.ECODE_INVAL)
15052
15053     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15054
15055     self.share_locks = _ShareAll()
15056     self.needed_locks = {
15057       locking.LEVEL_INSTANCE: [],
15058       locking.LEVEL_NODEGROUP: [],
15059       locking.LEVEL_NODE: [],
15060       }
15061
15062   def DeclareLocks(self, level):
15063     if level == locking.LEVEL_INSTANCE:
15064       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15065
15066       # Lock instances optimistically, needs verification once node and group
15067       # locks have been acquired
15068       self.needed_locks[locking.LEVEL_INSTANCE] = \
15069         self.cfg.GetNodeGroupInstances(self.group_uuid)
15070
15071     elif level == locking.LEVEL_NODEGROUP:
15072       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15073
15074       if self.req_target_uuids:
15075         lock_groups = set([self.group_uuid] + self.req_target_uuids)
15076
15077         # Lock all groups used by instances optimistically; this requires going
15078         # via the node before it's locked, requiring verification later on
15079         lock_groups.update(group_uuid
15080                            for instance_name in
15081                              self.owned_locks(locking.LEVEL_INSTANCE)
15082                            for group_uuid in
15083                              self.cfg.GetInstanceNodeGroups(instance_name))
15084       else:
15085         # No target groups, need to lock all of them
15086         lock_groups = locking.ALL_SET
15087
15088       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15089
15090     elif level == locking.LEVEL_NODE:
15091       # This will only lock the nodes in the group to be evacuated which
15092       # contain actual instances
15093       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15094       self._LockInstancesNodes()
15095
15096       # Lock all nodes in group to be evacuated and target groups
15097       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15098       assert self.group_uuid in owned_groups
15099       member_nodes = [node_name
15100                       for group in owned_groups
15101                       for node_name in self.cfg.GetNodeGroup(group).members]
15102       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15103
15104   def CheckPrereq(self):
15105     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15106     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15107     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15108
15109     assert owned_groups.issuperset(self.req_target_uuids)
15110     assert self.group_uuid in owned_groups
15111
15112     # Check if locked instances are still correct
15113     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15114
15115     # Get instance information
15116     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15117
15118     # Check if node groups for locked instances are still correct
15119     _CheckInstancesNodeGroups(self.cfg, self.instances,
15120                               owned_groups, owned_nodes, self.group_uuid)
15121
15122     if self.req_target_uuids:
15123       # User requested specific target groups
15124       self.target_uuids = self.req_target_uuids
15125     else:
15126       # All groups except the one to be evacuated are potential targets
15127       self.target_uuids = [group_uuid for group_uuid in owned_groups
15128                            if group_uuid != self.group_uuid]
15129
15130       if not self.target_uuids:
15131         raise errors.OpPrereqError("There are no possible target groups",
15132                                    errors.ECODE_INVAL)
15133
15134   def BuildHooksEnv(self):
15135     """Build hooks env.
15136
15137     """
15138     return {
15139       "GROUP_NAME": self.op.group_name,
15140       "TARGET_GROUPS": " ".join(self.target_uuids),
15141       }
15142
15143   def BuildHooksNodes(self):
15144     """Build hooks nodes.
15145
15146     """
15147     mn = self.cfg.GetMasterNode()
15148
15149     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15150
15151     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15152
15153     return (run_nodes, run_nodes)
15154
15155   def Exec(self, feedback_fn):
15156     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15157
15158     assert self.group_uuid not in self.target_uuids
15159
15160     req = iallocator.IAReqGroupChange(instances=instances,
15161                                       target_groups=self.target_uuids)
15162     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15163
15164     ial.Run(self.op.iallocator)
15165
15166     if not ial.success:
15167       raise errors.OpPrereqError("Can't compute group evacuation using"
15168                                  " iallocator '%s': %s" %
15169                                  (self.op.iallocator, ial.info),
15170                                  errors.ECODE_NORES)
15171
15172     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15173
15174     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15175                  len(jobs), self.op.group_name)
15176
15177     return ResultWithJobs(jobs)
15178
15179
15180 class TagsLU(NoHooksLU): # pylint: disable=W0223
15181   """Generic tags LU.
15182
15183   This is an abstract class which is the parent of all the other tags LUs.
15184
15185   """
15186   def ExpandNames(self):
15187     self.group_uuid = None
15188     self.needed_locks = {}
15189
15190     if self.op.kind == constants.TAG_NODE:
15191       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15192       lock_level = locking.LEVEL_NODE
15193       lock_name = self.op.name
15194     elif self.op.kind == constants.TAG_INSTANCE:
15195       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15196       lock_level = locking.LEVEL_INSTANCE
15197       lock_name = self.op.name
15198     elif self.op.kind == constants.TAG_NODEGROUP:
15199       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15200       lock_level = locking.LEVEL_NODEGROUP
15201       lock_name = self.group_uuid
15202     elif self.op.kind == constants.TAG_NETWORK:
15203       self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15204       lock_level = locking.LEVEL_NETWORK
15205       lock_name = self.network_uuid
15206     else:
15207       lock_level = None
15208       lock_name = None
15209
15210     if lock_level and getattr(self.op, "use_locking", True):
15211       self.needed_locks[lock_level] = lock_name
15212
15213     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15214     # not possible to acquire the BGL based on opcode parameters)
15215
15216   def CheckPrereq(self):
15217     """Check prerequisites.
15218
15219     """
15220     if self.op.kind == constants.TAG_CLUSTER:
15221       self.target = self.cfg.GetClusterInfo()
15222     elif self.op.kind == constants.TAG_NODE:
15223       self.target = self.cfg.GetNodeInfo(self.op.name)
15224     elif self.op.kind == constants.TAG_INSTANCE:
15225       self.target = self.cfg.GetInstanceInfo(self.op.name)
15226     elif self.op.kind == constants.TAG_NODEGROUP:
15227       self.target = self.cfg.GetNodeGroup(self.group_uuid)
15228     elif self.op.kind == constants.TAG_NETWORK:
15229       self.target = self.cfg.GetNetwork(self.network_uuid)
15230     else:
15231       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15232                                  str(self.op.kind), errors.ECODE_INVAL)
15233
15234
15235 class LUTagsGet(TagsLU):
15236   """Returns the tags of a given object.
15237
15238   """
15239   REQ_BGL = False
15240
15241   def ExpandNames(self):
15242     TagsLU.ExpandNames(self)
15243
15244     # Share locks as this is only a read operation
15245     self.share_locks = _ShareAll()
15246
15247   def Exec(self, feedback_fn):
15248     """Returns the tag list.
15249
15250     """
15251     return list(self.target.GetTags())
15252
15253
15254 class LUTagsSearch(NoHooksLU):
15255   """Searches the tags for a given pattern.
15256
15257   """
15258   REQ_BGL = False
15259
15260   def ExpandNames(self):
15261     self.needed_locks = {}
15262
15263   def CheckPrereq(self):
15264     """Check prerequisites.
15265
15266     This checks the pattern passed for validity by compiling it.
15267
15268     """
15269     try:
15270       self.re = re.compile(self.op.pattern)
15271     except re.error, err:
15272       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15273                                  (self.op.pattern, err), errors.ECODE_INVAL)
15274
15275   def Exec(self, feedback_fn):
15276     """Returns the tag list.
15277
15278     """
15279     cfg = self.cfg
15280     tgts = [("/cluster", cfg.GetClusterInfo())]
15281     ilist = cfg.GetAllInstancesInfo().values()
15282     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15283     nlist = cfg.GetAllNodesInfo().values()
15284     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15285     tgts.extend(("/nodegroup/%s" % n.name, n)
15286                 for n in cfg.GetAllNodeGroupsInfo().values())
15287     results = []
15288     for path, target in tgts:
15289       for tag in target.GetTags():
15290         if self.re.search(tag):
15291           results.append((path, tag))
15292     return results
15293
15294
15295 class LUTagsSet(TagsLU):
15296   """Sets a tag on a given object.
15297
15298   """
15299   REQ_BGL = False
15300
15301   def CheckPrereq(self):
15302     """Check prerequisites.
15303
15304     This checks the type and length of the tag name and value.
15305
15306     """
15307     TagsLU.CheckPrereq(self)
15308     for tag in self.op.tags:
15309       objects.TaggableObject.ValidateTag(tag)
15310
15311   def Exec(self, feedback_fn):
15312     """Sets the tag.
15313
15314     """
15315     try:
15316       for tag in self.op.tags:
15317         self.target.AddTag(tag)
15318     except errors.TagError, err:
15319       raise errors.OpExecError("Error while setting tag: %s" % str(err))
15320     self.cfg.Update(self.target, feedback_fn)
15321
15322
15323 class LUTagsDel(TagsLU):
15324   """Delete a list of tags from a given object.
15325
15326   """
15327   REQ_BGL = False
15328
15329   def CheckPrereq(self):
15330     """Check prerequisites.
15331
15332     This checks that we have the given tag.
15333
15334     """
15335     TagsLU.CheckPrereq(self)
15336     for tag in self.op.tags:
15337       objects.TaggableObject.ValidateTag(tag)
15338     del_tags = frozenset(self.op.tags)
15339     cur_tags = self.target.GetTags()
15340
15341     diff_tags = del_tags - cur_tags
15342     if diff_tags:
15343       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15344       raise errors.OpPrereqError("Tag(s) %s not found" %
15345                                  (utils.CommaJoin(diff_names), ),
15346                                  errors.ECODE_NOENT)
15347
15348   def Exec(self, feedback_fn):
15349     """Remove the tag from the object.
15350
15351     """
15352     for tag in self.op.tags:
15353       self.target.RemoveTag(tag)
15354     self.cfg.Update(self.target, feedback_fn)
15355
15356
15357 class LUTestDelay(NoHooksLU):
15358   """Sleep for a specified amount of time.
15359
15360   This LU sleeps on the master and/or nodes for a specified amount of
15361   time.
15362
15363   """
15364   REQ_BGL = False
15365
15366   def ExpandNames(self):
15367     """Expand names and set required locks.
15368
15369     This expands the node list, if any.
15370
15371     """
15372     self.needed_locks = {}
15373     if self.op.on_nodes:
15374       # _GetWantedNodes can be used here, but is not always appropriate to use
15375       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15376       # more information.
15377       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15378       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15379
15380   def _TestDelay(self):
15381     """Do the actual sleep.
15382
15383     """
15384     if self.op.on_master:
15385       if not utils.TestDelay(self.op.duration):
15386         raise errors.OpExecError("Error during master delay test")
15387     if self.op.on_nodes:
15388       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15389       for node, node_result in result.items():
15390         node_result.Raise("Failure during rpc call to node %s" % node)
15391
15392   def Exec(self, feedback_fn):
15393     """Execute the test delay opcode, with the wanted repetitions.
15394
15395     """
15396     if self.op.repeat == 0:
15397       self._TestDelay()
15398     else:
15399       top_value = self.op.repeat - 1
15400       for i in range(self.op.repeat):
15401         self.LogInfo("Test delay iteration %d/%d", i, top_value)
15402         self._TestDelay()
15403
15404
15405 class LURestrictedCommand(NoHooksLU):
15406   """Logical unit for executing restricted commands.
15407
15408   """
15409   REQ_BGL = False
15410
15411   def ExpandNames(self):
15412     if self.op.nodes:
15413       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15414
15415     self.needed_locks = {
15416       locking.LEVEL_NODE: self.op.nodes,
15417       }
15418     self.share_locks = {
15419       locking.LEVEL_NODE: not self.op.use_locking,
15420       }
15421
15422   def CheckPrereq(self):
15423     """Check prerequisites.
15424
15425     """
15426
15427   def Exec(self, feedback_fn):
15428     """Execute restricted command and return output.
15429
15430     """
15431     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15432
15433     # Check if correct locks are held
15434     assert set(self.op.nodes).issubset(owned_nodes)
15435
15436     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15437
15438     result = []
15439
15440     for node_name in self.op.nodes:
15441       nres = rpcres[node_name]
15442       if nres.fail_msg:
15443         msg = ("Command '%s' on node '%s' failed: %s" %
15444                (self.op.command, node_name, nres.fail_msg))
15445         result.append((False, msg))
15446       else:
15447         result.append((True, nres.payload))
15448
15449     return result
15450
15451
15452 class LUTestJqueue(NoHooksLU):
15453   """Utility LU to test some aspects of the job queue.
15454
15455   """
15456   REQ_BGL = False
15457
15458   # Must be lower than default timeout for WaitForJobChange to see whether it
15459   # notices changed jobs
15460   _CLIENT_CONNECT_TIMEOUT = 20.0
15461   _CLIENT_CONFIRM_TIMEOUT = 60.0
15462
15463   @classmethod
15464   def _NotifyUsingSocket(cls, cb, errcls):
15465     """Opens a Unix socket and waits for another program to connect.
15466
15467     @type cb: callable
15468     @param cb: Callback to send socket name to client
15469     @type errcls: class
15470     @param errcls: Exception class to use for errors
15471
15472     """
15473     # Using a temporary directory as there's no easy way to create temporary
15474     # sockets without writing a custom loop around tempfile.mktemp and
15475     # socket.bind
15476     tmpdir = tempfile.mkdtemp()
15477     try:
15478       tmpsock = utils.PathJoin(tmpdir, "sock")
15479
15480       logging.debug("Creating temporary socket at %s", tmpsock)
15481       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15482       try:
15483         sock.bind(tmpsock)
15484         sock.listen(1)
15485
15486         # Send details to client
15487         cb(tmpsock)
15488
15489         # Wait for client to connect before continuing
15490         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15491         try:
15492           (conn, _) = sock.accept()
15493         except socket.error, err:
15494           raise errcls("Client didn't connect in time (%s)" % err)
15495       finally:
15496         sock.close()
15497     finally:
15498       # Remove as soon as client is connected
15499       shutil.rmtree(tmpdir)
15500
15501     # Wait for client to close
15502     try:
15503       try:
15504         # pylint: disable=E1101
15505         # Instance of '_socketobject' has no ... member
15506         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15507         conn.recv(1)
15508       except socket.error, err:
15509         raise errcls("Client failed to confirm notification (%s)" % err)
15510     finally:
15511       conn.close()
15512
15513   def _SendNotification(self, test, arg, sockname):
15514     """Sends a notification to the client.
15515
15516     @type test: string
15517     @param test: Test name
15518     @param arg: Test argument (depends on test)
15519     @type sockname: string
15520     @param sockname: Socket path
15521
15522     """
15523     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15524
15525   def _Notify(self, prereq, test, arg):
15526     """Notifies the client of a test.
15527
15528     @type prereq: bool
15529     @param prereq: Whether this is a prereq-phase test
15530     @type test: string
15531     @param test: Test name
15532     @param arg: Test argument (depends on test)
15533
15534     """
15535     if prereq:
15536       errcls = errors.OpPrereqError
15537     else:
15538       errcls = errors.OpExecError
15539
15540     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15541                                                   test, arg),
15542                                    errcls)
15543
15544   def CheckArguments(self):
15545     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15546     self.expandnames_calls = 0
15547
15548   def ExpandNames(self):
15549     checkargs_calls = getattr(self, "checkargs_calls", 0)
15550     if checkargs_calls < 1:
15551       raise errors.ProgrammerError("CheckArguments was not called")
15552
15553     self.expandnames_calls += 1
15554
15555     if self.op.notify_waitlock:
15556       self._Notify(True, constants.JQT_EXPANDNAMES, None)
15557
15558     self.LogInfo("Expanding names")
15559
15560     # Get lock on master node (just to get a lock, not for a particular reason)
15561     self.needed_locks = {
15562       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15563       }
15564
15565   def Exec(self, feedback_fn):
15566     if self.expandnames_calls < 1:
15567       raise errors.ProgrammerError("ExpandNames was not called")
15568
15569     if self.op.notify_exec:
15570       self._Notify(False, constants.JQT_EXEC, None)
15571
15572     self.LogInfo("Executing")
15573
15574     if self.op.log_messages:
15575       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15576       for idx, msg in enumerate(self.op.log_messages):
15577         self.LogInfo("Sending log message %s", idx + 1)
15578         feedback_fn(constants.JQT_MSGPREFIX + msg)
15579         # Report how many test messages have been sent
15580         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15581
15582     if self.op.fail:
15583       raise errors.OpExecError("Opcode failure was requested")
15584
15585     return True
15586
15587
15588 class LUTestAllocator(NoHooksLU):
15589   """Run allocator tests.
15590
15591   This LU runs the allocator tests
15592
15593   """
15594   def CheckPrereq(self):
15595     """Check prerequisites.
15596
15597     This checks the opcode parameters depending on the director and mode test.
15598
15599     """
15600     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15601                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
15602       for attr in ["memory", "disks", "disk_template",
15603                    "os", "tags", "nics", "vcpus"]:
15604         if not hasattr(self.op, attr):
15605           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15606                                      attr, errors.ECODE_INVAL)
15607       iname = self.cfg.ExpandInstanceName(self.op.name)
15608       if iname is not None:
15609         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15610                                    iname, errors.ECODE_EXISTS)
15611       if not isinstance(self.op.nics, list):
15612         raise errors.OpPrereqError("Invalid parameter 'nics'",
15613                                    errors.ECODE_INVAL)
15614       if not isinstance(self.op.disks, list):
15615         raise errors.OpPrereqError("Invalid parameter 'disks'",
15616                                    errors.ECODE_INVAL)
15617       for row in self.op.disks:
15618         if (not isinstance(row, dict) or
15619             constants.IDISK_SIZE not in row or
15620             not isinstance(row[constants.IDISK_SIZE], int) or
15621             constants.IDISK_MODE not in row or
15622             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15623           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15624                                      " parameter", errors.ECODE_INVAL)
15625       if self.op.hypervisor is None:
15626         self.op.hypervisor = self.cfg.GetHypervisorType()
15627     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15628       fname = _ExpandInstanceName(self.cfg, self.op.name)
15629       self.op.name = fname
15630       self.relocate_from = \
15631           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15632     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15633                           constants.IALLOCATOR_MODE_NODE_EVAC):
15634       if not self.op.instances:
15635         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15636       self.op.instances = _GetWantedInstances(self, self.op.instances)
15637     else:
15638       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15639                                  self.op.mode, errors.ECODE_INVAL)
15640
15641     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15642       if self.op.iallocator is None:
15643         raise errors.OpPrereqError("Missing allocator name",
15644                                    errors.ECODE_INVAL)
15645     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15646       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15647                                  self.op.direction, errors.ECODE_INVAL)
15648
15649   def Exec(self, feedback_fn):
15650     """Run the allocator test.
15651
15652     """
15653     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15654       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15655                                           memory=self.op.memory,
15656                                           disks=self.op.disks,
15657                                           disk_template=self.op.disk_template,
15658                                           os=self.op.os,
15659                                           tags=self.op.tags,
15660                                           nics=self.op.nics,
15661                                           vcpus=self.op.vcpus,
15662                                           spindle_use=self.op.spindle_use,
15663                                           hypervisor=self.op.hypervisor)
15664     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15665       req = iallocator.IAReqRelocate(name=self.op.name,
15666                                      relocate_from=list(self.relocate_from))
15667     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15668       req = iallocator.IAReqGroupChange(instances=self.op.instances,
15669                                         target_groups=self.op.target_groups)
15670     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15671       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15672                                      evac_mode=self.op.evac_mode)
15673     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15674       disk_template = self.op.disk_template
15675       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15676                                              memory=self.op.memory,
15677                                              disks=self.op.disks,
15678                                              disk_template=disk_template,
15679                                              os=self.op.os,
15680                                              tags=self.op.tags,
15681                                              nics=self.op.nics,
15682                                              vcpus=self.op.vcpus,
15683                                              spindle_use=self.op.spindle_use,
15684                                              hypervisor=self.op.hypervisor)
15685                for idx in range(self.op.count)]
15686       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15687     else:
15688       raise errors.ProgrammerError("Uncatched mode %s in"
15689                                    " LUTestAllocator.Exec", self.op.mode)
15690
15691     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15692     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15693       result = ial.in_text
15694     else:
15695       ial.Run(self.op.iallocator, validate=False)
15696       result = ial.out_text
15697     return result
15698
15699
15700 class LUNetworkAdd(LogicalUnit):
15701   """Logical unit for creating networks.
15702
15703   """
15704   HPATH = "network-add"
15705   HTYPE = constants.HTYPE_NETWORK
15706   REQ_BGL = False
15707
15708   def BuildHooksNodes(self):
15709     """Build hooks nodes.
15710
15711     """
15712     mn = self.cfg.GetMasterNode()
15713     return ([mn], [mn])
15714
15715   def CheckArguments(self):
15716     if self.op.mac_prefix:
15717       self.op.mac_prefix = \
15718         utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
15719
15720   def ExpandNames(self):
15721     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15722
15723     if self.op.conflicts_check:
15724       self.share_locks[locking.LEVEL_NODE] = 1
15725       self.needed_locks = {
15726         locking.LEVEL_NODE: locking.ALL_SET,
15727         }
15728     else:
15729       self.needed_locks = {}
15730
15731     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15732
15733   def CheckPrereq(self):
15734     if self.op.network is None:
15735       raise errors.OpPrereqError("Network must be given",
15736                                  errors.ECODE_INVAL)
15737
15738     uuid = self.cfg.LookupNetwork(self.op.network_name)
15739
15740     if uuid:
15741       raise errors.OpPrereqError("Network '%s' already defined" %
15742                                  self.op.network, errors.ECODE_EXISTS)
15743
15744     # Check tag validity
15745     for tag in self.op.tags:
15746       objects.TaggableObject.ValidateTag(tag)
15747
15748   def BuildHooksEnv(self):
15749     """Build hooks env.
15750
15751     """
15752     args = {
15753       "name": self.op.network_name,
15754       "subnet": self.op.network,
15755       "gateway": self.op.gateway,
15756       "network6": self.op.network6,
15757       "gateway6": self.op.gateway6,
15758       "mac_prefix": self.op.mac_prefix,
15759       "network_type": self.op.network_type,
15760       "tags": self.op.tags,
15761       }
15762     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15763
15764   def Exec(self, feedback_fn):
15765     """Add the ip pool to the cluster.
15766
15767     """
15768     nobj = objects.Network(name=self.op.network_name,
15769                            network=self.op.network,
15770                            gateway=self.op.gateway,
15771                            network6=self.op.network6,
15772                            gateway6=self.op.gateway6,
15773                            mac_prefix=self.op.mac_prefix,
15774                            network_type=self.op.network_type,
15775                            uuid=self.network_uuid,
15776                            family=constants.IP4_VERSION)
15777     # Initialize the associated address pool
15778     try:
15779       pool = network.AddressPool.InitializeNetwork(nobj)
15780     except errors.AddressPoolError, e:
15781       raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15782
15783     # Check if we need to reserve the nodes and the cluster master IP
15784     # These may not be allocated to any instances in routed mode, as
15785     # they wouldn't function anyway.
15786     if self.op.conflicts_check:
15787       for node in self.cfg.GetAllNodesInfo().values():
15788         for ip in [node.primary_ip, node.secondary_ip]:
15789           try:
15790             if pool.Contains(ip):
15791               pool.Reserve(ip)
15792               self.LogInfo("Reserved IP address of node '%s' (%s)",
15793                            node.name, ip)
15794           except errors.AddressPoolError:
15795             self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
15796                             node.name, ip)
15797
15798       master_ip = self.cfg.GetClusterInfo().master_ip
15799       try:
15800         if pool.Contains(master_ip):
15801           pool.Reserve(master_ip)
15802           self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
15803       except errors.AddressPoolError:
15804         self.LogWarning("Cannot reserve cluster master IP address (%s)",
15805                         master_ip)
15806
15807     if self.op.add_reserved_ips:
15808       for ip in self.op.add_reserved_ips:
15809         try:
15810           pool.Reserve(ip, external=True)
15811         except errors.AddressPoolError, e:
15812           raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15813
15814     if self.op.tags:
15815       for tag in self.op.tags:
15816         nobj.AddTag(tag)
15817
15818     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15819     del self.remove_locks[locking.LEVEL_NETWORK]
15820
15821
15822 class LUNetworkRemove(LogicalUnit):
15823   HPATH = "network-remove"
15824   HTYPE = constants.HTYPE_NETWORK
15825   REQ_BGL = False
15826
15827   def ExpandNames(self):
15828     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15829
15830     if not self.network_uuid:
15831       raise errors.OpPrereqError(("Network '%s' not found" %
15832                                   self.op.network_name),
15833                                  errors.ECODE_INVAL)
15834
15835     self.share_locks[locking.LEVEL_NODEGROUP] = 1
15836     self.needed_locks = {
15837       locking.LEVEL_NETWORK: [self.network_uuid],
15838       locking.LEVEL_NODEGROUP: locking.ALL_SET,
15839       }
15840
15841   def CheckPrereq(self):
15842     """Check prerequisites.
15843
15844     This checks that the given network name exists as a network, that is
15845     empty (i.e., contains no nodes), and that is not the last group of the
15846     cluster.
15847
15848     """
15849     # Verify that the network is not conncted.
15850     node_groups = [group.name
15851                    for group in self.cfg.GetAllNodeGroupsInfo().values()
15852                    if self.network_uuid in group.networks]
15853
15854     if node_groups:
15855       self.LogWarning("Network '%s' is connected to the following"
15856                       " node groups: %s" %
15857                       (self.op.network_name,
15858                        utils.CommaJoin(utils.NiceSort(node_groups))))
15859       raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
15860
15861   def BuildHooksEnv(self):
15862     """Build hooks env.
15863
15864     """
15865     return {
15866       "NETWORK_NAME": self.op.network_name,
15867       }
15868
15869   def BuildHooksNodes(self):
15870     """Build hooks nodes.
15871
15872     """
15873     mn = self.cfg.GetMasterNode()
15874     return ([mn], [mn])
15875
15876   def Exec(self, feedback_fn):
15877     """Remove the network.
15878
15879     """
15880     try:
15881       self.cfg.RemoveNetwork(self.network_uuid)
15882     except errors.ConfigurationError:
15883       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15884                                (self.op.network_name, self.network_uuid))
15885
15886
15887 class LUNetworkSetParams(LogicalUnit):
15888   """Modifies the parameters of a network.
15889
15890   """
15891   HPATH = "network-modify"
15892   HTYPE = constants.HTYPE_NETWORK
15893   REQ_BGL = False
15894
15895   def CheckArguments(self):
15896     if (self.op.gateway and
15897         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15898       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15899                                  " at once", errors.ECODE_INVAL)
15900
15901   def ExpandNames(self):
15902     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15903     if self.network_uuid is None:
15904       raise errors.OpPrereqError(("Network '%s' not found" %
15905                                   self.op.network_name),
15906                                  errors.ECODE_INVAL)
15907
15908     self.needed_locks = {
15909       locking.LEVEL_NETWORK: [self.network_uuid],
15910       }
15911
15912   def CheckPrereq(self):
15913     """Check prerequisites.
15914
15915     """
15916     self.network = self.cfg.GetNetwork(self.network_uuid)
15917     self.gateway = self.network.gateway
15918     self.network_type = self.network.network_type
15919     self.mac_prefix = self.network.mac_prefix
15920     self.network6 = self.network.network6
15921     self.gateway6 = self.network.gateway6
15922     self.tags = self.network.tags
15923
15924     self.pool = network.AddressPool(self.network)
15925
15926     if self.op.gateway:
15927       if self.op.gateway == constants.VALUE_NONE:
15928         self.gateway = None
15929       else:
15930         self.gateway = self.op.gateway
15931         if self.pool.IsReserved(self.gateway):
15932           raise errors.OpPrereqError("%s is already reserved" %
15933                                      self.gateway, errors.ECODE_INVAL)
15934
15935     if self.op.network_type:
15936       if self.op.network_type == constants.VALUE_NONE:
15937         self.network_type = None
15938       else:
15939         self.network_type = self.op.network_type
15940
15941     if self.op.mac_prefix:
15942       if self.op.mac_prefix == constants.VALUE_NONE:
15943         self.mac_prefix = None
15944       else:
15945         self.mac_prefix = \
15946           utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
15947
15948     if self.op.gateway6:
15949       if self.op.gateway6 == constants.VALUE_NONE:
15950         self.gateway6 = None
15951       else:
15952         self.gateway6 = self.op.gateway6
15953
15954     if self.op.network6:
15955       if self.op.network6 == constants.VALUE_NONE:
15956         self.network6 = None
15957       else:
15958         self.network6 = self.op.network6
15959
15960   def BuildHooksEnv(self):
15961     """Build hooks env.
15962
15963     """
15964     args = {
15965       "name": self.op.network_name,
15966       "subnet": self.network.network,
15967       "gateway": self.gateway,
15968       "network6": self.network6,
15969       "gateway6": self.gateway6,
15970       "mac_prefix": self.mac_prefix,
15971       "network_type": self.network_type,
15972       "tags": self.tags,
15973       }
15974     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15975
15976   def BuildHooksNodes(self):
15977     """Build hooks nodes.
15978
15979     """
15980     mn = self.cfg.GetMasterNode()
15981     return ([mn], [mn])
15982
15983   def Exec(self, feedback_fn):
15984     """Modifies the network.
15985
15986     """
15987     #TODO: reserve/release via temporary reservation manager
15988     #      extend cfg.ReserveIp/ReleaseIp with the external flag
15989     if self.op.gateway:
15990       if self.gateway == self.network.gateway:
15991         self.LogWarning("Gateway is already %s", self.gateway)
15992       else:
15993         if self.gateway:
15994           self.pool.Reserve(self.gateway, external=True)
15995         if self.network.gateway:
15996           self.pool.Release(self.network.gateway, external=True)
15997         self.network.gateway = self.gateway
15998
15999     if self.op.add_reserved_ips:
16000       for ip in self.op.add_reserved_ips:
16001         try:
16002           if self.pool.IsReserved(ip):
16003             self.LogWarning("IP address %s is already reserved", ip)
16004           else:
16005             self.pool.Reserve(ip, external=True)
16006         except errors.AddressPoolError, err:
16007           self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16008
16009     if self.op.remove_reserved_ips:
16010       for ip in self.op.remove_reserved_ips:
16011         if ip == self.network.gateway:
16012           self.LogWarning("Cannot unreserve Gateway's IP")
16013           continue
16014         try:
16015           if not self.pool.IsReserved(ip):
16016             self.LogWarning("IP address %s is already unreserved", ip)
16017           else:
16018             self.pool.Release(ip, external=True)
16019         except errors.AddressPoolError, err:
16020           self.LogWarning("Cannot release IP address %s: %s", ip, err)
16021
16022     if self.op.mac_prefix:
16023       self.network.mac_prefix = self.mac_prefix
16024
16025     if self.op.network6:
16026       self.network.network6 = self.network6
16027
16028     if self.op.gateway6:
16029       self.network.gateway6 = self.gateway6
16030
16031     if self.op.network_type:
16032       self.network.network_type = self.network_type
16033
16034     self.pool.Validate()
16035
16036     self.cfg.Update(self.network, feedback_fn)
16037
16038
16039 class _NetworkQuery(_QueryBase):
16040   FIELDS = query.NETWORK_FIELDS
16041
16042   def ExpandNames(self, lu):
16043     lu.needed_locks = {}
16044
16045     self._all_networks = lu.cfg.GetAllNetworksInfo()
16046     name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
16047
16048     if not self.names:
16049       self.wanted = [name_to_uuid[name]
16050                      for name in utils.NiceSort(name_to_uuid.keys())]
16051     else:
16052       # Accept names to be either names or UUIDs.
16053       missing = []
16054       self.wanted = []
16055       all_uuid = frozenset(self._all_networks.keys())
16056
16057       for name in self.names:
16058         if name in all_uuid:
16059           self.wanted.append(name)
16060         elif name in name_to_uuid:
16061           self.wanted.append(name_to_uuid[name])
16062         else:
16063           missing.append(name)
16064
16065       if missing:
16066         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16067                                    errors.ECODE_NOENT)
16068
16069   def DeclareLocks(self, lu, level):
16070     pass
16071
16072   def _GetQueryData(self, lu):
16073     """Computes the list of networks and their attributes.
16074
16075     """
16076     do_instances = query.NETQ_INST in self.requested_data
16077     do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
16078     do_stats = query.NETQ_STATS in self.requested_data
16079
16080     network_to_groups = None
16081     network_to_instances = None
16082     stats = None
16083
16084     # For NETQ_GROUP, we need to map network->[groups]
16085     if do_groups:
16086       all_groups = lu.cfg.GetAllNodeGroupsInfo()
16087       network_to_groups = dict((uuid, []) for uuid in self.wanted)
16088
16089       if do_instances:
16090         all_instances = lu.cfg.GetAllInstancesInfo()
16091         all_nodes = lu.cfg.GetAllNodesInfo()
16092         network_to_instances = dict((uuid, []) for uuid in self.wanted)
16093
16094       for group in all_groups.values():
16095         if do_instances:
16096           group_nodes = [node.name for node in all_nodes.values() if
16097                          node.group == group.uuid]
16098           group_instances = [instance for instance in all_instances.values()
16099                              if instance.primary_node in group_nodes]
16100
16101         for net_uuid in group.networks.keys():
16102           if net_uuid in network_to_groups:
16103             netparams = group.networks[net_uuid]
16104             mode = netparams[constants.NIC_MODE]
16105             link = netparams[constants.NIC_LINK]
16106             info = group.name + "(" + mode + ", " + link + ")"
16107             network_to_groups[net_uuid].append(info)
16108
16109             if do_instances:
16110               for instance in group_instances:
16111                 for nic in instance.nics:
16112                   if nic.network == self._all_networks[net_uuid].name:
16113                     network_to_instances[net_uuid].append(instance.name)
16114                     break
16115
16116     if do_stats:
16117       stats = {}
16118       for uuid, net in self._all_networks.items():
16119         if uuid in self.wanted:
16120           pool = network.AddressPool(net)
16121           stats[uuid] = {
16122             "free_count": pool.GetFreeCount(),
16123             "reserved_count": pool.GetReservedCount(),
16124             "map": pool.GetMap(),
16125             "external_reservations":
16126               utils.CommaJoin(pool.GetExternalReservations()),
16127             }
16128
16129     return query.NetworkQueryData([self._all_networks[uuid]
16130                                    for uuid in self.wanted],
16131                                    network_to_groups,
16132                                    network_to_instances,
16133                                    stats)
16134
16135
16136 class LUNetworkQuery(NoHooksLU):
16137   """Logical unit for querying networks.
16138
16139   """
16140   REQ_BGL = False
16141
16142   def CheckArguments(self):
16143     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16144                             self.op.output_fields, False)
16145
16146   def ExpandNames(self):
16147     self.nq.ExpandNames(self)
16148
16149   def Exec(self, feedback_fn):
16150     return self.nq.OldStyleQuery(self)
16151
16152
16153 class LUNetworkConnect(LogicalUnit):
16154   """Connect a network to a nodegroup
16155
16156   """
16157   HPATH = "network-connect"
16158   HTYPE = constants.HTYPE_NETWORK
16159   REQ_BGL = False
16160
16161   def ExpandNames(self):
16162     self.network_name = self.op.network_name
16163     self.group_name = self.op.group_name
16164     self.network_mode = self.op.network_mode
16165     self.network_link = self.op.network_link
16166
16167     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16168     if self.network_uuid is None:
16169       raise errors.OpPrereqError("Network %s does not exist" %
16170                                  self.network_name, errors.ECODE_INVAL)
16171
16172     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16173     if self.group_uuid is None:
16174       raise errors.OpPrereqError("Group %s does not exist" %
16175                                  self.group_name, errors.ECODE_INVAL)
16176
16177     self.needed_locks = {
16178       locking.LEVEL_INSTANCE: [],
16179       locking.LEVEL_NODEGROUP: [self.group_uuid],
16180       }
16181     self.share_locks[locking.LEVEL_INSTANCE] = 1
16182
16183     if self.op.conflicts_check:
16184       self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16185       self.share_locks[locking.LEVEL_NETWORK] = 1
16186
16187   def DeclareLocks(self, level):
16188     if level == locking.LEVEL_INSTANCE:
16189       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16190
16191       # Lock instances optimistically, needs verification once group lock has
16192       # been acquired
16193       if self.op.conflicts_check:
16194         self.needed_locks[locking.LEVEL_INSTANCE] = \
16195             self.cfg.GetNodeGroupInstances(self.group_uuid)
16196
16197   def BuildHooksEnv(self):
16198     ret = {
16199       "GROUP_NAME": self.group_name,
16200       "GROUP_NETWORK_MODE": self.network_mode,
16201       "GROUP_NETWORK_LINK": self.network_link,
16202       }
16203     return ret
16204
16205   def BuildHooksNodes(self):
16206     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16207     return (nodes, nodes)
16208
16209   def CheckPrereq(self):
16210     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16211
16212     assert self.group_uuid in owned_groups
16213
16214     l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16215                                       for i in value)
16216
16217     self.netparams = {
16218       constants.NIC_MODE: self.network_mode,
16219       constants.NIC_LINK: self.network_link,
16220       }
16221     objects.NIC.CheckParameterSyntax(self.netparams)
16222
16223     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16224     #if self.network_mode == constants.NIC_MODE_BRIDGED:
16225     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16226     self.connected = False
16227     if self.network_uuid in self.group.networks:
16228       self.LogWarning("Network '%s' is already mapped to group '%s'" %
16229                       (self.network_name, self.group.name))
16230       self.connected = True
16231       return
16232
16233     if self.op.conflicts_check:
16234       # Check if locked instances are still correct
16235       owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16236       _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16237
16238       nobj = self.cfg.GetNetwork(self.network_uuid)
16239       pool = network.AddressPool(nobj)
16240       conflicting_instances = []
16241
16242       for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
16243         for idx, nic in enumerate(instance.nics):
16244           if pool.Contains(nic.ip):
16245             conflicting_instances.append((instance.name, idx, nic.ip))
16246
16247       if conflicting_instances:
16248         self.LogWarning("Following occurences use IPs from network %s"
16249                         " that is about to connect to nodegroup %s: %s" %
16250                         (self.network_name, self.group.name,
16251                         l(conflicting_instances)))
16252         raise errors.OpPrereqError("Conflicting IPs found."
16253                                    " Please remove/modify"
16254                                    " corresponding NICs",
16255                                    errors.ECODE_INVAL)
16256
16257   def Exec(self, feedback_fn):
16258     if self.connected:
16259       return
16260
16261     self.group.networks[self.network_uuid] = self.netparams
16262     self.cfg.Update(self.group, feedback_fn)
16263
16264
16265 class LUNetworkDisconnect(LogicalUnit):
16266   """Disconnect a network to a nodegroup
16267
16268   """
16269   HPATH = "network-disconnect"
16270   HTYPE = constants.HTYPE_NETWORK
16271   REQ_BGL = False
16272
16273   def ExpandNames(self):
16274     self.network_name = self.op.network_name
16275     self.group_name = self.op.group_name
16276
16277     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16278     if self.network_uuid is None:
16279       raise errors.OpPrereqError("Network %s does not exist" %
16280                                  self.network_name, errors.ECODE_INVAL)
16281
16282     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16283     if self.group_uuid is None:
16284       raise errors.OpPrereqError("Group %s does not exist" %
16285                                  self.group_name, errors.ECODE_INVAL)
16286
16287     self.needed_locks = {
16288       locking.LEVEL_INSTANCE: [],
16289       locking.LEVEL_NODEGROUP: [self.group_uuid],
16290       }
16291     self.share_locks[locking.LEVEL_INSTANCE] = 1
16292
16293   def DeclareLocks(self, level):
16294     if level == locking.LEVEL_INSTANCE:
16295       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16296
16297       # Lock instances optimistically, needs verification once group lock has
16298       # been acquired
16299       if self.op.conflicts_check:
16300         self.needed_locks[locking.LEVEL_INSTANCE] = \
16301           self.cfg.GetNodeGroupInstances(self.group_uuid)
16302
16303   def BuildHooksEnv(self):
16304     ret = {
16305       "GROUP_NAME": self.group_name,
16306       }
16307     return ret
16308
16309   def BuildHooksNodes(self):
16310     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16311     return (nodes, nodes)
16312
16313   def CheckPrereq(self):
16314     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16315
16316     assert self.group_uuid in owned_groups
16317
16318     l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16319                                       for i in value)
16320
16321     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16322     self.connected = True
16323     if self.network_uuid not in self.group.networks:
16324       self.LogWarning("Network '%s' is not mapped to group '%s'",
16325                       self.network_name, self.group.name)
16326       self.connected = False
16327       return
16328
16329     if self.op.conflicts_check:
16330       # Check if locked instances are still correct
16331       owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16332       _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16333
16334       conflicting_instances = []
16335
16336       for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
16337         for idx, nic in enumerate(instance.nics):
16338           if nic.network == self.network_name:
16339             conflicting_instances.append((instance.name, idx, nic.ip))
16340
16341       if conflicting_instances:
16342         self.LogWarning("Following occurences use IPs from network %s"
16343                            " that is about to disconnected from the nodegroup"
16344                            " %s: %s" %
16345                            (self.network_name, self.group.name,
16346                             l(conflicting_instances)))
16347         raise errors.OpPrereqError("Conflicting IPs."
16348                                    " Please remove/modify"
16349                                    " corresponding NICS",
16350                                    errors.ECODE_INVAL)
16351
16352   def Exec(self, feedback_fn):
16353     if not self.connected:
16354       return
16355
16356     del self.group.networks[self.network_uuid]
16357     self.cfg.Update(self.group, feedback_fn)
16358
16359
16360 #: Query type implementations
16361 _QUERY_IMPL = {
16362   constants.QR_CLUSTER: _ClusterQuery,
16363   constants.QR_INSTANCE: _InstanceQuery,
16364   constants.QR_NODE: _NodeQuery,
16365   constants.QR_GROUP: _GroupQuery,
16366   constants.QR_NETWORK: _NetworkQuery,
16367   constants.QR_OS: _OsQuery,
16368   constants.QR_EXPORT: _ExportQuery,
16369   }
16370
16371 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16372
16373
16374 def _GetQueryImplementation(name):
16375   """Returns the implemtnation for a query type.
16376
16377   @param name: Query type, must be one of L{constants.QR_VIA_OP}
16378
16379   """
16380   try:
16381     return _QUERY_IMPL[name]
16382   except KeyError:
16383     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16384                                errors.ECODE_INVAL)
16385
16386
16387 def _CheckForConflictingIp(lu, ip, node):
16388   """In case of conflicting ip raise error.
16389
16390   @type ip: string
16391   @param ip: ip address
16392   @type node: string
16393   @param node: node name
16394
16395   """
16396   (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16397   if conf_net is not None:
16398     raise errors.OpPrereqError("Conflicting IP found:"
16399                                " %s <> %s." % (ip, conf_net),
16400                                errors.ECODE_INVAL)
16401
16402   return (None, None)