code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti import network
  65 from ganeti.masterd import iallocator
  66
  67 import ganeti.masterd.instance # pylint: disable=W0611
  68
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141
 142     # Dictionaries used to declare locking needs to mcpu
 143     self.needed_locks = None
 144     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 145     self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
 146
 147     self.add_locks = {}
 148     self.remove_locks = {}
 149
 150     # Used to force good behavior when calling helper functions
 151     self.recalculate_locks = {}
 152
 153     # logging
 154     self.Log = processor.Log # pylint: disable=C0103
 155     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 156     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 157     self.LogStep = processor.LogStep # pylint: disable=C0103
 158     # support for dry-run
 159     self.dry_run_result = None
 160     # support for generic debug attribute
 161     if (not hasattr(self.op, "debug_level") or
 162         not isinstance(self.op.debug_level, int)):
 163       self.op.debug_level = 0
 164
 165     # Tasklets
 166     self.tasklets = None
 167
 168     # Validate opcode parameters and set defaults
 169     self.op.Validate(True)
 170
 171     self.CheckArguments()
 172
 173   def CheckArguments(self):
 174     """Check syntactic validity for the opcode arguments.
 175
 176     This method is for doing a simple syntactic check and ensure
 177     validity of opcode parameters, without any cluster-related
 178     checks. While the same can be accomplished in ExpandNames and/or
 179     CheckPrereq, doing these separate is better because:
 180
 181       - ExpandNames is left as as purely a lock-related function
 182       - CheckPrereq is run after we have acquired locks (and possible
 183         waited for them)
 184
 185     The function is allowed to change the self.op attribute so that
 186     later methods can no longer worry about missing parameters.
 187
 188     """
 189     pass
 190
 191   def ExpandNames(self):
 192     """Expand names for this LU.
 193
 194     This method is called before starting to execute the opcode, and it should
 195     update all the parameters of the opcode to their canonical form (e.g. a
 196     short node name must be fully expanded after this method has successfully
 197     completed). This way locking, hooks, logging, etc. can work correctly.
 198
 199     LUs which implement this method must also populate the self.needed_locks
 200     member, as a dict with lock levels as keys, and a list of needed lock names
 201     as values. Rules:
 202
 203       - use an empty dict if you don't need any lock
 204       - if you don't need any lock at a particular level omit that
 205         level (note that in this case C{DeclareLocks} won't be called
 206         at all for that level)
 207       - if you need locks at a level, but you can't calculate it in
 208         this function, initialise that level with an empty list and do
 209         further processing in L{LogicalUnit.DeclareLocks} (see that
 210         function's docstring)
 211       - don't put anything for the BGL level
 212       - if you want all locks at a level use L{locking.ALL_SET} as a value
 213
 214     If you need to share locks (rather than acquire them exclusively) at one
 215     level you can modify self.share_locks, setting a true value (usually 1) for
 216     that level. By default locks are not shared.
 217
 218     This function can also define a list of tasklets, which then will be
 219     executed in order instead of the usual LU-level CheckPrereq and Exec
 220     functions, if those are not defined by the LU.
 221
 222     Examples::
 223
 224       # Acquire all nodes and one instance
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: locking.ALL_SET,
 227         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 228       }
 229       # Acquire just two nodes
 230       self.needed_locks = {
 231         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 232       }
 233       # Acquire no locks
 234       self.needed_locks = {} # No, you can't leave it to the default value None
 235
 236     """
 237     # The implementation of this method is mandatory only if the new LU is
 238     # concurrent, so that old LUs don't need to be changed all at the same
 239     # time.
 240     if self.REQ_BGL:
 241       self.needed_locks = {} # Exclusive LUs don't need locks.
 242     else:
 243       raise NotImplementedError
 244
 245   def DeclareLocks(self, level):
 246     """Declare LU locking needs for a level
 247
 248     While most LUs can just declare their locking needs at ExpandNames time,
 249     sometimes there's the need to calculate some locks after having acquired
 250     the ones before. This function is called just before acquiring locks at a
 251     particular level, but after acquiring the ones at lower levels, and permits
 252     such calculations. It can be used to modify self.needed_locks, and by
 253     default it does nothing.
 254
 255     This function is only called if you have something already set in
 256     self.needed_locks for the level.
 257
 258     @param level: Locking level which is going to be locked
 259     @type level: member of L{ganeti.locking.LEVELS}
 260
 261     """
 262
 263   def CheckPrereq(self):
 264     """Check prerequisites for this LU.
 265
 266     This method should check that the prerequisites for the execution
 267     of this LU are fulfilled. It can do internode communication, but
 268     it should be idempotent - no cluster or system changes are
 269     allowed.
 270
 271     The method should raise errors.OpPrereqError in case something is
 272     not fulfilled. Its return value is ignored.
 273
 274     This method should also update all the parameters of the opcode to
 275     their canonical form if it hasn't been done by ExpandNames before.
 276
 277     """
 278     if self.tasklets is not None:
 279       for (idx, tl) in enumerate(self.tasklets):
 280         logging.debug("Checking prerequisites for tasklet %s/%s",
 281                       idx + 1, len(self.tasklets))
 282         tl.CheckPrereq()
 283     else:
 284       pass
 285
 286   def Exec(self, feedback_fn):
 287     """Execute the LU.
 288
 289     This method should implement the actual work. It should raise
 290     errors.OpExecError for failures that are somewhat dealt with in
 291     code, or expected.
 292
 293     """
 294     if self.tasklets is not None:
 295       for (idx, tl) in enumerate(self.tasklets):
 296         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 297         tl.Exec(feedback_fn)
 298     else:
 299       raise NotImplementedError
 300
 301   def BuildHooksEnv(self):
 302     """Build hooks environment for this LU.
 303
 304     @rtype: dict
 305     @return: Dictionary containing the environment that will be used for
 306       running the hooks for this LU. The keys of the dict must not be prefixed
 307       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 308       will extend the environment with additional variables. If no environment
 309       should be defined, an empty dictionary should be returned (not C{None}).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def BuildHooksNodes(self):
 317     """Build list of nodes to run LU's hooks.
 318
 319     @rtype: tuple; (list, list)
 320     @return: Tuple containing a list of node names on which the hook
 321       should run before the execution and a list of node names on which the
 322       hook should run after the execution. No nodes should be returned as an
 323       empty list (and not None).
 324     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 325       will not be called.
 326
 327     """
 328     raise NotImplementedError
 329
 330   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 331     """Notify the LU about the results of its hooks.
 332
 333     This method is called every time a hooks phase is executed, and notifies
 334     the Logical Unit about the hooks' result. The LU can then use it to alter
 335     its result based on the hooks.  By default the method does nothing and the
 336     previous result is passed back unchanged but any LU can define it if it
 337     wants to use the local cluster hook-scripts somehow.
 338
 339     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 340         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 341     @param hook_results: the results of the multi-node hooks rpc call
 342     @param feedback_fn: function used send feedback back to the caller
 343     @param lu_result: the previous Exec result this LU had, or None
 344         in the PRE phase
 345     @return: the new Exec result, based on the previous result
 346         and hook results
 347
 348     """
 349     # API must be kept, thus we ignore the unused argument and could
 350     # be a function warnings
 351     # pylint: disable=W0613,R0201
 352     return lu_result
 353
 354   def _ExpandAndLockInstance(self):
 355     """Helper function to expand and lock an instance.
 356
 357     Many LUs that work on an instance take its name in self.op.instance_name
 358     and need to expand it and then declare the expanded name for locking. This
 359     function does it, and then updates self.op.instance_name to the expanded
 360     name. It also initializes needed_locks as a dict, if this hasn't been done
 361     before.
 362
 363     """
 364     if self.needed_locks is None:
 365       self.needed_locks = {}
 366     else:
 367       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 368         "_ExpandAndLockInstance called with instance-level locks set"
 369     self.op.instance_name = _ExpandInstanceName(self.cfg,
 370                                                 self.op.instance_name)
 371     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 372
 373   def _LockInstancesNodes(self, primary_only=False,
 374                           level=locking.LEVEL_NODE):
 375     """Helper function to declare instances' nodes for locking.
 376
 377     This function should be called after locking one or more instances to lock
 378     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 379     with all primary or secondary nodes for instances already locked and
 380     present in self.needed_locks[locking.LEVEL_INSTANCE].
 381
 382     It should be called from DeclareLocks, and for safety only works if
 383     self.recalculate_locks[locking.LEVEL_NODE] is set.
 384
 385     In the future it may grow parameters to just lock some instance's nodes, or
 386     to just lock primaries or secondary nodes, if needed.
 387
 388     If should be called in DeclareLocks in a way similar to::
 389
 390       if level == locking.LEVEL_NODE:
 391         self._LockInstancesNodes()
 392
 393     @type primary_only: boolean
 394     @param primary_only: only lock primary nodes of locked instances
 395     @param level: Which lock level to use for locking nodes
 396
 397     """
 398     assert level in self.recalculate_locks, \
 399       "_LockInstancesNodes helper function called with no nodes to recalculate"
 400
 401     # TODO: check if we're really been called with the instance locks held
 402
 403     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 404     # future we might want to have different behaviors depending on the value
 405     # of self.recalculate_locks[locking.LEVEL_NODE]
 406     wanted_nodes = []
 407     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 408     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 409       wanted_nodes.append(instance.primary_node)
 410       if not primary_only:
 411         wanted_nodes.extend(instance.secondary_nodes)
 412
 413     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 414       self.needed_locks[level] = wanted_nodes
 415     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 416       self.needed_locks[level].extend(wanted_nodes)
 417     else:
 418       raise errors.ProgrammerError("Unknown recalculation mode")
 419
 420     del self.recalculate_locks[level]
 421
 422
 423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 424   """Simple LU which runs no hooks.
 425
 426   This LU is intended as a parent for other LogicalUnits which will
 427   run no hooks, in order to reduce duplicate code.
 428
 429   """
 430   HPATH = None
 431   HTYPE = None
 432
 433   def BuildHooksEnv(self):
 434     """Empty BuildHooksEnv for NoHooksLu.
 435
 436     This just raises an error.
 437
 438     """
 439     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 440
 441   def BuildHooksNodes(self):
 442     """Empty BuildHooksNodes for NoHooksLU.
 443
 444     """
 445     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 446
 447
 448 class Tasklet:
 449   """Tasklet base class.
 450
 451   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 452   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 453   tasklets know nothing about locks.
 454
 455   Subclasses must follow these rules:
 456     - Implement CheckPrereq
 457     - Implement Exec
 458
 459   """
 460   def __init__(self, lu):
 461     self.lu = lu
 462
 463     # Shortcuts
 464     self.cfg = lu.cfg
 465     self.rpc = lu.rpc
 466
 467   def CheckPrereq(self):
 468     """Check prerequisites for this tasklets.
 469
 470     This method should check whether the prerequisites for the execution of
 471     this tasklet are fulfilled. It can do internode communication, but it
 472     should be idempotent - no cluster or system changes are allowed.
 473
 474     The method should raise errors.OpPrereqError in case something is not
 475     fulfilled. Its return value is ignored.
 476
 477     This method should also update all parameters to their canonical form if it
 478     hasn't been done before.
 479
 480     """
 481     pass
 482
 483   def Exec(self, feedback_fn):
 484     """Execute the tasklet.
 485
 486     This method should implement the actual work. It should raise
 487     errors.OpExecError for failures that are somewhat dealt with in code, or
 488     expected.
 489
 490     """
 491     raise NotImplementedError
 492
 493
 494 class _QueryBase:
 495   """Base for query utility classes.
 496
 497   """
 498   #: Attribute holding field definitions
 499   FIELDS = None
 500
 501   #: Field to sort by
 502   SORT_FIELD = "name"
 503
 504   def __init__(self, qfilter, fields, use_locking):
 505     """Initializes this class.
 506
 507     """
 508     self.use_locking = use_locking
 509
 510     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 511                              namefield=self.SORT_FIELD)
 512     self.requested_data = self.query.RequestedData()
 513     self.names = self.query.RequestedNames()
 514
 515     # Sort only if no names were requested
 516     self.sort_by_name = not self.names
 517
 518     self.do_locking = None
 519     self.wanted = None
 520
 521   def _GetNames(self, lu, all_names, lock_level):
 522     """Helper function to determine names asked for in the query.
 523
 524     """
 525     if self.do_locking:
 526       names = lu.owned_locks(lock_level)
 527     else:
 528       names = all_names
 529
 530     if self.wanted == locking.ALL_SET:
 531       assert not self.names
 532       # caller didn't specify names, so ordering is not important
 533       return utils.NiceSort(names)
 534
 535     # caller specified names and we must keep the same order
 536     assert self.names
 537     assert not self.do_locking or lu.glm.is_owned(lock_level)
 538
 539     missing = set(self.wanted).difference(names)
 540     if missing:
 541       raise errors.OpExecError("Some items were removed before retrieving"
 542                                " their data: %s" % missing)
 543
 544     # Return expanded names
 545     return self.wanted
 546
 547   def ExpandNames(self, lu):
 548     """Expand names for this query.
 549
 550     See L{LogicalUnit.ExpandNames}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def DeclareLocks(self, lu, level):
 556     """Declare locks for this query.
 557
 558     See L{LogicalUnit.DeclareLocks}.
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def _GetQueryData(self, lu):
 564     """Collects all data for this query.
 565
 566     @return: Query data object
 567
 568     """
 569     raise NotImplementedError()
 570
 571   def NewStyleQuery(self, lu):
 572     """Collect data and execute query.
 573
 574     """
 575     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 576                                   sort_by_name=self.sort_by_name)
 577
 578   def OldStyleQuery(self, lu):
 579     """Collect data and execute query.
 580
 581     """
 582     return self.query.OldStyleQuery(self._GetQueryData(lu),
 583                                     sort_by_name=self.sort_by_name)
 584
 585
 586 def _ShareAll():
 587   """Returns a dict declaring all lock levels shared.
 588
 589   """
 590   return dict.fromkeys(locking.LEVELS, 1)
 591
 592
 593 def _AnnotateDiskParams(instance, devs, cfg):
 594   """Little helper wrapper to the rpc annotation method.
 595
 596   @param instance: The instance object
 597   @type devs: List of L{objects.Disk}
 598   @param devs: The root devices (not any of its children!)
 599   @param cfg: The config object
 600   @returns The annotated disk copies
 601   @see L{rpc.AnnotateDiskParams}
 602
 603   """
 604   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 605                                 cfg.GetInstanceDiskParams(instance))
 606
 607
 608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 609                               cur_group_uuid):
 610   """Checks if node groups for locked instances are still correct.
 611
 612   @type cfg: L{config.ConfigWriter}
 613   @param cfg: Cluster configuration
 614   @type instances: dict; string as key, L{objects.Instance} as value
 615   @param instances: Dictionary, instance name as key, instance object as value
 616   @type owned_groups: iterable of string
 617   @param owned_groups: List of owned groups
 618   @type owned_nodes: iterable of string
 619   @param owned_nodes: List of owned nodes
 620   @type cur_group_uuid: string or None
 621   @param cur_group_uuid: Optional group UUID to check against instance's groups
 622
 623   """
 624   for (name, inst) in instances.items():
 625     assert owned_nodes.issuperset(inst.all_nodes), \
 626       "Instance %s's nodes changed while we kept the lock" % name
 627
 628     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 629
 630     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 631       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 632
 633
 634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 635                              primary_only=False):
 636   """Checks if the owned node groups are still correct for an instance.
 637
 638   @type cfg: L{config.ConfigWriter}
 639   @param cfg: The cluster configuration
 640   @type instance_name: string
 641   @param instance_name: Instance name
 642   @type owned_groups: set or frozenset
 643   @param owned_groups: List of currently owned node groups
 644   @type primary_only: boolean
 645   @param primary_only: Whether to check node groups for only the primary node
 646
 647   """
 648   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 649
 650   if not owned_groups.issuperset(inst_groups):
 651     raise errors.OpPrereqError("Instance %s's node groups changed since"
 652                                " locks were acquired, current groups are"
 653                                " are '%s', owning groups '%s'; retry the"
 654                                " operation" %
 655                                (instance_name,
 656                                 utils.CommaJoin(inst_groups),
 657                                 utils.CommaJoin(owned_groups)),
 658                                errors.ECODE_STATE)
 659
 660   return inst_groups
 661
 662
 663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 664   """Checks if the instances in a node group are still correct.
 665
 666   @type cfg: L{config.ConfigWriter}
 667   @param cfg: The cluster configuration
 668   @type group_uuid: string
 669   @param group_uuid: Node group UUID
 670   @type owned_instances: set or frozenset
 671   @param owned_instances: List of currently owned instances
 672
 673   """
 674   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 675   if owned_instances != wanted_instances:
 676     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 677                                " locks were acquired, wanted '%s', have '%s';"
 678                                " retry the operation" %
 679                                (group_uuid,
 680                                 utils.CommaJoin(wanted_instances),
 681                                 utils.CommaJoin(owned_instances)),
 682                                errors.ECODE_STATE)
 683
 684   return wanted_instances
 685
 686
 687 def _SupportsOob(cfg, node):
 688   """Tells if node supports OOB.
 689
 690   @type cfg: L{config.ConfigWriter}
 691   @param cfg: The cluster configuration
 692   @type node: L{objects.Node}
 693   @param node: The node
 694   @return: The OOB script if supported or an empty string otherwise
 695
 696   """
 697   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 698
 699
 700 def _CopyLockList(names):
 701   """Makes a copy of a list of lock names.
 702
 703   Handles L{locking.ALL_SET} correctly.
 704
 705   """
 706   if names == locking.ALL_SET:
 707     return locking.ALL_SET
 708   else:
 709     return names[:]
 710
 711
 712 def _GetWantedNodes(lu, nodes):
 713   """Returns list of checked and expanded node names.
 714
 715   @type lu: L{LogicalUnit}
 716   @param lu: the logical unit on whose behalf we execute
 717   @type nodes: list
 718   @param nodes: list of node names or None for all nodes
 719   @rtype: list
 720   @return: the list of nodes, sorted
 721   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 722
 723   """
 724   if nodes:
 725     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 726
 727   return utils.NiceSort(lu.cfg.GetNodeList())
 728
 729
 730 def _GetWantedInstances(lu, instances):
 731   """Returns list of checked and expanded instance names.
 732
 733   @type lu: L{LogicalUnit}
 734   @param lu: the logical unit on whose behalf we execute
 735   @type instances: list
 736   @param instances: list of instance names or None for all instances
 737   @rtype: list
 738   @return: the list of instances, sorted
 739   @raise errors.OpPrereqError: if the instances parameter is wrong type
 740   @raise errors.OpPrereqError: if any of the passed instances is not found
 741
 742   """
 743   if instances:
 744     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 745   else:
 746     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 747   return wanted
 748
 749
 750 def _GetUpdatedParams(old_params, update_dict,
 751                       use_default=True, use_none=False):
 752   """Return the new version of a parameter dictionary.
 753
 754   @type old_params: dict
 755   @param old_params: old parameters
 756   @type update_dict: dict
 757   @param update_dict: dict containing new parameter values, or
 758       constants.VALUE_DEFAULT to reset the parameter to its default
 759       value
 760   @param use_default: boolean
 761   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 762       values as 'to be deleted' values
 763   @param use_none: boolean
 764   @type use_none: whether to recognise C{None} values as 'to be
 765       deleted' values
 766   @rtype: dict
 767   @return: the new parameter dictionary
 768
 769   """
 770   params_copy = copy.deepcopy(old_params)
 771   for key, val in update_dict.iteritems():
 772     if ((use_default and val == constants.VALUE_DEFAULT) or
 773         (use_none and val is None)):
 774       try:
 775         del params_copy[key]
 776       except KeyError:
 777         pass
 778     else:
 779       params_copy[key] = val
 780   return params_copy
 781
 782
 783 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 784   """Return the new version of a instance policy.
 785
 786   @param group_policy: whether this policy applies to a group and thus
 787     we should support removal of policy entries
 788
 789   """
 790   use_none = use_default = group_policy
 791   ipolicy = copy.deepcopy(old_ipolicy)
 792   for key, value in new_ipolicy.items():
 793     if key not in constants.IPOLICY_ALL_KEYS:
 794       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 795                                  errors.ECODE_INVAL)
 796     if key in constants.IPOLICY_ISPECS:
 797       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 798       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 799                                        use_none=use_none,
 800                                        use_default=use_default)
 801     else:
 802       if (not value or value == [constants.VALUE_DEFAULT] or
 803           value == constants.VALUE_DEFAULT):
 804         if group_policy:
 805           del ipolicy[key]
 806         else:
 807           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 808                                      " on the cluster'" % key,
 809                                      errors.ECODE_INVAL)
 810       else:
 811         if key in constants.IPOLICY_PARAMETERS:
 812           # FIXME: we assume all such values are float
 813           try:
 814             ipolicy[key] = float(value)
 815           except (TypeError, ValueError), err:
 816             raise errors.OpPrereqError("Invalid value for attribute"
 817                                        " '%s': '%s', error: %s" %
 818                                        (key, value, err), errors.ECODE_INVAL)
 819         else:
 820           # FIXME: we assume all others are lists; this should be redone
 821           # in a nicer way
 822           ipolicy[key] = list(value)
 823   try:
 824     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 825   except errors.ConfigurationError, err:
 826     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 827                                errors.ECODE_INVAL)
 828   return ipolicy
 829
 830
 831 def _UpdateAndVerifySubDict(base, updates, type_check):
 832   """Updates and verifies a dict with sub dicts of the same type.
 833
 834   @param base: The dict with the old data
 835   @param updates: The dict with the new data
 836   @param type_check: Dict suitable to ForceDictType to verify correct types
 837   @returns: A new dict with updated and verified values
 838
 839   """
 840   def fn(old, value):
 841     new = _GetUpdatedParams(old, value)
 842     utils.ForceDictType(new, type_check)
 843     return new
 844
 845   ret = copy.deepcopy(base)
 846   ret.update(dict((key, fn(base.get(key, {}), value))
 847                   for key, value in updates.items()))
 848   return ret
 849
 850
 851 def _MergeAndVerifyHvState(op_input, obj_input):
 852   """Combines the hv state from an opcode with the one of the object
 853
 854   @param op_input: The input dict from the opcode
 855   @param obj_input: The input dict from the objects
 856   @return: The verified and updated dict
 857
 858   """
 859   if op_input:
 860     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 861     if invalid_hvs:
 862       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 863                                  " %s" % utils.CommaJoin(invalid_hvs),
 864                                  errors.ECODE_INVAL)
 865     if obj_input is None:
 866       obj_input = {}
 867     type_check = constants.HVSTS_PARAMETER_TYPES
 868     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 869
 870   return None
 871
 872
 873 def _MergeAndVerifyDiskState(op_input, obj_input):
 874   """Combines the disk state from an opcode with the one of the object
 875
 876   @param op_input: The input dict from the opcode
 877   @param obj_input: The input dict from the objects
 878   @return: The verified and updated dict
 879   """
 880   if op_input:
 881     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 882     if invalid_dst:
 883       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 884                                  utils.CommaJoin(invalid_dst),
 885                                  errors.ECODE_INVAL)
 886     type_check = constants.DSS_PARAMETER_TYPES
 887     if obj_input is None:
 888       obj_input = {}
 889     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 890                                               type_check))
 891                 for key, value in op_input.items())
 892
 893   return None
 894
 895
 896 def _ReleaseLocks(lu, level, names=None, keep=None):
 897   """Releases locks owned by an LU.
 898
 899   @type lu: L{LogicalUnit}
 900   @param level: Lock level
 901   @type names: list or None
 902   @param names: Names of locks to release
 903   @type keep: list or None
 904   @param keep: Names of locks to retain
 905
 906   """
 907   assert not (keep is not None and names is not None), \
 908          "Only one of the 'names' and the 'keep' parameters can be given"
 909
 910   if names is not None:
 911     should_release = names.__contains__
 912   elif keep:
 913     should_release = lambda name: name not in keep
 914   else:
 915     should_release = None
 916
 917   owned = lu.owned_locks(level)
 918   if not owned:
 919     # Not owning any lock at this level, do nothing
 920     pass
 921
 922   elif should_release:
 923     retain = []
 924     release = []
 925
 926     # Determine which locks to release
 927     for name in owned:
 928       if should_release(name):
 929         release.append(name)
 930       else:
 931         retain.append(name)
 932
 933     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 934
 935     # Release just some locks
 936     lu.glm.release(level, names=release)
 937
 938     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 939   else:
 940     # Release everything
 941     lu.glm.release(level)
 942
 943     assert not lu.glm.is_owned(level), "No locks should be owned"
 944
 945
 946 def _MapInstanceDisksToNodes(instances):
 947   """Creates a map from (node, volume) to instance name.
 948
 949   @type instances: list of L{objects.Instance}
 950   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 951
 952   """
 953   return dict(((node, vol), inst.name)
 954               for inst in instances
 955               for (node, vols) in inst.MapLVsByNode().items()
 956               for vol in vols)
 957
 958
 959 def _RunPostHook(lu, node_name):
 960   """Runs the post-hook for an opcode on a single node.
 961
 962   """
 963   hm = lu.proc.BuildHooksManager(lu)
 964   try:
 965     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 966   except Exception, err: # pylint: disable=W0703
 967     lu.LogWarning("Errors occurred running hooks on %s: %s",
 968                   node_name, err)
 969
 970
 971 def _CheckOutputFields(static, dynamic, selected):
 972   """Checks whether all selected fields are valid.
 973
 974   @type static: L{utils.FieldSet}
 975   @param static: static fields set
 976   @type dynamic: L{utils.FieldSet}
 977   @param dynamic: dynamic fields set
 978
 979   """
 980   f = utils.FieldSet()
 981   f.Extend(static)
 982   f.Extend(dynamic)
 983
 984   delta = f.NonMatching(selected)
 985   if delta:
 986     raise errors.OpPrereqError("Unknown output fields selected: %s"
 987                                % ",".join(delta), errors.ECODE_INVAL)
 988
 989
 990 def _CheckGlobalHvParams(params):
 991   """Validates that given hypervisor params are not global ones.
 992
 993   This will ensure that instances don't get customised versions of
 994   global params.
 995
 996   """
 997   used_globals = constants.HVC_GLOBALS.intersection(params)
 998   if used_globals:
 999     msg = ("The following hypervisor parameters are global and cannot"
1000            " be customized at instance level, please modify them at"
1001            " cluster level: %s" % utils.CommaJoin(used_globals))
1002     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1003
1004
1005 def _CheckNodeOnline(lu, node, msg=None):
1006   """Ensure that a given node is online.
1007
1008   @param lu: the LU on behalf of which we make the check
1009   @param node: the node to check
1010   @param msg: if passed, should be a message to replace the default one
1011   @raise errors.OpPrereqError: if the node is offline
1012
1013   """
1014   if msg is None:
1015     msg = "Can't use offline node"
1016   if lu.cfg.GetNodeInfo(node).offline:
1017     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1018
1019
1020 def _CheckNodeNotDrained(lu, node):
1021   """Ensure that a given node is not drained.
1022
1023   @param lu: the LU on behalf of which we make the check
1024   @param node: the node to check
1025   @raise errors.OpPrereqError: if the node is drained
1026
1027   """
1028   if lu.cfg.GetNodeInfo(node).drained:
1029     raise errors.OpPrereqError("Can't use drained node %s" % node,
1030                                errors.ECODE_STATE)
1031
1032
1033 def _CheckNodeVmCapable(lu, node):
1034   """Ensure that a given node is vm capable.
1035
1036   @param lu: the LU on behalf of which we make the check
1037   @param node: the node to check
1038   @raise errors.OpPrereqError: if the node is not vm capable
1039
1040   """
1041   if not lu.cfg.GetNodeInfo(node).vm_capable:
1042     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1043                                errors.ECODE_STATE)
1044
1045
1046 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1047   """Ensure that a node supports a given OS.
1048
1049   @param lu: the LU on behalf of which we make the check
1050   @param node: the node to check
1051   @param os_name: the OS to query about
1052   @param force_variant: whether to ignore variant errors
1053   @raise errors.OpPrereqError: if the node is not supporting the OS
1054
1055   """
1056   result = lu.rpc.call_os_get(node, os_name)
1057   result.Raise("OS '%s' not in supported OS list for node %s" %
1058                (os_name, node),
1059                prereq=True, ecode=errors.ECODE_INVAL)
1060   if not force_variant:
1061     _CheckOSVariant(result.payload, os_name)
1062
1063
1064 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1065   """Ensure that a node has the given secondary ip.
1066
1067   @type lu: L{LogicalUnit}
1068   @param lu: the LU on behalf of which we make the check
1069   @type node: string
1070   @param node: the node to check
1071   @type secondary_ip: string
1072   @param secondary_ip: the ip to check
1073   @type prereq: boolean
1074   @param prereq: whether to throw a prerequisite or an execute error
1075   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1076   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1077
1078   """
1079   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1080   result.Raise("Failure checking secondary ip on node %s" % node,
1081                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1082   if not result.payload:
1083     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1084            " please fix and re-run this command" % secondary_ip)
1085     if prereq:
1086       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1087     else:
1088       raise errors.OpExecError(msg)
1089
1090
1091 def _GetClusterDomainSecret():
1092   """Reads the cluster domain secret.
1093
1094   """
1095   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1096                                strict=True)
1097
1098
1099 def _CheckInstanceState(lu, instance, req_states, msg=None):
1100   """Ensure that an instance is in one of the required states.
1101
1102   @param lu: the LU on behalf of which we make the check
1103   @param instance: the instance to check
1104   @param msg: if passed, should be a message to replace the default one
1105   @raise errors.OpPrereqError: if the instance is not in the required state
1106
1107   """
1108   if msg is None:
1109     msg = ("can't use instance from outside %s states" %
1110            utils.CommaJoin(req_states))
1111   if instance.admin_state not in req_states:
1112     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1113                                (instance.name, instance.admin_state, msg),
1114                                errors.ECODE_STATE)
1115
1116   if constants.ADMINST_UP not in req_states:
1117     pnode = instance.primary_node
1118     if not lu.cfg.GetNodeInfo(pnode).offline:
1119       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1120       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1121                   prereq=True, ecode=errors.ECODE_ENVIRON)
1122       if instance.name in ins_l.payload:
1123         raise errors.OpPrereqError("Instance %s is running, %s" %
1124                                    (instance.name, msg), errors.ECODE_STATE)
1125     else:
1126       lu.LogWarning("Primary node offline, ignoring check that instance"
1127                      " is down")
1128
1129
1130 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1131   """Computes if value is in the desired range.
1132
1133   @param name: name of the parameter for which we perform the check
1134   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1135       not just 'disk')
1136   @param ipolicy: dictionary containing min, max and std values
1137   @param value: actual value that we want to use
1138   @return: None or element not meeting the criteria
1139
1140
1141   """
1142   if value in [None, constants.VALUE_AUTO]:
1143     return None
1144   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1145   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1146   if value > max_v or min_v > value:
1147     if qualifier:
1148       fqn = "%s/%s" % (name, qualifier)
1149     else:
1150       fqn = name
1151     return ("%s value %s is not in range [%s, %s]" %
1152             (fqn, value, min_v, max_v))
1153   return None
1154
1155
1156 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1157                                  nic_count, disk_sizes, spindle_use,
1158                                  _compute_fn=_ComputeMinMaxSpec):
1159   """Verifies ipolicy against provided specs.
1160
1161   @type ipolicy: dict
1162   @param ipolicy: The ipolicy
1163   @type mem_size: int
1164   @param mem_size: The memory size
1165   @type cpu_count: int
1166   @param cpu_count: Used cpu cores
1167   @type disk_count: int
1168   @param disk_count: Number of disks used
1169   @type nic_count: int
1170   @param nic_count: Number of nics used
1171   @type disk_sizes: list of ints
1172   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1173   @type spindle_use: int
1174   @param spindle_use: The number of spindles this instance uses
1175   @param _compute_fn: The compute function (unittest only)
1176   @return: A list of violations, or an empty list of no violations are found
1177
1178   """
1179   assert disk_count == len(disk_sizes)
1180
1181   test_settings = [
1182     (constants.ISPEC_MEM_SIZE, "", mem_size),
1183     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1184     (constants.ISPEC_DISK_COUNT, "", disk_count),
1185     (constants.ISPEC_NIC_COUNT, "", nic_count),
1186     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1187     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1188          for idx, d in enumerate(disk_sizes)]
1189
1190   return filter(None,
1191                 (_compute_fn(name, qualifier, ipolicy, value)
1192                  for (name, qualifier, value) in test_settings))
1193
1194
1195 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1196                                      _compute_fn=_ComputeIPolicySpecViolation):
1197   """Compute if instance meets the specs of ipolicy.
1198
1199   @type ipolicy: dict
1200   @param ipolicy: The ipolicy to verify against
1201   @type instance: L{objects.Instance}
1202   @param instance: The instance to verify
1203   @param _compute_fn: The function to verify ipolicy (unittest only)
1204   @see: L{_ComputeIPolicySpecViolation}
1205
1206   """
1207   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1208   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1209   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1210   disk_count = len(instance.disks)
1211   disk_sizes = [disk.size for disk in instance.disks]
1212   nic_count = len(instance.nics)
1213
1214   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1215                      disk_sizes, spindle_use)
1216
1217
1218 def _ComputeIPolicyInstanceSpecViolation(
1219   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1220   """Compute if instance specs meets the specs of ipolicy.
1221
1222   @type ipolicy: dict
1223   @param ipolicy: The ipolicy to verify against
1224   @param instance_spec: dict
1225   @param instance_spec: The instance spec to verify
1226   @param _compute_fn: The function to verify ipolicy (unittest only)
1227   @see: L{_ComputeIPolicySpecViolation}
1228
1229   """
1230   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1231   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1232   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1233   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1234   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1235   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1236
1237   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1238                      disk_sizes, spindle_use)
1239
1240
1241 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1242                                  target_group,
1243                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1244   """Compute if instance meets the specs of the new target group.
1245
1246   @param ipolicy: The ipolicy to verify
1247   @param instance: The instance object to verify
1248   @param current_group: The current group of the instance
1249   @param target_group: The new group of the instance
1250   @param _compute_fn: The function to verify ipolicy (unittest only)
1251   @see: L{_ComputeIPolicySpecViolation}
1252
1253   """
1254   if current_group == target_group:
1255     return []
1256   else:
1257     return _compute_fn(ipolicy, instance)
1258
1259
1260 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1261                             _compute_fn=_ComputeIPolicyNodeViolation):
1262   """Checks that the target node is correct in terms of instance policy.
1263
1264   @param ipolicy: The ipolicy to verify
1265   @param instance: The instance object to verify
1266   @param node: The new node to relocate
1267   @param ignore: Ignore violations of the ipolicy
1268   @param _compute_fn: The function to verify ipolicy (unittest only)
1269   @see: L{_ComputeIPolicySpecViolation}
1270
1271   """
1272   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1273   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1274
1275   if res:
1276     msg = ("Instance does not meet target node group's (%s) instance"
1277            " policy: %s") % (node.group, utils.CommaJoin(res))
1278     if ignore:
1279       lu.LogWarning(msg)
1280     else:
1281       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1282
1283
1284 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1285   """Computes a set of any instances that would violate the new ipolicy.
1286
1287   @param old_ipolicy: The current (still in-place) ipolicy
1288   @param new_ipolicy: The new (to become) ipolicy
1289   @param instances: List of instances to verify
1290   @return: A list of instances which violates the new ipolicy but
1291       did not before
1292
1293   """
1294   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1295           _ComputeViolatingInstances(old_ipolicy, instances))
1296
1297
1298 def _ExpandItemName(fn, name, kind):
1299   """Expand an item name.
1300
1301   @param fn: the function to use for expansion
1302   @param name: requested item name
1303   @param kind: text description ('Node' or 'Instance')
1304   @return: the resolved (full) name
1305   @raise errors.OpPrereqError: if the item is not found
1306
1307   """
1308   full_name = fn(name)
1309   if full_name is None:
1310     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1311                                errors.ECODE_NOENT)
1312   return full_name
1313
1314
1315 def _ExpandNodeName(cfg, name):
1316   """Wrapper over L{_ExpandItemName} for nodes."""
1317   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1318
1319
1320 def _ExpandInstanceName(cfg, name):
1321   """Wrapper over L{_ExpandItemName} for instance."""
1322   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1323
1324
1325 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1326                          network_type, mac_prefix, tags):
1327   """Builds network related env variables for hooks
1328
1329   This builds the hook environment from individual variables.
1330
1331   @type name: string
1332   @param name: the name of the network
1333   @type subnet: string
1334   @param subnet: the ipv4 subnet
1335   @type gateway: string
1336   @param gateway: the ipv4 gateway
1337   @type network6: string
1338   @param network6: the ipv6 subnet
1339   @type gateway6: string
1340   @param gateway6: the ipv6 gateway
1341   @type network_type: string
1342   @param network_type: the type of the network
1343   @type mac_prefix: string
1344   @param mac_prefix: the mac_prefix
1345   @type tags: list
1346   @param tags: the tags of the network
1347
1348   """
1349   env = {}
1350   if name:
1351     env["NETWORK_NAME"] = name
1352   if subnet:
1353     env["NETWORK_SUBNET"] = subnet
1354   if gateway:
1355     env["NETWORK_GATEWAY"] = gateway
1356   if network6:
1357     env["NETWORK_SUBNET6"] = network6
1358   if gateway6:
1359     env["NETWORK_GATEWAY6"] = gateway6
1360   if mac_prefix:
1361     env["NETWORK_MAC_PREFIX"] = mac_prefix
1362   if network_type:
1363     env["NETWORK_TYPE"] = network_type
1364   if tags:
1365     env["NETWORK_TAGS"] = " ".join(tags)
1366
1367   return env
1368
1369
1370 def _BuildNetworkHookEnvByObject(net):
1371   """Builds network related env varliables for hooks
1372
1373   @type net: L{objects.Network}
1374   @param net: the network object
1375
1376   """
1377   args = {
1378     "name": net.name,
1379     "subnet": net.network,
1380     "gateway": net.gateway,
1381     "network6": net.network6,
1382     "gateway6": net.gateway6,
1383     "network_type": net.network_type,
1384     "mac_prefix": net.mac_prefix,
1385     "tags": net.tags,
1386   }
1387
1388   return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
1389
1390
1391 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1392                           minmem, maxmem, vcpus, nics, disk_template, disks,
1393                           bep, hvp, hypervisor_name, tags):
1394   """Builds instance related env variables for hooks
1395
1396   This builds the hook environment from individual variables.
1397
1398   @type name: string
1399   @param name: the name of the instance
1400   @type primary_node: string
1401   @param primary_node: the name of the instance's primary node
1402   @type secondary_nodes: list
1403   @param secondary_nodes: list of secondary nodes as strings
1404   @type os_type: string
1405   @param os_type: the name of the instance's OS
1406   @type status: string
1407   @param status: the desired status of the instance
1408   @type minmem: string
1409   @param minmem: the minimum memory size of the instance
1410   @type maxmem: string
1411   @param maxmem: the maximum memory size of the instance
1412   @type vcpus: string
1413   @param vcpus: the count of VCPUs the instance has
1414   @type nics: list
1415   @param nics: list of tuples (ip, mac, mode, link, network) representing
1416       the NICs the instance has
1417   @type disk_template: string
1418   @param disk_template: the disk template of the instance
1419   @type disks: list
1420   @param disks: the list of (size, mode) pairs
1421   @type bep: dict
1422   @param bep: the backend parameters for the instance
1423   @type hvp: dict
1424   @param hvp: the hypervisor parameters for the instance
1425   @type hypervisor_name: string
1426   @param hypervisor_name: the hypervisor for the instance
1427   @type tags: list
1428   @param tags: list of instance tags as strings
1429   @rtype: dict
1430   @return: the hook environment for this instance
1431
1432   """
1433   env = {
1434     "OP_TARGET": name,
1435     "INSTANCE_NAME": name,
1436     "INSTANCE_PRIMARY": primary_node,
1437     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1438     "INSTANCE_OS_TYPE": os_type,
1439     "INSTANCE_STATUS": status,
1440     "INSTANCE_MINMEM": minmem,
1441     "INSTANCE_MAXMEM": maxmem,
1442     # TODO(2.7) remove deprecated "memory" value
1443     "INSTANCE_MEMORY": maxmem,
1444     "INSTANCE_VCPUS": vcpus,
1445     "INSTANCE_DISK_TEMPLATE": disk_template,
1446     "INSTANCE_HYPERVISOR": hypervisor_name,
1447   }
1448   if nics:
1449     nic_count = len(nics)
1450     for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1451       if ip is None:
1452         ip = ""
1453       env["INSTANCE_NIC%d_IP" % idx] = ip
1454       env["INSTANCE_NIC%d_MAC" % idx] = mac
1455       env["INSTANCE_NIC%d_MODE" % idx] = mode
1456       env["INSTANCE_NIC%d_LINK" % idx] = link
1457       if network:
1458         env["INSTANCE_NIC%d_NETWORK" % idx] = net
1459         if netinfo:
1460           nobj = objects.Network.FromDict(netinfo)
1461           if nobj.network:
1462             env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1463           if nobj.gateway:
1464             env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1465           if nobj.network6:
1466             env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1467           if nobj.gateway6:
1468             env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1469           if nobj.mac_prefix:
1470             env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1471           if nobj.network_type:
1472             env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1473           if nobj.tags:
1474             env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1475       if mode == constants.NIC_MODE_BRIDGED:
1476         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1477   else:
1478     nic_count = 0
1479
1480   env["INSTANCE_NIC_COUNT"] = nic_count
1481
1482   if disks:
1483     disk_count = len(disks)
1484     for idx, (size, mode) in enumerate(disks):
1485       env["INSTANCE_DISK%d_SIZE" % idx] = size
1486       env["INSTANCE_DISK%d_MODE" % idx] = mode
1487   else:
1488     disk_count = 0
1489
1490   env["INSTANCE_DISK_COUNT"] = disk_count
1491
1492   if not tags:
1493     tags = []
1494
1495   env["INSTANCE_TAGS"] = " ".join(tags)
1496
1497   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1498     for key, value in source.items():
1499       env["INSTANCE_%s_%s" % (kind, key)] = value
1500
1501   return env
1502
1503
1504 def _NICToTuple(lu, nic):
1505   """Build a tupple of nic information.
1506
1507   @type lu:  L{LogicalUnit}
1508   @param lu: the logical unit on whose behalf we execute
1509   @type nic: L{objects.NIC}
1510   @param nic: nic to convert to hooks tuple
1511
1512   """
1513   ip = nic.ip
1514   mac = nic.mac
1515   cluster = lu.cfg.GetClusterInfo()
1516   filled_params = cluster.SimpleFillNIC(nic.nicparams)
1517   mode = filled_params[constants.NIC_MODE]
1518   link = filled_params[constants.NIC_LINK]
1519   net = nic.network
1520   netinfo = None
1521   if net:
1522     net_uuid = lu.cfg.LookupNetwork(net)
1523     if net_uuid:
1524       nobj = lu.cfg.GetNetwork(net_uuid)
1525       netinfo = objects.Network.ToDict(nobj)
1526   return (ip, mac, mode, link, net, netinfo)
1527
1528
1529 def _NICListToTuple(lu, nics):
1530   """Build a list of nic information tuples.
1531
1532   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1533   value in LUInstanceQueryData.
1534
1535   @type lu:  L{LogicalUnit}
1536   @param lu: the logical unit on whose behalf we execute
1537   @type nics: list of L{objects.NIC}
1538   @param nics: list of nics to convert to hooks tuples
1539
1540   """
1541   hooks_nics = []
1542   for nic in nics:
1543     hooks_nics.append(_NICToTuple(lu, nic))
1544   return hooks_nics
1545
1546
1547 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1548   """Builds instance related env variables for hooks from an object.
1549
1550   @type lu: L{LogicalUnit}
1551   @param lu: the logical unit on whose behalf we execute
1552   @type instance: L{objects.Instance}
1553   @param instance: the instance for which we should build the
1554       environment
1555   @type override: dict
1556   @param override: dictionary with key/values that will override
1557       our values
1558   @rtype: dict
1559   @return: the hook environment dictionary
1560
1561   """
1562   cluster = lu.cfg.GetClusterInfo()
1563   bep = cluster.FillBE(instance)
1564   hvp = cluster.FillHV(instance)
1565   args = {
1566     "name": instance.name,
1567     "primary_node": instance.primary_node,
1568     "secondary_nodes": instance.secondary_nodes,
1569     "os_type": instance.os,
1570     "status": instance.admin_state,
1571     "maxmem": bep[constants.BE_MAXMEM],
1572     "minmem": bep[constants.BE_MINMEM],
1573     "vcpus": bep[constants.BE_VCPUS],
1574     "nics": _NICListToTuple(lu, instance.nics),
1575     "disk_template": instance.disk_template,
1576     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1577     "bep": bep,
1578     "hvp": hvp,
1579     "hypervisor_name": instance.hypervisor,
1580     "tags": instance.tags,
1581   }
1582   if override:
1583     args.update(override)
1584   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1585
1586
1587 def _AdjustCandidatePool(lu, exceptions):
1588   """Adjust the candidate pool after node operations.
1589
1590   """
1591   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1592   if mod_list:
1593     lu.LogInfo("Promoted nodes to master candidate role: %s",
1594                utils.CommaJoin(node.name for node in mod_list))
1595     for name in mod_list:
1596       lu.context.ReaddNode(name)
1597   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1598   if mc_now > mc_max:
1599     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1600                (mc_now, mc_max))
1601
1602
1603 def _DecideSelfPromotion(lu, exceptions=None):
1604   """Decide whether I should promote myself as a master candidate.
1605
1606   """
1607   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1608   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1609   # the new node will increase mc_max with one, so:
1610   mc_should = min(mc_should + 1, cp_size)
1611   return mc_now < mc_should
1612
1613
1614 def _ComputeViolatingInstances(ipolicy, instances):
1615   """Computes a set of instances who violates given ipolicy.
1616
1617   @param ipolicy: The ipolicy to verify
1618   @type instances: object.Instance
1619   @param instances: List of instances to verify
1620   @return: A frozenset of instance names violating the ipolicy
1621
1622   """
1623   return frozenset([inst.name for inst in instances
1624                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1625
1626
1627 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1628   """Check that the brigdes needed by a list of nics exist.
1629
1630   """
1631   cluster = lu.cfg.GetClusterInfo()
1632   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1633   brlist = [params[constants.NIC_LINK] for params in paramslist
1634             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1635   if brlist:
1636     result = lu.rpc.call_bridges_exist(target_node, brlist)
1637     result.Raise("Error checking bridges on destination node '%s'" %
1638                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1639
1640
1641 def _CheckInstanceBridgesExist(lu, instance, node=None):
1642   """Check that the brigdes needed by an instance exist.
1643
1644   """
1645   if node is None:
1646     node = instance.primary_node
1647   _CheckNicsBridgesExist(lu, instance.nics, node)
1648
1649
1650 def _CheckOSVariant(os_obj, name):
1651   """Check whether an OS name conforms to the os variants specification.
1652
1653   @type os_obj: L{objects.OS}
1654   @param os_obj: OS object to check
1655   @type name: string
1656   @param name: OS name passed by the user, to check for validity
1657
1658   """
1659   variant = objects.OS.GetVariant(name)
1660   if not os_obj.supported_variants:
1661     if variant:
1662       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1663                                  " passed)" % (os_obj.name, variant),
1664                                  errors.ECODE_INVAL)
1665     return
1666   if not variant:
1667     raise errors.OpPrereqError("OS name must include a variant",
1668                                errors.ECODE_INVAL)
1669
1670   if variant not in os_obj.supported_variants:
1671     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1672
1673
1674 def _GetNodeInstancesInner(cfg, fn):
1675   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1676
1677
1678 def _GetNodeInstances(cfg, node_name):
1679   """Returns a list of all primary and secondary instances on a node.
1680
1681   """
1682
1683   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1684
1685
1686 def _GetNodePrimaryInstances(cfg, node_name):
1687   """Returns primary instances on a node.
1688
1689   """
1690   return _GetNodeInstancesInner(cfg,
1691                                 lambda inst: node_name == inst.primary_node)
1692
1693
1694 def _GetNodeSecondaryInstances(cfg, node_name):
1695   """Returns secondary instances on a node.
1696
1697   """
1698   return _GetNodeInstancesInner(cfg,
1699                                 lambda inst: node_name in inst.secondary_nodes)
1700
1701
1702 def _GetStorageTypeArgs(cfg, storage_type):
1703   """Returns the arguments for a storage type.
1704
1705   """
1706   # Special case for file storage
1707   if storage_type == constants.ST_FILE:
1708     # storage.FileStorage wants a list of storage directories
1709     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1710
1711   return []
1712
1713
1714 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1715   faulty = []
1716
1717   for dev in instance.disks:
1718     cfg.SetDiskID(dev, node_name)
1719
1720   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1721                                                                 instance))
1722   result.Raise("Failed to get disk status from node %s" % node_name,
1723                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1724
1725   for idx, bdev_status in enumerate(result.payload):
1726     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1727       faulty.append(idx)
1728
1729   return faulty
1730
1731
1732 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1733   """Check the sanity of iallocator and node arguments and use the
1734   cluster-wide iallocator if appropriate.
1735
1736   Check that at most one of (iallocator, node) is specified. If none is
1737   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1738   then the LU's opcode's iallocator slot is filled with the cluster-wide
1739   default iallocator.
1740
1741   @type iallocator_slot: string
1742   @param iallocator_slot: the name of the opcode iallocator slot
1743   @type node_slot: string
1744   @param node_slot: the name of the opcode target node slot
1745
1746   """
1747   node = getattr(lu.op, node_slot, None)
1748   ialloc = getattr(lu.op, iallocator_slot, None)
1749   if node == []:
1750     node = None
1751
1752   if node is not None and ialloc is not None:
1753     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1754                                errors.ECODE_INVAL)
1755   elif ((node is None and ialloc is None) or
1756         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1757     default_iallocator = lu.cfg.GetDefaultIAllocator()
1758     if default_iallocator:
1759       setattr(lu.op, iallocator_slot, default_iallocator)
1760     else:
1761       raise errors.OpPrereqError("No iallocator or node given and no"
1762                                  " cluster-wide default iallocator found;"
1763                                  " please specify either an iallocator or a"
1764                                  " node, or set a cluster-wide default"
1765                                  " iallocator", errors.ECODE_INVAL)
1766
1767
1768 def _GetDefaultIAllocator(cfg, ialloc):
1769   """Decides on which iallocator to use.
1770
1771   @type cfg: L{config.ConfigWriter}
1772   @param cfg: Cluster configuration object
1773   @type ialloc: string or None
1774   @param ialloc: Iallocator specified in opcode
1775   @rtype: string
1776   @return: Iallocator name
1777
1778   """
1779   if not ialloc:
1780     # Use default iallocator
1781     ialloc = cfg.GetDefaultIAllocator()
1782
1783   if not ialloc:
1784     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1785                                " opcode nor as a cluster-wide default",
1786                                errors.ECODE_INVAL)
1787
1788   return ialloc
1789
1790
1791 def _CheckHostnameSane(lu, name):
1792   """Ensures that a given hostname resolves to a 'sane' name.
1793
1794   The given name is required to be a prefix of the resolved hostname,
1795   to prevent accidental mismatches.
1796
1797   @param lu: the logical unit on behalf of which we're checking
1798   @param name: the name we should resolve and check
1799   @return: the resolved hostname object
1800
1801   """
1802   hostname = netutils.GetHostname(name=name)
1803   if hostname.name != name:
1804     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1805   if not utils.MatchNameComponent(name, [hostname.name]):
1806     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1807                                 " same as given hostname '%s'") %
1808                                 (hostname.name, name), errors.ECODE_INVAL)
1809   return hostname
1810
1811
1812 class LUClusterPostInit(LogicalUnit):
1813   """Logical unit for running hooks after cluster initialization.
1814
1815   """
1816   HPATH = "cluster-init"
1817   HTYPE = constants.HTYPE_CLUSTER
1818
1819   def BuildHooksEnv(self):
1820     """Build hooks env.
1821
1822     """
1823     return {
1824       "OP_TARGET": self.cfg.GetClusterName(),
1825       }
1826
1827   def BuildHooksNodes(self):
1828     """Build hooks nodes.
1829
1830     """
1831     return ([], [self.cfg.GetMasterNode()])
1832
1833   def Exec(self, feedback_fn):
1834     """Nothing to do.
1835
1836     """
1837     return True
1838
1839
1840 class LUClusterDestroy(LogicalUnit):
1841   """Logical unit for destroying the cluster.
1842
1843   """
1844   HPATH = "cluster-destroy"
1845   HTYPE = constants.HTYPE_CLUSTER
1846
1847   def BuildHooksEnv(self):
1848     """Build hooks env.
1849
1850     """
1851     return {
1852       "OP_TARGET": self.cfg.GetClusterName(),
1853       }
1854
1855   def BuildHooksNodes(self):
1856     """Build hooks nodes.
1857
1858     """
1859     return ([], [])
1860
1861   def CheckPrereq(self):
1862     """Check prerequisites.
1863
1864     This checks whether the cluster is empty.
1865
1866     Any errors are signaled by raising errors.OpPrereqError.
1867
1868     """
1869     master = self.cfg.GetMasterNode()
1870
1871     nodelist = self.cfg.GetNodeList()
1872     if len(nodelist) != 1 or nodelist[0] != master:
1873       raise errors.OpPrereqError("There are still %d node(s) in"
1874                                  " this cluster." % (len(nodelist) - 1),
1875                                  errors.ECODE_INVAL)
1876     instancelist = self.cfg.GetInstanceList()
1877     if instancelist:
1878       raise errors.OpPrereqError("There are still %d instance(s) in"
1879                                  " this cluster." % len(instancelist),
1880                                  errors.ECODE_INVAL)
1881
1882   def Exec(self, feedback_fn):
1883     """Destroys the cluster.
1884
1885     """
1886     master_params = self.cfg.GetMasterNetworkParameters()
1887
1888     # Run post hooks on master node before it's removed
1889     _RunPostHook(self, master_params.name)
1890
1891     ems = self.cfg.GetUseExternalMipScript()
1892     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1893                                                      master_params, ems)
1894     if result.fail_msg:
1895       self.LogWarning("Error disabling the master IP address: %s",
1896                       result.fail_msg)
1897
1898     return master_params.name
1899
1900
1901 def _VerifyCertificate(filename):
1902   """Verifies a certificate for L{LUClusterVerifyConfig}.
1903
1904   @type filename: string
1905   @param filename: Path to PEM file
1906
1907   """
1908   try:
1909     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1910                                            utils.ReadFile(filename))
1911   except Exception, err: # pylint: disable=W0703
1912     return (LUClusterVerifyConfig.ETYPE_ERROR,
1913             "Failed to load X509 certificate %s: %s" % (filename, err))
1914
1915   (errcode, msg) = \
1916     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1917                                 constants.SSL_CERT_EXPIRATION_ERROR)
1918
1919   if msg:
1920     fnamemsg = "While verifying %s: %s" % (filename, msg)
1921   else:
1922     fnamemsg = None
1923
1924   if errcode is None:
1925     return (None, fnamemsg)
1926   elif errcode == utils.CERT_WARNING:
1927     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1928   elif errcode == utils.CERT_ERROR:
1929     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1930
1931   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1932
1933
1934 def _GetAllHypervisorParameters(cluster, instances):
1935   """Compute the set of all hypervisor parameters.
1936
1937   @type cluster: L{objects.Cluster}
1938   @param cluster: the cluster object
1939   @param instances: list of L{objects.Instance}
1940   @param instances: additional instances from which to obtain parameters
1941   @rtype: list of (origin, hypervisor, parameters)
1942   @return: a list with all parameters found, indicating the hypervisor they
1943        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1944
1945   """
1946   hvp_data = []
1947
1948   for hv_name in cluster.enabled_hypervisors:
1949     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1950
1951   for os_name, os_hvp in cluster.os_hvp.items():
1952     for hv_name, hv_params in os_hvp.items():
1953       if hv_params:
1954         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1955         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1956
1957   # TODO: collapse identical parameter values in a single one
1958   for instance in instances:
1959     if instance.hvparams:
1960       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1961                        cluster.FillHV(instance)))
1962
1963   return hvp_data
1964
1965
1966 class _VerifyErrors(object):
1967   """Mix-in for cluster/group verify LUs.
1968
1969   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1970   self.op and self._feedback_fn to be available.)
1971
1972   """
1973
1974   ETYPE_FIELD = "code"
1975   ETYPE_ERROR = "ERROR"
1976   ETYPE_WARNING = "WARNING"
1977
1978   def _Error(self, ecode, item, msg, *args, **kwargs):
1979     """Format an error message.
1980
1981     Based on the opcode's error_codes parameter, either format a
1982     parseable error code, or a simpler error string.
1983
1984     This must be called only from Exec and functions called from Exec.
1985
1986     """
1987     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1988     itype, etxt, _ = ecode
1989     # first complete the msg
1990     if args:
1991       msg = msg % args
1992     # then format the whole message
1993     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1994       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1995     else:
1996       if item:
1997         item = " " + item
1998       else:
1999         item = ""
2000       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2001     # and finally report it via the feedback_fn
2002     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
2003
2004   def _ErrorIf(self, cond, ecode, *args, **kwargs):
2005     """Log an error message if the passed condition is True.
2006
2007     """
2008     cond = (bool(cond)
2009             or self.op.debug_simulate_errors) # pylint: disable=E1101
2010
2011     # If the error code is in the list of ignored errors, demote the error to a
2012     # warning
2013     (_, etxt, _) = ecode
2014     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
2015       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
2016
2017     if cond:
2018       self._Error(ecode, *args, **kwargs)
2019
2020     # do not mark the operation as failed for WARN cases only
2021     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2022       self.bad = self.bad or cond
2023
2024
2025 class LUClusterVerify(NoHooksLU):
2026   """Submits all jobs necessary to verify the cluster.
2027
2028   """
2029   REQ_BGL = False
2030
2031   def ExpandNames(self):
2032     self.needed_locks = {}
2033
2034   def Exec(self, feedback_fn):
2035     jobs = []
2036
2037     if self.op.group_name:
2038       groups = [self.op.group_name]
2039       depends_fn = lambda: None
2040     else:
2041       groups = self.cfg.GetNodeGroupList()
2042
2043       # Verify global configuration
2044       jobs.append([
2045         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2046         ])
2047
2048       # Always depend on global verification
2049       depends_fn = lambda: [(-len(jobs), [])]
2050
2051     jobs.extend(
2052       [opcodes.OpClusterVerifyGroup(group_name=group,
2053                                     ignore_errors=self.op.ignore_errors,
2054                                     depends=depends_fn())]
2055       for group in groups)
2056
2057     # Fix up all parameters
2058     for op in itertools.chain(*jobs): # pylint: disable=W0142
2059       op.debug_simulate_errors = self.op.debug_simulate_errors
2060       op.verbose = self.op.verbose
2061       op.error_codes = self.op.error_codes
2062       try:
2063         op.skip_checks = self.op.skip_checks
2064       except AttributeError:
2065         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2066
2067     return ResultWithJobs(jobs)
2068
2069
2070 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2071   """Verifies the cluster config.
2072
2073   """
2074   REQ_BGL = False
2075
2076   def _VerifyHVP(self, hvp_data):
2077     """Verifies locally the syntax of the hypervisor parameters.
2078
2079     """
2080     for item, hv_name, hv_params in hvp_data:
2081       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2082              (item, hv_name))
2083       try:
2084         hv_class = hypervisor.GetHypervisor(hv_name)
2085         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2086         hv_class.CheckParameterSyntax(hv_params)
2087       except errors.GenericError, err:
2088         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2089
2090   def ExpandNames(self):
2091     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2092     self.share_locks = _ShareAll()
2093
2094   def CheckPrereq(self):
2095     """Check prerequisites.
2096
2097     """
2098     # Retrieve all information
2099     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2100     self.all_node_info = self.cfg.GetAllNodesInfo()
2101     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2102
2103   def Exec(self, feedback_fn):
2104     """Verify integrity of cluster, performing various test on nodes.
2105
2106     """
2107     self.bad = False
2108     self._feedback_fn = feedback_fn
2109
2110     feedback_fn("* Verifying cluster config")
2111
2112     for msg in self.cfg.VerifyConfig():
2113       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2114
2115     feedback_fn("* Verifying cluster certificate files")
2116
2117     for cert_filename in pathutils.ALL_CERT_FILES:
2118       (errcode, msg) = _VerifyCertificate(cert_filename)
2119       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2120
2121     feedback_fn("* Verifying hypervisor parameters")
2122
2123     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2124                                                 self.all_inst_info.values()))
2125
2126     feedback_fn("* Verifying all nodes belong to an existing group")
2127
2128     # We do this verification here because, should this bogus circumstance
2129     # occur, it would never be caught by VerifyGroup, which only acts on
2130     # nodes/instances reachable from existing node groups.
2131
2132     dangling_nodes = set(node.name for node in self.all_node_info.values()
2133                          if node.group not in self.all_group_info)
2134
2135     dangling_instances = {}
2136     no_node_instances = []
2137
2138     for inst in self.all_inst_info.values():
2139       if inst.primary_node in dangling_nodes:
2140         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2141       elif inst.primary_node not in self.all_node_info:
2142         no_node_instances.append(inst.name)
2143
2144     pretty_dangling = [
2145         "%s (%s)" %
2146         (node.name,
2147          utils.CommaJoin(dangling_instances.get(node.name,
2148                                                 ["no instances"])))
2149         for node in dangling_nodes]
2150
2151     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2152                   None,
2153                   "the following nodes (and their instances) belong to a non"
2154                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2155
2156     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2157                   None,
2158                   "the following instances have a non-existing primary-node:"
2159                   " %s", utils.CommaJoin(no_node_instances))
2160
2161     return not self.bad
2162
2163
2164 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2165   """Verifies the status of a node group.
2166
2167   """
2168   HPATH = "cluster-verify"
2169   HTYPE = constants.HTYPE_CLUSTER
2170   REQ_BGL = False
2171
2172   _HOOKS_INDENT_RE = re.compile("^", re.M)
2173
2174   class NodeImage(object):
2175     """A class representing the logical and physical status of a node.
2176
2177     @type name: string
2178     @ivar name: the node name to which this object refers
2179     @ivar volumes: a structure as returned from
2180         L{ganeti.backend.GetVolumeList} (runtime)
2181     @ivar instances: a list of running instances (runtime)
2182     @ivar pinst: list of configured primary instances (config)
2183     @ivar sinst: list of configured secondary instances (config)
2184     @ivar sbp: dictionary of {primary-node: list of instances} for all
2185         instances for which this node is secondary (config)
2186     @ivar mfree: free memory, as reported by hypervisor (runtime)
2187     @ivar dfree: free disk, as reported by the node (runtime)
2188     @ivar offline: the offline status (config)
2189     @type rpc_fail: boolean
2190     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2191         not whether the individual keys were correct) (runtime)
2192     @type lvm_fail: boolean
2193     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2194     @type hyp_fail: boolean
2195     @ivar hyp_fail: whether the RPC call didn't return the instance list
2196     @type ghost: boolean
2197     @ivar ghost: whether this is a known node or not (config)
2198     @type os_fail: boolean
2199     @ivar os_fail: whether the RPC call didn't return valid OS data
2200     @type oslist: list
2201     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2202     @type vm_capable: boolean
2203     @ivar vm_capable: whether the node can host instances
2204
2205     """
2206     def __init__(self, offline=False, name=None, vm_capable=True):
2207       self.name = name
2208       self.volumes = {}
2209       self.instances = []
2210       self.pinst = []
2211       self.sinst = []
2212       self.sbp = {}
2213       self.mfree = 0
2214       self.dfree = 0
2215       self.offline = offline
2216       self.vm_capable = vm_capable
2217       self.rpc_fail = False
2218       self.lvm_fail = False
2219       self.hyp_fail = False
2220       self.ghost = False
2221       self.os_fail = False
2222       self.oslist = {}
2223
2224   def ExpandNames(self):
2225     # This raises errors.OpPrereqError on its own:
2226     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2227
2228     # Get instances in node group; this is unsafe and needs verification later
2229     inst_names = \
2230       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2231
2232     self.needed_locks = {
2233       locking.LEVEL_INSTANCE: inst_names,
2234       locking.LEVEL_NODEGROUP: [self.group_uuid],
2235       locking.LEVEL_NODE: [],
2236
2237       # This opcode is run by watcher every five minutes and acquires all nodes
2238       # for a group. It doesn't run for a long time, so it's better to acquire
2239       # the node allocation lock as well.
2240       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2241       }
2242
2243     self.share_locks = _ShareAll()
2244
2245   def DeclareLocks(self, level):
2246     if level == locking.LEVEL_NODE:
2247       # Get members of node group; this is unsafe and needs verification later
2248       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2249
2250       all_inst_info = self.cfg.GetAllInstancesInfo()
2251
2252       # In Exec(), we warn about mirrored instances that have primary and
2253       # secondary living in separate node groups. To fully verify that
2254       # volumes for these instances are healthy, we will need to do an
2255       # extra call to their secondaries. We ensure here those nodes will
2256       # be locked.
2257       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2258         # Important: access only the instances whose lock is owned
2259         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2260           nodes.update(all_inst_info[inst].secondary_nodes)
2261
2262       self.needed_locks[locking.LEVEL_NODE] = nodes
2263
2264   def CheckPrereq(self):
2265     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2266     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2267
2268     group_nodes = set(self.group_info.members)
2269     group_instances = \
2270       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2271
2272     unlocked_nodes = \
2273         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2274
2275     unlocked_instances = \
2276         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2277
2278     if unlocked_nodes:
2279       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2280                                  utils.CommaJoin(unlocked_nodes),
2281                                  errors.ECODE_STATE)
2282
2283     if unlocked_instances:
2284       raise errors.OpPrereqError("Missing lock for instances: %s" %
2285                                  utils.CommaJoin(unlocked_instances),
2286                                  errors.ECODE_STATE)
2287
2288     self.all_node_info = self.cfg.GetAllNodesInfo()
2289     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2290
2291     self.my_node_names = utils.NiceSort(group_nodes)
2292     self.my_inst_names = utils.NiceSort(group_instances)
2293
2294     self.my_node_info = dict((name, self.all_node_info[name])
2295                              for name in self.my_node_names)
2296
2297     self.my_inst_info = dict((name, self.all_inst_info[name])
2298                              for name in self.my_inst_names)
2299
2300     # We detect here the nodes that will need the extra RPC calls for verifying
2301     # split LV volumes; they should be locked.
2302     extra_lv_nodes = set()
2303
2304     for inst in self.my_inst_info.values():
2305       if inst.disk_template in constants.DTS_INT_MIRROR:
2306         for nname in inst.all_nodes:
2307           if self.all_node_info[nname].group != self.group_uuid:
2308             extra_lv_nodes.add(nname)
2309
2310     unlocked_lv_nodes = \
2311         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2312
2313     if unlocked_lv_nodes:
2314       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2315                                  utils.CommaJoin(unlocked_lv_nodes),
2316                                  errors.ECODE_STATE)
2317     self.extra_lv_nodes = list(extra_lv_nodes)
2318
2319   def _VerifyNode(self, ninfo, nresult):
2320     """Perform some basic validation on data returned from a node.
2321
2322       - check the result data structure is well formed and has all the
2323         mandatory fields
2324       - check ganeti version
2325
2326     @type ninfo: L{objects.Node}
2327     @param ninfo: the node to check
2328     @param nresult: the results from the node
2329     @rtype: boolean
2330     @return: whether overall this call was successful (and we can expect
2331          reasonable values in the respose)
2332
2333     """
2334     node = ninfo.name
2335     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2336
2337     # main result, nresult should be a non-empty dict
2338     test = not nresult or not isinstance(nresult, dict)
2339     _ErrorIf(test, constants.CV_ENODERPC, node,
2340                   "unable to verify node: no data returned")
2341     if test:
2342       return False
2343
2344     # compares ganeti version
2345     local_version = constants.PROTOCOL_VERSION
2346     remote_version = nresult.get("version", None)
2347     test = not (remote_version and
2348                 isinstance(remote_version, (list, tuple)) and
2349                 len(remote_version) == 2)
2350     _ErrorIf(test, constants.CV_ENODERPC, node,
2351              "connection to node returned invalid data")
2352     if test:
2353       return False
2354
2355     test = local_version != remote_version[0]
2356     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2357              "incompatible protocol versions: master %s,"
2358              " node %s", local_version, remote_version[0])
2359     if test:
2360       return False
2361
2362     # node seems compatible, we can actually try to look into its results
2363
2364     # full package version
2365     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2366                   constants.CV_ENODEVERSION, node,
2367                   "software version mismatch: master %s, node %s",
2368                   constants.RELEASE_VERSION, remote_version[1],
2369                   code=self.ETYPE_WARNING)
2370
2371     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2372     if ninfo.vm_capable and isinstance(hyp_result, dict):
2373       for hv_name, hv_result in hyp_result.iteritems():
2374         test = hv_result is not None
2375         _ErrorIf(test, constants.CV_ENODEHV, node,
2376                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2377
2378     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2379     if ninfo.vm_capable and isinstance(hvp_result, list):
2380       for item, hv_name, hv_result in hvp_result:
2381         _ErrorIf(True, constants.CV_ENODEHV, node,
2382                  "hypervisor %s parameter verify failure (source %s): %s",
2383                  hv_name, item, hv_result)
2384
2385     test = nresult.get(constants.NV_NODESETUP,
2386                        ["Missing NODESETUP results"])
2387     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2388              "; ".join(test))
2389
2390     return True
2391
2392   def _VerifyNodeTime(self, ninfo, nresult,
2393                       nvinfo_starttime, nvinfo_endtime):
2394     """Check the node time.
2395
2396     @type ninfo: L{objects.Node}
2397     @param ninfo: the node to check
2398     @param nresult: the remote results for the node
2399     @param nvinfo_starttime: the start time of the RPC call
2400     @param nvinfo_endtime: the end time of the RPC call
2401
2402     """
2403     node = ninfo.name
2404     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2405
2406     ntime = nresult.get(constants.NV_TIME, None)
2407     try:
2408       ntime_merged = utils.MergeTime(ntime)
2409     except (ValueError, TypeError):
2410       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2411       return
2412
2413     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2414       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2415     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2416       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2417     else:
2418       ntime_diff = None
2419
2420     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2421              "Node time diverges by at least %s from master node time",
2422              ntime_diff)
2423
2424   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2425     """Check the node LVM results.
2426
2427     @type ninfo: L{objects.Node}
2428     @param ninfo: the node to check
2429     @param nresult: the remote results for the node
2430     @param vg_name: the configured VG name
2431
2432     """
2433     if vg_name is None:
2434       return
2435
2436     node = ninfo.name
2437     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2438
2439     # checks vg existence and size > 20G
2440     vglist = nresult.get(constants.NV_VGLIST, None)
2441     test = not vglist
2442     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2443     if not test:
2444       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2445                                             constants.MIN_VG_SIZE)
2446       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2447
2448     # check pv names
2449     pvlist = nresult.get(constants.NV_PVLIST, None)
2450     test = pvlist is None
2451     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2452     if not test:
2453       # check that ':' is not present in PV names, since it's a
2454       # special character for lvcreate (denotes the range of PEs to
2455       # use on the PV)
2456       for _, pvname, owner_vg in pvlist:
2457         test = ":" in pvname
2458         _ErrorIf(test, constants.CV_ENODELVM, node,
2459                  "Invalid character ':' in PV '%s' of VG '%s'",
2460                  pvname, owner_vg)
2461
2462   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2463     """Check the node bridges.
2464
2465     @type ninfo: L{objects.Node}
2466     @param ninfo: the node to check
2467     @param nresult: the remote results for the node
2468     @param bridges: the expected list of bridges
2469
2470     """
2471     if not bridges:
2472       return
2473
2474     node = ninfo.name
2475     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2476
2477     missing = nresult.get(constants.NV_BRIDGES, None)
2478     test = not isinstance(missing, list)
2479     _ErrorIf(test, constants.CV_ENODENET, node,
2480              "did not return valid bridge information")
2481     if not test:
2482       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2483                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2484
2485   def _VerifyNodeUserScripts(self, ninfo, nresult):
2486     """Check the results of user scripts presence and executability on the node
2487
2488     @type ninfo: L{objects.Node}
2489     @param ninfo: the node to check
2490     @param nresult: the remote results for the node
2491
2492     """
2493     node = ninfo.name
2494
2495     test = not constants.NV_USERSCRIPTS in nresult
2496     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2497                   "did not return user scripts information")
2498
2499     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2500     if not test:
2501       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2502                     "user scripts not present or not executable: %s" %
2503                     utils.CommaJoin(sorted(broken_scripts)))
2504
2505   def _VerifyNodeNetwork(self, ninfo, nresult):
2506     """Check the node network connectivity results.
2507
2508     @type ninfo: L{objects.Node}
2509     @param ninfo: the node to check
2510     @param nresult: the remote results for the node
2511
2512     """
2513     node = ninfo.name
2514     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2515
2516     test = constants.NV_NODELIST not in nresult
2517     _ErrorIf(test, constants.CV_ENODESSH, node,
2518              "node hasn't returned node ssh connectivity data")
2519     if not test:
2520       if nresult[constants.NV_NODELIST]:
2521         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2522           _ErrorIf(True, constants.CV_ENODESSH, node,
2523                    "ssh communication with node '%s': %s", a_node, a_msg)
2524
2525     test = constants.NV_NODENETTEST not in nresult
2526     _ErrorIf(test, constants.CV_ENODENET, node,
2527              "node hasn't returned node tcp connectivity data")
2528     if not test:
2529       if nresult[constants.NV_NODENETTEST]:
2530         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2531         for anode in nlist:
2532           _ErrorIf(True, constants.CV_ENODENET, node,
2533                    "tcp communication with node '%s': %s",
2534                    anode, nresult[constants.NV_NODENETTEST][anode])
2535
2536     test = constants.NV_MASTERIP not in nresult
2537     _ErrorIf(test, constants.CV_ENODENET, node,
2538              "node hasn't returned node master IP reachability data")
2539     if not test:
2540       if not nresult[constants.NV_MASTERIP]:
2541         if node == self.master_node:
2542           msg = "the master node cannot reach the master IP (not configured?)"
2543         else:
2544           msg = "cannot reach the master IP"
2545         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2546
2547   def _VerifyInstance(self, instance, instanceconfig, node_image,
2548                       diskstatus):
2549     """Verify an instance.
2550
2551     This function checks to see if the required block devices are
2552     available on the instance's node.
2553
2554     """
2555     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2556     node_current = instanceconfig.primary_node
2557
2558     node_vol_should = {}
2559     instanceconfig.MapLVsByNode(node_vol_should)
2560
2561     cluster = self.cfg.GetClusterInfo()
2562     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2563                                                             self.group_info)
2564     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2565     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2566              code=self.ETYPE_WARNING)
2567
2568     for node in node_vol_should:
2569       n_img = node_image[node]
2570       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2571         # ignore missing volumes on offline or broken nodes
2572         continue
2573       for volume in node_vol_should[node]:
2574         test = volume not in n_img.volumes
2575         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2576                  "volume %s missing on node %s", volume, node)
2577
2578     if instanceconfig.admin_state == constants.ADMINST_UP:
2579       pri_img = node_image[node_current]
2580       test = instance not in pri_img.instances and not pri_img.offline
2581       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2582                "instance not running on its primary node %s",
2583                node_current)
2584
2585     diskdata = [(nname, success, status, idx)
2586                 for (nname, disks) in diskstatus.items()
2587                 for idx, (success, status) in enumerate(disks)]
2588
2589     for nname, success, bdev_status, idx in diskdata:
2590       # the 'ghost node' construction in Exec() ensures that we have a
2591       # node here
2592       snode = node_image[nname]
2593       bad_snode = snode.ghost or snode.offline
2594       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2595                not success and not bad_snode,
2596                constants.CV_EINSTANCEFAULTYDISK, instance,
2597                "couldn't retrieve status for disk/%s on %s: %s",
2598                idx, nname, bdev_status)
2599       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2600                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2601                constants.CV_EINSTANCEFAULTYDISK, instance,
2602                "disk/%s on %s is faulty", idx, nname)
2603
2604   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2605     """Verify if there are any unknown volumes in the cluster.
2606
2607     The .os, .swap and backup volumes are ignored. All other volumes are
2608     reported as unknown.
2609
2610     @type reserved: L{ganeti.utils.FieldSet}
2611     @param reserved: a FieldSet of reserved volume names
2612
2613     """
2614     for node, n_img in node_image.items():
2615       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2616           self.all_node_info[node].group != self.group_uuid):
2617         # skip non-healthy nodes
2618         continue
2619       for volume in n_img.volumes:
2620         test = ((node not in node_vol_should or
2621                 volume not in node_vol_should[node]) and
2622                 not reserved.Matches(volume))
2623         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2624                       "volume %s is unknown", volume)
2625
2626   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2627     """Verify N+1 Memory Resilience.
2628
2629     Check that if one single node dies we can still start all the
2630     instances it was primary for.
2631
2632     """
2633     cluster_info = self.cfg.GetClusterInfo()
2634     for node, n_img in node_image.items():
2635       # This code checks that every node which is now listed as
2636       # secondary has enough memory to host all instances it is
2637       # supposed to should a single other node in the cluster fail.
2638       # FIXME: not ready for failover to an arbitrary node
2639       # FIXME: does not support file-backed instances
2640       # WARNING: we currently take into account down instances as well
2641       # as up ones, considering that even if they're down someone
2642       # might want to start them even in the event of a node failure.
2643       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2644         # we're skipping nodes marked offline and nodes in other groups from
2645         # the N+1 warning, since most likely we don't have good memory
2646         # infromation from them; we already list instances living on such
2647         # nodes, and that's enough warning
2648         continue
2649       #TODO(dynmem): also consider ballooning out other instances
2650       for prinode, instances in n_img.sbp.items():
2651         needed_mem = 0
2652         for instance in instances:
2653           bep = cluster_info.FillBE(instance_cfg[instance])
2654           if bep[constants.BE_AUTO_BALANCE]:
2655             needed_mem += bep[constants.BE_MINMEM]
2656         test = n_img.mfree < needed_mem
2657         self._ErrorIf(test, constants.CV_ENODEN1, node,
2658                       "not enough memory to accomodate instance failovers"
2659                       " should node %s fail (%dMiB needed, %dMiB available)",
2660                       prinode, needed_mem, n_img.mfree)
2661
2662   @classmethod
2663   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2664                    (files_all, files_opt, files_mc, files_vm)):
2665     """Verifies file checksums collected from all nodes.
2666
2667     @param errorif: Callback for reporting errors
2668     @param nodeinfo: List of L{objects.Node} objects
2669     @param master_node: Name of master node
2670     @param all_nvinfo: RPC results
2671
2672     """
2673     # Define functions determining which nodes to consider for a file
2674     files2nodefn = [
2675       (files_all, None),
2676       (files_mc, lambda node: (node.master_candidate or
2677                                node.name == master_node)),
2678       (files_vm, lambda node: node.vm_capable),
2679       ]
2680
2681     # Build mapping from filename to list of nodes which should have the file
2682     nodefiles = {}
2683     for (files, fn) in files2nodefn:
2684       if fn is None:
2685         filenodes = nodeinfo
2686       else:
2687         filenodes = filter(fn, nodeinfo)
2688       nodefiles.update((filename,
2689                         frozenset(map(operator.attrgetter("name"), filenodes)))
2690                        for filename in files)
2691
2692     assert set(nodefiles) == (files_all | files_mc | files_vm)
2693
2694     fileinfo = dict((filename, {}) for filename in nodefiles)
2695     ignore_nodes = set()
2696
2697     for node in nodeinfo:
2698       if node.offline:
2699         ignore_nodes.add(node.name)
2700         continue
2701
2702       nresult = all_nvinfo[node.name]
2703
2704       if nresult.fail_msg or not nresult.payload:
2705         node_files = None
2706       else:
2707         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2708         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2709                           for (key, value) in fingerprints.items())
2710         del fingerprints
2711
2712       test = not (node_files and isinstance(node_files, dict))
2713       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2714               "Node did not return file checksum data")
2715       if test:
2716         ignore_nodes.add(node.name)
2717         continue
2718
2719       # Build per-checksum mapping from filename to nodes having it
2720       for (filename, checksum) in node_files.items():
2721         assert filename in nodefiles
2722         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2723
2724     for (filename, checksums) in fileinfo.items():
2725       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2726
2727       # Nodes having the file
2728       with_file = frozenset(node_name
2729                             for nodes in fileinfo[filename].values()
2730                             for node_name in nodes) - ignore_nodes
2731
2732       expected_nodes = nodefiles[filename] - ignore_nodes
2733
2734       # Nodes missing file
2735       missing_file = expected_nodes - with_file
2736
2737       if filename in files_opt:
2738         # All or no nodes
2739         errorif(missing_file and missing_file != expected_nodes,
2740                 constants.CV_ECLUSTERFILECHECK, None,
2741                 "File %s is optional, but it must exist on all or no"
2742                 " nodes (not found on %s)",
2743                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2744       else:
2745         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2746                 "File %s is missing from node(s) %s", filename,
2747                 utils.CommaJoin(utils.NiceSort(missing_file)))
2748
2749         # Warn if a node has a file it shouldn't
2750         unexpected = with_file - expected_nodes
2751         errorif(unexpected,
2752                 constants.CV_ECLUSTERFILECHECK, None,
2753                 "File %s should not exist on node(s) %s",
2754                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2755
2756       # See if there are multiple versions of the file
2757       test = len(checksums) > 1
2758       if test:
2759         variants = ["variant %s on %s" %
2760                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2761                     for (idx, (checksum, nodes)) in
2762                       enumerate(sorted(checksums.items()))]
2763       else:
2764         variants = []
2765
2766       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2767               "File %s found with %s different checksums (%s)",
2768               filename, len(checksums), "; ".join(variants))
2769
2770   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2771                       drbd_map):
2772     """Verifies and the node DRBD status.
2773
2774     @type ninfo: L{objects.Node}
2775     @param ninfo: the node to check
2776     @param nresult: the remote results for the node
2777     @param instanceinfo: the dict of instances
2778     @param drbd_helper: the configured DRBD usermode helper
2779     @param drbd_map: the DRBD map as returned by
2780         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2781
2782     """
2783     node = ninfo.name
2784     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2785
2786     if drbd_helper:
2787       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2788       test = (helper_result is None)
2789       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2790                "no drbd usermode helper returned")
2791       if helper_result:
2792         status, payload = helper_result
2793         test = not status
2794         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2795                  "drbd usermode helper check unsuccessful: %s", payload)
2796         test = status and (payload != drbd_helper)
2797         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2798                  "wrong drbd usermode helper: %s", payload)
2799
2800     # compute the DRBD minors
2801     node_drbd = {}
2802     for minor, instance in drbd_map[node].items():
2803       test = instance not in instanceinfo
2804       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2805                "ghost instance '%s' in temporary DRBD map", instance)
2806         # ghost instance should not be running, but otherwise we
2807         # don't give double warnings (both ghost instance and
2808         # unallocated minor in use)
2809       if test:
2810         node_drbd[minor] = (instance, False)
2811       else:
2812         instance = instanceinfo[instance]
2813         node_drbd[minor] = (instance.name,
2814                             instance.admin_state == constants.ADMINST_UP)
2815
2816     # and now check them
2817     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2818     test = not isinstance(used_minors, (tuple, list))
2819     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2820              "cannot parse drbd status file: %s", str(used_minors))
2821     if test:
2822       # we cannot check drbd status
2823       return
2824
2825     for minor, (iname, must_exist) in node_drbd.items():
2826       test = minor not in used_minors and must_exist
2827       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2828                "drbd minor %d of instance %s is not active", minor, iname)
2829     for minor in used_minors:
2830       test = minor not in node_drbd
2831       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2832                "unallocated drbd minor %d is in use", minor)
2833
2834   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2835     """Builds the node OS structures.
2836
2837     @type ninfo: L{objects.Node}
2838     @param ninfo: the node to check
2839     @param nresult: the remote results for the node
2840     @param nimg: the node image object
2841
2842     """
2843     node = ninfo.name
2844     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2845
2846     remote_os = nresult.get(constants.NV_OSLIST, None)
2847     test = (not isinstance(remote_os, list) or
2848             not compat.all(isinstance(v, list) and len(v) == 7
2849                            for v in remote_os))
2850
2851     _ErrorIf(test, constants.CV_ENODEOS, node,
2852              "node hasn't returned valid OS data")
2853
2854     nimg.os_fail = test
2855
2856     if test:
2857       return
2858
2859     os_dict = {}
2860
2861     for (name, os_path, status, diagnose,
2862          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2863
2864       if name not in os_dict:
2865         os_dict[name] = []
2866
2867       # parameters is a list of lists instead of list of tuples due to
2868       # JSON lacking a real tuple type, fix it:
2869       parameters = [tuple(v) for v in parameters]
2870       os_dict[name].append((os_path, status, diagnose,
2871                             set(variants), set(parameters), set(api_ver)))
2872
2873     nimg.oslist = os_dict
2874
2875   def _VerifyNodeOS(self, ninfo, nimg, base):
2876     """Verifies the node OS list.
2877
2878     @type ninfo: L{objects.Node}
2879     @param ninfo: the node to check
2880     @param nimg: the node image object
2881     @param base: the 'template' node we match against (e.g. from the master)
2882
2883     """
2884     node = ninfo.name
2885     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2886
2887     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2888
2889     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2890     for os_name, os_data in nimg.oslist.items():
2891       assert os_data, "Empty OS status for OS %s?!" % os_name
2892       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2893       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2894                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2895       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2896                "OS '%s' has multiple entries (first one shadows the rest): %s",
2897                os_name, utils.CommaJoin([v[0] for v in os_data]))
2898       # comparisons with the 'base' image
2899       test = os_name not in base.oslist
2900       _ErrorIf(test, constants.CV_ENODEOS, node,
2901                "Extra OS %s not present on reference node (%s)",
2902                os_name, base.name)
2903       if test:
2904         continue
2905       assert base.oslist[os_name], "Base node has empty OS status?"
2906       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2907       if not b_status:
2908         # base OS is invalid, skipping
2909         continue
2910       for kind, a, b in [("API version", f_api, b_api),
2911                          ("variants list", f_var, b_var),
2912                          ("parameters", beautify_params(f_param),
2913                           beautify_params(b_param))]:
2914         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2915                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2916                  kind, os_name, base.name,
2917                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2918
2919     # check any missing OSes
2920     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2921     _ErrorIf(missing, constants.CV_ENODEOS, node,
2922              "OSes present on reference node %s but missing on this node: %s",
2923              base.name, utils.CommaJoin(missing))
2924
2925   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2926     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2927
2928     @type ninfo: L{objects.Node}
2929     @param ninfo: the node to check
2930     @param nresult: the remote results for the node
2931     @type is_master: bool
2932     @param is_master: Whether node is the master node
2933
2934     """
2935     node = ninfo.name
2936
2937     if (is_master and
2938         (constants.ENABLE_FILE_STORAGE or
2939          constants.ENABLE_SHARED_FILE_STORAGE)):
2940       try:
2941         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2942       except KeyError:
2943         # This should never happen
2944         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2945                       "Node did not return forbidden file storage paths")
2946       else:
2947         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2948                       "Found forbidden file storage paths: %s",
2949                       utils.CommaJoin(fspaths))
2950     else:
2951       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2952                     constants.CV_ENODEFILESTORAGEPATHS, node,
2953                     "Node should not have returned forbidden file storage"
2954                     " paths")
2955
2956   def _VerifyOob(self, ninfo, nresult):
2957     """Verifies out of band functionality of a node.
2958
2959     @type ninfo: L{objects.Node}
2960     @param ninfo: the node to check
2961     @param nresult: the remote results for the node
2962
2963     """
2964     node = ninfo.name
2965     # We just have to verify the paths on master and/or master candidates
2966     # as the oob helper is invoked on the master
2967     if ((ninfo.master_candidate or ninfo.master_capable) and
2968         constants.NV_OOB_PATHS in nresult):
2969       for path_result in nresult[constants.NV_OOB_PATHS]:
2970         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2971
2972   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2973     """Verifies and updates the node volume data.
2974
2975     This function will update a L{NodeImage}'s internal structures
2976     with data from the remote call.
2977
2978     @type ninfo: L{objects.Node}
2979     @param ninfo: the node to check
2980     @param nresult: the remote results for the node
2981     @param nimg: the node image object
2982     @param vg_name: the configured VG name
2983
2984     """
2985     node = ninfo.name
2986     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2987
2988     nimg.lvm_fail = True
2989     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2990     if vg_name is None:
2991       pass
2992     elif isinstance(lvdata, basestring):
2993       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2994                utils.SafeEncode(lvdata))
2995     elif not isinstance(lvdata, dict):
2996       _ErrorIf(True, constants.CV_ENODELVM, node,
2997                "rpc call to node failed (lvlist)")
2998     else:
2999       nimg.volumes = lvdata
3000       nimg.lvm_fail = False
3001
3002   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3003     """Verifies and updates the node instance list.
3004
3005     If the listing was successful, then updates this node's instance
3006     list. Otherwise, it marks the RPC call as failed for the instance
3007     list key.
3008
3009     @type ninfo: L{objects.Node}
3010     @param ninfo: the node to check
3011     @param nresult: the remote results for the node
3012     @param nimg: the node image object
3013
3014     """
3015     idata = nresult.get(constants.NV_INSTANCELIST, None)
3016     test = not isinstance(idata, list)
3017     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3018                   "rpc call to node failed (instancelist): %s",
3019                   utils.SafeEncode(str(idata)))
3020     if test:
3021       nimg.hyp_fail = True
3022     else:
3023       nimg.instances = idata
3024
3025   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3026     """Verifies and computes a node information map
3027
3028     @type ninfo: L{objects.Node}
3029     @param ninfo: the node to check
3030     @param nresult: the remote results for the node
3031     @param nimg: the node image object
3032     @param vg_name: the configured VG name
3033
3034     """
3035     node = ninfo.name
3036     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3037
3038     # try to read free memory (from the hypervisor)
3039     hv_info = nresult.get(constants.NV_HVINFO, None)
3040     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3041     _ErrorIf(test, constants.CV_ENODEHV, node,
3042              "rpc call to node failed (hvinfo)")
3043     if not test:
3044       try:
3045         nimg.mfree = int(hv_info["memory_free"])
3046       except (ValueError, TypeError):
3047         _ErrorIf(True, constants.CV_ENODERPC, node,
3048                  "node returned invalid nodeinfo, check hypervisor")
3049
3050     # FIXME: devise a free space model for file based instances as well
3051     if vg_name is not None:
3052       test = (constants.NV_VGLIST not in nresult or
3053               vg_name not in nresult[constants.NV_VGLIST])
3054       _ErrorIf(test, constants.CV_ENODELVM, node,
3055                "node didn't return data for the volume group '%s'"
3056                " - it is either missing or broken", vg_name)
3057       if not test:
3058         try:
3059           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3060         except (ValueError, TypeError):
3061           _ErrorIf(True, constants.CV_ENODERPC, node,
3062                    "node returned invalid LVM info, check LVM status")
3063
3064   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3065     """Gets per-disk status information for all instances.
3066
3067     @type nodelist: list of strings
3068     @param nodelist: Node names
3069     @type node_image: dict of (name, L{objects.Node})
3070     @param node_image: Node objects
3071     @type instanceinfo: dict of (name, L{objects.Instance})
3072     @param instanceinfo: Instance objects
3073     @rtype: {instance: {node: [(succes, payload)]}}
3074     @return: a dictionary of per-instance dictionaries with nodes as
3075         keys and disk information as values; the disk information is a
3076         list of tuples (success, payload)
3077
3078     """
3079     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3080
3081     node_disks = {}
3082     node_disks_devonly = {}
3083     diskless_instances = set()
3084     diskless = constants.DT_DISKLESS
3085
3086     for nname in nodelist:
3087       node_instances = list(itertools.chain(node_image[nname].pinst,
3088                                             node_image[nname].sinst))
3089       diskless_instances.update(inst for inst in node_instances
3090                                 if instanceinfo[inst].disk_template == diskless)
3091       disks = [(inst, disk)
3092                for inst in node_instances
3093                for disk in instanceinfo[inst].disks]
3094
3095       if not disks:
3096         # No need to collect data
3097         continue
3098
3099       node_disks[nname] = disks
3100
3101       # _AnnotateDiskParams makes already copies of the disks
3102       devonly = []
3103       for (inst, dev) in disks:
3104         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3105         self.cfg.SetDiskID(anno_disk, nname)
3106         devonly.append(anno_disk)
3107
3108       node_disks_devonly[nname] = devonly
3109
3110     assert len(node_disks) == len(node_disks_devonly)
3111
3112     # Collect data from all nodes with disks
3113     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3114                                                           node_disks_devonly)
3115
3116     assert len(result) == len(node_disks)
3117
3118     instdisk = {}
3119
3120     for (nname, nres) in result.items():
3121       disks = node_disks[nname]
3122
3123       if nres.offline:
3124         # No data from this node
3125         data = len(disks) * [(False, "node offline")]
3126       else:
3127         msg = nres.fail_msg
3128         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3129                  "while getting disk information: %s", msg)
3130         if msg:
3131           # No data from this node
3132           data = len(disks) * [(False, msg)]
3133         else:
3134           data = []
3135           for idx, i in enumerate(nres.payload):
3136             if isinstance(i, (tuple, list)) and len(i) == 2:
3137               data.append(i)
3138             else:
3139               logging.warning("Invalid result from node %s, entry %d: %s",
3140                               nname, idx, i)
3141               data.append((False, "Invalid result from the remote node"))
3142
3143       for ((inst, _), status) in zip(disks, data):
3144         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3145
3146     # Add empty entries for diskless instances.
3147     for inst in diskless_instances:
3148       assert inst not in instdisk
3149       instdisk[inst] = {}
3150
3151     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3152                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3153                       compat.all(isinstance(s, (tuple, list)) and
3154                                  len(s) == 2 for s in statuses)
3155                       for inst, nnames in instdisk.items()
3156                       for nname, statuses in nnames.items())
3157     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3158
3159     return instdisk
3160
3161   @staticmethod
3162   def _SshNodeSelector(group_uuid, all_nodes):
3163     """Create endless iterators for all potential SSH check hosts.
3164
3165     """
3166     nodes = [node for node in all_nodes
3167              if (node.group != group_uuid and
3168                  not node.offline)]
3169     keyfunc = operator.attrgetter("group")
3170
3171     return map(itertools.cycle,
3172                [sorted(map(operator.attrgetter("name"), names))
3173                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3174                                                   keyfunc)])
3175
3176   @classmethod
3177   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3178     """Choose which nodes should talk to which other nodes.
3179
3180     We will make nodes contact all nodes in their group, and one node from
3181     every other group.
3182
3183     @warning: This algorithm has a known issue if one node group is much
3184       smaller than others (e.g. just one node). In such a case all other
3185       nodes will talk to the single node.
3186
3187     """
3188     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3189     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3190
3191     return (online_nodes,
3192             dict((name, sorted([i.next() for i in sel]))
3193                  for name in online_nodes))
3194
3195   def BuildHooksEnv(self):
3196     """Build hooks env.
3197
3198     Cluster-Verify hooks just ran in the post phase and their failure makes
3199     the output be logged in the verify output and the verification to fail.
3200
3201     """
3202     env = {
3203       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3204       }
3205
3206     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3207                for node in self.my_node_info.values())
3208
3209     return env
3210
3211   def BuildHooksNodes(self):
3212     """Build hooks nodes.
3213
3214     """
3215     return ([], self.my_node_names)
3216
3217   def Exec(self, feedback_fn):
3218     """Verify integrity of the node group, performing various test on nodes.
3219
3220     """
3221     # This method has too many local variables. pylint: disable=R0914
3222     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3223
3224     if not self.my_node_names:
3225       # empty node group
3226       feedback_fn("* Empty node group, skipping verification")
3227       return True
3228
3229     self.bad = False
3230     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3231     verbose = self.op.verbose
3232     self._feedback_fn = feedback_fn
3233
3234     vg_name = self.cfg.GetVGName()
3235     drbd_helper = self.cfg.GetDRBDHelper()
3236     cluster = self.cfg.GetClusterInfo()
3237     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3238     hypervisors = cluster.enabled_hypervisors
3239     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3240
3241     i_non_redundant = [] # Non redundant instances
3242     i_non_a_balanced = [] # Non auto-balanced instances
3243     i_offline = 0 # Count of offline instances
3244     n_offline = 0 # Count of offline nodes
3245     n_drained = 0 # Count of nodes being drained
3246     node_vol_should = {}
3247
3248     # FIXME: verify OS list
3249
3250     # File verification
3251     filemap = _ComputeAncillaryFiles(cluster, False)
3252
3253     # do local checksums
3254     master_node = self.master_node = self.cfg.GetMasterNode()
3255     master_ip = self.cfg.GetMasterIP()
3256
3257     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3258
3259     user_scripts = []
3260     if self.cfg.GetUseExternalMipScript():
3261       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3262
3263     node_verify_param = {
3264       constants.NV_FILELIST:
3265         map(vcluster.MakeVirtualPath,
3266             utils.UniqueSequence(filename
3267                                  for files in filemap
3268                                  for filename in files)),
3269       constants.NV_NODELIST:
3270         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3271                                   self.all_node_info.values()),
3272       constants.NV_HYPERVISOR: hypervisors,
3273       constants.NV_HVPARAMS:
3274         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3275       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3276                                  for node in node_data_list
3277                                  if not node.offline],
3278       constants.NV_INSTANCELIST: hypervisors,
3279       constants.NV_VERSION: None,
3280       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3281       constants.NV_NODESETUP: None,
3282       constants.NV_TIME: None,
3283       constants.NV_MASTERIP: (master_node, master_ip),
3284       constants.NV_OSLIST: None,
3285       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3286       constants.NV_USERSCRIPTS: user_scripts,
3287       }
3288
3289     if vg_name is not None:
3290       node_verify_param[constants.NV_VGLIST] = None
3291       node_verify_param[constants.NV_LVLIST] = vg_name
3292       node_verify_param[constants.NV_PVLIST] = [vg_name]
3293
3294     if drbd_helper:
3295       node_verify_param[constants.NV_DRBDLIST] = None
3296       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3297
3298     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3299       # Load file storage paths only from master node
3300       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3301
3302     # bridge checks
3303     # FIXME: this needs to be changed per node-group, not cluster-wide
3304     bridges = set()
3305     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3306     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3307       bridges.add(default_nicpp[constants.NIC_LINK])
3308     for instance in self.my_inst_info.values():
3309       for nic in instance.nics:
3310         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3311         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3312           bridges.add(full_nic[constants.NIC_LINK])
3313
3314     if bridges:
3315       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3316
3317     # Build our expected cluster state
3318     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3319                                                  name=node.name,
3320                                                  vm_capable=node.vm_capable))
3321                       for node in node_data_list)
3322
3323     # Gather OOB paths
3324     oob_paths = []
3325     for node in self.all_node_info.values():
3326       path = _SupportsOob(self.cfg, node)
3327       if path and path not in oob_paths:
3328         oob_paths.append(path)
3329
3330     if oob_paths:
3331       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3332
3333     for instance in self.my_inst_names:
3334       inst_config = self.my_inst_info[instance]
3335       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3336         i_offline += 1
3337
3338       for nname in inst_config.all_nodes:
3339         if nname not in node_image:
3340           gnode = self.NodeImage(name=nname)
3341           gnode.ghost = (nname not in self.all_node_info)
3342           node_image[nname] = gnode
3343
3344       inst_config.MapLVsByNode(node_vol_should)
3345
3346       pnode = inst_config.primary_node
3347       node_image[pnode].pinst.append(instance)
3348
3349       for snode in inst_config.secondary_nodes:
3350         nimg = node_image[snode]
3351         nimg.sinst.append(instance)
3352         if pnode not in nimg.sbp:
3353           nimg.sbp[pnode] = []
3354         nimg.sbp[pnode].append(instance)
3355
3356     # At this point, we have the in-memory data structures complete,
3357     # except for the runtime information, which we'll gather next
3358
3359     # Due to the way our RPC system works, exact response times cannot be
3360     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3361     # time before and after executing the request, we can at least have a time
3362     # window.
3363     nvinfo_starttime = time.time()
3364     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3365                                            node_verify_param,
3366                                            self.cfg.GetClusterName())
3367     nvinfo_endtime = time.time()
3368
3369     if self.extra_lv_nodes and vg_name is not None:
3370       extra_lv_nvinfo = \
3371           self.rpc.call_node_verify(self.extra_lv_nodes,
3372                                     {constants.NV_LVLIST: vg_name},
3373                                     self.cfg.GetClusterName())
3374     else:
3375       extra_lv_nvinfo = {}
3376
3377     all_drbd_map = self.cfg.ComputeDRBDMap()
3378
3379     feedback_fn("* Gathering disk information (%s nodes)" %
3380                 len(self.my_node_names))
3381     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3382                                      self.my_inst_info)
3383
3384     feedback_fn("* Verifying configuration file consistency")
3385
3386     # If not all nodes are being checked, we need to make sure the master node
3387     # and a non-checked vm_capable node are in the list.
3388     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3389     if absent_nodes:
3390       vf_nvinfo = all_nvinfo.copy()
3391       vf_node_info = list(self.my_node_info.values())
3392       additional_nodes = []
3393       if master_node not in self.my_node_info:
3394         additional_nodes.append(master_node)
3395         vf_node_info.append(self.all_node_info[master_node])
3396       # Add the first vm_capable node we find which is not included,
3397       # excluding the master node (which we already have)
3398       for node in absent_nodes:
3399         nodeinfo = self.all_node_info[node]
3400         if (nodeinfo.vm_capable and not nodeinfo.offline and
3401             node != master_node):
3402           additional_nodes.append(node)
3403           vf_node_info.append(self.all_node_info[node])
3404           break
3405       key = constants.NV_FILELIST
3406       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3407                                                  {key: node_verify_param[key]},
3408                                                  self.cfg.GetClusterName()))
3409     else:
3410       vf_nvinfo = all_nvinfo
3411       vf_node_info = self.my_node_info.values()
3412
3413     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3414
3415     feedback_fn("* Verifying node status")
3416
3417     refos_img = None
3418
3419     for node_i in node_data_list:
3420       node = node_i.name
3421       nimg = node_image[node]
3422
3423       if node_i.offline:
3424         if verbose:
3425           feedback_fn("* Skipping offline node %s" % (node,))
3426         n_offline += 1
3427         continue
3428
3429       if node == master_node:
3430         ntype = "master"
3431       elif node_i.master_candidate:
3432         ntype = "master candidate"
3433       elif node_i.drained:
3434         ntype = "drained"
3435         n_drained += 1
3436       else:
3437         ntype = "regular"
3438       if verbose:
3439         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3440
3441       msg = all_nvinfo[node].fail_msg
3442       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3443                msg)
3444       if msg:
3445         nimg.rpc_fail = True
3446         continue
3447
3448       nresult = all_nvinfo[node].payload
3449
3450       nimg.call_ok = self._VerifyNode(node_i, nresult)
3451       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3452       self._VerifyNodeNetwork(node_i, nresult)
3453       self._VerifyNodeUserScripts(node_i, nresult)
3454       self._VerifyOob(node_i, nresult)
3455       self._VerifyFileStoragePaths(node_i, nresult,
3456                                    node == master_node)
3457
3458       if nimg.vm_capable:
3459         self._VerifyNodeLVM(node_i, nresult, vg_name)
3460         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3461                              all_drbd_map)
3462
3463         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3464         self._UpdateNodeInstances(node_i, nresult, nimg)
3465         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3466         self._UpdateNodeOS(node_i, nresult, nimg)
3467
3468         if not nimg.os_fail:
3469           if refos_img is None:
3470             refos_img = nimg
3471           self._VerifyNodeOS(node_i, nimg, refos_img)
3472         self._VerifyNodeBridges(node_i, nresult, bridges)
3473
3474         # Check whether all running instancies are primary for the node. (This
3475         # can no longer be done from _VerifyInstance below, since some of the
3476         # wrong instances could be from other node groups.)
3477         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3478
3479         for inst in non_primary_inst:
3480           test = inst in self.all_inst_info
3481           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3482                    "instance should not run on node %s", node_i.name)
3483           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3484                    "node is running unknown instance %s", inst)
3485
3486     for node, result in extra_lv_nvinfo.items():
3487       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3488                               node_image[node], vg_name)
3489
3490     feedback_fn("* Verifying instance status")
3491     for instance in self.my_inst_names:
3492       if verbose:
3493         feedback_fn("* Verifying instance %s" % instance)
3494       inst_config = self.my_inst_info[instance]
3495       self._VerifyInstance(instance, inst_config, node_image,
3496                            instdisk[instance])
3497       inst_nodes_offline = []
3498
3499       pnode = inst_config.primary_node
3500       pnode_img = node_image[pnode]
3501       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3502                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3503                " primary node failed", instance)
3504
3505       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3506                pnode_img.offline,
3507                constants.CV_EINSTANCEBADNODE, instance,
3508                "instance is marked as running and lives on offline node %s",
3509                inst_config.primary_node)
3510
3511       # If the instance is non-redundant we cannot survive losing its primary
3512       # node, so we are not N+1 compliant.
3513       if inst_config.disk_template not in constants.DTS_MIRRORED:
3514         i_non_redundant.append(instance)
3515
3516       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3517                constants.CV_EINSTANCELAYOUT,
3518                instance, "instance has multiple secondary nodes: %s",
3519                utils.CommaJoin(inst_config.secondary_nodes),
3520                code=self.ETYPE_WARNING)
3521
3522       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3523         pnode = inst_config.primary_node
3524         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3525         instance_groups = {}
3526
3527         for node in instance_nodes:
3528           instance_groups.setdefault(self.all_node_info[node].group,
3529                                      []).append(node)
3530
3531         pretty_list = [
3532           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3533           # Sort so that we always list the primary node first.
3534           for group, nodes in sorted(instance_groups.items(),
3535                                      key=lambda (_, nodes): pnode in nodes,
3536                                      reverse=True)]
3537
3538         self._ErrorIf(len(instance_groups) > 1,
3539                       constants.CV_EINSTANCESPLITGROUPS,
3540                       instance, "instance has primary and secondary nodes in"
3541                       " different groups: %s", utils.CommaJoin(pretty_list),
3542                       code=self.ETYPE_WARNING)
3543
3544       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3545         i_non_a_balanced.append(instance)
3546
3547       for snode in inst_config.secondary_nodes:
3548         s_img = node_image[snode]
3549         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3550                  snode, "instance %s, connection to secondary node failed",
3551                  instance)
3552
3553         if s_img.offline:
3554           inst_nodes_offline.append(snode)
3555
3556       # warn that the instance lives on offline nodes
3557       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3558                "instance has offline secondary node(s) %s",
3559                utils.CommaJoin(inst_nodes_offline))
3560       # ... or ghost/non-vm_capable nodes
3561       for node in inst_config.all_nodes:
3562         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3563                  instance, "instance lives on ghost node %s", node)
3564         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3565                  instance, "instance lives on non-vm_capable node %s", node)
3566
3567     feedback_fn("* Verifying orphan volumes")
3568     reserved = utils.FieldSet(*cluster.reserved_lvs)
3569
3570     # We will get spurious "unknown volume" warnings if any node of this group
3571     # is secondary for an instance whose primary is in another group. To avoid
3572     # them, we find these instances and add their volumes to node_vol_should.
3573     for inst in self.all_inst_info.values():
3574       for secondary in inst.secondary_nodes:
3575         if (secondary in self.my_node_info
3576             and inst.name not in self.my_inst_info):
3577           inst.MapLVsByNode(node_vol_should)
3578           break
3579
3580     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3581
3582     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3583       feedback_fn("* Verifying N+1 Memory redundancy")
3584       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3585
3586     feedback_fn("* Other Notes")
3587     if i_non_redundant:
3588       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3589                   % len(i_non_redundant))
3590
3591     if i_non_a_balanced:
3592       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3593                   % len(i_non_a_balanced))
3594
3595     if i_offline:
3596       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3597
3598     if n_offline:
3599       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3600
3601     if n_drained:
3602       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3603
3604     return not self.bad
3605
3606   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3607     """Analyze the post-hooks' result
3608
3609     This method analyses the hook result, handles it, and sends some
3610     nicely-formatted feedback back to the user.
3611
3612     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3613         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3614     @param hooks_results: the results of the multi-node hooks rpc call
3615     @param feedback_fn: function used send feedback back to the caller
3616     @param lu_result: previous Exec result
3617     @return: the new Exec result, based on the previous result
3618         and hook results
3619
3620     """
3621     # We only really run POST phase hooks, only for non-empty groups,
3622     # and are only interested in their results
3623     if not self.my_node_names:
3624       # empty node group
3625       pass
3626     elif phase == constants.HOOKS_PHASE_POST:
3627       # Used to change hooks' output to proper indentation
3628       feedback_fn("* Hooks Results")
3629       assert hooks_results, "invalid result from hooks"
3630
3631       for node_name in hooks_results:
3632         res = hooks_results[node_name]
3633         msg = res.fail_msg
3634         test = msg and not res.offline
3635         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3636                       "Communication failure in hooks execution: %s", msg)
3637         if res.offline or msg:
3638           # No need to investigate payload if node is offline or gave
3639           # an error.
3640           continue
3641         for script, hkr, output in res.payload:
3642           test = hkr == constants.HKR_FAIL
3643           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3644                         "Script %s failed, output:", script)
3645           if test:
3646             output = self._HOOKS_INDENT_RE.sub("      ", output)
3647             feedback_fn("%s" % output)
3648             lu_result = False
3649
3650     return lu_result
3651
3652
3653 class LUClusterVerifyDisks(NoHooksLU):
3654   """Verifies the cluster disks status.
3655
3656   """
3657   REQ_BGL = False
3658
3659   def ExpandNames(self):
3660     self.share_locks = _ShareAll()
3661     self.needed_locks = {
3662       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3663       }
3664
3665   def Exec(self, feedback_fn):
3666     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3667
3668     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3669     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3670                            for group in group_names])
3671
3672
3673 class LUGroupVerifyDisks(NoHooksLU):
3674   """Verifies the status of all disks in a node group.
3675
3676   """
3677   REQ_BGL = False
3678
3679   def ExpandNames(self):
3680     # Raises errors.OpPrereqError on its own if group can't be found
3681     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3682
3683     self.share_locks = _ShareAll()
3684     self.needed_locks = {
3685       locking.LEVEL_INSTANCE: [],
3686       locking.LEVEL_NODEGROUP: [],
3687       locking.LEVEL_NODE: [],
3688
3689       # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3690       # starts one instance of this opcode for every group, which means all
3691       # nodes will be locked for a short amount of time, so it's better to
3692       # acquire the node allocation lock as well.
3693       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3694       }
3695
3696   def DeclareLocks(self, level):
3697     if level == locking.LEVEL_INSTANCE:
3698       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3699
3700       # Lock instances optimistically, needs verification once node and group
3701       # locks have been acquired
3702       self.needed_locks[locking.LEVEL_INSTANCE] = \
3703         self.cfg.GetNodeGroupInstances(self.group_uuid)
3704
3705     elif level == locking.LEVEL_NODEGROUP:
3706       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3707
3708       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3709         set([self.group_uuid] +
3710             # Lock all groups used by instances optimistically; this requires
3711             # going via the node before it's locked, requiring verification
3712             # later on
3713             [group_uuid
3714              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3715              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3716
3717     elif level == locking.LEVEL_NODE:
3718       # This will only lock the nodes in the group to be verified which contain
3719       # actual instances
3720       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3721       self._LockInstancesNodes()
3722
3723       # Lock all nodes in group to be verified
3724       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3725       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3726       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3727
3728   def CheckPrereq(self):
3729     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3730     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3731     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3732
3733     assert self.group_uuid in owned_groups
3734
3735     # Check if locked instances are still correct
3736     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3737
3738     # Get instance information
3739     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3740
3741     # Check if node groups for locked instances are still correct
3742     _CheckInstancesNodeGroups(self.cfg, self.instances,
3743                               owned_groups, owned_nodes, self.group_uuid)
3744
3745   def Exec(self, feedback_fn):
3746     """Verify integrity of cluster disks.
3747
3748     @rtype: tuple of three items
3749     @return: a tuple of (dict of node-to-node_error, list of instances
3750         which need activate-disks, dict of instance: (node, volume) for
3751         missing volumes
3752
3753     """
3754     res_nodes = {}
3755     res_instances = set()
3756     res_missing = {}
3757
3758     nv_dict = _MapInstanceDisksToNodes(
3759       [inst for inst in self.instances.values()
3760        if inst.admin_state == constants.ADMINST_UP])
3761
3762     if nv_dict:
3763       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3764                              set(self.cfg.GetVmCapableNodeList()))
3765
3766       node_lvs = self.rpc.call_lv_list(nodes, [])
3767
3768       for (node, node_res) in node_lvs.items():
3769         if node_res.offline:
3770           continue
3771
3772         msg = node_res.fail_msg
3773         if msg:
3774           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3775           res_nodes[node] = msg
3776           continue
3777
3778         for lv_name, (_, _, lv_online) in node_res.payload.items():
3779           inst = nv_dict.pop((node, lv_name), None)
3780           if not (lv_online or inst is None):
3781             res_instances.add(inst)
3782
3783       # any leftover items in nv_dict are missing LVs, let's arrange the data
3784       # better
3785       for key, inst in nv_dict.iteritems():
3786         res_missing.setdefault(inst, []).append(list(key))
3787
3788     return (res_nodes, list(res_instances), res_missing)
3789
3790
3791 class LUClusterRepairDiskSizes(NoHooksLU):
3792   """Verifies the cluster disks sizes.
3793
3794   """
3795   REQ_BGL = False
3796
3797   def ExpandNames(self):
3798     if self.op.instances:
3799       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3800       # Not getting the node allocation lock as only a specific set of
3801       # instances (and their nodes) is going to be acquired
3802       self.needed_locks = {
3803         locking.LEVEL_NODE_RES: [],
3804         locking.LEVEL_INSTANCE: self.wanted_names,
3805         }
3806       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3807     else:
3808       self.wanted_names = None
3809       self.needed_locks = {
3810         locking.LEVEL_NODE_RES: locking.ALL_SET,
3811         locking.LEVEL_INSTANCE: locking.ALL_SET,
3812
3813         # This opcode is acquires the node locks for all instances
3814         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3815         }
3816
3817     self.share_locks = {
3818       locking.LEVEL_NODE_RES: 1,
3819       locking.LEVEL_INSTANCE: 0,
3820       locking.LEVEL_NODE_ALLOC: 1,
3821       }
3822
3823   def DeclareLocks(self, level):
3824     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3825       self._LockInstancesNodes(primary_only=True, level=level)
3826
3827   def CheckPrereq(self):
3828     """Check prerequisites.
3829
3830     This only checks the optional instance list against the existing names.
3831
3832     """
3833     if self.wanted_names is None:
3834       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3835
3836     self.wanted_instances = \
3837         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3838
3839   def _EnsureChildSizes(self, disk):
3840     """Ensure children of the disk have the needed disk size.
3841
3842     This is valid mainly for DRBD8 and fixes an issue where the
3843     children have smaller disk size.
3844
3845     @param disk: an L{ganeti.objects.Disk} object
3846
3847     """
3848     if disk.dev_type == constants.LD_DRBD8:
3849       assert disk.children, "Empty children for DRBD8?"
3850       fchild = disk.children[0]
3851       mismatch = fchild.size < disk.size
3852       if mismatch:
3853         self.LogInfo("Child disk has size %d, parent %d, fixing",
3854                      fchild.size, disk.size)
3855         fchild.size = disk.size
3856
3857       # and we recurse on this child only, not on the metadev
3858       return self._EnsureChildSizes(fchild) or mismatch
3859     else:
3860       return False
3861
3862   def Exec(self, feedback_fn):
3863     """Verify the size of cluster disks.
3864
3865     """
3866     # TODO: check child disks too
3867     # TODO: check differences in size between primary/secondary nodes
3868     per_node_disks = {}
3869     for instance in self.wanted_instances:
3870       pnode = instance.primary_node
3871       if pnode not in per_node_disks:
3872         per_node_disks[pnode] = []
3873       for idx, disk in enumerate(instance.disks):
3874         per_node_disks[pnode].append((instance, idx, disk))
3875
3876     assert not (frozenset(per_node_disks.keys()) -
3877                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3878       "Not owning correct locks"
3879     assert not self.owned_locks(locking.LEVEL_NODE)
3880
3881     changed = []
3882     for node, dskl in per_node_disks.items():
3883       newl = [v[2].Copy() for v in dskl]
3884       for dsk in newl:
3885         self.cfg.SetDiskID(dsk, node)
3886       result = self.rpc.call_blockdev_getsize(node, newl)
3887       if result.fail_msg:
3888         self.LogWarning("Failure in blockdev_getsize call to node"
3889                         " %s, ignoring", node)
3890         continue
3891       if len(result.payload) != len(dskl):
3892         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3893                         " result.payload=%s", node, len(dskl), result.payload)
3894         self.LogWarning("Invalid result from node %s, ignoring node results",
3895                         node)
3896         continue
3897       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3898         if size is None:
3899           self.LogWarning("Disk %d of instance %s did not return size"
3900                           " information, ignoring", idx, instance.name)
3901           continue
3902         if not isinstance(size, (int, long)):
3903           self.LogWarning("Disk %d of instance %s did not return valid"
3904                           " size information, ignoring", idx, instance.name)
3905           continue
3906         size = size >> 20
3907         if size != disk.size:
3908           self.LogInfo("Disk %d of instance %s has mismatched size,"
3909                        " correcting: recorded %d, actual %d", idx,
3910                        instance.name, disk.size, size)
3911           disk.size = size
3912           self.cfg.Update(instance, feedback_fn)
3913           changed.append((instance.name, idx, size))
3914         if self._EnsureChildSizes(disk):
3915           self.cfg.Update(instance, feedback_fn)
3916           changed.append((instance.name, idx, disk.size))
3917     return changed
3918
3919
3920 class LUClusterRename(LogicalUnit):
3921   """Rename the cluster.
3922
3923   """
3924   HPATH = "cluster-rename"
3925   HTYPE = constants.HTYPE_CLUSTER
3926
3927   def BuildHooksEnv(self):
3928     """Build hooks env.
3929
3930     """
3931     return {
3932       "OP_TARGET": self.cfg.GetClusterName(),
3933       "NEW_NAME": self.op.name,
3934       }
3935
3936   def BuildHooksNodes(self):
3937     """Build hooks nodes.
3938
3939     """
3940     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3941
3942   def CheckPrereq(self):
3943     """Verify that the passed name is a valid one.
3944
3945     """
3946     hostname = netutils.GetHostname(name=self.op.name,
3947                                     family=self.cfg.GetPrimaryIPFamily())
3948
3949     new_name = hostname.name
3950     self.ip = new_ip = hostname.ip
3951     old_name = self.cfg.GetClusterName()
3952     old_ip = self.cfg.GetMasterIP()
3953     if new_name == old_name and new_ip == old_ip:
3954       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3955                                  " cluster has changed",
3956                                  errors.ECODE_INVAL)
3957     if new_ip != old_ip:
3958       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3959         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3960                                    " reachable on the network" %
3961                                    new_ip, errors.ECODE_NOTUNIQUE)
3962
3963     self.op.name = new_name
3964
3965   def Exec(self, feedback_fn):
3966     """Rename the cluster.
3967
3968     """
3969     clustername = self.op.name
3970     new_ip = self.ip
3971
3972     # shutdown the master IP
3973     master_params = self.cfg.GetMasterNetworkParameters()
3974     ems = self.cfg.GetUseExternalMipScript()
3975     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3976                                                      master_params, ems)
3977     result.Raise("Could not disable the master role")
3978
3979     try:
3980       cluster = self.cfg.GetClusterInfo()
3981       cluster.cluster_name = clustername
3982       cluster.master_ip = new_ip
3983       self.cfg.Update(cluster, feedback_fn)
3984
3985       # update the known hosts file
3986       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3987       node_list = self.cfg.GetOnlineNodeList()
3988       try:
3989         node_list.remove(master_params.name)
3990       except ValueError:
3991         pass
3992       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3993     finally:
3994       master_params.ip = new_ip
3995       result = self.rpc.call_node_activate_master_ip(master_params.name,
3996                                                      master_params, ems)
3997       msg = result.fail_msg
3998       if msg:
3999         self.LogWarning("Could not re-enable the master role on"
4000                         " the master, please restart manually: %s", msg)
4001
4002     return clustername
4003
4004
4005 def _ValidateNetmask(cfg, netmask):
4006   """Checks if a netmask is valid.
4007
4008   @type cfg: L{config.ConfigWriter}
4009   @param cfg: The cluster configuration
4010   @type netmask: int
4011   @param netmask: the netmask to be verified
4012   @raise errors.OpPrereqError: if the validation fails
4013
4014   """
4015   ip_family = cfg.GetPrimaryIPFamily()
4016   try:
4017     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4018   except errors.ProgrammerError:
4019     raise errors.OpPrereqError("Invalid primary ip family: %s." %
4020                                ip_family, errors.ECODE_INVAL)
4021   if not ipcls.ValidateNetmask(netmask):
4022     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4023                                 (netmask), errors.ECODE_INVAL)
4024
4025
4026 class LUClusterSetParams(LogicalUnit):
4027   """Change the parameters of the cluster.
4028
4029   """
4030   HPATH = "cluster-modify"
4031   HTYPE = constants.HTYPE_CLUSTER
4032   REQ_BGL = False
4033
4034   def CheckArguments(self):
4035     """Check parameters
4036
4037     """
4038     if self.op.uid_pool:
4039       uidpool.CheckUidPool(self.op.uid_pool)
4040
4041     if self.op.add_uids:
4042       uidpool.CheckUidPool(self.op.add_uids)
4043
4044     if self.op.remove_uids:
4045       uidpool.CheckUidPool(self.op.remove_uids)
4046
4047     if self.op.master_netmask is not None:
4048       _ValidateNetmask(self.cfg, self.op.master_netmask)
4049
4050     if self.op.diskparams:
4051       for dt_params in self.op.diskparams.values():
4052         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4053       try:
4054         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4055       except errors.OpPrereqError, err:
4056         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4057                                    errors.ECODE_INVAL)
4058
4059   def ExpandNames(self):
4060     # FIXME: in the future maybe other cluster params won't require checking on
4061     # all nodes to be modified.
4062     # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4063     # resource locks the right thing, shouldn't it be the BGL instead?
4064     self.needed_locks = {
4065       locking.LEVEL_NODE: locking.ALL_SET,
4066       locking.LEVEL_INSTANCE: locking.ALL_SET,
4067       locking.LEVEL_NODEGROUP: locking.ALL_SET,
4068       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4069     }
4070     self.share_locks = _ShareAll()
4071
4072   def BuildHooksEnv(self):
4073     """Build hooks env.
4074
4075     """
4076     return {
4077       "OP_TARGET": self.cfg.GetClusterName(),
4078       "NEW_VG_NAME": self.op.vg_name,
4079       }
4080
4081   def BuildHooksNodes(self):
4082     """Build hooks nodes.
4083
4084     """
4085     mn = self.cfg.GetMasterNode()
4086     return ([mn], [mn])
4087
4088   def CheckPrereq(self):
4089     """Check prerequisites.
4090
4091     This checks whether the given params don't conflict and
4092     if the given volume group is valid.
4093
4094     """
4095     if self.op.vg_name is not None and not self.op.vg_name:
4096       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4097         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4098                                    " instances exist", errors.ECODE_INVAL)
4099
4100     if self.op.drbd_helper is not None and not self.op.drbd_helper:
4101       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4102         raise errors.OpPrereqError("Cannot disable drbd helper while"
4103                                    " drbd-based instances exist",
4104                                    errors.ECODE_INVAL)
4105
4106     node_list = self.owned_locks(locking.LEVEL_NODE)
4107
4108     # if vg_name not None, checks given volume group on all nodes
4109     if self.op.vg_name:
4110       vglist = self.rpc.call_vg_list(node_list)
4111       for node in node_list:
4112         msg = vglist[node].fail_msg
4113         if msg:
4114           # ignoring down node
4115           self.LogWarning("Error while gathering data on node %s"
4116                           " (ignoring node): %s", node, msg)
4117           continue
4118         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4119                                               self.op.vg_name,
4120                                               constants.MIN_VG_SIZE)
4121         if vgstatus:
4122           raise errors.OpPrereqError("Error on node '%s': %s" %
4123                                      (node, vgstatus), errors.ECODE_ENVIRON)
4124
4125     if self.op.drbd_helper:
4126       # checks given drbd helper on all nodes
4127       helpers = self.rpc.call_drbd_helper(node_list)
4128       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4129         if ninfo.offline:
4130           self.LogInfo("Not checking drbd helper on offline node %s", node)
4131           continue
4132         msg = helpers[node].fail_msg
4133         if msg:
4134           raise errors.OpPrereqError("Error checking drbd helper on node"
4135                                      " '%s': %s" % (node, msg),
4136                                      errors.ECODE_ENVIRON)
4137         node_helper = helpers[node].payload
4138         if node_helper != self.op.drbd_helper:
4139           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4140                                      (node, node_helper), errors.ECODE_ENVIRON)
4141
4142     self.cluster = cluster = self.cfg.GetClusterInfo()
4143     # validate params changes
4144     if self.op.beparams:
4145       objects.UpgradeBeParams(self.op.beparams)
4146       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4147       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4148
4149     if self.op.ndparams:
4150       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4151       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4152
4153       # TODO: we need a more general way to handle resetting
4154       # cluster-level parameters to default values
4155       if self.new_ndparams["oob_program"] == "":
4156         self.new_ndparams["oob_program"] = \
4157             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4158
4159     if self.op.hv_state:
4160       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4161                                             self.cluster.hv_state_static)
4162       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4163                                for hv, values in new_hv_state.items())
4164
4165     if self.op.disk_state:
4166       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4167                                                 self.cluster.disk_state_static)
4168       self.new_disk_state = \
4169         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4170                             for name, values in svalues.items()))
4171              for storage, svalues in new_disk_state.items())
4172
4173     if self.op.ipolicy:
4174       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4175                                             group_policy=False)
4176
4177       all_instances = self.cfg.GetAllInstancesInfo().values()
4178       violations = set()
4179       for group in self.cfg.GetAllNodeGroupsInfo().values():
4180         instances = frozenset([inst for inst in all_instances
4181                                if compat.any(node in group.members
4182                                              for node in inst.all_nodes)])
4183         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4184         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4185         new = _ComputeNewInstanceViolations(ipol,
4186                                             new_ipolicy, instances)
4187         if new:
4188           violations.update(new)
4189
4190       if violations:
4191         self.LogWarning("After the ipolicy change the following instances"
4192                         " violate them: %s",
4193                         utils.CommaJoin(utils.NiceSort(violations)))
4194
4195     if self.op.nicparams:
4196       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4197       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4198       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4199       nic_errors = []
4200
4201       # check all instances for consistency
4202       for instance in self.cfg.GetAllInstancesInfo().values():
4203         for nic_idx, nic in enumerate(instance.nics):
4204           params_copy = copy.deepcopy(nic.nicparams)
4205           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4206
4207           # check parameter syntax
4208           try:
4209             objects.NIC.CheckParameterSyntax(params_filled)
4210           except errors.ConfigurationError, err:
4211             nic_errors.append("Instance %s, nic/%d: %s" %
4212                               (instance.name, nic_idx, err))
4213
4214           # if we're moving instances to routed, check that they have an ip
4215           target_mode = params_filled[constants.NIC_MODE]
4216           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4217             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4218                               " address" % (instance.name, nic_idx))
4219       if nic_errors:
4220         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4221                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4222
4223     # hypervisor list/parameters
4224     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4225     if self.op.hvparams:
4226       for hv_name, hv_dict in self.op.hvparams.items():
4227         if hv_name not in self.new_hvparams:
4228           self.new_hvparams[hv_name] = hv_dict
4229         else:
4230           self.new_hvparams[hv_name].update(hv_dict)
4231
4232     # disk template parameters
4233     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4234     if self.op.diskparams:
4235       for dt_name, dt_params in self.op.diskparams.items():
4236         if dt_name not in self.op.diskparams:
4237           self.new_diskparams[dt_name] = dt_params
4238         else:
4239           self.new_diskparams[dt_name].update(dt_params)
4240
4241     # os hypervisor parameters
4242     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4243     if self.op.os_hvp:
4244       for os_name, hvs in self.op.os_hvp.items():
4245         if os_name not in self.new_os_hvp:
4246           self.new_os_hvp[os_name] = hvs
4247         else:
4248           for hv_name, hv_dict in hvs.items():
4249             if hv_name not in self.new_os_hvp[os_name]:
4250               self.new_os_hvp[os_name][hv_name] = hv_dict
4251             else:
4252               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4253
4254     # os parameters
4255     self.new_osp = objects.FillDict(cluster.osparams, {})
4256     if self.op.osparams:
4257       for os_name, osp in self.op.osparams.items():
4258         if os_name not in self.new_osp:
4259           self.new_osp[os_name] = {}
4260
4261         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4262                                                   use_none=True)
4263
4264         if not self.new_osp[os_name]:
4265           # we removed all parameters
4266           del self.new_osp[os_name]
4267         else:
4268           # check the parameter validity (remote check)
4269           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4270                          os_name, self.new_osp[os_name])
4271
4272     # changes to the hypervisor list
4273     if self.op.enabled_hypervisors is not None:
4274       self.hv_list = self.op.enabled_hypervisors
4275       for hv in self.hv_list:
4276         # if the hypervisor doesn't already exist in the cluster
4277         # hvparams, we initialize it to empty, and then (in both
4278         # cases) we make sure to fill the defaults, as we might not
4279         # have a complete defaults list if the hypervisor wasn't
4280         # enabled before
4281         if hv not in new_hvp:
4282           new_hvp[hv] = {}
4283         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4284         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4285     else:
4286       self.hv_list = cluster.enabled_hypervisors
4287
4288     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4289       # either the enabled list has changed, or the parameters have, validate
4290       for hv_name, hv_params in self.new_hvparams.items():
4291         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4292             (self.op.enabled_hypervisors and
4293              hv_name in self.op.enabled_hypervisors)):
4294           # either this is a new hypervisor, or its parameters have changed
4295           hv_class = hypervisor.GetHypervisor(hv_name)
4296           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4297           hv_class.CheckParameterSyntax(hv_params)
4298           _CheckHVParams(self, node_list, hv_name, hv_params)
4299
4300     if self.op.os_hvp:
4301       # no need to check any newly-enabled hypervisors, since the
4302       # defaults have already been checked in the above code-block
4303       for os_name, os_hvp in self.new_os_hvp.items():
4304         for hv_name, hv_params in os_hvp.items():
4305           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4306           # we need to fill in the new os_hvp on top of the actual hv_p
4307           cluster_defaults = self.new_hvparams.get(hv_name, {})
4308           new_osp = objects.FillDict(cluster_defaults, hv_params)
4309           hv_class = hypervisor.GetHypervisor(hv_name)
4310           hv_class.CheckParameterSyntax(new_osp)
4311           _CheckHVParams(self, node_list, hv_name, new_osp)
4312
4313     if self.op.default_iallocator:
4314       alloc_script = utils.FindFile(self.op.default_iallocator,
4315                                     constants.IALLOCATOR_SEARCH_PATH,
4316                                     os.path.isfile)
4317       if alloc_script is None:
4318         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4319                                    " specified" % self.op.default_iallocator,
4320                                    errors.ECODE_INVAL)
4321
4322   def Exec(self, feedback_fn):
4323     """Change the parameters of the cluster.
4324
4325     """
4326     if self.op.vg_name is not None:
4327       new_volume = self.op.vg_name
4328       if not new_volume:
4329         new_volume = None
4330       if new_volume != self.cfg.GetVGName():
4331         self.cfg.SetVGName(new_volume)
4332       else:
4333         feedback_fn("Cluster LVM configuration already in desired"
4334                     " state, not changing")
4335     if self.op.drbd_helper is not None:
4336       new_helper = self.op.drbd_helper
4337       if not new_helper:
4338         new_helper = None
4339       if new_helper != self.cfg.GetDRBDHelper():
4340         self.cfg.SetDRBDHelper(new_helper)
4341       else:
4342         feedback_fn("Cluster DRBD helper already in desired state,"
4343                     " not changing")
4344     if self.op.hvparams:
4345       self.cluster.hvparams = self.new_hvparams
4346     if self.op.os_hvp:
4347       self.cluster.os_hvp = self.new_os_hvp
4348     if self.op.enabled_hypervisors is not None:
4349       self.cluster.hvparams = self.new_hvparams
4350       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4351     if self.op.beparams:
4352       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4353     if self.op.nicparams:
4354       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4355     if self.op.ipolicy:
4356       self.cluster.ipolicy = self.new_ipolicy
4357     if self.op.osparams:
4358       self.cluster.osparams = self.new_osp
4359     if self.op.ndparams:
4360       self.cluster.ndparams = self.new_ndparams
4361     if self.op.diskparams:
4362       self.cluster.diskparams = self.new_diskparams
4363     if self.op.hv_state:
4364       self.cluster.hv_state_static = self.new_hv_state
4365     if self.op.disk_state:
4366       self.cluster.disk_state_static = self.new_disk_state
4367
4368     if self.op.candidate_pool_size is not None:
4369       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4370       # we need to update the pool size here, otherwise the save will fail
4371       _AdjustCandidatePool(self, [])
4372
4373     if self.op.maintain_node_health is not None:
4374       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4375         feedback_fn("Note: CONFD was disabled at build time, node health"
4376                     " maintenance is not useful (still enabling it)")
4377       self.cluster.maintain_node_health = self.op.maintain_node_health
4378
4379     if self.op.prealloc_wipe_disks is not None:
4380       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4381
4382     if self.op.add_uids is not None:
4383       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4384
4385     if self.op.remove_uids is not None:
4386       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4387
4388     if self.op.uid_pool is not None:
4389       self.cluster.uid_pool = self.op.uid_pool
4390
4391     if self.op.default_iallocator is not None:
4392       self.cluster.default_iallocator = self.op.default_iallocator
4393
4394     if self.op.reserved_lvs is not None:
4395       self.cluster.reserved_lvs = self.op.reserved_lvs
4396
4397     if self.op.use_external_mip_script is not None:
4398       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4399
4400     def helper_os(aname, mods, desc):
4401       desc += " OS list"
4402       lst = getattr(self.cluster, aname)
4403       for key, val in mods:
4404         if key == constants.DDM_ADD:
4405           if val in lst:
4406             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4407           else:
4408             lst.append(val)
4409         elif key == constants.DDM_REMOVE:
4410           if val in lst:
4411             lst.remove(val)
4412           else:
4413             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4414         else:
4415           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4416
4417     if self.op.hidden_os:
4418       helper_os("hidden_os", self.op.hidden_os, "hidden")
4419
4420     if self.op.blacklisted_os:
4421       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4422
4423     if self.op.master_netdev:
4424       master_params = self.cfg.GetMasterNetworkParameters()
4425       ems = self.cfg.GetUseExternalMipScript()
4426       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4427                   self.cluster.master_netdev)
4428       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4429                                                        master_params, ems)
4430       result.Raise("Could not disable the master ip")
4431       feedback_fn("Changing master_netdev from %s to %s" %
4432                   (master_params.netdev, self.op.master_netdev))
4433       self.cluster.master_netdev = self.op.master_netdev
4434
4435     if self.op.master_netmask:
4436       master_params = self.cfg.GetMasterNetworkParameters()
4437       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4438       result = self.rpc.call_node_change_master_netmask(master_params.name,
4439                                                         master_params.netmask,
4440                                                         self.op.master_netmask,
4441                                                         master_params.ip,
4442                                                         master_params.netdev)
4443       if result.fail_msg:
4444         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4445         feedback_fn(msg)
4446
4447       self.cluster.master_netmask = self.op.master_netmask
4448
4449     self.cfg.Update(self.cluster, feedback_fn)
4450
4451     if self.op.master_netdev:
4452       master_params = self.cfg.GetMasterNetworkParameters()
4453       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4454                   self.op.master_netdev)
4455       ems = self.cfg.GetUseExternalMipScript()
4456       result = self.rpc.call_node_activate_master_ip(master_params.name,
4457                                                      master_params, ems)
4458       if result.fail_msg:
4459         self.LogWarning("Could not re-enable the master ip on"
4460                         " the master, please restart manually: %s",
4461                         result.fail_msg)
4462
4463
4464 def _UploadHelper(lu, nodes, fname):
4465   """Helper for uploading a file and showing warnings.
4466
4467   """
4468   if os.path.exists(fname):
4469     result = lu.rpc.call_upload_file(nodes, fname)
4470     for to_node, to_result in result.items():
4471       msg = to_result.fail_msg
4472       if msg:
4473         msg = ("Copy of file %s to node %s failed: %s" %
4474                (fname, to_node, msg))
4475         lu.LogWarning(msg)
4476
4477
4478 def _ComputeAncillaryFiles(cluster, redist):
4479   """Compute files external to Ganeti which need to be consistent.
4480
4481   @type redist: boolean
4482   @param redist: Whether to include files which need to be redistributed
4483
4484   """
4485   # Compute files for all nodes
4486   files_all = set([
4487     pathutils.SSH_KNOWN_HOSTS_FILE,
4488     pathutils.CONFD_HMAC_KEY,
4489     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4490     pathutils.SPICE_CERT_FILE,
4491     pathutils.SPICE_CACERT_FILE,
4492     pathutils.RAPI_USERS_FILE,
4493     ])
4494
4495   if redist:
4496     # we need to ship at least the RAPI certificate
4497     files_all.add(pathutils.RAPI_CERT_FILE)
4498   else:
4499     files_all.update(pathutils.ALL_CERT_FILES)
4500     files_all.update(ssconf.SimpleStore().GetFileList())
4501
4502   if cluster.modify_etc_hosts:
4503     files_all.add(pathutils.ETC_HOSTS)
4504
4505   if cluster.use_external_mip_script:
4506     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4507
4508   # Files which are optional, these must:
4509   # - be present in one other category as well
4510   # - either exist or not exist on all nodes of that category (mc, vm all)
4511   files_opt = set([
4512     pathutils.RAPI_USERS_FILE,
4513     ])
4514
4515   # Files which should only be on master candidates
4516   files_mc = set()
4517
4518   if not redist:
4519     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4520
4521   # File storage
4522   if (not redist and
4523       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4524     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4525     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4526
4527   # Files which should only be on VM-capable nodes
4528   files_vm = set(
4529     filename
4530     for hv_name in cluster.enabled_hypervisors
4531     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4532
4533   files_opt |= set(
4534     filename
4535     for hv_name in cluster.enabled_hypervisors
4536     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4537
4538   # Filenames in each category must be unique
4539   all_files_set = files_all | files_mc | files_vm
4540   assert (len(all_files_set) ==
4541           sum(map(len, [files_all, files_mc, files_vm]))), \
4542          "Found file listed in more than one file list"
4543
4544   # Optional files must be present in one other category
4545   assert all_files_set.issuperset(files_opt), \
4546          "Optional file not in a different required list"
4547
4548   # This one file should never ever be re-distributed via RPC
4549   assert not (redist and
4550               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4551
4552   return (files_all, files_opt, files_mc, files_vm)
4553
4554
4555 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4556   """Distribute additional files which are part of the cluster configuration.
4557
4558   ConfigWriter takes care of distributing the config and ssconf files, but
4559   there are more files which should be distributed to all nodes. This function
4560   makes sure those are copied.
4561
4562   @param lu: calling logical unit
4563   @param additional_nodes: list of nodes not in the config to distribute to
4564   @type additional_vm: boolean
4565   @param additional_vm: whether the additional nodes are vm-capable or not
4566
4567   """
4568   # Gather target nodes
4569   cluster = lu.cfg.GetClusterInfo()
4570   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4571
4572   online_nodes = lu.cfg.GetOnlineNodeList()
4573   online_set = frozenset(online_nodes)
4574   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4575
4576   if additional_nodes is not None:
4577     online_nodes.extend(additional_nodes)
4578     if additional_vm:
4579       vm_nodes.extend(additional_nodes)
4580
4581   # Never distribute to master node
4582   for nodelist in [online_nodes, vm_nodes]:
4583     if master_info.name in nodelist:
4584       nodelist.remove(master_info.name)
4585
4586   # Gather file lists
4587   (files_all, _, files_mc, files_vm) = \
4588     _ComputeAncillaryFiles(cluster, True)
4589
4590   # Never re-distribute configuration file from here
4591   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4592               pathutils.CLUSTER_CONF_FILE in files_vm)
4593   assert not files_mc, "Master candidates not handled in this function"
4594
4595   filemap = [
4596     (online_nodes, files_all),
4597     (vm_nodes, files_vm),
4598     ]
4599
4600   # Upload the files
4601   for (node_list, files) in filemap:
4602     for fname in files:
4603       _UploadHelper(lu, node_list, fname)
4604
4605
4606 class LUClusterRedistConf(NoHooksLU):
4607   """Force the redistribution of cluster configuration.
4608
4609   This is a very simple LU.
4610
4611   """
4612   REQ_BGL = False
4613
4614   def ExpandNames(self):
4615     self.needed_locks = {
4616       locking.LEVEL_NODE: locking.ALL_SET,
4617       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4618     }
4619     self.share_locks = _ShareAll()
4620
4621   def Exec(self, feedback_fn):
4622     """Redistribute the configuration.
4623
4624     """
4625     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4626     _RedistributeAncillaryFiles(self)
4627
4628
4629 class LUClusterActivateMasterIp(NoHooksLU):
4630   """Activate the master IP on the master node.
4631
4632   """
4633   def Exec(self, feedback_fn):
4634     """Activate the master IP.
4635
4636     """
4637     master_params = self.cfg.GetMasterNetworkParameters()
4638     ems = self.cfg.GetUseExternalMipScript()
4639     result = self.rpc.call_node_activate_master_ip(master_params.name,
4640                                                    master_params, ems)
4641     result.Raise("Could not activate the master IP")
4642
4643
4644 class LUClusterDeactivateMasterIp(NoHooksLU):
4645   """Deactivate the master IP on the master node.
4646
4647   """
4648   def Exec(self, feedback_fn):
4649     """Deactivate the master IP.
4650
4651     """
4652     master_params = self.cfg.GetMasterNetworkParameters()
4653     ems = self.cfg.GetUseExternalMipScript()
4654     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4655                                                      master_params, ems)
4656     result.Raise("Could not deactivate the master IP")
4657
4658
4659 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4660   """Sleep and poll for an instance's disk to sync.
4661
4662   """
4663   if not instance.disks or disks is not None and not disks:
4664     return True
4665
4666   disks = _ExpandCheckDisks(instance, disks)
4667
4668   if not oneshot:
4669     lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4670
4671   node = instance.primary_node
4672
4673   for dev in disks:
4674     lu.cfg.SetDiskID(dev, node)
4675
4676   # TODO: Convert to utils.Retry
4677
4678   retries = 0
4679   degr_retries = 10 # in seconds, as we sleep 1 second each time
4680   while True:
4681     max_time = 0
4682     done = True
4683     cumul_degraded = False
4684     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4685     msg = rstats.fail_msg
4686     if msg:
4687       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4688       retries += 1
4689       if retries >= 10:
4690         raise errors.RemoteError("Can't contact node %s for mirror data,"
4691                                  " aborting." % node)
4692       time.sleep(6)
4693       continue
4694     rstats = rstats.payload
4695     retries = 0
4696     for i, mstat in enumerate(rstats):
4697       if mstat is None:
4698         lu.LogWarning("Can't compute data for node %s/%s",
4699                            node, disks[i].iv_name)
4700         continue
4701
4702       cumul_degraded = (cumul_degraded or
4703                         (mstat.is_degraded and mstat.sync_percent is None))
4704       if mstat.sync_percent is not None:
4705         done = False
4706         if mstat.estimated_time is not None:
4707           rem_time = ("%s remaining (estimated)" %
4708                       utils.FormatSeconds(mstat.estimated_time))
4709           max_time = mstat.estimated_time
4710         else:
4711           rem_time = "no time estimate"
4712         lu.LogInfo("- device %s: %5.2f%% done, %s",
4713                    disks[i].iv_name, mstat.sync_percent, rem_time)
4714
4715     # if we're done but degraded, let's do a few small retries, to
4716     # make sure we see a stable and not transient situation; therefore
4717     # we force restart of the loop
4718     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4719       logging.info("Degraded disks found, %d retries left", degr_retries)
4720       degr_retries -= 1
4721       time.sleep(1)
4722       continue
4723
4724     if done or oneshot:
4725       break
4726
4727     time.sleep(min(60, max_time))
4728
4729   if done:
4730     lu.LogInfo("Instance %s's disks are in sync", instance.name)
4731
4732   return not cumul_degraded
4733
4734
4735 def _BlockdevFind(lu, node, dev, instance):
4736   """Wrapper around call_blockdev_find to annotate diskparams.
4737
4738   @param lu: A reference to the lu object
4739   @param node: The node to call out
4740   @param dev: The device to find
4741   @param instance: The instance object the device belongs to
4742   @returns The result of the rpc call
4743
4744   """
4745   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4746   return lu.rpc.call_blockdev_find(node, disk)
4747
4748
4749 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4750   """Wrapper around L{_CheckDiskConsistencyInner}.
4751
4752   """
4753   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4754   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4755                                     ldisk=ldisk)
4756
4757
4758 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4759                                ldisk=False):
4760   """Check that mirrors are not degraded.
4761
4762   @attention: The device has to be annotated already.
4763
4764   The ldisk parameter, if True, will change the test from the
4765   is_degraded attribute (which represents overall non-ok status for
4766   the device(s)) to the ldisk (representing the local storage status).
4767
4768   """
4769   lu.cfg.SetDiskID(dev, node)
4770
4771   result = True
4772
4773   if on_primary or dev.AssembleOnSecondary():
4774     rstats = lu.rpc.call_blockdev_find(node, dev)
4775     msg = rstats.fail_msg
4776     if msg:
4777       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4778       result = False
4779     elif not rstats.payload:
4780       lu.LogWarning("Can't find disk on node %s", node)
4781       result = False
4782     else:
4783       if ldisk:
4784         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4785       else:
4786         result = result and not rstats.payload.is_degraded
4787
4788   if dev.children:
4789     for child in dev.children:
4790       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4791                                                      on_primary)
4792
4793   return result
4794
4795
4796 class LUOobCommand(NoHooksLU):
4797   """Logical unit for OOB handling.
4798
4799   """
4800   REQ_BGL = False
4801   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4802
4803   def ExpandNames(self):
4804     """Gather locks we need.
4805
4806     """
4807     if self.op.node_names:
4808       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4809       lock_names = self.op.node_names
4810     else:
4811       lock_names = locking.ALL_SET
4812
4813     self.needed_locks = {
4814       locking.LEVEL_NODE: lock_names,
4815       }
4816
4817   def CheckPrereq(self):
4818     """Check prerequisites.
4819
4820     This checks:
4821      - the node exists in the configuration
4822      - OOB is supported
4823
4824     Any errors are signaled by raising errors.OpPrereqError.
4825
4826     """
4827     self.nodes = []
4828     self.master_node = self.cfg.GetMasterNode()
4829
4830     assert self.op.power_delay >= 0.0
4831
4832     if self.op.node_names:
4833       if (self.op.command in self._SKIP_MASTER and
4834           self.master_node in self.op.node_names):
4835         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4836         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4837
4838         if master_oob_handler:
4839           additional_text = ("run '%s %s %s' if you want to operate on the"
4840                              " master regardless") % (master_oob_handler,
4841                                                       self.op.command,
4842                                                       self.master_node)
4843         else:
4844           additional_text = "it does not support out-of-band operations"
4845
4846         raise errors.OpPrereqError(("Operating on the master node %s is not"
4847                                     " allowed for %s; %s") %
4848                                    (self.master_node, self.op.command,
4849                                     additional_text), errors.ECODE_INVAL)
4850     else:
4851       self.op.node_names = self.cfg.GetNodeList()
4852       if self.op.command in self._SKIP_MASTER:
4853         self.op.node_names.remove(self.master_node)
4854
4855     if self.op.command in self._SKIP_MASTER:
4856       assert self.master_node not in self.op.node_names
4857
4858     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4859       if node is None:
4860         raise errors.OpPrereqError("Node %s not found" % node_name,
4861                                    errors.ECODE_NOENT)
4862       else:
4863         self.nodes.append(node)
4864
4865       if (not self.op.ignore_status and
4866           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4867         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4868                                     " not marked offline") % node_name,
4869                                    errors.ECODE_STATE)
4870
4871   def Exec(self, feedback_fn):
4872     """Execute OOB and return result if we expect any.
4873
4874     """
4875     master_node = self.master_node
4876     ret = []
4877
4878     for idx, node in enumerate(utils.NiceSort(self.nodes,
4879                                               key=lambda node: node.name)):
4880       node_entry = [(constants.RS_NORMAL, node.name)]
4881       ret.append(node_entry)
4882
4883       oob_program = _SupportsOob(self.cfg, node)
4884
4885       if not oob_program:
4886         node_entry.append((constants.RS_UNAVAIL, None))
4887         continue
4888
4889       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4890                    self.op.command, oob_program, node.name)
4891       result = self.rpc.call_run_oob(master_node, oob_program,
4892                                      self.op.command, node.name,
4893                                      self.op.timeout)
4894
4895       if result.fail_msg:
4896         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4897                         node.name, result.fail_msg)
4898         node_entry.append((constants.RS_NODATA, None))
4899       else:
4900         try:
4901           self._CheckPayload(result)
4902         except errors.OpExecError, err:
4903           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4904                           node.name, err)
4905           node_entry.append((constants.RS_NODATA, None))
4906         else:
4907           if self.op.command == constants.OOB_HEALTH:
4908             # For health we should log important events
4909             for item, status in result.payload:
4910               if status in [constants.OOB_STATUS_WARNING,
4911                             constants.OOB_STATUS_CRITICAL]:
4912                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4913                                 item, node.name, status)
4914
4915           if self.op.command == constants.OOB_POWER_ON:
4916             node.powered = True
4917           elif self.op.command == constants.OOB_POWER_OFF:
4918             node.powered = False
4919           elif self.op.command == constants.OOB_POWER_STATUS:
4920             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4921             if powered != node.powered:
4922               logging.warning(("Recorded power state (%s) of node '%s' does not"
4923                                " match actual power state (%s)"), node.powered,
4924                               node.name, powered)
4925
4926           # For configuration changing commands we should update the node
4927           if self.op.command in (constants.OOB_POWER_ON,
4928                                  constants.OOB_POWER_OFF):
4929             self.cfg.Update(node, feedback_fn)
4930
4931           node_entry.append((constants.RS_NORMAL, result.payload))
4932
4933           if (self.op.command == constants.OOB_POWER_ON and
4934               idx < len(self.nodes) - 1):
4935             time.sleep(self.op.power_delay)
4936
4937     return ret
4938
4939   def _CheckPayload(self, result):
4940     """Checks if the payload is valid.
4941
4942     @param result: RPC result
4943     @raises errors.OpExecError: If payload is not valid
4944
4945     """
4946     errs = []
4947     if self.op.command == constants.OOB_HEALTH:
4948       if not isinstance(result.payload, list):
4949         errs.append("command 'health' is expected to return a list but got %s" %
4950                     type(result.payload))
4951       else:
4952         for item, status in result.payload:
4953           if status not in constants.OOB_STATUSES:
4954             errs.append("health item '%s' has invalid status '%s'" %
4955                         (item, status))
4956
4957     if self.op.command == constants.OOB_POWER_STATUS:
4958       if not isinstance(result.payload, dict):
4959         errs.append("power-status is expected to return a dict but got %s" %
4960                     type(result.payload))
4961
4962     if self.op.command in [
4963       constants.OOB_POWER_ON,
4964       constants.OOB_POWER_OFF,
4965       constants.OOB_POWER_CYCLE,
4966       ]:
4967       if result.payload is not None:
4968         errs.append("%s is expected to not return payload but got '%s'" %
4969                     (self.op.command, result.payload))
4970
4971     if errs:
4972       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4973                                utils.CommaJoin(errs))
4974
4975
4976 class _OsQuery(_QueryBase):
4977   FIELDS = query.OS_FIELDS
4978
4979   def ExpandNames(self, lu):
4980     # Lock all nodes in shared mode
4981     # Temporary removal of locks, should be reverted later
4982     # TODO: reintroduce locks when they are lighter-weight
4983     lu.needed_locks = {}
4984     #self.share_locks[locking.LEVEL_NODE] = 1
4985     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4986
4987     # The following variables interact with _QueryBase._GetNames
4988     if self.names:
4989       self.wanted = self.names
4990     else:
4991       self.wanted = locking.ALL_SET
4992
4993     self.do_locking = self.use_locking
4994
4995   def DeclareLocks(self, lu, level):
4996     pass
4997
4998   @staticmethod
4999   def _DiagnoseByOS(rlist):
5000     """Remaps a per-node return list into an a per-os per-node dictionary
5001
5002     @param rlist: a map with node names as keys and OS objects as values
5003
5004     @rtype: dict
5005     @return: a dictionary with osnames as keys and as value another
5006         map, with nodes as keys and tuples of (path, status, diagnose,
5007         variants, parameters, api_versions) as values, eg::
5008
5009           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5010                                      (/srv/..., False, "invalid api")],
5011                            "node2": [(/srv/..., True, "", [], [])]}
5012           }
5013
5014     """
5015     all_os = {}
5016     # we build here the list of nodes that didn't fail the RPC (at RPC
5017     # level), so that nodes with a non-responding node daemon don't
5018     # make all OSes invalid
5019     good_nodes = [node_name for node_name in rlist
5020                   if not rlist[node_name].fail_msg]
5021     for node_name, nr in rlist.items():
5022       if nr.fail_msg or not nr.payload:
5023         continue
5024       for (name, path, status, diagnose, variants,
5025            params, api_versions) in nr.payload:
5026         if name not in all_os:
5027           # build a list of nodes for this os containing empty lists
5028           # for each node in node_list
5029           all_os[name] = {}
5030           for nname in good_nodes:
5031             all_os[name][nname] = []
5032         # convert params from [name, help] to (name, help)
5033         params = [tuple(v) for v in params]
5034         all_os[name][node_name].append((path, status, diagnose,
5035                                         variants, params, api_versions))
5036     return all_os
5037
5038   def _GetQueryData(self, lu):
5039     """Computes the list of nodes and their attributes.
5040
5041     """
5042     # Locking is not used
5043     assert not (compat.any(lu.glm.is_owned(level)
5044                            for level in locking.LEVELS
5045                            if level != locking.LEVEL_CLUSTER) or
5046                 self.do_locking or self.use_locking)
5047
5048     valid_nodes = [node.name
5049                    for node in lu.cfg.GetAllNodesInfo().values()
5050                    if not node.offline and node.vm_capable]
5051     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5052     cluster = lu.cfg.GetClusterInfo()
5053
5054     data = {}
5055
5056     for (os_name, os_data) in pol.items():
5057       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5058                           hidden=(os_name in cluster.hidden_os),
5059                           blacklisted=(os_name in cluster.blacklisted_os))
5060
5061       variants = set()
5062       parameters = set()
5063       api_versions = set()
5064
5065       for idx, osl in enumerate(os_data.values()):
5066         info.valid = bool(info.valid and osl and osl[0][1])
5067         if not info.valid:
5068           break
5069
5070         (node_variants, node_params, node_api) = osl[0][3:6]
5071         if idx == 0:
5072           # First entry
5073           variants.update(node_variants)
5074           parameters.update(node_params)
5075           api_versions.update(node_api)
5076         else:
5077           # Filter out inconsistent values
5078           variants.intersection_update(node_variants)
5079           parameters.intersection_update(node_params)
5080           api_versions.intersection_update(node_api)
5081
5082       info.variants = list(variants)
5083       info.parameters = list(parameters)
5084       info.api_versions = list(api_versions)
5085
5086       data[os_name] = info
5087
5088     # Prepare data in requested order
5089     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5090             if name in data]
5091
5092
5093 class LUOsDiagnose(NoHooksLU):
5094   """Logical unit for OS diagnose/query.
5095
5096   """
5097   REQ_BGL = False
5098
5099   @staticmethod
5100   def _BuildFilter(fields, names):
5101     """Builds a filter for querying OSes.
5102
5103     """
5104     name_filter = qlang.MakeSimpleFilter("name", names)
5105
5106     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5107     # respective field is not requested
5108     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5109                      for fname in ["hidden", "blacklisted"]
5110                      if fname not in fields]
5111     if "valid" not in fields:
5112       status_filter.append([qlang.OP_TRUE, "valid"])
5113
5114     if status_filter:
5115       status_filter.insert(0, qlang.OP_AND)
5116     else:
5117       status_filter = None
5118
5119     if name_filter and status_filter:
5120       return [qlang.OP_AND, name_filter, status_filter]
5121     elif name_filter:
5122       return name_filter
5123     else:
5124       return status_filter
5125
5126   def CheckArguments(self):
5127     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5128                        self.op.output_fields, False)
5129
5130   def ExpandNames(self):
5131     self.oq.ExpandNames(self)
5132
5133   def Exec(self, feedback_fn):
5134     return self.oq.OldStyleQuery(self)
5135
5136
5137 class LUNodeRemove(LogicalUnit):
5138   """Logical unit for removing a node.
5139
5140   """
5141   HPATH = "node-remove"
5142   HTYPE = constants.HTYPE_NODE
5143
5144   def BuildHooksEnv(self):
5145     """Build hooks env.
5146
5147     """
5148     return {
5149       "OP_TARGET": self.op.node_name,
5150       "NODE_NAME": self.op.node_name,
5151       }
5152
5153   def BuildHooksNodes(self):
5154     """Build hooks nodes.
5155
5156     This doesn't run on the target node in the pre phase as a failed
5157     node would then be impossible to remove.
5158
5159     """
5160     all_nodes = self.cfg.GetNodeList()
5161     try:
5162       all_nodes.remove(self.op.node_name)
5163     except ValueError:
5164       pass
5165     return (all_nodes, all_nodes)
5166
5167   def CheckPrereq(self):
5168     """Check prerequisites.
5169
5170     This checks:
5171      - the node exists in the configuration
5172      - it does not have primary or secondary instances
5173      - it's not the master
5174
5175     Any errors are signaled by raising errors.OpPrereqError.
5176
5177     """
5178     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5179     node = self.cfg.GetNodeInfo(self.op.node_name)
5180     assert node is not None
5181
5182     masternode = self.cfg.GetMasterNode()
5183     if node.name == masternode:
5184       raise errors.OpPrereqError("Node is the master node, failover to another"
5185                                  " node is required", errors.ECODE_INVAL)
5186
5187     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5188       if node.name in instance.all_nodes:
5189         raise errors.OpPrereqError("Instance %s is still running on the node,"
5190                                    " please remove first" % instance_name,
5191                                    errors.ECODE_INVAL)
5192     self.op.node_name = node.name
5193     self.node = node
5194
5195   def Exec(self, feedback_fn):
5196     """Removes the node from the cluster.
5197
5198     """
5199     node = self.node
5200     logging.info("Stopping the node daemon and removing configs from node %s",
5201                  node.name)
5202
5203     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5204
5205     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5206       "Not owning BGL"
5207
5208     # Promote nodes to master candidate as needed
5209     _AdjustCandidatePool(self, exceptions=[node.name])
5210     self.context.RemoveNode(node.name)
5211
5212     # Run post hooks on the node before it's removed
5213     _RunPostHook(self, node.name)
5214
5215     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5216     msg = result.fail_msg
5217     if msg:
5218       self.LogWarning("Errors encountered on the remote node while leaving"
5219                       " the cluster: %s", msg)
5220
5221     # Remove node from our /etc/hosts
5222     if self.cfg.GetClusterInfo().modify_etc_hosts:
5223       master_node = self.cfg.GetMasterNode()
5224       result = self.rpc.call_etc_hosts_modify(master_node,
5225                                               constants.ETC_HOSTS_REMOVE,
5226                                               node.name, None)
5227       result.Raise("Can't update hosts file with new host data")
5228       _RedistributeAncillaryFiles(self)
5229
5230
5231 class _NodeQuery(_QueryBase):
5232   FIELDS = query.NODE_FIELDS
5233
5234   def ExpandNames(self, lu):
5235     lu.needed_locks = {}
5236     lu.share_locks = _ShareAll()
5237
5238     if self.names:
5239       self.wanted = _GetWantedNodes(lu, self.names)
5240     else:
5241       self.wanted = locking.ALL_SET
5242
5243     self.do_locking = (self.use_locking and
5244                        query.NQ_LIVE in self.requested_data)
5245
5246     if self.do_locking:
5247       # If any non-static field is requested we need to lock the nodes
5248       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5249       lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5250
5251   def DeclareLocks(self, lu, level):
5252     pass
5253
5254   def _GetQueryData(self, lu):
5255     """Computes the list of nodes and their attributes.
5256
5257     """
5258     all_info = lu.cfg.GetAllNodesInfo()
5259
5260     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5261
5262     # Gather data as requested
5263     if query.NQ_LIVE in self.requested_data:
5264       # filter out non-vm_capable nodes
5265       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5266
5267       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5268                                         [lu.cfg.GetHypervisorType()])
5269       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5270                        for (name, nresult) in node_data.items()
5271                        if not nresult.fail_msg and nresult.payload)
5272     else:
5273       live_data = None
5274
5275     if query.NQ_INST in self.requested_data:
5276       node_to_primary = dict([(name, set()) for name in nodenames])
5277       node_to_secondary = dict([(name, set()) for name in nodenames])
5278
5279       inst_data = lu.cfg.GetAllInstancesInfo()
5280
5281       for inst in inst_data.values():
5282         if inst.primary_node in node_to_primary:
5283           node_to_primary[inst.primary_node].add(inst.name)
5284         for secnode in inst.secondary_nodes:
5285           if secnode in node_to_secondary:
5286             node_to_secondary[secnode].add(inst.name)
5287     else:
5288       node_to_primary = None
5289       node_to_secondary = None
5290
5291     if query.NQ_OOB in self.requested_data:
5292       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5293                          for name, node in all_info.iteritems())
5294     else:
5295       oob_support = None
5296
5297     if query.NQ_GROUP in self.requested_data:
5298       groups = lu.cfg.GetAllNodeGroupsInfo()
5299     else:
5300       groups = {}
5301
5302     return query.NodeQueryData([all_info[name] for name in nodenames],
5303                                live_data, lu.cfg.GetMasterNode(),
5304                                node_to_primary, node_to_secondary, groups,
5305                                oob_support, lu.cfg.GetClusterInfo())
5306
5307
5308 class LUNodeQuery(NoHooksLU):
5309   """Logical unit for querying nodes.
5310
5311   """
5312   # pylint: disable=W0142
5313   REQ_BGL = False
5314
5315   def CheckArguments(self):
5316     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5317                          self.op.output_fields, self.op.use_locking)
5318
5319   def ExpandNames(self):
5320     self.nq.ExpandNames(self)
5321
5322   def DeclareLocks(self, level):
5323     self.nq.DeclareLocks(self, level)
5324
5325   def Exec(self, feedback_fn):
5326     return self.nq.OldStyleQuery(self)
5327
5328
5329 class LUNodeQueryvols(NoHooksLU):
5330   """Logical unit for getting volumes on node(s).
5331
5332   """
5333   REQ_BGL = False
5334   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5335   _FIELDS_STATIC = utils.FieldSet("node")
5336
5337   def CheckArguments(self):
5338     _CheckOutputFields(static=self._FIELDS_STATIC,
5339                        dynamic=self._FIELDS_DYNAMIC,
5340                        selected=self.op.output_fields)
5341
5342   def ExpandNames(self):
5343     self.share_locks = _ShareAll()
5344     self.needed_locks = {}
5345
5346     if not self.op.nodes:
5347       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5348     else:
5349       self.needed_locks[locking.LEVEL_NODE] = \
5350         _GetWantedNodes(self, self.op.nodes)
5351
5352   def Exec(self, feedback_fn):
5353     """Computes the list of nodes and their attributes.
5354
5355     """
5356     nodenames = self.owned_locks(locking.LEVEL_NODE)
5357     volumes = self.rpc.call_node_volumes(nodenames)
5358
5359     ilist = self.cfg.GetAllInstancesInfo()
5360     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5361
5362     output = []
5363     for node in nodenames:
5364       nresult = volumes[node]
5365       if nresult.offline:
5366         continue
5367       msg = nresult.fail_msg
5368       if msg:
5369         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5370         continue
5371
5372       node_vols = sorted(nresult.payload,
5373                          key=operator.itemgetter("dev"))
5374
5375       for vol in node_vols:
5376         node_output = []
5377         for field in self.op.output_fields:
5378           if field == "node":
5379             val = node
5380           elif field == "phys":
5381             val = vol["dev"]
5382           elif field == "vg":
5383             val = vol["vg"]
5384           elif field == "name":
5385             val = vol["name"]
5386           elif field == "size":
5387             val = int(float(vol["size"]))
5388           elif field == "instance":
5389             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5390           else:
5391             raise errors.ParameterError(field)
5392           node_output.append(str(val))
5393
5394         output.append(node_output)
5395
5396     return output
5397
5398
5399 class LUNodeQueryStorage(NoHooksLU):
5400   """Logical unit for getting information on storage units on node(s).
5401
5402   """
5403   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5404   REQ_BGL = False
5405
5406   def CheckArguments(self):
5407     _CheckOutputFields(static=self._FIELDS_STATIC,
5408                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5409                        selected=self.op.output_fields)
5410
5411   def ExpandNames(self):
5412     self.share_locks = _ShareAll()
5413
5414     if self.op.nodes:
5415       self.needed_locks = {
5416         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5417         }
5418     else:
5419       self.needed_locks = {
5420         locking.LEVEL_NODE: locking.ALL_SET,
5421         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5422         }
5423
5424   def Exec(self, feedback_fn):
5425     """Computes the list of nodes and their attributes.
5426
5427     """
5428     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5429
5430     # Always get name to sort by
5431     if constants.SF_NAME in self.op.output_fields:
5432       fields = self.op.output_fields[:]
5433     else:
5434       fields = [constants.SF_NAME] + self.op.output_fields
5435
5436     # Never ask for node or type as it's only known to the LU
5437     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5438       while extra in fields:
5439         fields.remove(extra)
5440
5441     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5442     name_idx = field_idx[constants.SF_NAME]
5443
5444     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5445     data = self.rpc.call_storage_list(self.nodes,
5446                                       self.op.storage_type, st_args,
5447                                       self.op.name, fields)
5448
5449     result = []
5450
5451     for node in utils.NiceSort(self.nodes):
5452       nresult = data[node]
5453       if nresult.offline:
5454         continue
5455
5456       msg = nresult.fail_msg
5457       if msg:
5458         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5459         continue
5460
5461       rows = dict([(row[name_idx], row) for row in nresult.payload])
5462
5463       for name in utils.NiceSort(rows.keys()):
5464         row = rows[name]
5465
5466         out = []
5467
5468         for field in self.op.output_fields:
5469           if field == constants.SF_NODE:
5470             val = node
5471           elif field == constants.SF_TYPE:
5472             val = self.op.storage_type
5473           elif field in field_idx:
5474             val = row[field_idx[field]]
5475           else:
5476             raise errors.ParameterError(field)
5477
5478           out.append(val)
5479
5480         result.append(out)
5481
5482     return result
5483
5484
5485 class _InstanceQuery(_QueryBase):
5486   FIELDS = query.INSTANCE_FIELDS
5487
5488   def ExpandNames(self, lu):
5489     lu.needed_locks = {}
5490     lu.share_locks = _ShareAll()
5491
5492     if self.names:
5493       self.wanted = _GetWantedInstances(lu, self.names)
5494     else:
5495       self.wanted = locking.ALL_SET
5496
5497     self.do_locking = (self.use_locking and
5498                        query.IQ_LIVE in self.requested_data)
5499     if self.do_locking:
5500       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5501       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5502       lu.needed_locks[locking.LEVEL_NODE] = []
5503       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5504
5505     self.do_grouplocks = (self.do_locking and
5506                           query.IQ_NODES in self.requested_data)
5507
5508   def DeclareLocks(self, lu, level):
5509     if self.do_locking:
5510       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5511         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5512
5513         # Lock all groups used by instances optimistically; this requires going
5514         # via the node before it's locked, requiring verification later on
5515         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5516           set(group_uuid
5517               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5518               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5519       elif level == locking.LEVEL_NODE:
5520         lu._LockInstancesNodes() # pylint: disable=W0212
5521
5522   @staticmethod
5523   def _CheckGroupLocks(lu):
5524     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5525     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5526
5527     # Check if node groups for locked instances are still correct
5528     for instance_name in owned_instances:
5529       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5530
5531   def _GetQueryData(self, lu):
5532     """Computes the list of instances and their attributes.
5533
5534     """
5535     if self.do_grouplocks:
5536       self._CheckGroupLocks(lu)
5537
5538     cluster = lu.cfg.GetClusterInfo()
5539     all_info = lu.cfg.GetAllInstancesInfo()
5540
5541     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5542
5543     instance_list = [all_info[name] for name in instance_names]
5544     nodes = frozenset(itertools.chain(*(inst.all_nodes
5545                                         for inst in instance_list)))
5546     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5547     bad_nodes = []
5548     offline_nodes = []
5549     wrongnode_inst = set()
5550
5551     # Gather data as requested
5552     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5553       live_data = {}
5554       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5555       for name in nodes:
5556         result = node_data[name]
5557         if result.offline:
5558           # offline nodes will be in both lists
5559           assert result.fail_msg
5560           offline_nodes.append(name)
5561         if result.fail_msg:
5562           bad_nodes.append(name)
5563         elif result.payload:
5564           for inst in result.payload:
5565             if inst in all_info:
5566               if all_info[inst].primary_node == name:
5567                 live_data.update(result.payload)
5568               else:
5569                 wrongnode_inst.add(inst)
5570             else:
5571               # orphan instance; we don't list it here as we don't
5572               # handle this case yet in the output of instance listing
5573               logging.warning("Orphan instance '%s' found on node %s",
5574                               inst, name)
5575         # else no instance is alive
5576     else:
5577       live_data = {}
5578
5579     if query.IQ_DISKUSAGE in self.requested_data:
5580       gmi = ganeti.masterd.instance
5581       disk_usage = dict((inst.name,
5582                          gmi.ComputeDiskSize(inst.disk_template,
5583                                              [{constants.IDISK_SIZE: disk.size}
5584                                               for disk in inst.disks]))
5585                         for inst in instance_list)
5586     else:
5587       disk_usage = None
5588
5589     if query.IQ_CONSOLE in self.requested_data:
5590       consinfo = {}
5591       for inst in instance_list:
5592         if inst.name in live_data:
5593           # Instance is running
5594           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5595         else:
5596           consinfo[inst.name] = None
5597       assert set(consinfo.keys()) == set(instance_names)
5598     else:
5599       consinfo = None
5600
5601     if query.IQ_NODES in self.requested_data:
5602       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5603                                             instance_list)))
5604       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5605       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5606                     for uuid in set(map(operator.attrgetter("group"),
5607                                         nodes.values())))
5608     else:
5609       nodes = None
5610       groups = None
5611
5612     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5613                                    disk_usage, offline_nodes, bad_nodes,
5614                                    live_data, wrongnode_inst, consinfo,
5615                                    nodes, groups)
5616
5617
5618 class LUQuery(NoHooksLU):
5619   """Query for resources/items of a certain kind.
5620
5621   """
5622   # pylint: disable=W0142
5623   REQ_BGL = False
5624
5625   def CheckArguments(self):
5626     qcls = _GetQueryImplementation(self.op.what)
5627
5628     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5629
5630   def ExpandNames(self):
5631     self.impl.ExpandNames(self)
5632
5633   def DeclareLocks(self, level):
5634     self.impl.DeclareLocks(self, level)
5635
5636   def Exec(self, feedback_fn):
5637     return self.impl.NewStyleQuery(self)
5638
5639
5640 class LUQueryFields(NoHooksLU):
5641   """Query for resources/items of a certain kind.
5642
5643   """
5644   # pylint: disable=W0142
5645   REQ_BGL = False
5646
5647   def CheckArguments(self):
5648     self.qcls = _GetQueryImplementation(self.op.what)
5649
5650   def ExpandNames(self):
5651     self.needed_locks = {}
5652
5653   def Exec(self, feedback_fn):
5654     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5655
5656
5657 class LUNodeModifyStorage(NoHooksLU):
5658   """Logical unit for modifying a storage volume on a node.
5659
5660   """
5661   REQ_BGL = False
5662
5663   def CheckArguments(self):
5664     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5665
5666     storage_type = self.op.storage_type
5667
5668     try:
5669       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5670     except KeyError:
5671       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5672                                  " modified" % storage_type,
5673                                  errors.ECODE_INVAL)
5674
5675     diff = set(self.op.changes.keys()) - modifiable
5676     if diff:
5677       raise errors.OpPrereqError("The following fields can not be modified for"
5678                                  " storage units of type '%s': %r" %
5679                                  (storage_type, list(diff)),
5680                                  errors.ECODE_INVAL)
5681
5682   def ExpandNames(self):
5683     self.needed_locks = {
5684       locking.LEVEL_NODE: self.op.node_name,
5685       }
5686
5687   def Exec(self, feedback_fn):
5688     """Computes the list of nodes and their attributes.
5689
5690     """
5691     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5692     result = self.rpc.call_storage_modify(self.op.node_name,
5693                                           self.op.storage_type, st_args,
5694                                           self.op.name, self.op.changes)
5695     result.Raise("Failed to modify storage unit '%s' on %s" %
5696                  (self.op.name, self.op.node_name))
5697
5698
5699 class LUNodeAdd(LogicalUnit):
5700   """Logical unit for adding node to the cluster.
5701
5702   """
5703   HPATH = "node-add"
5704   HTYPE = constants.HTYPE_NODE
5705   _NFLAGS = ["master_capable", "vm_capable"]
5706
5707   def CheckArguments(self):
5708     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5709     # validate/normalize the node name
5710     self.hostname = netutils.GetHostname(name=self.op.node_name,
5711                                          family=self.primary_ip_family)
5712     self.op.node_name = self.hostname.name
5713
5714     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5715       raise errors.OpPrereqError("Cannot readd the master node",
5716                                  errors.ECODE_STATE)
5717
5718     if self.op.readd and self.op.group:
5719       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5720                                  " being readded", errors.ECODE_INVAL)
5721
5722   def BuildHooksEnv(self):
5723     """Build hooks env.
5724
5725     This will run on all nodes before, and on all nodes + the new node after.
5726
5727     """
5728     return {
5729       "OP_TARGET": self.op.node_name,
5730       "NODE_NAME": self.op.node_name,
5731       "NODE_PIP": self.op.primary_ip,
5732       "NODE_SIP": self.op.secondary_ip,
5733       "MASTER_CAPABLE": str(self.op.master_capable),
5734       "VM_CAPABLE": str(self.op.vm_capable),
5735       }
5736
5737   def BuildHooksNodes(self):
5738     """Build hooks nodes.
5739
5740     """
5741     # Exclude added node
5742     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5743     post_nodes = pre_nodes + [self.op.node_name, ]
5744
5745     return (pre_nodes, post_nodes)
5746
5747   def CheckPrereq(self):
5748     """Check prerequisites.
5749
5750     This checks:
5751      - the new node is not already in the config
5752      - it is resolvable
5753      - its parameters (single/dual homed) matches the cluster
5754
5755     Any errors are signaled by raising errors.OpPrereqError.
5756
5757     """
5758     cfg = self.cfg
5759     hostname = self.hostname
5760     node = hostname.name
5761     primary_ip = self.op.primary_ip = hostname.ip
5762     if self.op.secondary_ip is None:
5763       if self.primary_ip_family == netutils.IP6Address.family:
5764         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5765                                    " IPv4 address must be given as secondary",
5766                                    errors.ECODE_INVAL)
5767       self.op.secondary_ip = primary_ip
5768
5769     secondary_ip = self.op.secondary_ip
5770     if not netutils.IP4Address.IsValid(secondary_ip):
5771       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5772                                  " address" % secondary_ip, errors.ECODE_INVAL)
5773
5774     node_list = cfg.GetNodeList()
5775     if not self.op.readd and node in node_list:
5776       raise errors.OpPrereqError("Node %s is already in the configuration" %
5777                                  node, errors.ECODE_EXISTS)
5778     elif self.op.readd and node not in node_list:
5779       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5780                                  errors.ECODE_NOENT)
5781
5782     self.changed_primary_ip = False
5783
5784     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5785       if self.op.readd and node == existing_node_name:
5786         if existing_node.secondary_ip != secondary_ip:
5787           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5788                                      " address configuration as before",
5789                                      errors.ECODE_INVAL)
5790         if existing_node.primary_ip != primary_ip:
5791           self.changed_primary_ip = True
5792
5793         continue
5794
5795       if (existing_node.primary_ip == primary_ip or
5796           existing_node.secondary_ip == primary_ip or
5797           existing_node.primary_ip == secondary_ip or
5798           existing_node.secondary_ip == secondary_ip):
5799         raise errors.OpPrereqError("New node ip address(es) conflict with"
5800                                    " existing node %s" % existing_node.name,
5801                                    errors.ECODE_NOTUNIQUE)
5802
5803     # After this 'if' block, None is no longer a valid value for the
5804     # _capable op attributes
5805     if self.op.readd:
5806       old_node = self.cfg.GetNodeInfo(node)
5807       assert old_node is not None, "Can't retrieve locked node %s" % node
5808       for attr in self._NFLAGS:
5809         if getattr(self.op, attr) is None:
5810           setattr(self.op, attr, getattr(old_node, attr))
5811     else:
5812       for attr in self._NFLAGS:
5813         if getattr(self.op, attr) is None:
5814           setattr(self.op, attr, True)
5815
5816     if self.op.readd and not self.op.vm_capable:
5817       pri, sec = cfg.GetNodeInstances(node)
5818       if pri or sec:
5819         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5820                                    " flag set to false, but it already holds"
5821                                    " instances" % node,
5822                                    errors.ECODE_STATE)
5823
5824     # check that the type of the node (single versus dual homed) is the
5825     # same as for the master
5826     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5827     master_singlehomed = myself.secondary_ip == myself.primary_ip
5828     newbie_singlehomed = secondary_ip == primary_ip
5829     if master_singlehomed != newbie_singlehomed:
5830       if master_singlehomed:
5831         raise errors.OpPrereqError("The master has no secondary ip but the"
5832                                    " new node has one",
5833                                    errors.ECODE_INVAL)
5834       else:
5835         raise errors.OpPrereqError("The master has a secondary ip but the"
5836                                    " new node doesn't have one",
5837                                    errors.ECODE_INVAL)
5838
5839     # checks reachability
5840     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5841       raise errors.OpPrereqError("Node not reachable by ping",
5842                                  errors.ECODE_ENVIRON)
5843
5844     if not newbie_singlehomed:
5845       # check reachability from my secondary ip to newbie's secondary ip
5846       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5847                               source=myself.secondary_ip):
5848         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5849                                    " based ping to node daemon port",
5850                                    errors.ECODE_ENVIRON)
5851
5852     if self.op.readd:
5853       exceptions = [node]
5854     else:
5855       exceptions = []
5856
5857     if self.op.master_capable:
5858       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5859     else:
5860       self.master_candidate = False
5861
5862     if self.op.readd:
5863       self.new_node = old_node
5864     else:
5865       node_group = cfg.LookupNodeGroup(self.op.group)
5866       self.new_node = objects.Node(name=node,
5867                                    primary_ip=primary_ip,
5868                                    secondary_ip=secondary_ip,
5869                                    master_candidate=self.master_candidate,
5870                                    offline=False, drained=False,
5871                                    group=node_group)
5872
5873     if self.op.ndparams:
5874       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5875
5876     if self.op.hv_state:
5877       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5878
5879     if self.op.disk_state:
5880       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5881
5882     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5883     #       it a property on the base class.
5884     result = rpc.DnsOnlyRunner().call_version([node])[node]
5885     result.Raise("Can't get version information from node %s" % node)
5886     if constants.PROTOCOL_VERSION == result.payload:
5887       logging.info("Communication to node %s fine, sw version %s match",
5888                    node, result.payload)
5889     else:
5890       raise errors.OpPrereqError("Version mismatch master version %s,"
5891                                  " node version %s" %
5892                                  (constants.PROTOCOL_VERSION, result.payload),
5893                                  errors.ECODE_ENVIRON)
5894
5895   def Exec(self, feedback_fn):
5896     """Adds the new node to the cluster.
5897
5898     """
5899     new_node = self.new_node
5900     node = new_node.name
5901
5902     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5903       "Not owning BGL"
5904
5905     # We adding a new node so we assume it's powered
5906     new_node.powered = True
5907
5908     # for re-adds, reset the offline/drained/master-candidate flags;
5909     # we need to reset here, otherwise offline would prevent RPC calls
5910     # later in the procedure; this also means that if the re-add
5911     # fails, we are left with a non-offlined, broken node
5912     if self.op.readd:
5913       new_node.drained = new_node.offline = False # pylint: disable=W0201
5914       self.LogInfo("Readding a node, the offline/drained flags were reset")
5915       # if we demote the node, we do cleanup later in the procedure
5916       new_node.master_candidate = self.master_candidate
5917       if self.changed_primary_ip:
5918         new_node.primary_ip = self.op.primary_ip
5919
5920     # copy the master/vm_capable flags
5921     for attr in self._NFLAGS:
5922       setattr(new_node, attr, getattr(self.op, attr))
5923
5924     # notify the user about any possible mc promotion
5925     if new_node.master_candidate:
5926       self.LogInfo("Node will be a master candidate")
5927
5928     if self.op.ndparams:
5929       new_node.ndparams = self.op.ndparams
5930     else:
5931       new_node.ndparams = {}
5932
5933     if self.op.hv_state:
5934       new_node.hv_state_static = self.new_hv_state
5935
5936     if self.op.disk_state:
5937       new_node.disk_state_static = self.new_disk_state
5938
5939     # Add node to our /etc/hosts, and add key to known_hosts
5940     if self.cfg.GetClusterInfo().modify_etc_hosts:
5941       master_node = self.cfg.GetMasterNode()
5942       result = self.rpc.call_etc_hosts_modify(master_node,
5943                                               constants.ETC_HOSTS_ADD,
5944                                               self.hostname.name,
5945                                               self.hostname.ip)
5946       result.Raise("Can't update hosts file with new host data")
5947
5948     if new_node.secondary_ip != new_node.primary_ip:
5949       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5950                                False)
5951
5952     node_verify_list = [self.cfg.GetMasterNode()]
5953     node_verify_param = {
5954       constants.NV_NODELIST: ([node], {}),
5955       # TODO: do a node-net-test as well?
5956     }
5957
5958     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5959                                        self.cfg.GetClusterName())
5960     for verifier in node_verify_list:
5961       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5962       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5963       if nl_payload:
5964         for failed in nl_payload:
5965           feedback_fn("ssh/hostname verification failed"
5966                       " (checking from %s): %s" %
5967                       (verifier, nl_payload[failed]))
5968         raise errors.OpExecError("ssh/hostname verification failed")
5969
5970     if self.op.readd:
5971       _RedistributeAncillaryFiles(self)
5972       self.context.ReaddNode(new_node)
5973       # make sure we redistribute the config
5974       self.cfg.Update(new_node, feedback_fn)
5975       # and make sure the new node will not have old files around
5976       if not new_node.master_candidate:
5977         result = self.rpc.call_node_demote_from_mc(new_node.name)
5978         msg = result.fail_msg
5979         if msg:
5980           self.LogWarning("Node failed to demote itself from master"
5981                           " candidate status: %s" % msg)
5982     else:
5983       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5984                                   additional_vm=self.op.vm_capable)
5985       self.context.AddNode(new_node, self.proc.GetECId())
5986
5987
5988 class LUNodeSetParams(LogicalUnit):
5989   """Modifies the parameters of a node.
5990
5991   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5992       to the node role (as _ROLE_*)
5993   @cvar _R2F: a dictionary from node role to tuples of flags
5994   @cvar _FLAGS: a list of attribute names corresponding to the flags
5995
5996   """
5997   HPATH = "node-modify"
5998   HTYPE = constants.HTYPE_NODE
5999   REQ_BGL = False
6000   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6001   _F2R = {
6002     (True, False, False): _ROLE_CANDIDATE,
6003     (False, True, False): _ROLE_DRAINED,
6004     (False, False, True): _ROLE_OFFLINE,
6005     (False, False, False): _ROLE_REGULAR,
6006     }
6007   _R2F = dict((v, k) for k, v in _F2R.items())
6008   _FLAGS = ["master_candidate", "drained", "offline"]
6009
6010   def CheckArguments(self):
6011     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6012     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6013                 self.op.master_capable, self.op.vm_capable,
6014                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6015                 self.op.disk_state]
6016     if all_mods.count(None) == len(all_mods):
6017       raise errors.OpPrereqError("Please pass at least one modification",
6018                                  errors.ECODE_INVAL)
6019     if all_mods.count(True) > 1:
6020       raise errors.OpPrereqError("Can't set the node into more than one"
6021                                  " state at the same time",
6022                                  errors.ECODE_INVAL)
6023
6024     # Boolean value that tells us whether we might be demoting from MC
6025     self.might_demote = (self.op.master_candidate is False or
6026                          self.op.offline is True or
6027                          self.op.drained is True or
6028                          self.op.master_capable is False)
6029
6030     if self.op.secondary_ip:
6031       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6032         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6033                                    " address" % self.op.secondary_ip,
6034                                    errors.ECODE_INVAL)
6035
6036     self.lock_all = self.op.auto_promote and self.might_demote
6037     self.lock_instances = self.op.secondary_ip is not None
6038
6039   def _InstanceFilter(self, instance):
6040     """Filter for getting affected instances.
6041
6042     """
6043     return (instance.disk_template in constants.DTS_INT_MIRROR and
6044             self.op.node_name in instance.all_nodes)
6045
6046   def ExpandNames(self):
6047     if self.lock_all:
6048       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
6049     else:
6050       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
6051
6052     # Since modifying a node can have severe effects on currently running
6053     # operations the resource lock is at least acquired in shared mode
6054     self.needed_locks[locking.LEVEL_NODE_RES] = \
6055       self.needed_locks[locking.LEVEL_NODE]
6056
6057     # Get node resource and instance locks in shared mode; they are not used
6058     # for anything but read-only access
6059     self.share_locks[locking.LEVEL_NODE_RES] = 1
6060     self.share_locks[locking.LEVEL_INSTANCE] = 1
6061
6062     if self.lock_instances:
6063       self.needed_locks[locking.LEVEL_INSTANCE] = \
6064         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6065
6066   def BuildHooksEnv(self):
6067     """Build hooks env.
6068
6069     This runs on the master node.
6070
6071     """
6072     return {
6073       "OP_TARGET": self.op.node_name,
6074       "MASTER_CANDIDATE": str(self.op.master_candidate),
6075       "OFFLINE": str(self.op.offline),
6076       "DRAINED": str(self.op.drained),
6077       "MASTER_CAPABLE": str(self.op.master_capable),
6078       "VM_CAPABLE": str(self.op.vm_capable),
6079       }
6080
6081   def BuildHooksNodes(self):
6082     """Build hooks nodes.
6083
6084     """
6085     nl = [self.cfg.GetMasterNode(), self.op.node_name]
6086     return (nl, nl)
6087
6088   def CheckPrereq(self):
6089     """Check prerequisites.
6090
6091     This only checks the instance list against the existing names.
6092
6093     """
6094     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6095
6096     if self.lock_instances:
6097       affected_instances = \
6098         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6099
6100       # Verify instance locks
6101       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6102       wanted_instances = frozenset(affected_instances.keys())
6103       if wanted_instances - owned_instances:
6104         raise errors.OpPrereqError("Instances affected by changing node %s's"
6105                                    " secondary IP address have changed since"
6106                                    " locks were acquired, wanted '%s', have"
6107                                    " '%s'; retry the operation" %
6108                                    (self.op.node_name,
6109                                     utils.CommaJoin(wanted_instances),
6110                                     utils.CommaJoin(owned_instances)),
6111                                    errors.ECODE_STATE)
6112     else:
6113       affected_instances = None
6114
6115     if (self.op.master_candidate is not None or
6116         self.op.drained is not None or
6117         self.op.offline is not None):
6118       # we can't change the master's node flags
6119       if self.op.node_name == self.cfg.GetMasterNode():
6120         raise errors.OpPrereqError("The master role can be changed"
6121                                    " only via master-failover",
6122                                    errors.ECODE_INVAL)
6123
6124     if self.op.master_candidate and not node.master_capable:
6125       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6126                                  " it a master candidate" % node.name,
6127                                  errors.ECODE_STATE)
6128
6129     if self.op.vm_capable is False:
6130       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6131       if ipri or isec:
6132         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6133                                    " the vm_capable flag" % node.name,
6134                                    errors.ECODE_STATE)
6135
6136     if node.master_candidate and self.might_demote and not self.lock_all:
6137       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6138       # check if after removing the current node, we're missing master
6139       # candidates
6140       (mc_remaining, mc_should, _) = \
6141           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6142       if mc_remaining < mc_should:
6143         raise errors.OpPrereqError("Not enough master candidates, please"
6144                                    " pass auto promote option to allow"
6145                                    " promotion (--auto-promote or RAPI"
6146                                    " auto_promote=True)", errors.ECODE_STATE)
6147
6148     self.old_flags = old_flags = (node.master_candidate,
6149                                   node.drained, node.offline)
6150     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6151     self.old_role = old_role = self._F2R[old_flags]
6152
6153     # Check for ineffective changes
6154     for attr in self._FLAGS:
6155       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6156         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6157         setattr(self.op, attr, None)
6158
6159     # Past this point, any flag change to False means a transition
6160     # away from the respective state, as only real changes are kept
6161
6162     # TODO: We might query the real power state if it supports OOB
6163     if _SupportsOob(self.cfg, node):
6164       if self.op.offline is False and not (node.powered or
6165                                            self.op.powered is True):
6166         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6167                                     " offline status can be reset") %
6168                                    self.op.node_name, errors.ECODE_STATE)
6169     elif self.op.powered is not None:
6170       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6171                                   " as it does not support out-of-band"
6172                                   " handling") % self.op.node_name,
6173                                  errors.ECODE_STATE)
6174
6175     # If we're being deofflined/drained, we'll MC ourself if needed
6176     if (self.op.drained is False or self.op.offline is False or
6177         (self.op.master_capable and not node.master_capable)):
6178       if _DecideSelfPromotion(self):
6179         self.op.master_candidate = True
6180         self.LogInfo("Auto-promoting node to master candidate")
6181
6182     # If we're no longer master capable, we'll demote ourselves from MC
6183     if self.op.master_capable is False and node.master_candidate:
6184       self.LogInfo("Demoting from master candidate")
6185       self.op.master_candidate = False
6186
6187     # Compute new role
6188     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6189     if self.op.master_candidate:
6190       new_role = self._ROLE_CANDIDATE
6191     elif self.op.drained:
6192       new_role = self._ROLE_DRAINED
6193     elif self.op.offline:
6194       new_role = self._ROLE_OFFLINE
6195     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6196       # False is still in new flags, which means we're un-setting (the
6197       # only) True flag
6198       new_role = self._ROLE_REGULAR
6199     else: # no new flags, nothing, keep old role
6200       new_role = old_role
6201
6202     self.new_role = new_role
6203
6204     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6205       # Trying to transition out of offline status
6206       result = self.rpc.call_version([node.name])[node.name]
6207       if result.fail_msg:
6208         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6209                                    " to report its version: %s" %
6210                                    (node.name, result.fail_msg),
6211                                    errors.ECODE_STATE)
6212       else:
6213         self.LogWarning("Transitioning node from offline to online state"
6214                         " without using re-add. Please make sure the node"
6215                         " is healthy!")
6216
6217     # When changing the secondary ip, verify if this is a single-homed to
6218     # multi-homed transition or vice versa, and apply the relevant
6219     # restrictions.
6220     if self.op.secondary_ip:
6221       # Ok even without locking, because this can't be changed by any LU
6222       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6223       master_singlehomed = master.secondary_ip == master.primary_ip
6224       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6225         if self.op.force and node.name == master.name:
6226           self.LogWarning("Transitioning from single-homed to multi-homed"
6227                           " cluster; all nodes will require a secondary IP"
6228                           " address")
6229         else:
6230           raise errors.OpPrereqError("Changing the secondary ip on a"
6231                                      " single-homed cluster requires the"
6232                                      " --force option to be passed, and the"
6233                                      " target node to be the master",
6234                                      errors.ECODE_INVAL)
6235       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6236         if self.op.force and node.name == master.name:
6237           self.LogWarning("Transitioning from multi-homed to single-homed"
6238                           " cluster; secondary IP addresses will have to be"
6239                           " removed")
6240         else:
6241           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6242                                      " same as the primary IP on a multi-homed"
6243                                      " cluster, unless the --force option is"
6244                                      " passed, and the target node is the"
6245                                      " master", errors.ECODE_INVAL)
6246
6247       assert not (frozenset(affected_instances) -
6248                   self.owned_locks(locking.LEVEL_INSTANCE))
6249
6250       if node.offline:
6251         if affected_instances:
6252           msg = ("Cannot change secondary IP address: offline node has"
6253                  " instances (%s) configured to use it" %
6254                  utils.CommaJoin(affected_instances.keys()))
6255           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6256       else:
6257         # On online nodes, check that no instances are running, and that
6258         # the node has the new ip and we can reach it.
6259         for instance in affected_instances.values():
6260           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6261                               msg="cannot change secondary ip")
6262
6263         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6264         if master.name != node.name:
6265           # check reachability from master secondary ip to new secondary ip
6266           if not netutils.TcpPing(self.op.secondary_ip,
6267                                   constants.DEFAULT_NODED_PORT,
6268                                   source=master.secondary_ip):
6269             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6270                                        " based ping to node daemon port",
6271                                        errors.ECODE_ENVIRON)
6272
6273     if self.op.ndparams:
6274       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6275       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6276       self.new_ndparams = new_ndparams
6277
6278     if self.op.hv_state:
6279       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6280                                                  self.node.hv_state_static)
6281
6282     if self.op.disk_state:
6283       self.new_disk_state = \
6284         _MergeAndVerifyDiskState(self.op.disk_state,
6285                                  self.node.disk_state_static)
6286
6287   def Exec(self, feedback_fn):
6288     """Modifies a node.
6289
6290     """
6291     node = self.node
6292     old_role = self.old_role
6293     new_role = self.new_role
6294
6295     result = []
6296
6297     if self.op.ndparams:
6298       node.ndparams = self.new_ndparams
6299
6300     if self.op.powered is not None:
6301       node.powered = self.op.powered
6302
6303     if self.op.hv_state:
6304       node.hv_state_static = self.new_hv_state
6305
6306     if self.op.disk_state:
6307       node.disk_state_static = self.new_disk_state
6308
6309     for attr in ["master_capable", "vm_capable"]:
6310       val = getattr(self.op, attr)
6311       if val is not None:
6312         setattr(node, attr, val)
6313         result.append((attr, str(val)))
6314
6315     if new_role != old_role:
6316       # Tell the node to demote itself, if no longer MC and not offline
6317       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6318         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6319         if msg:
6320           self.LogWarning("Node failed to demote itself: %s", msg)
6321
6322       new_flags = self._R2F[new_role]
6323       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6324         if of != nf:
6325           result.append((desc, str(nf)))
6326       (node.master_candidate, node.drained, node.offline) = new_flags
6327
6328       # we locked all nodes, we adjust the CP before updating this node
6329       if self.lock_all:
6330         _AdjustCandidatePool(self, [node.name])
6331
6332     if self.op.secondary_ip:
6333       node.secondary_ip = self.op.secondary_ip
6334       result.append(("secondary_ip", self.op.secondary_ip))
6335
6336     # this will trigger configuration file update, if needed
6337     self.cfg.Update(node, feedback_fn)
6338
6339     # this will trigger job queue propagation or cleanup if the mc
6340     # flag changed
6341     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6342       self.context.ReaddNode(node)
6343
6344     return result
6345
6346
6347 class LUNodePowercycle(NoHooksLU):
6348   """Powercycles a node.
6349
6350   """
6351   REQ_BGL = False
6352
6353   def CheckArguments(self):
6354     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6355     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6356       raise errors.OpPrereqError("The node is the master and the force"
6357                                  " parameter was not set",
6358                                  errors.ECODE_INVAL)
6359
6360   def ExpandNames(self):
6361     """Locking for PowercycleNode.
6362
6363     This is a last-resort option and shouldn't block on other
6364     jobs. Therefore, we grab no locks.
6365
6366     """
6367     self.needed_locks = {}
6368
6369   def Exec(self, feedback_fn):
6370     """Reboots a node.
6371
6372     """
6373     result = self.rpc.call_node_powercycle(self.op.node_name,
6374                                            self.cfg.GetHypervisorType())
6375     result.Raise("Failed to schedule the reboot")
6376     return result.payload
6377
6378
6379 class LUClusterQuery(NoHooksLU):
6380   """Query cluster configuration.
6381
6382   """
6383   REQ_BGL = False
6384
6385   def ExpandNames(self):
6386     self.needed_locks = {}
6387
6388   def Exec(self, feedback_fn):
6389     """Return cluster config.
6390
6391     """
6392     cluster = self.cfg.GetClusterInfo()
6393     os_hvp = {}
6394
6395     # Filter just for enabled hypervisors
6396     for os_name, hv_dict in cluster.os_hvp.items():
6397       os_hvp[os_name] = {}
6398       for hv_name, hv_params in hv_dict.items():
6399         if hv_name in cluster.enabled_hypervisors:
6400           os_hvp[os_name][hv_name] = hv_params
6401
6402     # Convert ip_family to ip_version
6403     primary_ip_version = constants.IP4_VERSION
6404     if cluster.primary_ip_family == netutils.IP6Address.family:
6405       primary_ip_version = constants.IP6_VERSION
6406
6407     result = {
6408       "software_version": constants.RELEASE_VERSION,
6409       "protocol_version": constants.PROTOCOL_VERSION,
6410       "config_version": constants.CONFIG_VERSION,
6411       "os_api_version": max(constants.OS_API_VERSIONS),
6412       "export_version": constants.EXPORT_VERSION,
6413       "architecture": runtime.GetArchInfo(),
6414       "name": cluster.cluster_name,
6415       "master": cluster.master_node,
6416       "default_hypervisor": cluster.primary_hypervisor,
6417       "enabled_hypervisors": cluster.enabled_hypervisors,
6418       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6419                         for hypervisor_name in cluster.enabled_hypervisors]),
6420       "os_hvp": os_hvp,
6421       "beparams": cluster.beparams,
6422       "osparams": cluster.osparams,
6423       "ipolicy": cluster.ipolicy,
6424       "nicparams": cluster.nicparams,
6425       "ndparams": cluster.ndparams,
6426       "diskparams": cluster.diskparams,
6427       "candidate_pool_size": cluster.candidate_pool_size,
6428       "master_netdev": cluster.master_netdev,
6429       "master_netmask": cluster.master_netmask,
6430       "use_external_mip_script": cluster.use_external_mip_script,
6431       "volume_group_name": cluster.volume_group_name,
6432       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6433       "file_storage_dir": cluster.file_storage_dir,
6434       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6435       "maintain_node_health": cluster.maintain_node_health,
6436       "ctime": cluster.ctime,
6437       "mtime": cluster.mtime,
6438       "uuid": cluster.uuid,
6439       "tags": list(cluster.GetTags()),
6440       "uid_pool": cluster.uid_pool,
6441       "default_iallocator": cluster.default_iallocator,
6442       "reserved_lvs": cluster.reserved_lvs,
6443       "primary_ip_version": primary_ip_version,
6444       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6445       "hidden_os": cluster.hidden_os,
6446       "blacklisted_os": cluster.blacklisted_os,
6447       }
6448
6449     return result
6450
6451
6452 class LUClusterConfigQuery(NoHooksLU):
6453   """Return configuration values.
6454
6455   """
6456   REQ_BGL = False
6457
6458   def CheckArguments(self):
6459     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6460
6461   def ExpandNames(self):
6462     self.cq.ExpandNames(self)
6463
6464   def DeclareLocks(self, level):
6465     self.cq.DeclareLocks(self, level)
6466
6467   def Exec(self, feedback_fn):
6468     result = self.cq.OldStyleQuery(self)
6469
6470     assert len(result) == 1
6471
6472     return result[0]
6473
6474
6475 class _ClusterQuery(_QueryBase):
6476   FIELDS = query.CLUSTER_FIELDS
6477
6478   #: Do not sort (there is only one item)
6479   SORT_FIELD = None
6480
6481   def ExpandNames(self, lu):
6482     lu.needed_locks = {}
6483
6484     # The following variables interact with _QueryBase._GetNames
6485     self.wanted = locking.ALL_SET
6486     self.do_locking = self.use_locking
6487
6488     if self.do_locking:
6489       raise errors.OpPrereqError("Can not use locking for cluster queries",
6490                                  errors.ECODE_INVAL)
6491
6492   def DeclareLocks(self, lu, level):
6493     pass
6494
6495   def _GetQueryData(self, lu):
6496     """Computes the list of nodes and their attributes.
6497
6498     """
6499     # Locking is not used
6500     assert not (compat.any(lu.glm.is_owned(level)
6501                            for level in locking.LEVELS
6502                            if level != locking.LEVEL_CLUSTER) or
6503                 self.do_locking or self.use_locking)
6504
6505     if query.CQ_CONFIG in self.requested_data:
6506       cluster = lu.cfg.GetClusterInfo()
6507     else:
6508       cluster = NotImplemented
6509
6510     if query.CQ_QUEUE_DRAINED in self.requested_data:
6511       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6512     else:
6513       drain_flag = NotImplemented
6514
6515     if query.CQ_WATCHER_PAUSE in self.requested_data:
6516       watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6517     else:
6518       watcher_pause = NotImplemented
6519
6520     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6521
6522
6523 class LUInstanceActivateDisks(NoHooksLU):
6524   """Bring up an instance's disks.
6525
6526   """
6527   REQ_BGL = False
6528
6529   def ExpandNames(self):
6530     self._ExpandAndLockInstance()
6531     self.needed_locks[locking.LEVEL_NODE] = []
6532     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6533
6534   def DeclareLocks(self, level):
6535     if level == locking.LEVEL_NODE:
6536       self._LockInstancesNodes()
6537
6538   def CheckPrereq(self):
6539     """Check prerequisites.
6540
6541     This checks that the instance is in the cluster.
6542
6543     """
6544     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6545     assert self.instance is not None, \
6546       "Cannot retrieve locked instance %s" % self.op.instance_name
6547     _CheckNodeOnline(self, self.instance.primary_node)
6548
6549   def Exec(self, feedback_fn):
6550     """Activate the disks.
6551
6552     """
6553     disks_ok, disks_info = \
6554               _AssembleInstanceDisks(self, self.instance,
6555                                      ignore_size=self.op.ignore_size)
6556     if not disks_ok:
6557       raise errors.OpExecError("Cannot activate block devices")
6558
6559     if self.op.wait_for_sync:
6560       if not _WaitForSync(self, self.instance):
6561         raise errors.OpExecError("Some disks of the instance are degraded!")
6562
6563     return disks_info
6564
6565
6566 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6567                            ignore_size=False):
6568   """Prepare the block devices for an instance.
6569
6570   This sets up the block devices on all nodes.
6571
6572   @type lu: L{LogicalUnit}
6573   @param lu: the logical unit on whose behalf we execute
6574   @type instance: L{objects.Instance}
6575   @param instance: the instance for whose disks we assemble
6576   @type disks: list of L{objects.Disk} or None
6577   @param disks: which disks to assemble (or all, if None)
6578   @type ignore_secondaries: boolean
6579   @param ignore_secondaries: if true, errors on secondary nodes
6580       won't result in an error return from the function
6581   @type ignore_size: boolean
6582   @param ignore_size: if true, the current known size of the disk
6583       will not be used during the disk activation, useful for cases
6584       when the size is wrong
6585   @return: False if the operation failed, otherwise a list of
6586       (host, instance_visible_name, node_visible_name)
6587       with the mapping from node devices to instance devices
6588
6589   """
6590   device_info = []
6591   disks_ok = True
6592   iname = instance.name
6593   disks = _ExpandCheckDisks(instance, disks)
6594
6595   # With the two passes mechanism we try to reduce the window of
6596   # opportunity for the race condition of switching DRBD to primary
6597   # before handshaking occured, but we do not eliminate it
6598
6599   # The proper fix would be to wait (with some limits) until the
6600   # connection has been made and drbd transitions from WFConnection
6601   # into any other network-connected state (Connected, SyncTarget,
6602   # SyncSource, etc.)
6603
6604   # 1st pass, assemble on all nodes in secondary mode
6605   for idx, inst_disk in enumerate(disks):
6606     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6607       if ignore_size:
6608         node_disk = node_disk.Copy()
6609         node_disk.UnsetSize()
6610       lu.cfg.SetDiskID(node_disk, node)
6611       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6612                                              False, idx)
6613       msg = result.fail_msg
6614       if msg:
6615         is_offline_secondary = (node in instance.secondary_nodes and
6616                                 result.offline)
6617         lu.LogWarning("Could not prepare block device %s on node %s"
6618                       " (is_primary=False, pass=1): %s",
6619                       inst_disk.iv_name, node, msg)
6620         if not (ignore_secondaries or is_offline_secondary):
6621           disks_ok = False
6622
6623   # FIXME: race condition on drbd migration to primary
6624
6625   # 2nd pass, do only the primary node
6626   for idx, inst_disk in enumerate(disks):
6627     dev_path = None
6628
6629     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6630       if node != instance.primary_node:
6631         continue
6632       if ignore_size:
6633         node_disk = node_disk.Copy()
6634         node_disk.UnsetSize()
6635       lu.cfg.SetDiskID(node_disk, node)
6636       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6637                                              True, idx)
6638       msg = result.fail_msg
6639       if msg:
6640         lu.LogWarning("Could not prepare block device %s on node %s"
6641                       " (is_primary=True, pass=2): %s",
6642                       inst_disk.iv_name, node, msg)
6643         disks_ok = False
6644       else:
6645         dev_path = result.payload
6646
6647     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6648
6649   # leave the disks configured for the primary node
6650   # this is a workaround that would be fixed better by
6651   # improving the logical/physical id handling
6652   for disk in disks:
6653     lu.cfg.SetDiskID(disk, instance.primary_node)
6654
6655   return disks_ok, device_info
6656
6657
6658 def _StartInstanceDisks(lu, instance, force):
6659   """Start the disks of an instance.
6660
6661   """
6662   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6663                                            ignore_secondaries=force)
6664   if not disks_ok:
6665     _ShutdownInstanceDisks(lu, instance)
6666     if force is not None and not force:
6667       lu.LogWarning("",
6668                     hint=("If the message above refers to a secondary node,"
6669                           " you can retry the operation using '--force'"))
6670     raise errors.OpExecError("Disk consistency error")
6671
6672
6673 class LUInstanceDeactivateDisks(NoHooksLU):
6674   """Shutdown an instance's disks.
6675
6676   """
6677   REQ_BGL = False
6678
6679   def ExpandNames(self):
6680     self._ExpandAndLockInstance()
6681     self.needed_locks[locking.LEVEL_NODE] = []
6682     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6683
6684   def DeclareLocks(self, level):
6685     if level == locking.LEVEL_NODE:
6686       self._LockInstancesNodes()
6687
6688   def CheckPrereq(self):
6689     """Check prerequisites.
6690
6691     This checks that the instance is in the cluster.
6692
6693     """
6694     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6695     assert self.instance is not None, \
6696       "Cannot retrieve locked instance %s" % self.op.instance_name
6697
6698   def Exec(self, feedback_fn):
6699     """Deactivate the disks
6700
6701     """
6702     instance = self.instance
6703     if self.op.force:
6704       _ShutdownInstanceDisks(self, instance)
6705     else:
6706       _SafeShutdownInstanceDisks(self, instance)
6707
6708
6709 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6710   """Shutdown block devices of an instance.
6711
6712   This function checks if an instance is running, before calling
6713   _ShutdownInstanceDisks.
6714
6715   """
6716   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6717   _ShutdownInstanceDisks(lu, instance, disks=disks)
6718
6719
6720 def _ExpandCheckDisks(instance, disks):
6721   """Return the instance disks selected by the disks list
6722
6723   @type disks: list of L{objects.Disk} or None
6724   @param disks: selected disks
6725   @rtype: list of L{objects.Disk}
6726   @return: selected instance disks to act on
6727
6728   """
6729   if disks is None:
6730     return instance.disks
6731   else:
6732     if not set(disks).issubset(instance.disks):
6733       raise errors.ProgrammerError("Can only act on disks belonging to the"
6734                                    " target instance")
6735     return disks
6736
6737
6738 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6739   """Shutdown block devices of an instance.
6740
6741   This does the shutdown on all nodes of the instance.
6742
6743   If the ignore_primary is false, errors on the primary node are
6744   ignored.
6745
6746   """
6747   all_result = True
6748   disks = _ExpandCheckDisks(instance, disks)
6749
6750   for disk in disks:
6751     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6752       lu.cfg.SetDiskID(top_disk, node)
6753       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6754       msg = result.fail_msg
6755       if msg:
6756         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6757                       disk.iv_name, node, msg)
6758         if ((node == instance.primary_node and not ignore_primary) or
6759             (node != instance.primary_node and not result.offline)):
6760           all_result = False
6761   return all_result
6762
6763
6764 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6765   """Checks if a node has enough free memory.
6766
6767   This function check if a given node has the needed amount of free
6768   memory. In case the node has less memory or we cannot get the
6769   information from the node, this function raise an OpPrereqError
6770   exception.
6771
6772   @type lu: C{LogicalUnit}
6773   @param lu: a logical unit from which we get configuration data
6774   @type node: C{str}
6775   @param node: the node to check
6776   @type reason: C{str}
6777   @param reason: string to use in the error message
6778   @type requested: C{int}
6779   @param requested: the amount of memory in MiB to check for
6780   @type hypervisor_name: C{str}
6781   @param hypervisor_name: the hypervisor to ask for memory stats
6782   @rtype: integer
6783   @return: node current free memory
6784   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6785       we cannot check the node
6786
6787   """
6788   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6789   nodeinfo[node].Raise("Can't get data from node %s" % node,
6790                        prereq=True, ecode=errors.ECODE_ENVIRON)
6791   (_, _, (hv_info, )) = nodeinfo[node].payload
6792
6793   free_mem = hv_info.get("memory_free", None)
6794   if not isinstance(free_mem, int):
6795     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6796                                " was '%s'" % (node, free_mem),
6797                                errors.ECODE_ENVIRON)
6798   if requested > free_mem:
6799     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6800                                " needed %s MiB, available %s MiB" %
6801                                (node, reason, requested, free_mem),
6802                                errors.ECODE_NORES)
6803   return free_mem
6804
6805
6806 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6807   """Checks if nodes have enough free disk space in the all VGs.
6808
6809   This function check if all given nodes have the needed amount of
6810   free disk. In case any node has less disk or we cannot get the
6811   information from the node, this function raise an OpPrereqError
6812   exception.
6813
6814   @type lu: C{LogicalUnit}
6815   @param lu: a logical unit from which we get configuration data
6816   @type nodenames: C{list}
6817   @param nodenames: the list of node names to check
6818   @type req_sizes: C{dict}
6819   @param req_sizes: the hash of vg and corresponding amount of disk in
6820       MiB to check for
6821   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6822       or we cannot check the node
6823
6824   """
6825   for vg, req_size in req_sizes.items():
6826     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6827
6828
6829 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6830   """Checks if nodes have enough free disk space in the specified VG.
6831
6832   This function check if all given nodes have the needed amount of
6833   free disk. In case any node has less disk or we cannot get the
6834   information from the node, this function raise an OpPrereqError
6835   exception.
6836
6837   @type lu: C{LogicalUnit}
6838   @param lu: a logical unit from which we get configuration data
6839   @type nodenames: C{list}
6840   @param nodenames: the list of node names to check
6841   @type vg: C{str}
6842   @param vg: the volume group to check
6843   @type requested: C{int}
6844   @param requested: the amount of disk in MiB to check for
6845   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6846       or we cannot check the node
6847
6848   """
6849   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6850   for node in nodenames:
6851     info = nodeinfo[node]
6852     info.Raise("Cannot get current information from node %s" % node,
6853                prereq=True, ecode=errors.ECODE_ENVIRON)
6854     (_, (vg_info, ), _) = info.payload
6855     vg_free = vg_info.get("vg_free", None)
6856     if not isinstance(vg_free, int):
6857       raise errors.OpPrereqError("Can't compute free disk space on node"
6858                                  " %s for vg %s, result was '%s'" %
6859                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6860     if requested > vg_free:
6861       raise errors.OpPrereqError("Not enough disk space on target node %s"
6862                                  " vg %s: required %d MiB, available %d MiB" %
6863                                  (node, vg, requested, vg_free),
6864                                  errors.ECODE_NORES)
6865
6866
6867 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6868   """Checks if nodes have enough physical CPUs
6869
6870   This function checks if all given nodes have the needed number of
6871   physical CPUs. In case any node has less CPUs or we cannot get the
6872   information from the node, this function raises an OpPrereqError
6873   exception.
6874
6875   @type lu: C{LogicalUnit}
6876   @param lu: a logical unit from which we get configuration data
6877   @type nodenames: C{list}
6878   @param nodenames: the list of node names to check
6879   @type requested: C{int}
6880   @param requested: the minimum acceptable number of physical CPUs
6881   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6882       or we cannot check the node
6883
6884   """
6885   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6886   for node in nodenames:
6887     info = nodeinfo[node]
6888     info.Raise("Cannot get current information from node %s" % node,
6889                prereq=True, ecode=errors.ECODE_ENVIRON)
6890     (_, _, (hv_info, )) = info.payload
6891     num_cpus = hv_info.get("cpu_total", None)
6892     if not isinstance(num_cpus, int):
6893       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6894                                  " on node %s, result was '%s'" %
6895                                  (node, num_cpus), errors.ECODE_ENVIRON)
6896     if requested > num_cpus:
6897       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6898                                  "required" % (node, num_cpus, requested),
6899                                  errors.ECODE_NORES)
6900
6901
6902 class LUInstanceStartup(LogicalUnit):
6903   """Starts an instance.
6904
6905   """
6906   HPATH = "instance-start"
6907   HTYPE = constants.HTYPE_INSTANCE
6908   REQ_BGL = False
6909
6910   def CheckArguments(self):
6911     # extra beparams
6912     if self.op.beparams:
6913       # fill the beparams dict
6914       objects.UpgradeBeParams(self.op.beparams)
6915       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6916
6917   def ExpandNames(self):
6918     self._ExpandAndLockInstance()
6919     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6920
6921   def DeclareLocks(self, level):
6922     if level == locking.LEVEL_NODE_RES:
6923       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6924
6925   def BuildHooksEnv(self):
6926     """Build hooks env.
6927
6928     This runs on master, primary and secondary nodes of the instance.
6929
6930     """
6931     env = {
6932       "FORCE": self.op.force,
6933       }
6934
6935     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6936
6937     return env
6938
6939   def BuildHooksNodes(self):
6940     """Build hooks nodes.
6941
6942     """
6943     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6944     return (nl, nl)
6945
6946   def CheckPrereq(self):
6947     """Check prerequisites.
6948
6949     This checks that the instance is in the cluster.
6950
6951     """
6952     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6953     assert self.instance is not None, \
6954       "Cannot retrieve locked instance %s" % self.op.instance_name
6955
6956     # extra hvparams
6957     if self.op.hvparams:
6958       # check hypervisor parameter syntax (locally)
6959       cluster = self.cfg.GetClusterInfo()
6960       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6961       filled_hvp = cluster.FillHV(instance)
6962       filled_hvp.update(self.op.hvparams)
6963       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6964       hv_type.CheckParameterSyntax(filled_hvp)
6965       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6966
6967     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6968
6969     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6970
6971     if self.primary_offline and self.op.ignore_offline_nodes:
6972       self.LogWarning("Ignoring offline primary node")
6973
6974       if self.op.hvparams or self.op.beparams:
6975         self.LogWarning("Overridden parameters are ignored")
6976     else:
6977       _CheckNodeOnline(self, instance.primary_node)
6978
6979       bep = self.cfg.GetClusterInfo().FillBE(instance)
6980       bep.update(self.op.beparams)
6981
6982       # check bridges existence
6983       _CheckInstanceBridgesExist(self, instance)
6984
6985       remote_info = self.rpc.call_instance_info(instance.primary_node,
6986                                                 instance.name,
6987                                                 instance.hypervisor)
6988       remote_info.Raise("Error checking node %s" % instance.primary_node,
6989                         prereq=True, ecode=errors.ECODE_ENVIRON)
6990       if not remote_info.payload: # not running already
6991         _CheckNodeFreeMemory(self, instance.primary_node,
6992                              "starting instance %s" % instance.name,
6993                              bep[constants.BE_MINMEM], instance.hypervisor)
6994
6995   def Exec(self, feedback_fn):
6996     """Start the instance.
6997
6998     """
6999     instance = self.instance
7000     force = self.op.force
7001
7002     if not self.op.no_remember:
7003       self.cfg.MarkInstanceUp(instance.name)
7004
7005     if self.primary_offline:
7006       assert self.op.ignore_offline_nodes
7007       self.LogInfo("Primary node offline, marked instance as started")
7008     else:
7009       node_current = instance.primary_node
7010
7011       _StartInstanceDisks(self, instance, force)
7012
7013       result = \
7014         self.rpc.call_instance_start(node_current,
7015                                      (instance, self.op.hvparams,
7016                                       self.op.beparams),
7017                                      self.op.startup_paused)
7018       msg = result.fail_msg
7019       if msg:
7020         _ShutdownInstanceDisks(self, instance)
7021         raise errors.OpExecError("Could not start instance: %s" % msg)
7022
7023
7024 class LUInstanceReboot(LogicalUnit):
7025   """Reboot an instance.
7026
7027   """
7028   HPATH = "instance-reboot"
7029   HTYPE = constants.HTYPE_INSTANCE
7030   REQ_BGL = False
7031
7032   def ExpandNames(self):
7033     self._ExpandAndLockInstance()
7034
7035   def BuildHooksEnv(self):
7036     """Build hooks env.
7037
7038     This runs on master, primary and secondary nodes of the instance.
7039
7040     """
7041     env = {
7042       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7043       "REBOOT_TYPE": self.op.reboot_type,
7044       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7045       }
7046
7047     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7048
7049     return env
7050
7051   def BuildHooksNodes(self):
7052     """Build hooks nodes.
7053
7054     """
7055     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7056     return (nl, nl)
7057
7058   def CheckPrereq(self):
7059     """Check prerequisites.
7060
7061     This checks that the instance is in the cluster.
7062
7063     """
7064     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7065     assert self.instance is not None, \
7066       "Cannot retrieve locked instance %s" % self.op.instance_name
7067     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7068     _CheckNodeOnline(self, instance.primary_node)
7069
7070     # check bridges existence
7071     _CheckInstanceBridgesExist(self, instance)
7072
7073   def Exec(self, feedback_fn):
7074     """Reboot the instance.
7075
7076     """
7077     instance = self.instance
7078     ignore_secondaries = self.op.ignore_secondaries
7079     reboot_type = self.op.reboot_type
7080
7081     remote_info = self.rpc.call_instance_info(instance.primary_node,
7082                                               instance.name,
7083                                               instance.hypervisor)
7084     remote_info.Raise("Error checking node %s" % instance.primary_node)
7085     instance_running = bool(remote_info.payload)
7086
7087     node_current = instance.primary_node
7088
7089     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7090                                             constants.INSTANCE_REBOOT_HARD]:
7091       for disk in instance.disks:
7092         self.cfg.SetDiskID(disk, node_current)
7093       result = self.rpc.call_instance_reboot(node_current, instance,
7094                                              reboot_type,
7095                                              self.op.shutdown_timeout)
7096       result.Raise("Could not reboot instance")
7097     else:
7098       if instance_running:
7099         result = self.rpc.call_instance_shutdown(node_current, instance,
7100                                                  self.op.shutdown_timeout)
7101         result.Raise("Could not shutdown instance for full reboot")
7102         _ShutdownInstanceDisks(self, instance)
7103       else:
7104         self.LogInfo("Instance %s was already stopped, starting now",
7105                      instance.name)
7106       _StartInstanceDisks(self, instance, ignore_secondaries)
7107       result = self.rpc.call_instance_start(node_current,
7108                                             (instance, None, None), False)
7109       msg = result.fail_msg
7110       if msg:
7111         _ShutdownInstanceDisks(self, instance)
7112         raise errors.OpExecError("Could not start instance for"
7113                                  " full reboot: %s" % msg)
7114
7115     self.cfg.MarkInstanceUp(instance.name)
7116
7117
7118 class LUInstanceShutdown(LogicalUnit):
7119   """Shutdown an instance.
7120
7121   """
7122   HPATH = "instance-stop"
7123   HTYPE = constants.HTYPE_INSTANCE
7124   REQ_BGL = False
7125
7126   def ExpandNames(self):
7127     self._ExpandAndLockInstance()
7128
7129   def BuildHooksEnv(self):
7130     """Build hooks env.
7131
7132     This runs on master, primary and secondary nodes of the instance.
7133
7134     """
7135     env = _BuildInstanceHookEnvByObject(self, self.instance)
7136     env["TIMEOUT"] = self.op.timeout
7137     return env
7138
7139   def BuildHooksNodes(self):
7140     """Build hooks nodes.
7141
7142     """
7143     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7144     return (nl, nl)
7145
7146   def CheckPrereq(self):
7147     """Check prerequisites.
7148
7149     This checks that the instance is in the cluster.
7150
7151     """
7152     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7153     assert self.instance is not None, \
7154       "Cannot retrieve locked instance %s" % self.op.instance_name
7155
7156     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7157
7158     self.primary_offline = \
7159       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7160
7161     if self.primary_offline and self.op.ignore_offline_nodes:
7162       self.LogWarning("Ignoring offline primary node")
7163     else:
7164       _CheckNodeOnline(self, self.instance.primary_node)
7165
7166   def Exec(self, feedback_fn):
7167     """Shutdown the instance.
7168
7169     """
7170     instance = self.instance
7171     node_current = instance.primary_node
7172     timeout = self.op.timeout
7173
7174     if not self.op.no_remember:
7175       self.cfg.MarkInstanceDown(instance.name)
7176
7177     if self.primary_offline:
7178       assert self.op.ignore_offline_nodes
7179       self.LogInfo("Primary node offline, marked instance as stopped")
7180     else:
7181       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7182       msg = result.fail_msg
7183       if msg:
7184         self.LogWarning("Could not shutdown instance: %s", msg)
7185
7186       _ShutdownInstanceDisks(self, instance)
7187
7188
7189 class LUInstanceReinstall(LogicalUnit):
7190   """Reinstall an instance.
7191
7192   """
7193   HPATH = "instance-reinstall"
7194   HTYPE = constants.HTYPE_INSTANCE
7195   REQ_BGL = False
7196
7197   def ExpandNames(self):
7198     self._ExpandAndLockInstance()
7199
7200   def BuildHooksEnv(self):
7201     """Build hooks env.
7202
7203     This runs on master, primary and secondary nodes of the instance.
7204
7205     """
7206     return _BuildInstanceHookEnvByObject(self, self.instance)
7207
7208   def BuildHooksNodes(self):
7209     """Build hooks nodes.
7210
7211     """
7212     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7213     return (nl, nl)
7214
7215   def CheckPrereq(self):
7216     """Check prerequisites.
7217
7218     This checks that the instance is in the cluster and is not running.
7219
7220     """
7221     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7222     assert instance is not None, \
7223       "Cannot retrieve locked instance %s" % self.op.instance_name
7224     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7225                      " offline, cannot reinstall")
7226
7227     if instance.disk_template == constants.DT_DISKLESS:
7228       raise errors.OpPrereqError("Instance '%s' has no disks" %
7229                                  self.op.instance_name,
7230                                  errors.ECODE_INVAL)
7231     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7232
7233     if self.op.os_type is not None:
7234       # OS verification
7235       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7236       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7237       instance_os = self.op.os_type
7238     else:
7239       instance_os = instance.os
7240
7241     nodelist = list(instance.all_nodes)
7242
7243     if self.op.osparams:
7244       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7245       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7246       self.os_inst = i_osdict # the new dict (without defaults)
7247     else:
7248       self.os_inst = None
7249
7250     self.instance = instance
7251
7252   def Exec(self, feedback_fn):
7253     """Reinstall the instance.
7254
7255     """
7256     inst = self.instance
7257
7258     if self.op.os_type is not None:
7259       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7260       inst.os = self.op.os_type
7261       # Write to configuration
7262       self.cfg.Update(inst, feedback_fn)
7263
7264     _StartInstanceDisks(self, inst, None)
7265     try:
7266       feedback_fn("Running the instance OS create scripts...")
7267       # FIXME: pass debug option from opcode to backend
7268       result = self.rpc.call_instance_os_add(inst.primary_node,
7269                                              (inst, self.os_inst), True,
7270                                              self.op.debug_level)
7271       result.Raise("Could not install OS for instance %s on node %s" %
7272                    (inst.name, inst.primary_node))
7273     finally:
7274       _ShutdownInstanceDisks(self, inst)
7275
7276
7277 class LUInstanceRecreateDisks(LogicalUnit):
7278   """Recreate an instance's missing disks.
7279
7280   """
7281   HPATH = "instance-recreate-disks"
7282   HTYPE = constants.HTYPE_INSTANCE
7283   REQ_BGL = False
7284
7285   _MODIFYABLE = frozenset([
7286     constants.IDISK_SIZE,
7287     constants.IDISK_MODE,
7288     ])
7289
7290   # New or changed disk parameters may have different semantics
7291   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7292     constants.IDISK_ADOPT,
7293
7294     # TODO: Implement support changing VG while recreating
7295     constants.IDISK_VG,
7296     constants.IDISK_METAVG,
7297     ]))
7298
7299   def _RunAllocator(self):
7300     """Run the allocator based on input opcode.
7301
7302     """
7303     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7304
7305     # FIXME
7306     # The allocator should actually run in "relocate" mode, but current
7307     # allocators don't support relocating all the nodes of an instance at
7308     # the same time. As a workaround we use "allocate" mode, but this is
7309     # suboptimal for two reasons:
7310     # - The instance name passed to the allocator is present in the list of
7311     #   existing instances, so there could be a conflict within the
7312     #   internal structures of the allocator. This doesn't happen with the
7313     #   current allocators, but it's a liability.
7314     # - The allocator counts the resources used by the instance twice: once
7315     #   because the instance exists already, and once because it tries to
7316     #   allocate a new instance.
7317     # The allocator could choose some of the nodes on which the instance is
7318     # running, but that's not a problem. If the instance nodes are broken,
7319     # they should be already be marked as drained or offline, and hence
7320     # skipped by the allocator. If instance disks have been lost for other
7321     # reasons, then recreating the disks on the same nodes should be fine.
7322     disk_template = self.instance.disk_template
7323     spindle_use = be_full[constants.BE_SPINDLE_USE]
7324     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7325                                         disk_template=disk_template,
7326                                         tags=list(self.instance.GetTags()),
7327                                         os=self.instance.os,
7328                                         nics=[{}],
7329                                         vcpus=be_full[constants.BE_VCPUS],
7330                                         memory=be_full[constants.BE_MAXMEM],
7331                                         spindle_use=spindle_use,
7332                                         disks=[{constants.IDISK_SIZE: d.size,
7333                                                 constants.IDISK_MODE: d.mode}
7334                                                 for d in self.instance.disks],
7335                                         hypervisor=self.instance.hypervisor)
7336     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7337
7338     ial.Run(self.op.iallocator)
7339
7340     assert req.RequiredNodes() == len(self.instance.all_nodes)
7341
7342     if not ial.success:
7343       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7344                                  " %s" % (self.op.iallocator, ial.info),
7345                                  errors.ECODE_NORES)
7346
7347     self.op.nodes = ial.result
7348     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7349                  self.op.instance_name, self.op.iallocator,
7350                  utils.CommaJoin(ial.result))
7351
7352   def CheckArguments(self):
7353     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7354       # Normalize and convert deprecated list of disk indices
7355       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7356
7357     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7358     if duplicates:
7359       raise errors.OpPrereqError("Some disks have been specified more than"
7360                                  " once: %s" % utils.CommaJoin(duplicates),
7361                                  errors.ECODE_INVAL)
7362
7363     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7364     # when neither iallocator nor nodes are specified
7365     if self.op.iallocator or self.op.nodes:
7366       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7367
7368     for (idx, params) in self.op.disks:
7369       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7370       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7371       if unsupported:
7372         raise errors.OpPrereqError("Parameters for disk %s try to change"
7373                                    " unmodifyable parameter(s): %s" %
7374                                    (idx, utils.CommaJoin(unsupported)),
7375                                    errors.ECODE_INVAL)
7376
7377   def ExpandNames(self):
7378     self._ExpandAndLockInstance()
7379     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7380     if self.op.nodes:
7381       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7382       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7383     else:
7384       self.needed_locks[locking.LEVEL_NODE] = []
7385       if self.op.iallocator:
7386         # iallocator will select a new node in the same group
7387         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7388     self.needed_locks[locking.LEVEL_NODE_RES] = []
7389
7390   def DeclareLocks(self, level):
7391     if level == locking.LEVEL_NODEGROUP:
7392       assert self.op.iallocator is not None
7393       assert not self.op.nodes
7394       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7395       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7396       # Lock the primary group used by the instance optimistically; this
7397       # requires going via the node before it's locked, requiring
7398       # verification later on
7399       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7400         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7401
7402     elif level == locking.LEVEL_NODE:
7403       # If an allocator is used, then we lock all the nodes in the current
7404       # instance group, as we don't know yet which ones will be selected;
7405       # if we replace the nodes without using an allocator, locks are
7406       # already declared in ExpandNames; otherwise, we need to lock all the
7407       # instance nodes for disk re-creation
7408       if self.op.iallocator:
7409         assert not self.op.nodes
7410         assert not self.needed_locks[locking.LEVEL_NODE]
7411         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7412
7413         # Lock member nodes of the group of the primary node
7414         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7415           self.needed_locks[locking.LEVEL_NODE].extend(
7416             self.cfg.GetNodeGroup(group_uuid).members)
7417       elif not self.op.nodes:
7418         self._LockInstancesNodes(primary_only=False)
7419     elif level == locking.LEVEL_NODE_RES:
7420       # Copy node locks
7421       self.needed_locks[locking.LEVEL_NODE_RES] = \
7422         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7423
7424   def BuildHooksEnv(self):
7425     """Build hooks env.
7426
7427     This runs on master, primary and secondary nodes of the instance.
7428
7429     """
7430     return _BuildInstanceHookEnvByObject(self, self.instance)
7431
7432   def BuildHooksNodes(self):
7433     """Build hooks nodes.
7434
7435     """
7436     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7437     return (nl, nl)
7438
7439   def CheckPrereq(self):
7440     """Check prerequisites.
7441
7442     This checks that the instance is in the cluster and is not running.
7443
7444     """
7445     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7446     assert instance is not None, \
7447       "Cannot retrieve locked instance %s" % self.op.instance_name
7448     if self.op.nodes:
7449       if len(self.op.nodes) != len(instance.all_nodes):
7450         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7451                                    " %d replacement nodes were specified" %
7452                                    (instance.name, len(instance.all_nodes),
7453                                     len(self.op.nodes)),
7454                                    errors.ECODE_INVAL)
7455       assert instance.disk_template != constants.DT_DRBD8 or \
7456           len(self.op.nodes) == 2
7457       assert instance.disk_template != constants.DT_PLAIN or \
7458           len(self.op.nodes) == 1
7459       primary_node = self.op.nodes[0]
7460     else:
7461       primary_node = instance.primary_node
7462     if not self.op.iallocator:
7463       _CheckNodeOnline(self, primary_node)
7464
7465     if instance.disk_template == constants.DT_DISKLESS:
7466       raise errors.OpPrereqError("Instance '%s' has no disks" %
7467                                  self.op.instance_name, errors.ECODE_INVAL)
7468
7469     # Verify if node group locks are still correct
7470     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7471     if owned_groups:
7472       # Node group locks are acquired only for the primary node (and only
7473       # when the allocator is used)
7474       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7475                                primary_only=True)
7476
7477     # if we replace nodes *and* the old primary is offline, we don't
7478     # check the instance state
7479     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7480     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7481       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7482                           msg="cannot recreate disks")
7483
7484     if self.op.disks:
7485       self.disks = dict(self.op.disks)
7486     else:
7487       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7488
7489     maxidx = max(self.disks.keys())
7490     if maxidx >= len(instance.disks):
7491       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7492                                  errors.ECODE_INVAL)
7493
7494     if ((self.op.nodes or self.op.iallocator) and
7495         sorted(self.disks.keys()) != range(len(instance.disks))):
7496       raise errors.OpPrereqError("Can't recreate disks partially and"
7497                                  " change the nodes at the same time",
7498                                  errors.ECODE_INVAL)
7499
7500     self.instance = instance
7501
7502     if self.op.iallocator:
7503       self._RunAllocator()
7504       # Release unneeded node and node resource locks
7505       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7506       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7507
7508   def Exec(self, feedback_fn):
7509     """Recreate the disks.
7510
7511     """
7512     instance = self.instance
7513
7514     assert (self.owned_locks(locking.LEVEL_NODE) ==
7515             self.owned_locks(locking.LEVEL_NODE_RES))
7516
7517     to_skip = []
7518     mods = [] # keeps track of needed changes
7519
7520     for idx, disk in enumerate(instance.disks):
7521       try:
7522         changes = self.disks[idx]
7523       except KeyError:
7524         # Disk should not be recreated
7525         to_skip.append(idx)
7526         continue
7527
7528       # update secondaries for disks, if needed
7529       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7530         # need to update the nodes and minors
7531         assert len(self.op.nodes) == 2
7532         assert len(disk.logical_id) == 6 # otherwise disk internals
7533                                          # have changed
7534         (_, _, old_port, _, _, old_secret) = disk.logical_id
7535         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7536         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7537                   new_minors[0], new_minors[1], old_secret)
7538         assert len(disk.logical_id) == len(new_id)
7539       else:
7540         new_id = None
7541
7542       mods.append((idx, new_id, changes))
7543
7544     # now that we have passed all asserts above, we can apply the mods
7545     # in a single run (to avoid partial changes)
7546     for idx, new_id, changes in mods:
7547       disk = instance.disks[idx]
7548       if new_id is not None:
7549         assert disk.dev_type == constants.LD_DRBD8
7550         disk.logical_id = new_id
7551       if changes:
7552         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7553                     mode=changes.get(constants.IDISK_MODE, None))
7554
7555     # change primary node, if needed
7556     if self.op.nodes:
7557       instance.primary_node = self.op.nodes[0]
7558       self.LogWarning("Changing the instance's nodes, you will have to"
7559                       " remove any disks left on the older nodes manually")
7560
7561     if self.op.nodes:
7562       self.cfg.Update(instance, feedback_fn)
7563
7564     # All touched nodes must be locked
7565     mylocks = self.owned_locks(locking.LEVEL_NODE)
7566     assert mylocks.issuperset(frozenset(instance.all_nodes))
7567     _CreateDisks(self, instance, to_skip=to_skip)
7568
7569
7570 class LUInstanceRename(LogicalUnit):
7571   """Rename an instance.
7572
7573   """
7574   HPATH = "instance-rename"
7575   HTYPE = constants.HTYPE_INSTANCE
7576
7577   def CheckArguments(self):
7578     """Check arguments.
7579
7580     """
7581     if self.op.ip_check and not self.op.name_check:
7582       # TODO: make the ip check more flexible and not depend on the name check
7583       raise errors.OpPrereqError("IP address check requires a name check",
7584                                  errors.ECODE_INVAL)
7585
7586   def BuildHooksEnv(self):
7587     """Build hooks env.
7588
7589     This runs on master, primary and secondary nodes of the instance.
7590
7591     """
7592     env = _BuildInstanceHookEnvByObject(self, self.instance)
7593     env["INSTANCE_NEW_NAME"] = self.op.new_name
7594     return env
7595
7596   def BuildHooksNodes(self):
7597     """Build hooks nodes.
7598
7599     """
7600     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7601     return (nl, nl)
7602
7603   def CheckPrereq(self):
7604     """Check prerequisites.
7605
7606     This checks that the instance is in the cluster and is not running.
7607
7608     """
7609     self.op.instance_name = _ExpandInstanceName(self.cfg,
7610                                                 self.op.instance_name)
7611     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7612     assert instance is not None
7613     _CheckNodeOnline(self, instance.primary_node)
7614     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7615                         msg="cannot rename")
7616     self.instance = instance
7617
7618     new_name = self.op.new_name
7619     if self.op.name_check:
7620       hostname = _CheckHostnameSane(self, new_name)
7621       new_name = self.op.new_name = hostname.name
7622       if (self.op.ip_check and
7623           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7624         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7625                                    (hostname.ip, new_name),
7626                                    errors.ECODE_NOTUNIQUE)
7627
7628     instance_list = self.cfg.GetInstanceList()
7629     if new_name in instance_list and new_name != instance.name:
7630       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7631                                  new_name, errors.ECODE_EXISTS)
7632
7633   def Exec(self, feedback_fn):
7634     """Rename the instance.
7635
7636     """
7637     inst = self.instance
7638     old_name = inst.name
7639
7640     rename_file_storage = False
7641     if (inst.disk_template in constants.DTS_FILEBASED and
7642         self.op.new_name != inst.name):
7643       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7644       rename_file_storage = True
7645
7646     self.cfg.RenameInstance(inst.name, self.op.new_name)
7647     # Change the instance lock. This is definitely safe while we hold the BGL.
7648     # Otherwise the new lock would have to be added in acquired mode.
7649     assert self.REQ_BGL
7650     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7651     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7652     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7653
7654     # re-read the instance from the configuration after rename
7655     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7656
7657     if rename_file_storage:
7658       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7659       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7660                                                      old_file_storage_dir,
7661                                                      new_file_storage_dir)
7662       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7663                    " (but the instance has been renamed in Ganeti)" %
7664                    (inst.primary_node, old_file_storage_dir,
7665                     new_file_storage_dir))
7666
7667     _StartInstanceDisks(self, inst, None)
7668     # update info on disks
7669     info = _GetInstanceInfoText(inst)
7670     for (idx, disk) in enumerate(inst.disks):
7671       for node in inst.all_nodes:
7672         self.cfg.SetDiskID(disk, node)
7673         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7674         if result.fail_msg:
7675           self.LogWarning("Error setting info on node %s for disk %s: %s",
7676                           node, idx, result.fail_msg)
7677     try:
7678       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7679                                                  old_name, self.op.debug_level)
7680       msg = result.fail_msg
7681       if msg:
7682         msg = ("Could not run OS rename script for instance %s on node %s"
7683                " (but the instance has been renamed in Ganeti): %s" %
7684                (inst.name, inst.primary_node, msg))
7685         self.LogWarning(msg)
7686     finally:
7687       _ShutdownInstanceDisks(self, inst)
7688
7689     return inst.name
7690
7691
7692 class LUInstanceRemove(LogicalUnit):
7693   """Remove an instance.
7694
7695   """
7696   HPATH = "instance-remove"
7697   HTYPE = constants.HTYPE_INSTANCE
7698   REQ_BGL = False
7699
7700   def ExpandNames(self):
7701     self._ExpandAndLockInstance()
7702     self.needed_locks[locking.LEVEL_NODE] = []
7703     self.needed_locks[locking.LEVEL_NODE_RES] = []
7704     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7705
7706   def DeclareLocks(self, level):
7707     if level == locking.LEVEL_NODE:
7708       self._LockInstancesNodes()
7709     elif level == locking.LEVEL_NODE_RES:
7710       # Copy node locks
7711       self.needed_locks[locking.LEVEL_NODE_RES] = \
7712         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7713
7714   def BuildHooksEnv(self):
7715     """Build hooks env.
7716
7717     This runs on master, primary and secondary nodes of the instance.
7718
7719     """
7720     env = _BuildInstanceHookEnvByObject(self, self.instance)
7721     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7722     return env
7723
7724   def BuildHooksNodes(self):
7725     """Build hooks nodes.
7726
7727     """
7728     nl = [self.cfg.GetMasterNode()]
7729     nl_post = list(self.instance.all_nodes) + nl
7730     return (nl, nl_post)
7731
7732   def CheckPrereq(self):
7733     """Check prerequisites.
7734
7735     This checks that the instance is in the cluster.
7736
7737     """
7738     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7739     assert self.instance is not None, \
7740       "Cannot retrieve locked instance %s" % self.op.instance_name
7741
7742   def Exec(self, feedback_fn):
7743     """Remove the instance.
7744
7745     """
7746     instance = self.instance
7747     logging.info("Shutting down instance %s on node %s",
7748                  instance.name, instance.primary_node)
7749
7750     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7751                                              self.op.shutdown_timeout)
7752     msg = result.fail_msg
7753     if msg:
7754       if self.op.ignore_failures:
7755         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7756       else:
7757         raise errors.OpExecError("Could not shutdown instance %s on"
7758                                  " node %s: %s" %
7759                                  (instance.name, instance.primary_node, msg))
7760
7761     assert (self.owned_locks(locking.LEVEL_NODE) ==
7762             self.owned_locks(locking.LEVEL_NODE_RES))
7763     assert not (set(instance.all_nodes) -
7764                 self.owned_locks(locking.LEVEL_NODE)), \
7765       "Not owning correct locks"
7766
7767     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7768
7769
7770 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7771   """Utility function to remove an instance.
7772
7773   """
7774   logging.info("Removing block devices for instance %s", instance.name)
7775
7776   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7777     if not ignore_failures:
7778       raise errors.OpExecError("Can't remove instance's disks")
7779     feedback_fn("Warning: can't remove instance's disks")
7780
7781   logging.info("Removing instance %s out of cluster config", instance.name)
7782
7783   lu.cfg.RemoveInstance(instance.name)
7784
7785   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7786     "Instance lock removal conflict"
7787
7788   # Remove lock for the instance
7789   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7790
7791
7792 class LUInstanceQuery(NoHooksLU):
7793   """Logical unit for querying instances.
7794
7795   """
7796   # pylint: disable=W0142
7797   REQ_BGL = False
7798
7799   def CheckArguments(self):
7800     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7801                              self.op.output_fields, self.op.use_locking)
7802
7803   def ExpandNames(self):
7804     self.iq.ExpandNames(self)
7805
7806   def DeclareLocks(self, level):
7807     self.iq.DeclareLocks(self, level)
7808
7809   def Exec(self, feedback_fn):
7810     return self.iq.OldStyleQuery(self)
7811
7812
7813 def _ExpandNamesForMigration(lu):
7814   """Expands names for use with L{TLMigrateInstance}.
7815
7816   @type lu: L{LogicalUnit}
7817
7818   """
7819   if lu.op.target_node is not None:
7820     lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
7821
7822   lu.needed_locks[locking.LEVEL_NODE] = []
7823   lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7824
7825   lu.needed_locks[locking.LEVEL_NODE_RES] = []
7826   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7827
7828   # The node allocation lock is actually only needed for replicated instances
7829   # (e.g. DRBD8) and if an iallocator is used.
7830   lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
7831
7832
7833 def _DeclareLocksForMigration(lu, level):
7834   """Declares locks for L{TLMigrateInstance}.
7835
7836   @type lu: L{LogicalUnit}
7837   @param level: Lock level
7838
7839   """
7840   if level == locking.LEVEL_NODE_ALLOC:
7841     assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
7842
7843     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
7844
7845     if instance.disk_template in constants.DTS_EXT_MIRROR:
7846       if lu.op.target_node is None:
7847         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7848         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7849       else:
7850         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7851                                                lu.op.target_node]
7852       del lu.recalculate_locks[locking.LEVEL_NODE]
7853     else:
7854       lu._LockInstancesNodes() # pylint: disable=W0212
7855
7856   elif level == locking.LEVEL_NODE:
7857     # Node locks are declared together with the node allocation lock
7858     assert lu.needed_locks[locking.LEVEL_NODE]
7859
7860   elif level == locking.LEVEL_NODE_RES:
7861     # Copy node locks
7862     lu.needed_locks[locking.LEVEL_NODE_RES] = \
7863       _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
7864
7865
7866 class LUInstanceFailover(LogicalUnit):
7867   """Failover an instance.
7868
7869   """
7870   HPATH = "instance-failover"
7871   HTYPE = constants.HTYPE_INSTANCE
7872   REQ_BGL = False
7873
7874   def CheckArguments(self):
7875     """Check the arguments.
7876
7877     """
7878     self.iallocator = getattr(self.op, "iallocator", None)
7879     self.target_node = getattr(self.op, "target_node", None)
7880
7881   def ExpandNames(self):
7882     self._ExpandAndLockInstance()
7883     _ExpandNamesForMigration(self)
7884
7885     self._migrater = \
7886       TLMigrateInstance(self, self.op.instance_name, False, True, False,
7887                         self.op.ignore_consistency, True,
7888                         self.op.shutdown_timeout, self.op.ignore_ipolicy)
7889
7890     self.tasklets = [self._migrater]
7891
7892   def DeclareLocks(self, level):
7893     _DeclareLocksForMigration(self, level)
7894
7895   def BuildHooksEnv(self):
7896     """Build hooks env.
7897
7898     This runs on master, primary and secondary nodes of the instance.
7899
7900     """
7901     instance = self._migrater.instance
7902     source_node = instance.primary_node
7903     target_node = self.op.target_node
7904     env = {
7905       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7906       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7907       "OLD_PRIMARY": source_node,
7908       "NEW_PRIMARY": target_node,
7909       }
7910
7911     if instance.disk_template in constants.DTS_INT_MIRROR:
7912       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7913       env["NEW_SECONDARY"] = source_node
7914     else:
7915       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7916
7917     env.update(_BuildInstanceHookEnvByObject(self, instance))
7918
7919     return env
7920
7921   def BuildHooksNodes(self):
7922     """Build hooks nodes.
7923
7924     """
7925     instance = self._migrater.instance
7926     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7927     return (nl, nl + [instance.primary_node])
7928
7929
7930 class LUInstanceMigrate(LogicalUnit):
7931   """Migrate an instance.
7932
7933   This is migration without shutting down, compared to the failover,
7934   which is done with shutdown.
7935
7936   """
7937   HPATH = "instance-migrate"
7938   HTYPE = constants.HTYPE_INSTANCE
7939   REQ_BGL = False
7940
7941   def ExpandNames(self):
7942     self._ExpandAndLockInstance()
7943     _ExpandNamesForMigration(self)
7944
7945     self._migrater = \
7946       TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
7947                         False, self.op.allow_failover, False,
7948                         self.op.allow_runtime_changes,
7949                         constants.DEFAULT_SHUTDOWN_TIMEOUT,
7950                         self.op.ignore_ipolicy)
7951
7952     self.tasklets = [self._migrater]
7953
7954   def DeclareLocks(self, level):
7955     _DeclareLocksForMigration(self, level)
7956
7957   def BuildHooksEnv(self):
7958     """Build hooks env.
7959
7960     This runs on master, primary and secondary nodes of the instance.
7961
7962     """
7963     instance = self._migrater.instance
7964     source_node = instance.primary_node
7965     target_node = self.op.target_node
7966     env = _BuildInstanceHookEnvByObject(self, instance)
7967     env.update({
7968       "MIGRATE_LIVE": self._migrater.live,
7969       "MIGRATE_CLEANUP": self.op.cleanup,
7970       "OLD_PRIMARY": source_node,
7971       "NEW_PRIMARY": target_node,
7972       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7973       })
7974
7975     if instance.disk_template in constants.DTS_INT_MIRROR:
7976       env["OLD_SECONDARY"] = target_node
7977       env["NEW_SECONDARY"] = source_node
7978     else:
7979       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7980
7981     return env
7982
7983   def BuildHooksNodes(self):
7984     """Build hooks nodes.
7985
7986     """
7987     instance = self._migrater.instance
7988     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7989     return (nl, nl + [instance.primary_node])
7990
7991
7992 class LUInstanceMove(LogicalUnit):
7993   """Move an instance by data-copying.
7994
7995   """
7996   HPATH = "instance-move"
7997   HTYPE = constants.HTYPE_INSTANCE
7998   REQ_BGL = False
7999
8000   def ExpandNames(self):
8001     self._ExpandAndLockInstance()
8002     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8003     self.op.target_node = target_node
8004     self.needed_locks[locking.LEVEL_NODE] = [target_node]
8005     self.needed_locks[locking.LEVEL_NODE_RES] = []
8006     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8007
8008   def DeclareLocks(self, level):
8009     if level == locking.LEVEL_NODE:
8010       self._LockInstancesNodes(primary_only=True)
8011     elif level == locking.LEVEL_NODE_RES:
8012       # Copy node locks
8013       self.needed_locks[locking.LEVEL_NODE_RES] = \
8014         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8015
8016   def BuildHooksEnv(self):
8017     """Build hooks env.
8018
8019     This runs on master, primary and secondary nodes of the instance.
8020
8021     """
8022     env = {
8023       "TARGET_NODE": self.op.target_node,
8024       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8025       }
8026     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8027     return env
8028
8029   def BuildHooksNodes(self):
8030     """Build hooks nodes.
8031
8032     """
8033     nl = [
8034       self.cfg.GetMasterNode(),
8035       self.instance.primary_node,
8036       self.op.target_node,
8037       ]
8038     return (nl, nl)
8039
8040   def CheckPrereq(self):
8041     """Check prerequisites.
8042
8043     This checks that the instance is in the cluster.
8044
8045     """
8046     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8047     assert self.instance is not None, \
8048       "Cannot retrieve locked instance %s" % self.op.instance_name
8049
8050     node = self.cfg.GetNodeInfo(self.op.target_node)
8051     assert node is not None, \
8052       "Cannot retrieve locked node %s" % self.op.target_node
8053
8054     self.target_node = target_node = node.name
8055
8056     if target_node == instance.primary_node:
8057       raise errors.OpPrereqError("Instance %s is already on the node %s" %
8058                                  (instance.name, target_node),
8059                                  errors.ECODE_STATE)
8060
8061     bep = self.cfg.GetClusterInfo().FillBE(instance)
8062
8063     for idx, dsk in enumerate(instance.disks):
8064       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8065         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8066                                    " cannot copy" % idx, errors.ECODE_STATE)
8067
8068     _CheckNodeOnline(self, target_node)
8069     _CheckNodeNotDrained(self, target_node)
8070     _CheckNodeVmCapable(self, target_node)
8071     cluster = self.cfg.GetClusterInfo()
8072     group_info = self.cfg.GetNodeGroup(node.group)
8073     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8074     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8075                             ignore=self.op.ignore_ipolicy)
8076
8077     if instance.admin_state == constants.ADMINST_UP:
8078       # check memory requirements on the secondary node
8079       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8080                            instance.name, bep[constants.BE_MAXMEM],
8081                            instance.hypervisor)
8082     else:
8083       self.LogInfo("Not checking memory on the secondary node as"
8084                    " instance will not be started")
8085
8086     # check bridge existance
8087     _CheckInstanceBridgesExist(self, instance, node=target_node)
8088
8089   def Exec(self, feedback_fn):
8090     """Move an instance.
8091
8092     The move is done by shutting it down on its present node, copying
8093     the data over (slow) and starting it on the new node.
8094
8095     """
8096     instance = self.instance
8097
8098     source_node = instance.primary_node
8099     target_node = self.target_node
8100
8101     self.LogInfo("Shutting down instance %s on source node %s",
8102                  instance.name, source_node)
8103
8104     assert (self.owned_locks(locking.LEVEL_NODE) ==
8105             self.owned_locks(locking.LEVEL_NODE_RES))
8106
8107     result = self.rpc.call_instance_shutdown(source_node, instance,
8108                                              self.op.shutdown_timeout)
8109     msg = result.fail_msg
8110     if msg:
8111       if self.op.ignore_consistency:
8112         self.LogWarning("Could not shutdown instance %s on node %s."
8113                         " Proceeding anyway. Please make sure node"
8114                         " %s is down. Error details: %s",
8115                         instance.name, source_node, source_node, msg)
8116       else:
8117         raise errors.OpExecError("Could not shutdown instance %s on"
8118                                  " node %s: %s" %
8119                                  (instance.name, source_node, msg))
8120
8121     # create the target disks
8122     try:
8123       _CreateDisks(self, instance, target_node=target_node)
8124     except errors.OpExecError:
8125       self.LogWarning("Device creation failed, reverting...")
8126       try:
8127         _RemoveDisks(self, instance, target_node=target_node)
8128       finally:
8129         self.cfg.ReleaseDRBDMinors(instance.name)
8130         raise
8131
8132     cluster_name = self.cfg.GetClusterInfo().cluster_name
8133
8134     errs = []
8135     # activate, get path, copy the data over
8136     for idx, disk in enumerate(instance.disks):
8137       self.LogInfo("Copying data for disk %d", idx)
8138       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8139                                                instance.name, True, idx)
8140       if result.fail_msg:
8141         self.LogWarning("Can't assemble newly created disk %d: %s",
8142                         idx, result.fail_msg)
8143         errs.append(result.fail_msg)
8144         break
8145       dev_path = result.payload
8146       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8147                                              target_node, dev_path,
8148                                              cluster_name)
8149       if result.fail_msg:
8150         self.LogWarning("Can't copy data over for disk %d: %s",
8151                         idx, result.fail_msg)
8152         errs.append(result.fail_msg)
8153         break
8154
8155     if errs:
8156       self.LogWarning("Some disks failed to copy, aborting")
8157       try:
8158         _RemoveDisks(self, instance, target_node=target_node)
8159       finally:
8160         self.cfg.ReleaseDRBDMinors(instance.name)
8161         raise errors.OpExecError("Errors during disk copy: %s" %
8162                                  (",".join(errs),))
8163
8164     instance.primary_node = target_node
8165     self.cfg.Update(instance, feedback_fn)
8166
8167     self.LogInfo("Removing the disks on the original node")
8168     _RemoveDisks(self, instance, target_node=source_node)
8169
8170     # Only start the instance if it's marked as up
8171     if instance.admin_state == constants.ADMINST_UP:
8172       self.LogInfo("Starting instance %s on node %s",
8173                    instance.name, target_node)
8174
8175       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8176                                            ignore_secondaries=True)
8177       if not disks_ok:
8178         _ShutdownInstanceDisks(self, instance)
8179         raise errors.OpExecError("Can't activate the instance's disks")
8180
8181       result = self.rpc.call_instance_start(target_node,
8182                                             (instance, None, None), False)
8183       msg = result.fail_msg
8184       if msg:
8185         _ShutdownInstanceDisks(self, instance)
8186         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8187                                  (instance.name, target_node, msg))
8188
8189
8190 class LUNodeMigrate(LogicalUnit):
8191   """Migrate all instances from a node.
8192
8193   """
8194   HPATH = "node-migrate"
8195   HTYPE = constants.HTYPE_NODE
8196   REQ_BGL = False
8197
8198   def CheckArguments(self):
8199     pass
8200
8201   def ExpandNames(self):
8202     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8203
8204     self.share_locks = _ShareAll()
8205     self.needed_locks = {
8206       locking.LEVEL_NODE: [self.op.node_name],
8207       }
8208
8209   def BuildHooksEnv(self):
8210     """Build hooks env.
8211
8212     This runs on the master, the primary and all the secondaries.
8213
8214     """
8215     return {
8216       "NODE_NAME": self.op.node_name,
8217       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8218       }
8219
8220   def BuildHooksNodes(self):
8221     """Build hooks nodes.
8222
8223     """
8224     nl = [self.cfg.GetMasterNode()]
8225     return (nl, nl)
8226
8227   def CheckPrereq(self):
8228     pass
8229
8230   def Exec(self, feedback_fn):
8231     # Prepare jobs for migration instances
8232     allow_runtime_changes = self.op.allow_runtime_changes
8233     jobs = [
8234       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8235                                  mode=self.op.mode,
8236                                  live=self.op.live,
8237                                  iallocator=self.op.iallocator,
8238                                  target_node=self.op.target_node,
8239                                  allow_runtime_changes=allow_runtime_changes,
8240                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8241       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8242
8243     # TODO: Run iallocator in this opcode and pass correct placement options to
8244     # OpInstanceMigrate. Since other jobs can modify the cluster between
8245     # running the iallocator and the actual migration, a good consistency model
8246     # will have to be found.
8247
8248     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8249             frozenset([self.op.node_name]))
8250
8251     return ResultWithJobs(jobs)
8252
8253
8254 class TLMigrateInstance(Tasklet):
8255   """Tasklet class for instance migration.
8256
8257   @type live: boolean
8258   @ivar live: whether the migration will be done live or non-live;
8259       this variable is initalized only after CheckPrereq has run
8260   @type cleanup: boolean
8261   @ivar cleanup: Wheater we cleanup from a failed migration
8262   @type iallocator: string
8263   @ivar iallocator: The iallocator used to determine target_node
8264   @type target_node: string
8265   @ivar target_node: If given, the target_node to reallocate the instance to
8266   @type failover: boolean
8267   @ivar failover: Whether operation results in failover or migration
8268   @type fallback: boolean
8269   @ivar fallback: Whether fallback to failover is allowed if migration not
8270                   possible
8271   @type ignore_consistency: boolean
8272   @ivar ignore_consistency: Wheter we should ignore consistency between source
8273                             and target node
8274   @type shutdown_timeout: int
8275   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8276   @type ignore_ipolicy: bool
8277   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8278
8279   """
8280
8281   # Constants
8282   _MIGRATION_POLL_INTERVAL = 1      # seconds
8283   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8284
8285   def __init__(self, lu, instance_name, cleanup, failover, fallback,
8286                ignore_consistency, allow_runtime_changes, shutdown_timeout,
8287                ignore_ipolicy):
8288     """Initializes this class.
8289
8290     """
8291     Tasklet.__init__(self, lu)
8292
8293     # Parameters
8294     self.instance_name = instance_name
8295     self.cleanup = cleanup
8296     self.live = False # will be overridden later
8297     self.failover = failover
8298     self.fallback = fallback
8299     self.ignore_consistency = ignore_consistency
8300     self.shutdown_timeout = shutdown_timeout
8301     self.ignore_ipolicy = ignore_ipolicy
8302     self.allow_runtime_changes = allow_runtime_changes
8303
8304   def CheckPrereq(self):
8305     """Check prerequisites.
8306
8307     This checks that the instance is in the cluster.
8308
8309     """
8310     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8311     instance = self.cfg.GetInstanceInfo(instance_name)
8312     assert instance is not None
8313     self.instance = instance
8314     cluster = self.cfg.GetClusterInfo()
8315
8316     if (not self.cleanup and
8317         not instance.admin_state == constants.ADMINST_UP and
8318         not self.failover and self.fallback):
8319       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8320                       " switching to failover")
8321       self.failover = True
8322
8323     if instance.disk_template not in constants.DTS_MIRRORED:
8324       if self.failover:
8325         text = "failovers"
8326       else:
8327         text = "migrations"
8328       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8329                                  " %s" % (instance.disk_template, text),
8330                                  errors.ECODE_STATE)
8331
8332     if instance.disk_template in constants.DTS_EXT_MIRROR:
8333       assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8334
8335       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8336
8337       if self.lu.op.iallocator:
8338         self._RunAllocator()
8339       else:
8340         # We set set self.target_node as it is required by
8341         # BuildHooksEnv
8342         self.target_node = self.lu.op.target_node
8343
8344       # Check that the target node is correct in terms of instance policy
8345       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8346       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8347       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8348                                                               group_info)
8349       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8350                               ignore=self.ignore_ipolicy)
8351
8352       # self.target_node is already populated, either directly or by the
8353       # iallocator run
8354       target_node = self.target_node
8355       if self.target_node == instance.primary_node:
8356         raise errors.OpPrereqError("Cannot migrate instance %s"
8357                                    " to its primary (%s)" %
8358                                    (instance.name, instance.primary_node),
8359                                    errors.ECODE_STATE)
8360
8361       if len(self.lu.tasklets) == 1:
8362         # It is safe to release locks only when we're the only tasklet
8363         # in the LU
8364         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8365                       keep=[instance.primary_node, self.target_node])
8366         _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8367
8368     else:
8369       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8370
8371       secondary_nodes = instance.secondary_nodes
8372       if not secondary_nodes:
8373         raise errors.ConfigurationError("No secondary node but using"
8374                                         " %s disk template" %
8375                                         instance.disk_template)
8376       target_node = secondary_nodes[0]
8377       if self.lu.op.iallocator or (self.lu.op.target_node and
8378                                    self.lu.op.target_node != target_node):
8379         if self.failover:
8380           text = "failed over"
8381         else:
8382           text = "migrated"
8383         raise errors.OpPrereqError("Instances with disk template %s cannot"
8384                                    " be %s to arbitrary nodes"
8385                                    " (neither an iallocator nor a target"
8386                                    " node can be passed)" %
8387                                    (instance.disk_template, text),
8388                                    errors.ECODE_INVAL)
8389       nodeinfo = self.cfg.GetNodeInfo(target_node)
8390       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8391       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8392                                                               group_info)
8393       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8394                               ignore=self.ignore_ipolicy)
8395
8396     i_be = cluster.FillBE(instance)
8397
8398     # check memory requirements on the secondary node
8399     if (not self.cleanup and
8400          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8401       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8402                                                "migrating instance %s" %
8403                                                instance.name,
8404                                                i_be[constants.BE_MINMEM],
8405                                                instance.hypervisor)
8406     else:
8407       self.lu.LogInfo("Not checking memory on the secondary node as"
8408                       " instance will not be started")
8409
8410     # check if failover must be forced instead of migration
8411     if (not self.cleanup and not self.failover and
8412         i_be[constants.BE_ALWAYS_FAILOVER]):
8413       self.lu.LogInfo("Instance configured to always failover; fallback"
8414                       " to failover")
8415       self.failover = True
8416
8417     # check bridge existance
8418     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8419
8420     if not self.cleanup:
8421       _CheckNodeNotDrained(self.lu, target_node)
8422       if not self.failover:
8423         result = self.rpc.call_instance_migratable(instance.primary_node,
8424                                                    instance)
8425         if result.fail_msg and self.fallback:
8426           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8427                           " failover")
8428           self.failover = True
8429         else:
8430           result.Raise("Can't migrate, please use failover",
8431                        prereq=True, ecode=errors.ECODE_STATE)
8432
8433     assert not (self.failover and self.cleanup)
8434
8435     if not self.failover:
8436       if self.lu.op.live is not None and self.lu.op.mode is not None:
8437         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8438                                    " parameters are accepted",
8439                                    errors.ECODE_INVAL)
8440       if self.lu.op.live is not None:
8441         if self.lu.op.live:
8442           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8443         else:
8444           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8445         # reset the 'live' parameter to None so that repeated
8446         # invocations of CheckPrereq do not raise an exception
8447         self.lu.op.live = None
8448       elif self.lu.op.mode is None:
8449         # read the default value from the hypervisor
8450         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8451         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8452
8453       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8454     else:
8455       # Failover is never live
8456       self.live = False
8457
8458     if not (self.failover or self.cleanup):
8459       remote_info = self.rpc.call_instance_info(instance.primary_node,
8460                                                 instance.name,
8461                                                 instance.hypervisor)
8462       remote_info.Raise("Error checking instance on node %s" %
8463                         instance.primary_node)
8464       instance_running = bool(remote_info.payload)
8465       if instance_running:
8466         self.current_mem = int(remote_info.payload["memory"])
8467
8468   def _RunAllocator(self):
8469     """Run the allocator based on input opcode.
8470
8471     """
8472     assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8473
8474     # FIXME: add a self.ignore_ipolicy option
8475     req = iallocator.IAReqRelocate(name=self.instance_name,
8476                                    relocate_from=[self.instance.primary_node])
8477     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8478
8479     ial.Run(self.lu.op.iallocator)
8480
8481     if not ial.success:
8482       raise errors.OpPrereqError("Can't compute nodes using"
8483                                  " iallocator '%s': %s" %
8484                                  (self.lu.op.iallocator, ial.info),
8485                                  errors.ECODE_NORES)
8486     self.target_node = ial.result[0]
8487     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8488                     self.instance_name, self.lu.op.iallocator,
8489                     utils.CommaJoin(ial.result))
8490
8491   def _WaitUntilSync(self):
8492     """Poll with custom rpc for disk sync.
8493
8494     This uses our own step-based rpc call.
8495
8496     """
8497     self.feedback_fn("* wait until resync is done")
8498     all_done = False
8499     while not all_done:
8500       all_done = True
8501       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8502                                             self.nodes_ip,
8503                                             (self.instance.disks,
8504                                              self.instance))
8505       min_percent = 100
8506       for node, nres in result.items():
8507         nres.Raise("Cannot resync disks on node %s" % node)
8508         node_done, node_percent = nres.payload
8509         all_done = all_done and node_done
8510         if node_percent is not None:
8511           min_percent = min(min_percent, node_percent)
8512       if not all_done:
8513         if min_percent < 100:
8514           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8515         time.sleep(2)
8516
8517   def _EnsureSecondary(self, node):
8518     """Demote a node to secondary.
8519
8520     """
8521     self.feedback_fn("* switching node %s to secondary mode" % node)
8522
8523     for dev in self.instance.disks:
8524       self.cfg.SetDiskID(dev, node)
8525
8526     result = self.rpc.call_blockdev_close(node, self.instance.name,
8527                                           self.instance.disks)
8528     result.Raise("Cannot change disk to secondary on node %s" % node)
8529
8530   def _GoStandalone(self):
8531     """Disconnect from the network.
8532
8533     """
8534     self.feedback_fn("* changing into standalone mode")
8535     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8536                                                self.instance.disks)
8537     for node, nres in result.items():
8538       nres.Raise("Cannot disconnect disks node %s" % node)
8539
8540   def _GoReconnect(self, multimaster):
8541     """Reconnect to the network.
8542
8543     """
8544     if multimaster:
8545       msg = "dual-master"
8546     else:
8547       msg = "single-master"
8548     self.feedback_fn("* changing disks into %s mode" % msg)
8549     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8550                                            (self.instance.disks, self.instance),
8551                                            self.instance.name, multimaster)
8552     for node, nres in result.items():
8553       nres.Raise("Cannot change disks config on node %s" % node)
8554
8555   def _ExecCleanup(self):
8556     """Try to cleanup after a failed migration.
8557
8558     The cleanup is done by:
8559       - check that the instance is running only on one node
8560         (and update the config if needed)
8561       - change disks on its secondary node to secondary
8562       - wait until disks are fully synchronized
8563       - disconnect from the network
8564       - change disks into single-master mode
8565       - wait again until disks are fully synchronized
8566
8567     """
8568     instance = self.instance
8569     target_node = self.target_node
8570     source_node = self.source_node
8571
8572     # check running on only one node
8573     self.feedback_fn("* checking where the instance actually runs"
8574                      " (if this hangs, the hypervisor might be in"
8575                      " a bad state)")
8576     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8577     for node, result in ins_l.items():
8578       result.Raise("Can't contact node %s" % node)
8579
8580     runningon_source = instance.name in ins_l[source_node].payload
8581     runningon_target = instance.name in ins_l[target_node].payload
8582
8583     if runningon_source and runningon_target:
8584       raise errors.OpExecError("Instance seems to be running on two nodes,"
8585                                " or the hypervisor is confused; you will have"
8586                                " to ensure manually that it runs only on one"
8587                                " and restart this operation")
8588
8589     if not (runningon_source or runningon_target):
8590       raise errors.OpExecError("Instance does not seem to be running at all;"
8591                                " in this case it's safer to repair by"
8592                                " running 'gnt-instance stop' to ensure disk"
8593                                " shutdown, and then restarting it")
8594
8595     if runningon_target:
8596       # the migration has actually succeeded, we need to update the config
8597       self.feedback_fn("* instance running on secondary node (%s),"
8598                        " updating config" % target_node)
8599       instance.primary_node = target_node
8600       self.cfg.Update(instance, self.feedback_fn)
8601       demoted_node = source_node
8602     else:
8603       self.feedback_fn("* instance confirmed to be running on its"
8604                        " primary node (%s)" % source_node)
8605       demoted_node = target_node
8606
8607     if instance.disk_template in constants.DTS_INT_MIRROR:
8608       self._EnsureSecondary(demoted_node)
8609       try:
8610         self._WaitUntilSync()
8611       except errors.OpExecError:
8612         # we ignore here errors, since if the device is standalone, it
8613         # won't be able to sync
8614         pass
8615       self._GoStandalone()
8616       self._GoReconnect(False)
8617       self._WaitUntilSync()
8618
8619     self.feedback_fn("* done")
8620
8621   def _RevertDiskStatus(self):
8622     """Try to revert the disk status after a failed migration.
8623
8624     """
8625     target_node = self.target_node
8626     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8627       return
8628
8629     try:
8630       self._EnsureSecondary(target_node)
8631       self._GoStandalone()
8632       self._GoReconnect(False)
8633       self._WaitUntilSync()
8634     except errors.OpExecError, err:
8635       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8636                          " please try to recover the instance manually;"
8637                          " error '%s'" % str(err))
8638
8639   def _AbortMigration(self):
8640     """Call the hypervisor code to abort a started migration.
8641
8642     """
8643     instance = self.instance
8644     target_node = self.target_node
8645     source_node = self.source_node
8646     migration_info = self.migration_info
8647
8648     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8649                                                                  instance,
8650                                                                  migration_info,
8651                                                                  False)
8652     abort_msg = abort_result.fail_msg
8653     if abort_msg:
8654       logging.error("Aborting migration failed on target node %s: %s",
8655                     target_node, abort_msg)
8656       # Don't raise an exception here, as we stil have to try to revert the
8657       # disk status, even if this step failed.
8658
8659     abort_result = self.rpc.call_instance_finalize_migration_src(
8660       source_node, instance, False, self.live)
8661     abort_msg = abort_result.fail_msg
8662     if abort_msg:
8663       logging.error("Aborting migration failed on source node %s: %s",
8664                     source_node, abort_msg)
8665
8666   def _ExecMigration(self):
8667     """Migrate an instance.
8668
8669     The migrate is done by:
8670       - change the disks into dual-master mode
8671       - wait until disks are fully synchronized again
8672       - migrate the instance
8673       - change disks on the new secondary node (the old primary) to secondary
8674       - wait until disks are fully synchronized
8675       - change disks into single-master mode
8676
8677     """
8678     instance = self.instance
8679     target_node = self.target_node
8680     source_node = self.source_node
8681
8682     # Check for hypervisor version mismatch and warn the user.
8683     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8684                                        None, [self.instance.hypervisor])
8685     for ninfo in nodeinfo.values():
8686       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8687                   ninfo.node)
8688     (_, _, (src_info, )) = nodeinfo[source_node].payload
8689     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8690
8691     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8692         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8693       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8694       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8695       if src_version != dst_version:
8696         self.feedback_fn("* warning: hypervisor version mismatch between"
8697                          " source (%s) and target (%s) node" %
8698                          (src_version, dst_version))
8699
8700     self.feedback_fn("* checking disk consistency between source and target")
8701     for (idx, dev) in enumerate(instance.disks):
8702       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8703         raise errors.OpExecError("Disk %s is degraded or not fully"
8704                                  " synchronized on target node,"
8705                                  " aborting migration" % idx)
8706
8707     if self.current_mem > self.tgt_free_mem:
8708       if not self.allow_runtime_changes:
8709         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8710                                  " free memory to fit instance %s on target"
8711                                  " node %s (have %dMB, need %dMB)" %
8712                                  (instance.name, target_node,
8713                                   self.tgt_free_mem, self.current_mem))
8714       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8715       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8716                                                      instance,
8717                                                      self.tgt_free_mem)
8718       rpcres.Raise("Cannot modify instance runtime memory")
8719
8720     # First get the migration information from the remote node
8721     result = self.rpc.call_migration_info(source_node, instance)
8722     msg = result.fail_msg
8723     if msg:
8724       log_err = ("Failed fetching source migration information from %s: %s" %
8725                  (source_node, msg))
8726       logging.error(log_err)
8727       raise errors.OpExecError(log_err)
8728
8729     self.migration_info = migration_info = result.payload
8730
8731     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8732       # Then switch the disks to master/master mode
8733       self._EnsureSecondary(target_node)
8734       self._GoStandalone()
8735       self._GoReconnect(True)
8736       self._WaitUntilSync()
8737
8738     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8739     result = self.rpc.call_accept_instance(target_node,
8740                                            instance,
8741                                            migration_info,
8742                                            self.nodes_ip[target_node])
8743
8744     msg = result.fail_msg
8745     if msg:
8746       logging.error("Instance pre-migration failed, trying to revert"
8747                     " disk status: %s", msg)
8748       self.feedback_fn("Pre-migration failed, aborting")
8749       self._AbortMigration()
8750       self._RevertDiskStatus()
8751       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8752                                (instance.name, msg))
8753
8754     self.feedback_fn("* migrating instance to %s" % target_node)
8755     result = self.rpc.call_instance_migrate(source_node, instance,
8756                                             self.nodes_ip[target_node],
8757                                             self.live)
8758     msg = result.fail_msg
8759     if msg:
8760       logging.error("Instance migration failed, trying to revert"
8761                     " disk status: %s", msg)
8762       self.feedback_fn("Migration failed, aborting")
8763       self._AbortMigration()
8764       self._RevertDiskStatus()
8765       raise errors.OpExecError("Could not migrate instance %s: %s" %
8766                                (instance.name, msg))
8767
8768     self.feedback_fn("* starting memory transfer")
8769     last_feedback = time.time()
8770     while True:
8771       result = self.rpc.call_instance_get_migration_status(source_node,
8772                                                            instance)
8773       msg = result.fail_msg
8774       ms = result.payload   # MigrationStatus instance
8775       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8776         logging.error("Instance migration failed, trying to revert"
8777                       " disk status: %s", msg)
8778         self.feedback_fn("Migration failed, aborting")
8779         self._AbortMigration()
8780         self._RevertDiskStatus()
8781         if not msg:
8782           msg = "hypervisor returned failure"
8783         raise errors.OpExecError("Could not migrate instance %s: %s" %
8784                                  (instance.name, msg))
8785
8786       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8787         self.feedback_fn("* memory transfer complete")
8788         break
8789
8790       if (utils.TimeoutExpired(last_feedback,
8791                                self._MIGRATION_FEEDBACK_INTERVAL) and
8792           ms.transferred_ram is not None):
8793         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8794         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8795         last_feedback = time.time()
8796
8797       time.sleep(self._MIGRATION_POLL_INTERVAL)
8798
8799     result = self.rpc.call_instance_finalize_migration_src(source_node,
8800                                                            instance,
8801                                                            True,
8802                                                            self.live)
8803     msg = result.fail_msg
8804     if msg:
8805       logging.error("Instance migration succeeded, but finalization failed"
8806                     " on the source node: %s", msg)
8807       raise errors.OpExecError("Could not finalize instance migration: %s" %
8808                                msg)
8809
8810     instance.primary_node = target_node
8811
8812     # distribute new instance config to the other nodes
8813     self.cfg.Update(instance, self.feedback_fn)
8814
8815     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8816                                                            instance,
8817                                                            migration_info,
8818                                                            True)
8819     msg = result.fail_msg
8820     if msg:
8821       logging.error("Instance migration succeeded, but finalization failed"
8822                     " on the target node: %s", msg)
8823       raise errors.OpExecError("Could not finalize instance migration: %s" %
8824                                msg)
8825
8826     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8827       self._EnsureSecondary(source_node)
8828       self._WaitUntilSync()
8829       self._GoStandalone()
8830       self._GoReconnect(False)
8831       self._WaitUntilSync()
8832
8833     # If the instance's disk template is `rbd' and there was a successful
8834     # migration, unmap the device from the source node.
8835     if self.instance.disk_template == constants.DT_RBD:
8836       disks = _ExpandCheckDisks(instance, instance.disks)
8837       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8838       for disk in disks:
8839         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8840         msg = result.fail_msg
8841         if msg:
8842           logging.error("Migration was successful, but couldn't unmap the"
8843                         " block device %s on source node %s: %s",
8844                         disk.iv_name, source_node, msg)
8845           logging.error("You need to unmap the device %s manually on %s",
8846                         disk.iv_name, source_node)
8847
8848     self.feedback_fn("* done")
8849
8850   def _ExecFailover(self):
8851     """Failover an instance.
8852
8853     The failover is done by shutting it down on its present node and
8854     starting it on the secondary.
8855
8856     """
8857     instance = self.instance
8858     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8859
8860     source_node = instance.primary_node
8861     target_node = self.target_node
8862
8863     if instance.admin_state == constants.ADMINST_UP:
8864       self.feedback_fn("* checking disk consistency between source and target")
8865       for (idx, dev) in enumerate(instance.disks):
8866         # for drbd, these are drbd over lvm
8867         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8868                                      False):
8869           if primary_node.offline:
8870             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8871                              " target node %s" %
8872                              (primary_node.name, idx, target_node))
8873           elif not self.ignore_consistency:
8874             raise errors.OpExecError("Disk %s is degraded on target node,"
8875                                      " aborting failover" % idx)
8876     else:
8877       self.feedback_fn("* not checking disk consistency as instance is not"
8878                        " running")
8879
8880     self.feedback_fn("* shutting down instance on source node")
8881     logging.info("Shutting down instance %s on node %s",
8882                  instance.name, source_node)
8883
8884     result = self.rpc.call_instance_shutdown(source_node, instance,
8885                                              self.shutdown_timeout)
8886     msg = result.fail_msg
8887     if msg:
8888       if self.ignore_consistency or primary_node.offline:
8889         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8890                            " proceeding anyway; please make sure node"
8891                            " %s is down; error details: %s",
8892                            instance.name, source_node, source_node, msg)
8893       else:
8894         raise errors.OpExecError("Could not shutdown instance %s on"
8895                                  " node %s: %s" %
8896                                  (instance.name, source_node, msg))
8897
8898     self.feedback_fn("* deactivating the instance's disks on source node")
8899     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8900       raise errors.OpExecError("Can't shut down the instance's disks")
8901
8902     instance.primary_node = target_node
8903     # distribute new instance config to the other nodes
8904     self.cfg.Update(instance, self.feedback_fn)
8905
8906     # Only start the instance if it's marked as up
8907     if instance.admin_state == constants.ADMINST_UP:
8908       self.feedback_fn("* activating the instance's disks on target node %s" %
8909                        target_node)
8910       logging.info("Starting instance %s on node %s",
8911                    instance.name, target_node)
8912
8913       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8914                                            ignore_secondaries=True)
8915       if not disks_ok:
8916         _ShutdownInstanceDisks(self.lu, instance)
8917         raise errors.OpExecError("Can't activate the instance's disks")
8918
8919       self.feedback_fn("* starting the instance on the target node %s" %
8920                        target_node)
8921       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8922                                             False)
8923       msg = result.fail_msg
8924       if msg:
8925         _ShutdownInstanceDisks(self.lu, instance)
8926         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8927                                  (instance.name, target_node, msg))
8928
8929   def Exec(self, feedback_fn):
8930     """Perform the migration.
8931
8932     """
8933     self.feedback_fn = feedback_fn
8934     self.source_node = self.instance.primary_node
8935
8936     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8937     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8938       self.target_node = self.instance.secondary_nodes[0]
8939       # Otherwise self.target_node has been populated either
8940       # directly, or through an iallocator.
8941
8942     self.all_nodes = [self.source_node, self.target_node]
8943     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8944                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8945
8946     if self.failover:
8947       feedback_fn("Failover instance %s" % self.instance.name)
8948       self._ExecFailover()
8949     else:
8950       feedback_fn("Migrating instance %s" % self.instance.name)
8951
8952       if self.cleanup:
8953         return self._ExecCleanup()
8954       else:
8955         return self._ExecMigration()
8956
8957
8958 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8959                     force_open):
8960   """Wrapper around L{_CreateBlockDevInner}.
8961
8962   This method annotates the root device first.
8963
8964   """
8965   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8966   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8967                               force_open)
8968
8969
8970 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8971                          info, force_open):
8972   """Create a tree of block devices on a given node.
8973
8974   If this device type has to be created on secondaries, create it and
8975   all its children.
8976
8977   If not, just recurse to children keeping the same 'force' value.
8978
8979   @attention: The device has to be annotated already.
8980
8981   @param lu: the lu on whose behalf we execute
8982   @param node: the node on which to create the device
8983   @type instance: L{objects.Instance}
8984   @param instance: the instance which owns the device
8985   @type device: L{objects.Disk}
8986   @param device: the device to create
8987   @type force_create: boolean
8988   @param force_create: whether to force creation of this device; this
8989       will be change to True whenever we find a device which has
8990       CreateOnSecondary() attribute
8991   @param info: the extra 'metadata' we should attach to the device
8992       (this will be represented as a LVM tag)
8993   @type force_open: boolean
8994   @param force_open: this parameter will be passes to the
8995       L{backend.BlockdevCreate} function where it specifies
8996       whether we run on primary or not, and it affects both
8997       the child assembly and the device own Open() execution
8998
8999   """
9000   if device.CreateOnSecondary():
9001     force_create = True
9002
9003   if device.children:
9004     for child in device.children:
9005       _CreateBlockDevInner(lu, node, instance, child, force_create,
9006                            info, force_open)
9007
9008   if not force_create:
9009     return
9010
9011   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
9012
9013
9014 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
9015   """Create a single block device on a given node.
9016
9017   This will not recurse over children of the device, so they must be
9018   created in advance.
9019
9020   @param lu: the lu on whose behalf we execute
9021   @param node: the node on which to create the device
9022   @type instance: L{objects.Instance}
9023   @param instance: the instance which owns the device
9024   @type device: L{objects.Disk}
9025   @param device: the device to create
9026   @param info: the extra 'metadata' we should attach to the device
9027       (this will be represented as a LVM tag)
9028   @type force_open: boolean
9029   @param force_open: this parameter will be passes to the
9030       L{backend.BlockdevCreate} function where it specifies
9031       whether we run on primary or not, and it affects both
9032       the child assembly and the device own Open() execution
9033
9034   """
9035   lu.cfg.SetDiskID(device, node)
9036   result = lu.rpc.call_blockdev_create(node, device, device.size,
9037                                        instance.name, force_open, info)
9038   result.Raise("Can't create block device %s on"
9039                " node %s for instance %s" % (device, node, instance.name))
9040   if device.physical_id is None:
9041     device.physical_id = result.payload
9042
9043
9044 def _GenerateUniqueNames(lu, exts):
9045   """Generate a suitable LV name.
9046
9047   This will generate a logical volume name for the given instance.
9048
9049   """
9050   results = []
9051   for val in exts:
9052     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9053     results.append("%s%s" % (new_id, val))
9054   return results
9055
9056
9057 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9058                          iv_name, p_minor, s_minor):
9059   """Generate a drbd8 device complete with its children.
9060
9061   """
9062   assert len(vgnames) == len(names) == 2
9063   port = lu.cfg.AllocatePort()
9064   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9065
9066   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9067                           logical_id=(vgnames[0], names[0]),
9068                           params={})
9069   dev_meta = objects.Disk(dev_type=constants.LD_LV,
9070                           size=constants.DRBD_META_SIZE,
9071                           logical_id=(vgnames[1], names[1]),
9072                           params={})
9073   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9074                           logical_id=(primary, secondary, port,
9075                                       p_minor, s_minor,
9076                                       shared_secret),
9077                           children=[dev_data, dev_meta],
9078                           iv_name=iv_name, params={})
9079   return drbd_dev
9080
9081
9082 _DISK_TEMPLATE_NAME_PREFIX = {
9083   constants.DT_PLAIN: "",
9084   constants.DT_RBD: ".rbd",
9085   }
9086
9087
9088 _DISK_TEMPLATE_DEVICE_TYPE = {
9089   constants.DT_PLAIN: constants.LD_LV,
9090   constants.DT_FILE: constants.LD_FILE,
9091   constants.DT_SHARED_FILE: constants.LD_FILE,
9092   constants.DT_BLOCK: constants.LD_BLOCKDEV,
9093   constants.DT_RBD: constants.LD_RBD,
9094   }
9095
9096
9097 def _GenerateDiskTemplate(
9098   lu, template_name, instance_name, primary_node, secondary_nodes,
9099   disk_info, file_storage_dir, file_driver, base_index,
9100   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9101   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9102   """Generate the entire disk layout for a given template type.
9103
9104   """
9105   #TODO: compute space requirements
9106
9107   vgname = lu.cfg.GetVGName()
9108   disk_count = len(disk_info)
9109   disks = []
9110
9111   if template_name == constants.DT_DISKLESS:
9112     pass
9113   elif template_name == constants.DT_DRBD8:
9114     if len(secondary_nodes) != 1:
9115       raise errors.ProgrammerError("Wrong template configuration")
9116     remote_node = secondary_nodes[0]
9117     minors = lu.cfg.AllocateDRBDMinor(
9118       [primary_node, remote_node] * len(disk_info), instance_name)
9119
9120     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9121                                                        full_disk_params)
9122     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9123
9124     names = []
9125     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9126                                                for i in range(disk_count)]):
9127       names.append(lv_prefix + "_data")
9128       names.append(lv_prefix + "_meta")
9129     for idx, disk in enumerate(disk_info):
9130       disk_index = idx + base_index
9131       data_vg = disk.get(constants.IDISK_VG, vgname)
9132       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9133       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9134                                       disk[constants.IDISK_SIZE],
9135                                       [data_vg, meta_vg],
9136                                       names[idx * 2:idx * 2 + 2],
9137                                       "disk/%d" % disk_index,
9138                                       minors[idx * 2], minors[idx * 2 + 1])
9139       disk_dev.mode = disk[constants.IDISK_MODE]
9140       disks.append(disk_dev)
9141   else:
9142     if secondary_nodes:
9143       raise errors.ProgrammerError("Wrong template configuration")
9144
9145     if template_name == constants.DT_FILE:
9146       _req_file_storage()
9147     elif template_name == constants.DT_SHARED_FILE:
9148       _req_shr_file_storage()
9149
9150     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9151     if name_prefix is None:
9152       names = None
9153     else:
9154       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9155                                         (name_prefix, base_index + i)
9156                                         for i in range(disk_count)])
9157
9158     if template_name == constants.DT_PLAIN:
9159
9160       def logical_id_fn(idx, _, disk):
9161         vg = disk.get(constants.IDISK_VG, vgname)
9162         return (vg, names[idx])
9163
9164     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9165       logical_id_fn = \
9166         lambda _, disk_index, disk: (file_driver,
9167                                      "%s/disk%d" % (file_storage_dir,
9168                                                     disk_index))
9169     elif template_name == constants.DT_BLOCK:
9170       logical_id_fn = \
9171         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9172                                        disk[constants.IDISK_ADOPT])
9173     elif template_name == constants.DT_RBD:
9174       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9175     else:
9176       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9177
9178     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9179
9180     for idx, disk in enumerate(disk_info):
9181       disk_index = idx + base_index
9182       size = disk[constants.IDISK_SIZE]
9183       feedback_fn("* disk %s, size %s" %
9184                   (disk_index, utils.FormatUnit(size, "h")))
9185       disks.append(objects.Disk(dev_type=dev_type, size=size,
9186                                 logical_id=logical_id_fn(idx, disk_index, disk),
9187                                 iv_name="disk/%d" % disk_index,
9188                                 mode=disk[constants.IDISK_MODE],
9189                                 params={}))
9190
9191   return disks
9192
9193
9194 def _GetInstanceInfoText(instance):
9195   """Compute that text that should be added to the disk's metadata.
9196
9197   """
9198   return "originstname+%s" % instance.name
9199
9200
9201 def _CalcEta(time_taken, written, total_size):
9202   """Calculates the ETA based on size written and total size.
9203
9204   @param time_taken: The time taken so far
9205   @param written: amount written so far
9206   @param total_size: The total size of data to be written
9207   @return: The remaining time in seconds
9208
9209   """
9210   avg_time = time_taken / float(written)
9211   return (total_size - written) * avg_time
9212
9213
9214 def _WipeDisks(lu, instance, disks=None):
9215   """Wipes instance disks.
9216
9217   @type lu: L{LogicalUnit}
9218   @param lu: the logical unit on whose behalf we execute
9219   @type instance: L{objects.Instance}
9220   @param instance: the instance whose disks we should create
9221   @return: the success of the wipe
9222
9223   """
9224   node = instance.primary_node
9225
9226   if disks is None:
9227     disks = [(idx, disk, 0)
9228              for (idx, disk) in enumerate(instance.disks)]
9229
9230   for (_, device, _) in disks:
9231     lu.cfg.SetDiskID(device, node)
9232
9233   logging.info("Pausing synchronization of disks of instance '%s'",
9234                instance.name)
9235   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9236                                                   (map(compat.snd, disks),
9237                                                    instance),
9238                                                   True)
9239   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9240
9241   for idx, success in enumerate(result.payload):
9242     if not success:
9243       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9244                    " failed", idx, instance.name)
9245
9246   try:
9247     for (idx, device, offset) in disks:
9248       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9249       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9250       wipe_chunk_size = \
9251         int(min(constants.MAX_WIPE_CHUNK,
9252                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9253
9254       size = device.size
9255       last_output = 0
9256       start_time = time.time()
9257
9258       if offset == 0:
9259         info_text = ""
9260       else:
9261         info_text = (" (from %s to %s)" %
9262                      (utils.FormatUnit(offset, "h"),
9263                       utils.FormatUnit(size, "h")))
9264
9265       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9266
9267       logging.info("Wiping disk %d for instance %s on node %s using"
9268                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9269
9270       while offset < size:
9271         wipe_size = min(wipe_chunk_size, size - offset)
9272
9273         logging.debug("Wiping disk %d, offset %s, chunk %s",
9274                       idx, offset, wipe_size)
9275
9276         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9277                                            wipe_size)
9278         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9279                      (idx, offset, wipe_size))
9280
9281         now = time.time()
9282         offset += wipe_size
9283         if now - last_output >= 60:
9284           eta = _CalcEta(now - start_time, offset, size)
9285           lu.LogInfo(" - done: %.1f%% ETA: %s",
9286                      offset / float(size) * 100, utils.FormatSeconds(eta))
9287           last_output = now
9288   finally:
9289     logging.info("Resuming synchronization of disks for instance '%s'",
9290                  instance.name)
9291
9292     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9293                                                     (map(compat.snd, disks),
9294                                                      instance),
9295                                                     False)
9296
9297     if result.fail_msg:
9298       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9299                     node, result.fail_msg)
9300     else:
9301       for idx, success in enumerate(result.payload):
9302         if not success:
9303           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9304                         " failed", idx, instance.name)
9305
9306
9307 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9308   """Create all disks for an instance.
9309
9310   This abstracts away some work from AddInstance.
9311
9312   @type lu: L{LogicalUnit}
9313   @param lu: the logical unit on whose behalf we execute
9314   @type instance: L{objects.Instance}
9315   @param instance: the instance whose disks we should create
9316   @type to_skip: list
9317   @param to_skip: list of indices to skip
9318   @type target_node: string
9319   @param target_node: if passed, overrides the target node for creation
9320   @rtype: boolean
9321   @return: the success of the creation
9322
9323   """
9324   info = _GetInstanceInfoText(instance)
9325   if target_node is None:
9326     pnode = instance.primary_node
9327     all_nodes = instance.all_nodes
9328   else:
9329     pnode = target_node
9330     all_nodes = [pnode]
9331
9332   if instance.disk_template in constants.DTS_FILEBASED:
9333     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9334     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9335
9336     result.Raise("Failed to create directory '%s' on"
9337                  " node %s" % (file_storage_dir, pnode))
9338
9339   # Note: this needs to be kept in sync with adding of disks in
9340   # LUInstanceSetParams
9341   for idx, device in enumerate(instance.disks):
9342     if to_skip and idx in to_skip:
9343       continue
9344     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9345     #HARDCODE
9346     for node in all_nodes:
9347       f_create = node == pnode
9348       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9349
9350
9351 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9352   """Remove all disks for an instance.
9353
9354   This abstracts away some work from `AddInstance()` and
9355   `RemoveInstance()`. Note that in case some of the devices couldn't
9356   be removed, the removal will continue with the other ones (compare
9357   with `_CreateDisks()`).
9358
9359   @type lu: L{LogicalUnit}
9360   @param lu: the logical unit on whose behalf we execute
9361   @type instance: L{objects.Instance}
9362   @param instance: the instance whose disks we should remove
9363   @type target_node: string
9364   @param target_node: used to override the node on which to remove the disks
9365   @rtype: boolean
9366   @return: the success of the removal
9367
9368   """
9369   logging.info("Removing block devices for instance %s", instance.name)
9370
9371   all_result = True
9372   ports_to_release = set()
9373   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9374   for (idx, device) in enumerate(anno_disks):
9375     if target_node:
9376       edata = [(target_node, device)]
9377     else:
9378       edata = device.ComputeNodeTree(instance.primary_node)
9379     for node, disk in edata:
9380       lu.cfg.SetDiskID(disk, node)
9381       result = lu.rpc.call_blockdev_remove(node, disk)
9382       if result.fail_msg:
9383         lu.LogWarning("Could not remove disk %s on node %s,"
9384                       " continuing anyway: %s", idx, node, result.fail_msg)
9385         if not (result.offline and node != instance.primary_node):
9386           all_result = False
9387
9388     # if this is a DRBD disk, return its port to the pool
9389     if device.dev_type in constants.LDS_DRBD:
9390       ports_to_release.add(device.logical_id[2])
9391
9392   if all_result or ignore_failures:
9393     for port in ports_to_release:
9394       lu.cfg.AddTcpUdpPort(port)
9395
9396   if instance.disk_template in constants.DTS_FILEBASED:
9397     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9398     if target_node:
9399       tgt = target_node
9400     else:
9401       tgt = instance.primary_node
9402     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9403     if result.fail_msg:
9404       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9405                     file_storage_dir, instance.primary_node, result.fail_msg)
9406       all_result = False
9407
9408   return all_result
9409
9410
9411 def _ComputeDiskSizePerVG(disk_template, disks):
9412   """Compute disk size requirements in the volume group
9413
9414   """
9415   def _compute(disks, payload):
9416     """Universal algorithm.
9417
9418     """
9419     vgs = {}
9420     for disk in disks:
9421       vgs[disk[constants.IDISK_VG]] = \
9422         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9423
9424     return vgs
9425
9426   # Required free disk space as a function of disk and swap space
9427   req_size_dict = {
9428     constants.DT_DISKLESS: {},
9429     constants.DT_PLAIN: _compute(disks, 0),
9430     # 128 MB are added for drbd metadata for each disk
9431     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9432     constants.DT_FILE: {},
9433     constants.DT_SHARED_FILE: {},
9434   }
9435
9436   if disk_template not in req_size_dict:
9437     raise errors.ProgrammerError("Disk template '%s' size requirement"
9438                                  " is unknown" % disk_template)
9439
9440   return req_size_dict[disk_template]
9441
9442
9443 def _FilterVmNodes(lu, nodenames):
9444   """Filters out non-vm_capable nodes from a list.
9445
9446   @type lu: L{LogicalUnit}
9447   @param lu: the logical unit for which we check
9448   @type nodenames: list
9449   @param nodenames: the list of nodes on which we should check
9450   @rtype: list
9451   @return: the list of vm-capable nodes
9452
9453   """
9454   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9455   return [name for name in nodenames if name not in vm_nodes]
9456
9457
9458 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9459   """Hypervisor parameter validation.
9460
9461   This function abstract the hypervisor parameter validation to be
9462   used in both instance create and instance modify.
9463
9464   @type lu: L{LogicalUnit}
9465   @param lu: the logical unit for which we check
9466   @type nodenames: list
9467   @param nodenames: the list of nodes on which we should check
9468   @type hvname: string
9469   @param hvname: the name of the hypervisor we should use
9470   @type hvparams: dict
9471   @param hvparams: the parameters which we need to check
9472   @raise errors.OpPrereqError: if the parameters are not valid
9473
9474   """
9475   nodenames = _FilterVmNodes(lu, nodenames)
9476
9477   cluster = lu.cfg.GetClusterInfo()
9478   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9479
9480   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9481   for node in nodenames:
9482     info = hvinfo[node]
9483     if info.offline:
9484       continue
9485     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9486
9487
9488 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9489   """OS parameters validation.
9490
9491   @type lu: L{LogicalUnit}
9492   @param lu: the logical unit for which we check
9493   @type required: boolean
9494   @param required: whether the validation should fail if the OS is not
9495       found
9496   @type nodenames: list
9497   @param nodenames: the list of nodes on which we should check
9498   @type osname: string
9499   @param osname: the name of the hypervisor we should use
9500   @type osparams: dict
9501   @param osparams: the parameters which we need to check
9502   @raise errors.OpPrereqError: if the parameters are not valid
9503
9504   """
9505   nodenames = _FilterVmNodes(lu, nodenames)
9506   result = lu.rpc.call_os_validate(nodenames, required, osname,
9507                                    [constants.OS_VALIDATE_PARAMETERS],
9508                                    osparams)
9509   for node, nres in result.items():
9510     # we don't check for offline cases since this should be run only
9511     # against the master node and/or an instance's nodes
9512     nres.Raise("OS Parameters validation failed on node %s" % node)
9513     if not nres.payload:
9514       lu.LogInfo("OS %s not found on node %s, validation skipped",
9515                  osname, node)
9516
9517
9518 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9519   """Wrapper around IAReqInstanceAlloc.
9520
9521   @param op: The instance opcode
9522   @param disks: The computed disks
9523   @param nics: The computed nics
9524   @param beparams: The full filled beparams
9525
9526   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9527
9528   """
9529   spindle_use = beparams[constants.BE_SPINDLE_USE]
9530   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9531                                        disk_template=op.disk_template,
9532                                        tags=op.tags,
9533                                        os=op.os_type,
9534                                        vcpus=beparams[constants.BE_VCPUS],
9535                                        memory=beparams[constants.BE_MAXMEM],
9536                                        spindle_use=spindle_use,
9537                                        disks=disks,
9538                                        nics=[n.ToDict() for n in nics],
9539                                        hypervisor=op.hypervisor)
9540
9541
9542 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9543   """Computes the nics.
9544
9545   @param op: The instance opcode
9546   @param cluster: Cluster configuration object
9547   @param default_ip: The default ip to assign
9548   @param cfg: An instance of the configuration object
9549   @param ec_id: Execution context ID
9550
9551   @returns: The build up nics
9552
9553   """
9554   nics = []
9555   for nic in op.nics:
9556     nic_mode_req = nic.get(constants.INIC_MODE, None)
9557     nic_mode = nic_mode_req
9558     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9559       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9560
9561     net = nic.get(constants.INIC_NETWORK, None)
9562     link = nic.get(constants.NIC_LINK, None)
9563     ip = nic.get(constants.INIC_IP, None)
9564
9565     if net is None or net.lower() == constants.VALUE_NONE:
9566       net = None
9567     else:
9568       if nic_mode_req is not None or link is not None:
9569         raise errors.OpPrereqError("If network is given, no mode or link"
9570                                    " is allowed to be passed",
9571                                    errors.ECODE_INVAL)
9572
9573     # ip validity checks
9574     if ip is None or ip.lower() == constants.VALUE_NONE:
9575       nic_ip = None
9576     elif ip.lower() == constants.VALUE_AUTO:
9577       if not op.name_check:
9578         raise errors.OpPrereqError("IP address set to auto but name checks"
9579                                    " have been skipped",
9580                                    errors.ECODE_INVAL)
9581       nic_ip = default_ip
9582     else:
9583       # We defer pool operations until later, so that the iallocator has
9584       # filled in the instance's node(s) dimara
9585       if ip.lower() == constants.NIC_IP_POOL:
9586         if net is None:
9587           raise errors.OpPrereqError("if ip=pool, parameter network"
9588                                      " must be passed too",
9589                                      errors.ECODE_INVAL)
9590
9591       elif not netutils.IPAddress.IsValid(ip):
9592         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9593                                    errors.ECODE_INVAL)
9594
9595       nic_ip = ip
9596
9597     # TODO: check the ip address for uniqueness
9598     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9599       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9600                                  errors.ECODE_INVAL)
9601
9602     # MAC address verification
9603     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9604     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9605       mac = utils.NormalizeAndValidateMac(mac)
9606
9607       try:
9608         # TODO: We need to factor this out
9609         cfg.ReserveMAC(mac, ec_id)
9610       except errors.ReservationError:
9611         raise errors.OpPrereqError("MAC address %s already in use"
9612                                    " in cluster" % mac,
9613                                    errors.ECODE_NOTUNIQUE)
9614
9615     #  Build nic parameters
9616     nicparams = {}
9617     if nic_mode_req:
9618       nicparams[constants.NIC_MODE] = nic_mode
9619     if link:
9620       nicparams[constants.NIC_LINK] = link
9621
9622     check_params = cluster.SimpleFillNIC(nicparams)
9623     objects.NIC.CheckParameterSyntax(check_params)
9624     nics.append(objects.NIC(mac=mac, ip=nic_ip,
9625                             network=net, nicparams=nicparams))
9626
9627   return nics
9628
9629
9630 def _ComputeDisks(op, default_vg):
9631   """Computes the instance disks.
9632
9633   @param op: The instance opcode
9634   @param default_vg: The default_vg to assume
9635
9636   @return: The computer disks
9637
9638   """
9639   disks = []
9640   for disk in op.disks:
9641     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9642     if mode not in constants.DISK_ACCESS_SET:
9643       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9644                                  mode, errors.ECODE_INVAL)
9645     size = disk.get(constants.IDISK_SIZE, None)
9646     if size is None:
9647       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9648     try:
9649       size = int(size)
9650     except (TypeError, ValueError):
9651       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9652                                  errors.ECODE_INVAL)
9653
9654     data_vg = disk.get(constants.IDISK_VG, default_vg)
9655     new_disk = {
9656       constants.IDISK_SIZE: size,
9657       constants.IDISK_MODE: mode,
9658       constants.IDISK_VG: data_vg,
9659       }
9660     if constants.IDISK_METAVG in disk:
9661       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9662     if constants.IDISK_ADOPT in disk:
9663       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9664     disks.append(new_disk)
9665
9666   return disks
9667
9668
9669 def _ComputeFullBeParams(op, cluster):
9670   """Computes the full beparams.
9671
9672   @param op: The instance opcode
9673   @param cluster: The cluster config object
9674
9675   @return: The fully filled beparams
9676
9677   """
9678   default_beparams = cluster.beparams[constants.PP_DEFAULT]
9679   for param, value in op.beparams.iteritems():
9680     if value == constants.VALUE_AUTO:
9681       op.beparams[param] = default_beparams[param]
9682   objects.UpgradeBeParams(op.beparams)
9683   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9684   return cluster.SimpleFillBE(op.beparams)
9685
9686
9687 class LUInstanceCreate(LogicalUnit):
9688   """Create an instance.
9689
9690   """
9691   HPATH = "instance-add"
9692   HTYPE = constants.HTYPE_INSTANCE
9693   REQ_BGL = False
9694
9695   def CheckArguments(self):
9696     """Check arguments.
9697
9698     """
9699     # do not require name_check to ease forward/backward compatibility
9700     # for tools
9701     if self.op.no_install and self.op.start:
9702       self.LogInfo("No-installation mode selected, disabling startup")
9703       self.op.start = False
9704     # validate/normalize the instance name
9705     self.op.instance_name = \
9706       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9707
9708     if self.op.ip_check and not self.op.name_check:
9709       # TODO: make the ip check more flexible and not depend on the name check
9710       raise errors.OpPrereqError("Cannot do IP address check without a name"
9711                                  " check", errors.ECODE_INVAL)
9712
9713     # check nics' parameter names
9714     for nic in self.op.nics:
9715       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9716
9717     # check disks. parameter names and consistent adopt/no-adopt strategy
9718     has_adopt = has_no_adopt = False
9719     for disk in self.op.disks:
9720       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9721       if constants.IDISK_ADOPT in disk:
9722         has_adopt = True
9723       else:
9724         has_no_adopt = True
9725     if has_adopt and has_no_adopt:
9726       raise errors.OpPrereqError("Either all disks are adopted or none is",
9727                                  errors.ECODE_INVAL)
9728     if has_adopt:
9729       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9730         raise errors.OpPrereqError("Disk adoption is not supported for the"
9731                                    " '%s' disk template" %
9732                                    self.op.disk_template,
9733                                    errors.ECODE_INVAL)
9734       if self.op.iallocator is not None:
9735         raise errors.OpPrereqError("Disk adoption not allowed with an"
9736                                    " iallocator script", errors.ECODE_INVAL)
9737       if self.op.mode == constants.INSTANCE_IMPORT:
9738         raise errors.OpPrereqError("Disk adoption not allowed for"
9739                                    " instance import", errors.ECODE_INVAL)
9740     else:
9741       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9742         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9743                                    " but no 'adopt' parameter given" %
9744                                    self.op.disk_template,
9745                                    errors.ECODE_INVAL)
9746
9747     self.adopt_disks = has_adopt
9748
9749     # instance name verification
9750     if self.op.name_check:
9751       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9752       self.op.instance_name = self.hostname1.name
9753       # used in CheckPrereq for ip ping check
9754       self.check_ip = self.hostname1.ip
9755     else:
9756       self.check_ip = None
9757
9758     # file storage checks
9759     if (self.op.file_driver and
9760         not self.op.file_driver in constants.FILE_DRIVER):
9761       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9762                                  self.op.file_driver, errors.ECODE_INVAL)
9763
9764     if self.op.disk_template == constants.DT_FILE:
9765       opcodes.RequireFileStorage()
9766     elif self.op.disk_template == constants.DT_SHARED_FILE:
9767       opcodes.RequireSharedFileStorage()
9768
9769     ### Node/iallocator related checks
9770     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9771
9772     if self.op.pnode is not None:
9773       if self.op.disk_template in constants.DTS_INT_MIRROR:
9774         if self.op.snode is None:
9775           raise errors.OpPrereqError("The networked disk templates need"
9776                                      " a mirror node", errors.ECODE_INVAL)
9777       elif self.op.snode:
9778         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9779                         " template")
9780         self.op.snode = None
9781
9782     self._cds = _GetClusterDomainSecret()
9783
9784     if self.op.mode == constants.INSTANCE_IMPORT:
9785       # On import force_variant must be True, because if we forced it at
9786       # initial install, our only chance when importing it back is that it
9787       # works again!
9788       self.op.force_variant = True
9789
9790       if self.op.no_install:
9791         self.LogInfo("No-installation mode has no effect during import")
9792
9793     elif self.op.mode == constants.INSTANCE_CREATE:
9794       if self.op.os_type is None:
9795         raise errors.OpPrereqError("No guest OS specified",
9796                                    errors.ECODE_INVAL)
9797       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9798         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9799                                    " installation" % self.op.os_type,
9800                                    errors.ECODE_STATE)
9801       if self.op.disk_template is None:
9802         raise errors.OpPrereqError("No disk template specified",
9803                                    errors.ECODE_INVAL)
9804
9805     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9806       # Check handshake to ensure both clusters have the same domain secret
9807       src_handshake = self.op.source_handshake
9808       if not src_handshake:
9809         raise errors.OpPrereqError("Missing source handshake",
9810                                    errors.ECODE_INVAL)
9811
9812       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9813                                                            src_handshake)
9814       if errmsg:
9815         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9816                                    errors.ECODE_INVAL)
9817
9818       # Load and check source CA
9819       self.source_x509_ca_pem = self.op.source_x509_ca
9820       if not self.source_x509_ca_pem:
9821         raise errors.OpPrereqError("Missing source X509 CA",
9822                                    errors.ECODE_INVAL)
9823
9824       try:
9825         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9826                                                     self._cds)
9827       except OpenSSL.crypto.Error, err:
9828         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9829                                    (err, ), errors.ECODE_INVAL)
9830
9831       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9832       if errcode is not None:
9833         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9834                                    errors.ECODE_INVAL)
9835
9836       self.source_x509_ca = cert
9837
9838       src_instance_name = self.op.source_instance_name
9839       if not src_instance_name:
9840         raise errors.OpPrereqError("Missing source instance name",
9841                                    errors.ECODE_INVAL)
9842
9843       self.source_instance_name = \
9844           netutils.GetHostname(name=src_instance_name).name
9845
9846     else:
9847       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9848                                  self.op.mode, errors.ECODE_INVAL)
9849
9850   def ExpandNames(self):
9851     """ExpandNames for CreateInstance.
9852
9853     Figure out the right locks for instance creation.
9854
9855     """
9856     self.needed_locks = {}
9857
9858     instance_name = self.op.instance_name
9859     # this is just a preventive check, but someone might still add this
9860     # instance in the meantime, and creation will fail at lock-add time
9861     if instance_name in self.cfg.GetInstanceList():
9862       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9863                                  instance_name, errors.ECODE_EXISTS)
9864
9865     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9866
9867     if self.op.iallocator:
9868       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9869       # specifying a group on instance creation and then selecting nodes from
9870       # that group
9871       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9872       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9873     else:
9874       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9875       nodelist = [self.op.pnode]
9876       if self.op.snode is not None:
9877         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9878         nodelist.append(self.op.snode)
9879       self.needed_locks[locking.LEVEL_NODE] = nodelist
9880
9881     # in case of import lock the source node too
9882     if self.op.mode == constants.INSTANCE_IMPORT:
9883       src_node = self.op.src_node
9884       src_path = self.op.src_path
9885
9886       if src_path is None:
9887         self.op.src_path = src_path = self.op.instance_name
9888
9889       if src_node is None:
9890         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9891         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9892         self.op.src_node = None
9893         if os.path.isabs(src_path):
9894           raise errors.OpPrereqError("Importing an instance from a path"
9895                                      " requires a source node option",
9896                                      errors.ECODE_INVAL)
9897       else:
9898         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9899         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9900           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9901         if not os.path.isabs(src_path):
9902           self.op.src_path = src_path = \
9903             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9904
9905     self.needed_locks[locking.LEVEL_NODE_RES] = \
9906       _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
9907
9908   def _RunAllocator(self):
9909     """Run the allocator based on input opcode.
9910
9911     """
9912     #TODO Export network to iallocator so that it chooses a pnode
9913     #     in a nodegroup that has the desired network connected to
9914     req = _CreateInstanceAllocRequest(self.op, self.disks,
9915                                       self.nics, self.be_full)
9916     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9917
9918     ial.Run(self.op.iallocator)
9919
9920     if not ial.success:
9921       raise errors.OpPrereqError("Can't compute nodes using"
9922                                  " iallocator '%s': %s" %
9923                                  (self.op.iallocator, ial.info),
9924                                  errors.ECODE_NORES)
9925     self.op.pnode = ial.result[0]
9926     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9927                  self.op.instance_name, self.op.iallocator,
9928                  utils.CommaJoin(ial.result))
9929
9930     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9931
9932     if req.RequiredNodes() == 2:
9933       self.op.snode = ial.result[1]
9934
9935   def BuildHooksEnv(self):
9936     """Build hooks env.
9937
9938     This runs on master, primary and secondary nodes of the instance.
9939
9940     """
9941     env = {
9942       "ADD_MODE": self.op.mode,
9943       }
9944     if self.op.mode == constants.INSTANCE_IMPORT:
9945       env["SRC_NODE"] = self.op.src_node
9946       env["SRC_PATH"] = self.op.src_path
9947       env["SRC_IMAGES"] = self.src_images
9948
9949     env.update(_BuildInstanceHookEnv(
9950       name=self.op.instance_name,
9951       primary_node=self.op.pnode,
9952       secondary_nodes=self.secondaries,
9953       status=self.op.start,
9954       os_type=self.op.os_type,
9955       minmem=self.be_full[constants.BE_MINMEM],
9956       maxmem=self.be_full[constants.BE_MAXMEM],
9957       vcpus=self.be_full[constants.BE_VCPUS],
9958       nics=_NICListToTuple(self, self.nics),
9959       disk_template=self.op.disk_template,
9960       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9961              for d in self.disks],
9962       bep=self.be_full,
9963       hvp=self.hv_full,
9964       hypervisor_name=self.op.hypervisor,
9965       tags=self.op.tags,
9966     ))
9967
9968     return env
9969
9970   def BuildHooksNodes(self):
9971     """Build hooks nodes.
9972
9973     """
9974     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9975     return nl, nl
9976
9977   def _ReadExportInfo(self):
9978     """Reads the export information from disk.
9979
9980     It will override the opcode source node and path with the actual
9981     information, if these two were not specified before.
9982
9983     @return: the export information
9984
9985     """
9986     assert self.op.mode == constants.INSTANCE_IMPORT
9987
9988     src_node = self.op.src_node
9989     src_path = self.op.src_path
9990
9991     if src_node is None:
9992       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9993       exp_list = self.rpc.call_export_list(locked_nodes)
9994       found = False
9995       for node in exp_list:
9996         if exp_list[node].fail_msg:
9997           continue
9998         if src_path in exp_list[node].payload:
9999           found = True
10000           self.op.src_node = src_node = node
10001           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10002                                                        src_path)
10003           break
10004       if not found:
10005         raise errors.OpPrereqError("No export found for relative path %s" %
10006                                     src_path, errors.ECODE_INVAL)
10007
10008     _CheckNodeOnline(self, src_node)
10009     result = self.rpc.call_export_info(src_node, src_path)
10010     result.Raise("No export or invalid export found in dir %s" % src_path)
10011
10012     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10013     if not export_info.has_section(constants.INISECT_EXP):
10014       raise errors.ProgrammerError("Corrupted export config",
10015                                    errors.ECODE_ENVIRON)
10016
10017     ei_version = export_info.get(constants.INISECT_EXP, "version")
10018     if (int(ei_version) != constants.EXPORT_VERSION):
10019       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10020                                  (ei_version, constants.EXPORT_VERSION),
10021                                  errors.ECODE_ENVIRON)
10022     return export_info
10023
10024   def _ReadExportParams(self, einfo):
10025     """Use export parameters as defaults.
10026
10027     In case the opcode doesn't specify (as in override) some instance
10028     parameters, then try to use them from the export information, if
10029     that declares them.
10030
10031     """
10032     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10033
10034     if self.op.disk_template is None:
10035       if einfo.has_option(constants.INISECT_INS, "disk_template"):
10036         self.op.disk_template = einfo.get(constants.INISECT_INS,
10037                                           "disk_template")
10038         if self.op.disk_template not in constants.DISK_TEMPLATES:
10039           raise errors.OpPrereqError("Disk template specified in configuration"
10040                                      " file is not one of the allowed values:"
10041                                      " %s" %
10042                                      " ".join(constants.DISK_TEMPLATES),
10043                                      errors.ECODE_INVAL)
10044       else:
10045         raise errors.OpPrereqError("No disk template specified and the export"
10046                                    " is missing the disk_template information",
10047                                    errors.ECODE_INVAL)
10048
10049     if not self.op.disks:
10050       disks = []
10051       # TODO: import the disk iv_name too
10052       for idx in range(constants.MAX_DISKS):
10053         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10054           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10055           disks.append({constants.IDISK_SIZE: disk_sz})
10056       self.op.disks = disks
10057       if not disks and self.op.disk_template != constants.DT_DISKLESS:
10058         raise errors.OpPrereqError("No disk info specified and the export"
10059                                    " is missing the disk information",
10060                                    errors.ECODE_INVAL)
10061
10062     if not self.op.nics:
10063       nics = []
10064       for idx in range(constants.MAX_NICS):
10065         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10066           ndict = {}
10067           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10068             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10069             ndict[name] = v
10070           nics.append(ndict)
10071         else:
10072           break
10073       self.op.nics = nics
10074
10075     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10076       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10077
10078     if (self.op.hypervisor is None and
10079         einfo.has_option(constants.INISECT_INS, "hypervisor")):
10080       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10081
10082     if einfo.has_section(constants.INISECT_HYP):
10083       # use the export parameters but do not override the ones
10084       # specified by the user
10085       for name, value in einfo.items(constants.INISECT_HYP):
10086         if name not in self.op.hvparams:
10087           self.op.hvparams[name] = value
10088
10089     if einfo.has_section(constants.INISECT_BEP):
10090       # use the parameters, without overriding
10091       for name, value in einfo.items(constants.INISECT_BEP):
10092         if name not in self.op.beparams:
10093           self.op.beparams[name] = value
10094         # Compatibility for the old "memory" be param
10095         if name == constants.BE_MEMORY:
10096           if constants.BE_MAXMEM not in self.op.beparams:
10097             self.op.beparams[constants.BE_MAXMEM] = value
10098           if constants.BE_MINMEM not in self.op.beparams:
10099             self.op.beparams[constants.BE_MINMEM] = value
10100     else:
10101       # try to read the parameters old style, from the main section
10102       for name in constants.BES_PARAMETERS:
10103         if (name not in self.op.beparams and
10104             einfo.has_option(constants.INISECT_INS, name)):
10105           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10106
10107     if einfo.has_section(constants.INISECT_OSP):
10108       # use the parameters, without overriding
10109       for name, value in einfo.items(constants.INISECT_OSP):
10110         if name not in self.op.osparams:
10111           self.op.osparams[name] = value
10112
10113   def _RevertToDefaults(self, cluster):
10114     """Revert the instance parameters to the default values.
10115
10116     """
10117     # hvparams
10118     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10119     for name in self.op.hvparams.keys():
10120       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10121         del self.op.hvparams[name]
10122     # beparams
10123     be_defs = cluster.SimpleFillBE({})
10124     for name in self.op.beparams.keys():
10125       if name in be_defs and be_defs[name] == self.op.beparams[name]:
10126         del self.op.beparams[name]
10127     # nic params
10128     nic_defs = cluster.SimpleFillNIC({})
10129     for nic in self.op.nics:
10130       for name in constants.NICS_PARAMETERS:
10131         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10132           del nic[name]
10133     # osparams
10134     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10135     for name in self.op.osparams.keys():
10136       if name in os_defs and os_defs[name] == self.op.osparams[name]:
10137         del self.op.osparams[name]
10138
10139   def _CalculateFileStorageDir(self):
10140     """Calculate final instance file storage dir.
10141
10142     """
10143     # file storage dir calculation/check
10144     self.instance_file_storage_dir = None
10145     if self.op.disk_template in constants.DTS_FILEBASED:
10146       # build the full file storage dir path
10147       joinargs = []
10148
10149       if self.op.disk_template == constants.DT_SHARED_FILE:
10150         get_fsd_fn = self.cfg.GetSharedFileStorageDir
10151       else:
10152         get_fsd_fn = self.cfg.GetFileStorageDir
10153
10154       cfg_storagedir = get_fsd_fn()
10155       if not cfg_storagedir:
10156         raise errors.OpPrereqError("Cluster file storage dir not defined",
10157                                    errors.ECODE_STATE)
10158       joinargs.append(cfg_storagedir)
10159
10160       if self.op.file_storage_dir is not None:
10161         joinargs.append(self.op.file_storage_dir)
10162
10163       joinargs.append(self.op.instance_name)
10164
10165       # pylint: disable=W0142
10166       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10167
10168   def CheckPrereq(self): # pylint: disable=R0914
10169     """Check prerequisites.
10170
10171     """
10172     self._CalculateFileStorageDir()
10173
10174     if self.op.mode == constants.INSTANCE_IMPORT:
10175       export_info = self._ReadExportInfo()
10176       self._ReadExportParams(export_info)
10177       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10178     else:
10179       self._old_instance_name = None
10180
10181     if (not self.cfg.GetVGName() and
10182         self.op.disk_template not in constants.DTS_NOT_LVM):
10183       raise errors.OpPrereqError("Cluster does not support lvm-based"
10184                                  " instances", errors.ECODE_STATE)
10185
10186     if (self.op.hypervisor is None or
10187         self.op.hypervisor == constants.VALUE_AUTO):
10188       self.op.hypervisor = self.cfg.GetHypervisorType()
10189
10190     cluster = self.cfg.GetClusterInfo()
10191     enabled_hvs = cluster.enabled_hypervisors
10192     if self.op.hypervisor not in enabled_hvs:
10193       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10194                                  " cluster (%s)" %
10195                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10196                                  errors.ECODE_STATE)
10197
10198     # Check tag validity
10199     for tag in self.op.tags:
10200       objects.TaggableObject.ValidateTag(tag)
10201
10202     # check hypervisor parameter syntax (locally)
10203     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10204     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10205                                       self.op.hvparams)
10206     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10207     hv_type.CheckParameterSyntax(filled_hvp)
10208     self.hv_full = filled_hvp
10209     # check that we don't specify global parameters on an instance
10210     _CheckGlobalHvParams(self.op.hvparams)
10211
10212     # fill and remember the beparams dict
10213     self.be_full = _ComputeFullBeParams(self.op, cluster)
10214
10215     # build os parameters
10216     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10217
10218     # now that hvp/bep are in final format, let's reset to defaults,
10219     # if told to do so
10220     if self.op.identify_defaults:
10221       self._RevertToDefaults(cluster)
10222
10223     # NIC buildup
10224     self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10225                              self.proc.GetECId())
10226
10227     # disk checks/pre-build
10228     default_vg = self.cfg.GetVGName()
10229     self.disks = _ComputeDisks(self.op, default_vg)
10230
10231     if self.op.mode == constants.INSTANCE_IMPORT:
10232       disk_images = []
10233       for idx in range(len(self.disks)):
10234         option = "disk%d_dump" % idx
10235         if export_info.has_option(constants.INISECT_INS, option):
10236           # FIXME: are the old os-es, disk sizes, etc. useful?
10237           export_name = export_info.get(constants.INISECT_INS, option)
10238           image = utils.PathJoin(self.op.src_path, export_name)
10239           disk_images.append(image)
10240         else:
10241           disk_images.append(False)
10242
10243       self.src_images = disk_images
10244
10245       if self.op.instance_name == self._old_instance_name:
10246         for idx, nic in enumerate(self.nics):
10247           if nic.mac == constants.VALUE_AUTO:
10248             nic_mac_ini = "nic%d_mac" % idx
10249             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10250
10251     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10252
10253     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10254     if self.op.ip_check:
10255       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10256         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10257                                    (self.check_ip, self.op.instance_name),
10258                                    errors.ECODE_NOTUNIQUE)
10259
10260     #### mac address generation
10261     # By generating here the mac address both the allocator and the hooks get
10262     # the real final mac address rather than the 'auto' or 'generate' value.
10263     # There is a race condition between the generation and the instance object
10264     # creation, which means that we know the mac is valid now, but we're not
10265     # sure it will be when we actually add the instance. If things go bad
10266     # adding the instance will abort because of a duplicate mac, and the
10267     # creation job will fail.
10268     for nic in self.nics:
10269       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10270         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10271
10272     #### allocator run
10273
10274     if self.op.iallocator is not None:
10275       self._RunAllocator()
10276
10277     # Release all unneeded node locks
10278     keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10279     _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10280     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10281     _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10282
10283     assert (self.owned_locks(locking.LEVEL_NODE) ==
10284             self.owned_locks(locking.LEVEL_NODE_RES)), \
10285       "Node locks differ from node resource locks"
10286
10287     #### node related checks
10288
10289     # check primary node
10290     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10291     assert self.pnode is not None, \
10292       "Cannot retrieve locked node %s" % self.op.pnode
10293     if pnode.offline:
10294       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10295                                  pnode.name, errors.ECODE_STATE)
10296     if pnode.drained:
10297       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10298                                  pnode.name, errors.ECODE_STATE)
10299     if not pnode.vm_capable:
10300       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10301                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10302
10303     self.secondaries = []
10304
10305     # Fill in any IPs from IP pools. This must happen here, because we need to
10306     # know the nic's primary node, as specified by the iallocator
10307     for idx, nic in enumerate(self.nics):
10308       net = nic.network
10309       if net is not None:
10310         netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10311         if netparams is None:
10312           raise errors.OpPrereqError("No netparams found for network"
10313                                      " %s. Propably not connected to"
10314                                      " node's %s nodegroup" %
10315                                      (net, self.pnode.name),
10316                                      errors.ECODE_INVAL)
10317         self.LogInfo("NIC/%d inherits netparams %s" %
10318                      (idx, netparams.values()))
10319         nic.nicparams = dict(netparams)
10320         if nic.ip is not None:
10321           if nic.ip.lower() == constants.NIC_IP_POOL:
10322             try:
10323               nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10324             except errors.ReservationError:
10325               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10326                                          " from the address pool" % idx,
10327                                          errors.ECODE_STATE)
10328             self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10329           else:
10330             try:
10331               self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10332             except errors.ReservationError:
10333               raise errors.OpPrereqError("IP address %s already in use"
10334                                          " or does not belong to network %s" %
10335                                          (nic.ip, net),
10336                                          errors.ECODE_NOTUNIQUE)
10337       else:
10338         # net is None, ip None or given
10339         if self.op.conflicts_check:
10340           _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10341
10342     # mirror node verification
10343     if self.op.disk_template in constants.DTS_INT_MIRROR:
10344       if self.op.snode == pnode.name:
10345         raise errors.OpPrereqError("The secondary node cannot be the"
10346                                    " primary node", errors.ECODE_INVAL)
10347       _CheckNodeOnline(self, self.op.snode)
10348       _CheckNodeNotDrained(self, self.op.snode)
10349       _CheckNodeVmCapable(self, self.op.snode)
10350       self.secondaries.append(self.op.snode)
10351
10352       snode = self.cfg.GetNodeInfo(self.op.snode)
10353       if pnode.group != snode.group:
10354         self.LogWarning("The primary and secondary nodes are in two"
10355                         " different node groups; the disk parameters"
10356                         " from the first disk's node group will be"
10357                         " used")
10358
10359     nodenames = [pnode.name] + self.secondaries
10360
10361     # Verify instance specs
10362     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10363     ispec = {
10364       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10365       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10366       constants.ISPEC_DISK_COUNT: len(self.disks),
10367       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10368       constants.ISPEC_NIC_COUNT: len(self.nics),
10369       constants.ISPEC_SPINDLE_USE: spindle_use,
10370       }
10371
10372     group_info = self.cfg.GetNodeGroup(pnode.group)
10373     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10374     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10375     if not self.op.ignore_ipolicy and res:
10376       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10377              (pnode.group, group_info.name, utils.CommaJoin(res)))
10378       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10379
10380     if not self.adopt_disks:
10381       if self.op.disk_template == constants.DT_RBD:
10382         # _CheckRADOSFreeSpace() is just a placeholder.
10383         # Any function that checks prerequisites can be placed here.
10384         # Check if there is enough space on the RADOS cluster.
10385         _CheckRADOSFreeSpace()
10386       else:
10387         # Check lv size requirements, if not adopting
10388         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10389         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10390
10391     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10392       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10393                                 disk[constants.IDISK_ADOPT])
10394                      for disk in self.disks])
10395       if len(all_lvs) != len(self.disks):
10396         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10397                                    errors.ECODE_INVAL)
10398       for lv_name in all_lvs:
10399         try:
10400           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10401           # to ReserveLV uses the same syntax
10402           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10403         except errors.ReservationError:
10404           raise errors.OpPrereqError("LV named %s used by another instance" %
10405                                      lv_name, errors.ECODE_NOTUNIQUE)
10406
10407       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10408       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10409
10410       node_lvs = self.rpc.call_lv_list([pnode.name],
10411                                        vg_names.payload.keys())[pnode.name]
10412       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10413       node_lvs = node_lvs.payload
10414
10415       delta = all_lvs.difference(node_lvs.keys())
10416       if delta:
10417         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10418                                    utils.CommaJoin(delta),
10419                                    errors.ECODE_INVAL)
10420       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10421       if online_lvs:
10422         raise errors.OpPrereqError("Online logical volumes found, cannot"
10423                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10424                                    errors.ECODE_STATE)
10425       # update the size of disk based on what is found
10426       for dsk in self.disks:
10427         dsk[constants.IDISK_SIZE] = \
10428           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10429                                         dsk[constants.IDISK_ADOPT])][0]))
10430
10431     elif self.op.disk_template == constants.DT_BLOCK:
10432       # Normalize and de-duplicate device paths
10433       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10434                        for disk in self.disks])
10435       if len(all_disks) != len(self.disks):
10436         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10437                                    errors.ECODE_INVAL)
10438       baddisks = [d for d in all_disks
10439                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10440       if baddisks:
10441         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10442                                    " cannot be adopted" %
10443                                    (utils.CommaJoin(baddisks),
10444                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10445                                    errors.ECODE_INVAL)
10446
10447       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10448                                             list(all_disks))[pnode.name]
10449       node_disks.Raise("Cannot get block device information from node %s" %
10450                        pnode.name)
10451       node_disks = node_disks.payload
10452       delta = all_disks.difference(node_disks.keys())
10453       if delta:
10454         raise errors.OpPrereqError("Missing block device(s): %s" %
10455                                    utils.CommaJoin(delta),
10456                                    errors.ECODE_INVAL)
10457       for dsk in self.disks:
10458         dsk[constants.IDISK_SIZE] = \
10459           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10460
10461     # Verify instance specs
10462     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10463     ispec = {
10464       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10465       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10466       constants.ISPEC_DISK_COUNT: len(self.disks),
10467       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10468                                   for disk in self.disks],
10469       constants.ISPEC_NIC_COUNT: len(self.nics),
10470       constants.ISPEC_SPINDLE_USE: spindle_use,
10471       }
10472
10473     group_info = self.cfg.GetNodeGroup(pnode.group)
10474     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10475     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10476     if not self.op.ignore_ipolicy and res:
10477       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10478                                   " policy: %s") % (pnode.group,
10479                                                     utils.CommaJoin(res)),
10480                                   errors.ECODE_INVAL)
10481
10482     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10483
10484     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10485     # check OS parameters (remotely)
10486     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10487
10488     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10489
10490     # memory check on primary node
10491     #TODO(dynmem): use MINMEM for checking
10492     if self.op.start:
10493       _CheckNodeFreeMemory(self, self.pnode.name,
10494                            "creating instance %s" % self.op.instance_name,
10495                            self.be_full[constants.BE_MAXMEM],
10496                            self.op.hypervisor)
10497
10498     self.dry_run_result = list(nodenames)
10499
10500   def Exec(self, feedback_fn):
10501     """Create and add the instance to the cluster.
10502
10503     """
10504     instance = self.op.instance_name
10505     pnode_name = self.pnode.name
10506
10507     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10508                 self.owned_locks(locking.LEVEL_NODE)), \
10509       "Node locks differ from node resource locks"
10510     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10511
10512     ht_kind = self.op.hypervisor
10513     if ht_kind in constants.HTS_REQ_PORT:
10514       network_port = self.cfg.AllocatePort()
10515     else:
10516       network_port = None
10517
10518     # This is ugly but we got a chicken-egg problem here
10519     # We can only take the group disk parameters, as the instance
10520     # has no disks yet (we are generating them right here).
10521     node = self.cfg.GetNodeInfo(pnode_name)
10522     nodegroup = self.cfg.GetNodeGroup(node.group)
10523     disks = _GenerateDiskTemplate(self,
10524                                   self.op.disk_template,
10525                                   instance, pnode_name,
10526                                   self.secondaries,
10527                                   self.disks,
10528                                   self.instance_file_storage_dir,
10529                                   self.op.file_driver,
10530                                   0,
10531                                   feedback_fn,
10532                                   self.cfg.GetGroupDiskParams(nodegroup))
10533
10534     iobj = objects.Instance(name=instance, os=self.op.os_type,
10535                             primary_node=pnode_name,
10536                             nics=self.nics, disks=disks,
10537                             disk_template=self.op.disk_template,
10538                             admin_state=constants.ADMINST_DOWN,
10539                             network_port=network_port,
10540                             beparams=self.op.beparams,
10541                             hvparams=self.op.hvparams,
10542                             hypervisor=self.op.hypervisor,
10543                             osparams=self.op.osparams,
10544                             )
10545
10546     if self.op.tags:
10547       for tag in self.op.tags:
10548         iobj.AddTag(tag)
10549
10550     if self.adopt_disks:
10551       if self.op.disk_template == constants.DT_PLAIN:
10552         # rename LVs to the newly-generated names; we need to construct
10553         # 'fake' LV disks with the old data, plus the new unique_id
10554         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10555         rename_to = []
10556         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10557           rename_to.append(t_dsk.logical_id)
10558           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10559           self.cfg.SetDiskID(t_dsk, pnode_name)
10560         result = self.rpc.call_blockdev_rename(pnode_name,
10561                                                zip(tmp_disks, rename_to))
10562         result.Raise("Failed to rename adoped LVs")
10563     else:
10564       feedback_fn("* creating instance disks...")
10565       try:
10566         _CreateDisks(self, iobj)
10567       except errors.OpExecError:
10568         self.LogWarning("Device creation failed, reverting...")
10569         try:
10570           _RemoveDisks(self, iobj)
10571         finally:
10572           self.cfg.ReleaseDRBDMinors(instance)
10573           raise
10574
10575     feedback_fn("adding instance %s to cluster config" % instance)
10576
10577     self.cfg.AddInstance(iobj, self.proc.GetECId())
10578
10579     # Declare that we don't want to remove the instance lock anymore, as we've
10580     # added the instance to the config
10581     del self.remove_locks[locking.LEVEL_INSTANCE]
10582
10583     if self.op.mode == constants.INSTANCE_IMPORT:
10584       # Release unused nodes
10585       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10586     else:
10587       # Release all nodes
10588       _ReleaseLocks(self, locking.LEVEL_NODE)
10589
10590     disk_abort = False
10591     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10592       feedback_fn("* wiping instance disks...")
10593       try:
10594         _WipeDisks(self, iobj)
10595       except errors.OpExecError, err:
10596         logging.exception("Wiping disks failed")
10597         self.LogWarning("Wiping instance disks failed (%s)", err)
10598         disk_abort = True
10599
10600     if disk_abort:
10601       # Something is already wrong with the disks, don't do anything else
10602       pass
10603     elif self.op.wait_for_sync:
10604       disk_abort = not _WaitForSync(self, iobj)
10605     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10606       # make sure the disks are not degraded (still sync-ing is ok)
10607       feedback_fn("* checking mirrors status")
10608       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10609     else:
10610       disk_abort = False
10611
10612     if disk_abort:
10613       _RemoveDisks(self, iobj)
10614       self.cfg.RemoveInstance(iobj.name)
10615       # Make sure the instance lock gets removed
10616       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10617       raise errors.OpExecError("There are some degraded disks for"
10618                                " this instance")
10619
10620     # Release all node resource locks
10621     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10622
10623     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10624       # we need to set the disks ID to the primary node, since the
10625       # preceding code might or might have not done it, depending on
10626       # disk template and other options
10627       for disk in iobj.disks:
10628         self.cfg.SetDiskID(disk, pnode_name)
10629       if self.op.mode == constants.INSTANCE_CREATE:
10630         if not self.op.no_install:
10631           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10632                         not self.op.wait_for_sync)
10633           if pause_sync:
10634             feedback_fn("* pausing disk sync to install instance OS")
10635             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10636                                                               (iobj.disks,
10637                                                                iobj), True)
10638             for idx, success in enumerate(result.payload):
10639               if not success:
10640                 logging.warn("pause-sync of instance %s for disk %d failed",
10641                              instance, idx)
10642
10643           feedback_fn("* running the instance OS create scripts...")
10644           # FIXME: pass debug option from opcode to backend
10645           os_add_result = \
10646             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10647                                           self.op.debug_level)
10648           if pause_sync:
10649             feedback_fn("* resuming disk sync")
10650             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10651                                                               (iobj.disks,
10652                                                                iobj), False)
10653             for idx, success in enumerate(result.payload):
10654               if not success:
10655                 logging.warn("resume-sync of instance %s for disk %d failed",
10656                              instance, idx)
10657
10658           os_add_result.Raise("Could not add os for instance %s"
10659                               " on node %s" % (instance, pnode_name))
10660
10661       else:
10662         if self.op.mode == constants.INSTANCE_IMPORT:
10663           feedback_fn("* running the instance OS import scripts...")
10664
10665           transfers = []
10666
10667           for idx, image in enumerate(self.src_images):
10668             if not image:
10669               continue
10670
10671             # FIXME: pass debug option from opcode to backend
10672             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10673                                                constants.IEIO_FILE, (image, ),
10674                                                constants.IEIO_SCRIPT,
10675                                                (iobj.disks[idx], idx),
10676                                                None)
10677             transfers.append(dt)
10678
10679           import_result = \
10680             masterd.instance.TransferInstanceData(self, feedback_fn,
10681                                                   self.op.src_node, pnode_name,
10682                                                   self.pnode.secondary_ip,
10683                                                   iobj, transfers)
10684           if not compat.all(import_result):
10685             self.LogWarning("Some disks for instance %s on node %s were not"
10686                             " imported successfully" % (instance, pnode_name))
10687
10688           rename_from = self._old_instance_name
10689
10690         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10691           feedback_fn("* preparing remote import...")
10692           # The source cluster will stop the instance before attempting to make
10693           # a connection. In some cases stopping an instance can take a long
10694           # time, hence the shutdown timeout is added to the connection
10695           # timeout.
10696           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10697                              self.op.source_shutdown_timeout)
10698           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10699
10700           assert iobj.primary_node == self.pnode.name
10701           disk_results = \
10702             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10703                                           self.source_x509_ca,
10704                                           self._cds, timeouts)
10705           if not compat.all(disk_results):
10706             # TODO: Should the instance still be started, even if some disks
10707             # failed to import (valid for local imports, too)?
10708             self.LogWarning("Some disks for instance %s on node %s were not"
10709                             " imported successfully" % (instance, pnode_name))
10710
10711           rename_from = self.source_instance_name
10712
10713         else:
10714           # also checked in the prereq part
10715           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10716                                        % self.op.mode)
10717
10718         # Run rename script on newly imported instance
10719         assert iobj.name == instance
10720         feedback_fn("Running rename script for %s" % instance)
10721         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10722                                                    rename_from,
10723                                                    self.op.debug_level)
10724         if result.fail_msg:
10725           self.LogWarning("Failed to run rename script for %s on node"
10726                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10727
10728     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10729
10730     if self.op.start:
10731       iobj.admin_state = constants.ADMINST_UP
10732       self.cfg.Update(iobj, feedback_fn)
10733       logging.info("Starting instance %s on node %s", instance, pnode_name)
10734       feedback_fn("* starting instance...")
10735       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10736                                             False)
10737       result.Raise("Could not start instance")
10738
10739     return list(iobj.all_nodes)
10740
10741
10742 class LUInstanceMultiAlloc(NoHooksLU):
10743   """Allocates multiple instances at the same time.
10744
10745   """
10746   REQ_BGL = False
10747
10748   def CheckArguments(self):
10749     """Check arguments.
10750
10751     """
10752     nodes = []
10753     for inst in self.op.instances:
10754       if inst.iallocator is not None:
10755         raise errors.OpPrereqError("iallocator are not allowed to be set on"
10756                                    " instance objects", errors.ECODE_INVAL)
10757       nodes.append(bool(inst.pnode))
10758       if inst.disk_template in constants.DTS_INT_MIRROR:
10759         nodes.append(bool(inst.snode))
10760
10761     has_nodes = compat.any(nodes)
10762     if compat.all(nodes) ^ has_nodes:
10763       raise errors.OpPrereqError("There are instance objects providing"
10764                                  " pnode/snode while others do not",
10765                                  errors.ECODE_INVAL)
10766
10767     if self.op.iallocator is None:
10768       default_iallocator = self.cfg.GetDefaultIAllocator()
10769       if default_iallocator and has_nodes:
10770         self.op.iallocator = default_iallocator
10771       else:
10772         raise errors.OpPrereqError("No iallocator or nodes on the instances"
10773                                    " given and no cluster-wide default"
10774                                    " iallocator found; please specify either"
10775                                    " an iallocator or nodes on the instances"
10776                                    " or set a cluster-wide default iallocator",
10777                                    errors.ECODE_INVAL)
10778
10779     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10780     if dups:
10781       raise errors.OpPrereqError("There are duplicate instance names: %s" %
10782                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
10783
10784   def ExpandNames(self):
10785     """Calculate the locks.
10786
10787     """
10788     self.share_locks = _ShareAll()
10789     self.needed_locks = {}
10790
10791     if self.op.iallocator:
10792       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10793       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10794     else:
10795       nodeslist = []
10796       for inst in self.op.instances:
10797         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10798         nodeslist.append(inst.pnode)
10799         if inst.snode is not None:
10800           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10801           nodeslist.append(inst.snode)
10802
10803       self.needed_locks[locking.LEVEL_NODE] = nodeslist
10804       # Lock resources of instance's primary and secondary nodes (copy to
10805       # prevent accidential modification)
10806       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10807
10808   def CheckPrereq(self):
10809     """Check prerequisite.
10810
10811     """
10812     cluster = self.cfg.GetClusterInfo()
10813     default_vg = self.cfg.GetVGName()
10814     ec_id = self.proc.GetECId()
10815
10816     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10817                                          _ComputeNics(op, cluster, None,
10818                                                       self.cfg, ec_id),
10819                                          _ComputeFullBeParams(op, cluster))
10820              for op in self.op.instances]
10821
10822     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10823     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10824
10825     ial.Run(self.op.iallocator)
10826
10827     if not ial.success:
10828       raise errors.OpPrereqError("Can't compute nodes using"
10829                                  " iallocator '%s': %s" %
10830                                  (self.op.iallocator, ial.info),
10831                                  errors.ECODE_NORES)
10832
10833     self.ia_result = ial.result
10834
10835     if self.op.dry_run:
10836       self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10837         constants.JOB_IDS_KEY: [],
10838         })
10839
10840   def _ConstructPartialResult(self):
10841     """Contructs the partial result.
10842
10843     """
10844     (allocatable, failed) = self.ia_result
10845     return {
10846       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10847         map(compat.fst, allocatable),
10848       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10849       }
10850
10851   def Exec(self, feedback_fn):
10852     """Executes the opcode.
10853
10854     """
10855     op2inst = dict((op.instance_name, op) for op in self.op.instances)
10856     (allocatable, failed) = self.ia_result
10857
10858     jobs = []
10859     for (name, nodes) in allocatable:
10860       op = op2inst.pop(name)
10861
10862       if len(nodes) > 1:
10863         (op.pnode, op.snode) = nodes
10864       else:
10865         (op.pnode,) = nodes
10866
10867       jobs.append([op])
10868
10869     missing = set(op2inst.keys()) - set(failed)
10870     assert not missing, \
10871       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10872
10873     return ResultWithJobs(jobs, **self._ConstructPartialResult())
10874
10875
10876 def _CheckRADOSFreeSpace():
10877   """Compute disk size requirements inside the RADOS cluster.
10878
10879   """
10880   # For the RADOS cluster we assume there is always enough space.
10881   pass
10882
10883
10884 class LUInstanceConsole(NoHooksLU):
10885   """Connect to an instance's console.
10886
10887   This is somewhat special in that it returns the command line that
10888   you need to run on the master node in order to connect to the
10889   console.
10890
10891   """
10892   REQ_BGL = False
10893
10894   def ExpandNames(self):
10895     self.share_locks = _ShareAll()
10896     self._ExpandAndLockInstance()
10897
10898   def CheckPrereq(self):
10899     """Check prerequisites.
10900
10901     This checks that the instance is in the cluster.
10902
10903     """
10904     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10905     assert self.instance is not None, \
10906       "Cannot retrieve locked instance %s" % self.op.instance_name
10907     _CheckNodeOnline(self, self.instance.primary_node)
10908
10909   def Exec(self, feedback_fn):
10910     """Connect to the console of an instance
10911
10912     """
10913     instance = self.instance
10914     node = instance.primary_node
10915
10916     node_insts = self.rpc.call_instance_list([node],
10917                                              [instance.hypervisor])[node]
10918     node_insts.Raise("Can't get node information from %s" % node)
10919
10920     if instance.name not in node_insts.payload:
10921       if instance.admin_state == constants.ADMINST_UP:
10922         state = constants.INSTST_ERRORDOWN
10923       elif instance.admin_state == constants.ADMINST_DOWN:
10924         state = constants.INSTST_ADMINDOWN
10925       else:
10926         state = constants.INSTST_ADMINOFFLINE
10927       raise errors.OpExecError("Instance %s is not running (state %s)" %
10928                                (instance.name, state))
10929
10930     logging.debug("Connecting to console of %s on %s", instance.name, node)
10931
10932     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10933
10934
10935 def _GetInstanceConsole(cluster, instance):
10936   """Returns console information for an instance.
10937
10938   @type cluster: L{objects.Cluster}
10939   @type instance: L{objects.Instance}
10940   @rtype: dict
10941
10942   """
10943   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10944   # beparams and hvparams are passed separately, to avoid editing the
10945   # instance and then saving the defaults in the instance itself.
10946   hvparams = cluster.FillHV(instance)
10947   beparams = cluster.FillBE(instance)
10948   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10949
10950   assert console.instance == instance.name
10951   assert console.Validate()
10952
10953   return console.ToDict()
10954
10955
10956 class LUInstanceReplaceDisks(LogicalUnit):
10957   """Replace the disks of an instance.
10958
10959   """
10960   HPATH = "mirrors-replace"
10961   HTYPE = constants.HTYPE_INSTANCE
10962   REQ_BGL = False
10963
10964   def CheckArguments(self):
10965     """Check arguments.
10966
10967     """
10968     remote_node = self.op.remote_node
10969     ialloc = self.op.iallocator
10970     if self.op.mode == constants.REPLACE_DISK_CHG:
10971       if remote_node is None and ialloc is None:
10972         raise errors.OpPrereqError("When changing the secondary either an"
10973                                    " iallocator script must be used or the"
10974                                    " new node given", errors.ECODE_INVAL)
10975       else:
10976         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10977
10978     elif remote_node is not None or ialloc is not None:
10979       # Not replacing the secondary
10980       raise errors.OpPrereqError("The iallocator and new node options can"
10981                                  " only be used when changing the"
10982                                  " secondary node", errors.ECODE_INVAL)
10983
10984   def ExpandNames(self):
10985     self._ExpandAndLockInstance()
10986
10987     assert locking.LEVEL_NODE not in self.needed_locks
10988     assert locking.LEVEL_NODE_RES not in self.needed_locks
10989     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10990
10991     assert self.op.iallocator is None or self.op.remote_node is None, \
10992       "Conflicting options"
10993
10994     if self.op.remote_node is not None:
10995       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10996
10997       # Warning: do not remove the locking of the new secondary here
10998       # unless DRBD8.AddChildren is changed to work in parallel;
10999       # currently it doesn't since parallel invocations of
11000       # FindUnusedMinor will conflict
11001       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11002       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11003     else:
11004       self.needed_locks[locking.LEVEL_NODE] = []
11005       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11006
11007       if self.op.iallocator is not None:
11008         # iallocator will select a new node in the same group
11009         self.needed_locks[locking.LEVEL_NODEGROUP] = []
11010         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11011
11012     self.needed_locks[locking.LEVEL_NODE_RES] = []
11013
11014     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11015                                    self.op.iallocator, self.op.remote_node,
11016                                    self.op.disks, self.op.early_release,
11017                                    self.op.ignore_ipolicy)
11018
11019     self.tasklets = [self.replacer]
11020
11021   def DeclareLocks(self, level):
11022     if level == locking.LEVEL_NODEGROUP:
11023       assert self.op.remote_node is None
11024       assert self.op.iallocator is not None
11025       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11026
11027       self.share_locks[locking.LEVEL_NODEGROUP] = 1
11028       # Lock all groups used by instance optimistically; this requires going
11029       # via the node before it's locked, requiring verification later on
11030       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11031         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11032
11033     elif level == locking.LEVEL_NODE:
11034       if self.op.iallocator is not None:
11035         assert self.op.remote_node is None
11036         assert not self.needed_locks[locking.LEVEL_NODE]
11037         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11038
11039         # Lock member nodes of all locked groups
11040         self.needed_locks[locking.LEVEL_NODE] = \
11041             [node_name
11042              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11043              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11044       else:
11045         assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11046
11047         self._LockInstancesNodes()
11048
11049     elif level == locking.LEVEL_NODE_RES:
11050       # Reuse node locks
11051       self.needed_locks[locking.LEVEL_NODE_RES] = \
11052         self.needed_locks[locking.LEVEL_NODE]
11053
11054   def BuildHooksEnv(self):
11055     """Build hooks env.
11056
11057     This runs on the master, the primary and all the secondaries.
11058
11059     """
11060     instance = self.replacer.instance
11061     env = {
11062       "MODE": self.op.mode,
11063       "NEW_SECONDARY": self.op.remote_node,
11064       "OLD_SECONDARY": instance.secondary_nodes[0],
11065       }
11066     env.update(_BuildInstanceHookEnvByObject(self, instance))
11067     return env
11068
11069   def BuildHooksNodes(self):
11070     """Build hooks nodes.
11071
11072     """
11073     instance = self.replacer.instance
11074     nl = [
11075       self.cfg.GetMasterNode(),
11076       instance.primary_node,
11077       ]
11078     if self.op.remote_node is not None:
11079       nl.append(self.op.remote_node)
11080     return nl, nl
11081
11082   def CheckPrereq(self):
11083     """Check prerequisites.
11084
11085     """
11086     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11087             self.op.iallocator is None)
11088
11089     # Verify if node group locks are still correct
11090     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11091     if owned_groups:
11092       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11093
11094     return LogicalUnit.CheckPrereq(self)
11095
11096
11097 class TLReplaceDisks(Tasklet):
11098   """Replaces disks for an instance.
11099
11100   Note: Locking is not within the scope of this class.
11101
11102   """
11103   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11104                disks, early_release, ignore_ipolicy):
11105     """Initializes this class.
11106
11107     """
11108     Tasklet.__init__(self, lu)
11109
11110     # Parameters
11111     self.instance_name = instance_name
11112     self.mode = mode
11113     self.iallocator_name = iallocator_name
11114     self.remote_node = remote_node
11115     self.disks = disks
11116     self.early_release = early_release
11117     self.ignore_ipolicy = ignore_ipolicy
11118
11119     # Runtime data
11120     self.instance = None
11121     self.new_node = None
11122     self.target_node = None
11123     self.other_node = None
11124     self.remote_node_info = None
11125     self.node_secondary_ip = None
11126
11127   @staticmethod
11128   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11129     """Compute a new secondary node using an IAllocator.
11130
11131     """
11132     req = iallocator.IAReqRelocate(name=instance_name,
11133                                    relocate_from=list(relocate_from))
11134     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11135
11136     ial.Run(iallocator_name)
11137
11138     if not ial.success:
11139       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11140                                  " %s" % (iallocator_name, ial.info),
11141                                  errors.ECODE_NORES)
11142
11143     remote_node_name = ial.result[0]
11144
11145     lu.LogInfo("Selected new secondary for instance '%s': %s",
11146                instance_name, remote_node_name)
11147
11148     return remote_node_name
11149
11150   def _FindFaultyDisks(self, node_name):
11151     """Wrapper for L{_FindFaultyInstanceDisks}.
11152
11153     """
11154     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11155                                     node_name, True)
11156
11157   def _CheckDisksActivated(self, instance):
11158     """Checks if the instance disks are activated.
11159
11160     @param instance: The instance to check disks
11161     @return: True if they are activated, False otherwise
11162
11163     """
11164     nodes = instance.all_nodes
11165
11166     for idx, dev in enumerate(instance.disks):
11167       for node in nodes:
11168         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11169         self.cfg.SetDiskID(dev, node)
11170
11171         result = _BlockdevFind(self, node, dev, instance)
11172
11173         if result.offline:
11174           continue
11175         elif result.fail_msg or not result.payload:
11176           return False
11177
11178     return True
11179
11180   def CheckPrereq(self):
11181     """Check prerequisites.
11182
11183     This checks that the instance is in the cluster.
11184
11185     """
11186     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11187     assert instance is not None, \
11188       "Cannot retrieve locked instance %s" % self.instance_name
11189
11190     if instance.disk_template != constants.DT_DRBD8:
11191       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11192                                  " instances", errors.ECODE_INVAL)
11193
11194     if len(instance.secondary_nodes) != 1:
11195       raise errors.OpPrereqError("The instance has a strange layout,"
11196                                  " expected one secondary but found %d" %
11197                                  len(instance.secondary_nodes),
11198                                  errors.ECODE_FAULT)
11199
11200     instance = self.instance
11201     secondary_node = instance.secondary_nodes[0]
11202
11203     if self.iallocator_name is None:
11204       remote_node = self.remote_node
11205     else:
11206       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11207                                        instance.name, instance.secondary_nodes)
11208
11209     if remote_node is None:
11210       self.remote_node_info = None
11211     else:
11212       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11213              "Remote node '%s' is not locked" % remote_node
11214
11215       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11216       assert self.remote_node_info is not None, \
11217         "Cannot retrieve locked node %s" % remote_node
11218
11219     if remote_node == self.instance.primary_node:
11220       raise errors.OpPrereqError("The specified node is the primary node of"
11221                                  " the instance", errors.ECODE_INVAL)
11222
11223     if remote_node == secondary_node:
11224       raise errors.OpPrereqError("The specified node is already the"
11225                                  " secondary node of the instance",
11226                                  errors.ECODE_INVAL)
11227
11228     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11229                                     constants.REPLACE_DISK_CHG):
11230       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11231                                  errors.ECODE_INVAL)
11232
11233     if self.mode == constants.REPLACE_DISK_AUTO:
11234       if not self._CheckDisksActivated(instance):
11235         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11236                                    " first" % self.instance_name,
11237                                    errors.ECODE_STATE)
11238       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11239       faulty_secondary = self._FindFaultyDisks(secondary_node)
11240
11241       if faulty_primary and faulty_secondary:
11242         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11243                                    " one node and can not be repaired"
11244                                    " automatically" % self.instance_name,
11245                                    errors.ECODE_STATE)
11246
11247       if faulty_primary:
11248         self.disks = faulty_primary
11249         self.target_node = instance.primary_node
11250         self.other_node = secondary_node
11251         check_nodes = [self.target_node, self.other_node]
11252       elif faulty_secondary:
11253         self.disks = faulty_secondary
11254         self.target_node = secondary_node
11255         self.other_node = instance.primary_node
11256         check_nodes = [self.target_node, self.other_node]
11257       else:
11258         self.disks = []
11259         check_nodes = []
11260
11261     else:
11262       # Non-automatic modes
11263       if self.mode == constants.REPLACE_DISK_PRI:
11264         self.target_node = instance.primary_node
11265         self.other_node = secondary_node
11266         check_nodes = [self.target_node, self.other_node]
11267
11268       elif self.mode == constants.REPLACE_DISK_SEC:
11269         self.target_node = secondary_node
11270         self.other_node = instance.primary_node
11271         check_nodes = [self.target_node, self.other_node]
11272
11273       elif self.mode == constants.REPLACE_DISK_CHG:
11274         self.new_node = remote_node
11275         self.other_node = instance.primary_node
11276         self.target_node = secondary_node
11277         check_nodes = [self.new_node, self.other_node]
11278
11279         _CheckNodeNotDrained(self.lu, remote_node)
11280         _CheckNodeVmCapable(self.lu, remote_node)
11281
11282         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11283         assert old_node_info is not None
11284         if old_node_info.offline and not self.early_release:
11285           # doesn't make sense to delay the release
11286           self.early_release = True
11287           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11288                           " early-release mode", secondary_node)
11289
11290       else:
11291         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11292                                      self.mode)
11293
11294       # If not specified all disks should be replaced
11295       if not self.disks:
11296         self.disks = range(len(self.instance.disks))
11297
11298     # TODO: This is ugly, but right now we can't distinguish between internal
11299     # submitted opcode and external one. We should fix that.
11300     if self.remote_node_info:
11301       # We change the node, lets verify it still meets instance policy
11302       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11303       cluster = self.cfg.GetClusterInfo()
11304       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11305                                                               new_group_info)
11306       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11307                               ignore=self.ignore_ipolicy)
11308
11309     for node in check_nodes:
11310       _CheckNodeOnline(self.lu, node)
11311
11312     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11313                                                           self.other_node,
11314                                                           self.target_node]
11315                               if node_name is not None)
11316
11317     # Release unneeded node and node resource locks
11318     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11319     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11320     _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11321
11322     # Release any owned node group
11323     _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11324
11325     # Check whether disks are valid
11326     for disk_idx in self.disks:
11327       instance.FindDisk(disk_idx)
11328
11329     # Get secondary node IP addresses
11330     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11331                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11332
11333   def Exec(self, feedback_fn):
11334     """Execute disk replacement.
11335
11336     This dispatches the disk replacement to the appropriate handler.
11337
11338     """
11339     if __debug__:
11340       # Verify owned locks before starting operation
11341       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11342       assert set(owned_nodes) == set(self.node_secondary_ip), \
11343           ("Incorrect node locks, owning %s, expected %s" %
11344            (owned_nodes, self.node_secondary_ip.keys()))
11345       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11346               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11347       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11348
11349       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11350       assert list(owned_instances) == [self.instance_name], \
11351           "Instance '%s' not locked" % self.instance_name
11352
11353       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11354           "Should not own any node group lock at this point"
11355
11356     if not self.disks:
11357       feedback_fn("No disks need replacement for instance '%s'" %
11358                   self.instance.name)
11359       return
11360
11361     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11362                 (utils.CommaJoin(self.disks), self.instance.name))
11363     feedback_fn("Current primary node: %s", self.instance.primary_node)
11364     feedback_fn("Current seconary node: %s",
11365                 utils.CommaJoin(self.instance.secondary_nodes))
11366
11367     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11368
11369     # Activate the instance disks if we're replacing them on a down instance
11370     if activate_disks:
11371       _StartInstanceDisks(self.lu, self.instance, True)
11372
11373     try:
11374       # Should we replace the secondary node?
11375       if self.new_node is not None:
11376         fn = self._ExecDrbd8Secondary
11377       else:
11378         fn = self._ExecDrbd8DiskOnly
11379
11380       result = fn(feedback_fn)
11381     finally:
11382       # Deactivate the instance disks if we're replacing them on a
11383       # down instance
11384       if activate_disks:
11385         _SafeShutdownInstanceDisks(self.lu, self.instance)
11386
11387     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11388
11389     if __debug__:
11390       # Verify owned locks
11391       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11392       nodes = frozenset(self.node_secondary_ip)
11393       assert ((self.early_release and not owned_nodes) or
11394               (not self.early_release and not (set(owned_nodes) - nodes))), \
11395         ("Not owning the correct locks, early_release=%s, owned=%r,"
11396          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11397
11398     return result
11399
11400   def _CheckVolumeGroup(self, nodes):
11401     self.lu.LogInfo("Checking volume groups")
11402
11403     vgname = self.cfg.GetVGName()
11404
11405     # Make sure volume group exists on all involved nodes
11406     results = self.rpc.call_vg_list(nodes)
11407     if not results:
11408       raise errors.OpExecError("Can't list volume groups on the nodes")
11409
11410     for node in nodes:
11411       res = results[node]
11412       res.Raise("Error checking node %s" % node)
11413       if vgname not in res.payload:
11414         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11415                                  (vgname, node))
11416
11417   def _CheckDisksExistence(self, nodes):
11418     # Check disk existence
11419     for idx, dev in enumerate(self.instance.disks):
11420       if idx not in self.disks:
11421         continue
11422
11423       for node in nodes:
11424         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11425         self.cfg.SetDiskID(dev, node)
11426
11427         result = _BlockdevFind(self, node, dev, self.instance)
11428
11429         msg = result.fail_msg
11430         if msg or not result.payload:
11431           if not msg:
11432             msg = "disk not found"
11433           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11434                                    (idx, node, msg))
11435
11436   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11437     for idx, dev in enumerate(self.instance.disks):
11438       if idx not in self.disks:
11439         continue
11440
11441       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11442                       (idx, node_name))
11443
11444       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11445                                    on_primary, ldisk=ldisk):
11446         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11447                                  " replace disks for instance %s" %
11448                                  (node_name, self.instance.name))
11449
11450   def _CreateNewStorage(self, node_name):
11451     """Create new storage on the primary or secondary node.
11452
11453     This is only used for same-node replaces, not for changing the
11454     secondary node, hence we don't want to modify the existing disk.
11455
11456     """
11457     iv_names = {}
11458
11459     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11460     for idx, dev in enumerate(disks):
11461       if idx not in self.disks:
11462         continue
11463
11464       self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11465
11466       self.cfg.SetDiskID(dev, node_name)
11467
11468       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11469       names = _GenerateUniqueNames(self.lu, lv_names)
11470
11471       (data_disk, meta_disk) = dev.children
11472       vg_data = data_disk.logical_id[0]
11473       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11474                              logical_id=(vg_data, names[0]),
11475                              params=data_disk.params)
11476       vg_meta = meta_disk.logical_id[0]
11477       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11478                              size=constants.DRBD_META_SIZE,
11479                              logical_id=(vg_meta, names[1]),
11480                              params=meta_disk.params)
11481
11482       new_lvs = [lv_data, lv_meta]
11483       old_lvs = [child.Copy() for child in dev.children]
11484       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11485
11486       # we pass force_create=True to force the LVM creation
11487       for new_lv in new_lvs:
11488         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11489                              _GetInstanceInfoText(self.instance), False)
11490
11491     return iv_names
11492
11493   def _CheckDevices(self, node_name, iv_names):
11494     for name, (dev, _, _) in iv_names.iteritems():
11495       self.cfg.SetDiskID(dev, node_name)
11496
11497       result = _BlockdevFind(self, node_name, dev, self.instance)
11498
11499       msg = result.fail_msg
11500       if msg or not result.payload:
11501         if not msg:
11502           msg = "disk not found"
11503         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11504                                  (name, msg))
11505
11506       if result.payload.is_degraded:
11507         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11508
11509   def _RemoveOldStorage(self, node_name, iv_names):
11510     for name, (_, old_lvs, _) in iv_names.iteritems():
11511       self.lu.LogInfo("Remove logical volumes for %s", name)
11512
11513       for lv in old_lvs:
11514         self.cfg.SetDiskID(lv, node_name)
11515
11516         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11517         if msg:
11518           self.lu.LogWarning("Can't remove old LV: %s", msg,
11519                              hint="remove unused LVs manually")
11520
11521   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11522     """Replace a disk on the primary or secondary for DRBD 8.
11523
11524     The algorithm for replace is quite complicated:
11525
11526       1. for each disk to be replaced:
11527
11528         1. create new LVs on the target node with unique names
11529         1. detach old LVs from the drbd device
11530         1. rename old LVs to name_replaced.<time_t>
11531         1. rename new LVs to old LVs
11532         1. attach the new LVs (with the old names now) to the drbd device
11533
11534       1. wait for sync across all devices
11535
11536       1. for each modified disk:
11537
11538         1. remove old LVs (which have the name name_replaces.<time_t>)
11539
11540     Failures are not very well handled.
11541
11542     """
11543     steps_total = 6
11544
11545     # Step: check device activation
11546     self.lu.LogStep(1, steps_total, "Check device existence")
11547     self._CheckDisksExistence([self.other_node, self.target_node])
11548     self._CheckVolumeGroup([self.target_node, self.other_node])
11549
11550     # Step: check other node consistency
11551     self.lu.LogStep(2, steps_total, "Check peer consistency")
11552     self._CheckDisksConsistency(self.other_node,
11553                                 self.other_node == self.instance.primary_node,
11554                                 False)
11555
11556     # Step: create new storage
11557     self.lu.LogStep(3, steps_total, "Allocate new storage")
11558     iv_names = self._CreateNewStorage(self.target_node)
11559
11560     # Step: for each lv, detach+rename*2+attach
11561     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11562     for dev, old_lvs, new_lvs in iv_names.itervalues():
11563       self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11564
11565       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11566                                                      old_lvs)
11567       result.Raise("Can't detach drbd from local storage on node"
11568                    " %s for device %s" % (self.target_node, dev.iv_name))
11569       #dev.children = []
11570       #cfg.Update(instance)
11571
11572       # ok, we created the new LVs, so now we know we have the needed
11573       # storage; as such, we proceed on the target node to rename
11574       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11575       # using the assumption that logical_id == physical_id (which in
11576       # turn is the unique_id on that node)
11577
11578       # FIXME(iustin): use a better name for the replaced LVs
11579       temp_suffix = int(time.time())
11580       ren_fn = lambda d, suff: (d.physical_id[0],
11581                                 d.physical_id[1] + "_replaced-%s" % suff)
11582
11583       # Build the rename list based on what LVs exist on the node
11584       rename_old_to_new = []
11585       for to_ren in old_lvs:
11586         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11587         if not result.fail_msg and result.payload:
11588           # device exists
11589           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11590
11591       self.lu.LogInfo("Renaming the old LVs on the target node")
11592       result = self.rpc.call_blockdev_rename(self.target_node,
11593                                              rename_old_to_new)
11594       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11595
11596       # Now we rename the new LVs to the old LVs
11597       self.lu.LogInfo("Renaming the new LVs on the target node")
11598       rename_new_to_old = [(new, old.physical_id)
11599                            for old, new in zip(old_lvs, new_lvs)]
11600       result = self.rpc.call_blockdev_rename(self.target_node,
11601                                              rename_new_to_old)
11602       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11603
11604       # Intermediate steps of in memory modifications
11605       for old, new in zip(old_lvs, new_lvs):
11606         new.logical_id = old.logical_id
11607         self.cfg.SetDiskID(new, self.target_node)
11608
11609       # We need to modify old_lvs so that removal later removes the
11610       # right LVs, not the newly added ones; note that old_lvs is a
11611       # copy here
11612       for disk in old_lvs:
11613         disk.logical_id = ren_fn(disk, temp_suffix)
11614         self.cfg.SetDiskID(disk, self.target_node)
11615
11616       # Now that the new lvs have the old name, we can add them to the device
11617       self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
11618       result = self.rpc.call_blockdev_addchildren(self.target_node,
11619                                                   (dev, self.instance), new_lvs)
11620       msg = result.fail_msg
11621       if msg:
11622         for new_lv in new_lvs:
11623           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11624                                                new_lv).fail_msg
11625           if msg2:
11626             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11627                                hint=("cleanup manually the unused logical"
11628                                      "volumes"))
11629         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11630
11631     cstep = itertools.count(5)
11632
11633     if self.early_release:
11634       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11635       self._RemoveOldStorage(self.target_node, iv_names)
11636       # TODO: Check if releasing locks early still makes sense
11637       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11638     else:
11639       # Release all resource locks except those used by the instance
11640       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11641                     keep=self.node_secondary_ip.keys())
11642
11643     # Release all node locks while waiting for sync
11644     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11645
11646     # TODO: Can the instance lock be downgraded here? Take the optional disk
11647     # shutdown in the caller into consideration.
11648
11649     # Wait for sync
11650     # This can fail as the old devices are degraded and _WaitForSync
11651     # does a combined result over all disks, so we don't check its return value
11652     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11653     _WaitForSync(self.lu, self.instance)
11654
11655     # Check all devices manually
11656     self._CheckDevices(self.instance.primary_node, iv_names)
11657
11658     # Step: remove old storage
11659     if not self.early_release:
11660       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11661       self._RemoveOldStorage(self.target_node, iv_names)
11662
11663   def _ExecDrbd8Secondary(self, feedback_fn):
11664     """Replace the secondary node for DRBD 8.
11665
11666     The algorithm for replace is quite complicated:
11667       - for all disks of the instance:
11668         - create new LVs on the new node with same names
11669         - shutdown the drbd device on the old secondary
11670         - disconnect the drbd network on the primary
11671         - create the drbd device on the new secondary
11672         - network attach the drbd on the primary, using an artifice:
11673           the drbd code for Attach() will connect to the network if it
11674           finds a device which is connected to the good local disks but
11675           not network enabled
11676       - wait for sync across all devices
11677       - remove all disks from the old secondary
11678
11679     Failures are not very well handled.
11680
11681     """
11682     steps_total = 6
11683
11684     pnode = self.instance.primary_node
11685
11686     # Step: check device activation
11687     self.lu.LogStep(1, steps_total, "Check device existence")
11688     self._CheckDisksExistence([self.instance.primary_node])
11689     self._CheckVolumeGroup([self.instance.primary_node])
11690
11691     # Step: check other node consistency
11692     self.lu.LogStep(2, steps_total, "Check peer consistency")
11693     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11694
11695     # Step: create new storage
11696     self.lu.LogStep(3, steps_total, "Allocate new storage")
11697     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11698     for idx, dev in enumerate(disks):
11699       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11700                       (self.new_node, idx))
11701       # we pass force_create=True to force LVM creation
11702       for new_lv in dev.children:
11703         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11704                              True, _GetInstanceInfoText(self.instance), False)
11705
11706     # Step 4: dbrd minors and drbd setups changes
11707     # after this, we must manually remove the drbd minors on both the
11708     # error and the success paths
11709     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11710     minors = self.cfg.AllocateDRBDMinor([self.new_node
11711                                          for dev in self.instance.disks],
11712                                         self.instance.name)
11713     logging.debug("Allocated minors %r", minors)
11714
11715     iv_names = {}
11716     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11717       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11718                       (self.new_node, idx))
11719       # create new devices on new_node; note that we create two IDs:
11720       # one without port, so the drbd will be activated without
11721       # networking information on the new node at this stage, and one
11722       # with network, for the latter activation in step 4
11723       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11724       if self.instance.primary_node == o_node1:
11725         p_minor = o_minor1
11726       else:
11727         assert self.instance.primary_node == o_node2, "Three-node instance?"
11728         p_minor = o_minor2
11729
11730       new_alone_id = (self.instance.primary_node, self.new_node, None,
11731                       p_minor, new_minor, o_secret)
11732       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11733                     p_minor, new_minor, o_secret)
11734
11735       iv_names[idx] = (dev, dev.children, new_net_id)
11736       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11737                     new_net_id)
11738       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11739                               logical_id=new_alone_id,
11740                               children=dev.children,
11741                               size=dev.size,
11742                               params={})
11743       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11744                                              self.cfg)
11745       try:
11746         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11747                               anno_new_drbd,
11748                               _GetInstanceInfoText(self.instance), False)
11749       except errors.GenericError:
11750         self.cfg.ReleaseDRBDMinors(self.instance.name)
11751         raise
11752
11753     # We have new devices, shutdown the drbd on the old secondary
11754     for idx, dev in enumerate(self.instance.disks):
11755       self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
11756       self.cfg.SetDiskID(dev, self.target_node)
11757       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11758                                             (dev, self.instance)).fail_msg
11759       if msg:
11760         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11761                            "node: %s" % (idx, msg),
11762                            hint=("Please cleanup this device manually as"
11763                                  " soon as possible"))
11764
11765     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11766     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11767                                                self.instance.disks)[pnode]
11768
11769     msg = result.fail_msg
11770     if msg:
11771       # detaches didn't succeed (unlikely)
11772       self.cfg.ReleaseDRBDMinors(self.instance.name)
11773       raise errors.OpExecError("Can't detach the disks from the network on"
11774                                " old node: %s" % (msg,))
11775
11776     # if we managed to detach at least one, we update all the disks of
11777     # the instance to point to the new secondary
11778     self.lu.LogInfo("Updating instance configuration")
11779     for dev, _, new_logical_id in iv_names.itervalues():
11780       dev.logical_id = new_logical_id
11781       self.cfg.SetDiskID(dev, self.instance.primary_node)
11782
11783     self.cfg.Update(self.instance, feedback_fn)
11784
11785     # Release all node locks (the configuration has been updated)
11786     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11787
11788     # and now perform the drbd attach
11789     self.lu.LogInfo("Attaching primary drbds to new secondary"
11790                     " (standalone => connected)")
11791     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11792                                             self.new_node],
11793                                            self.node_secondary_ip,
11794                                            (self.instance.disks, self.instance),
11795                                            self.instance.name,
11796                                            False)
11797     for to_node, to_result in result.items():
11798       msg = to_result.fail_msg
11799       if msg:
11800         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11801                            to_node, msg,
11802                            hint=("please do a gnt-instance info to see the"
11803                                  " status of disks"))
11804
11805     cstep = itertools.count(5)
11806
11807     if self.early_release:
11808       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11809       self._RemoveOldStorage(self.target_node, iv_names)
11810       # TODO: Check if releasing locks early still makes sense
11811       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11812     else:
11813       # Release all resource locks except those used by the instance
11814       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11815                     keep=self.node_secondary_ip.keys())
11816
11817     # TODO: Can the instance lock be downgraded here? Take the optional disk
11818     # shutdown in the caller into consideration.
11819
11820     # Wait for sync
11821     # This can fail as the old devices are degraded and _WaitForSync
11822     # does a combined result over all disks, so we don't check its return value
11823     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11824     _WaitForSync(self.lu, self.instance)
11825
11826     # Check all devices manually
11827     self._CheckDevices(self.instance.primary_node, iv_names)
11828
11829     # Step: remove old storage
11830     if not self.early_release:
11831       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11832       self._RemoveOldStorage(self.target_node, iv_names)
11833
11834
11835 class LURepairNodeStorage(NoHooksLU):
11836   """Repairs the volume group on a node.
11837
11838   """
11839   REQ_BGL = False
11840
11841   def CheckArguments(self):
11842     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11843
11844     storage_type = self.op.storage_type
11845
11846     if (constants.SO_FIX_CONSISTENCY not in
11847         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11848       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11849                                  " repaired" % storage_type,
11850                                  errors.ECODE_INVAL)
11851
11852   def ExpandNames(self):
11853     self.needed_locks = {
11854       locking.LEVEL_NODE: [self.op.node_name],
11855       }
11856
11857   def _CheckFaultyDisks(self, instance, node_name):
11858     """Ensure faulty disks abort the opcode or at least warn."""
11859     try:
11860       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11861                                   node_name, True):
11862         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11863                                    " node '%s'" % (instance.name, node_name),
11864                                    errors.ECODE_STATE)
11865     except errors.OpPrereqError, err:
11866       if self.op.ignore_consistency:
11867         self.LogWarning(str(err.args[0]))
11868       else:
11869         raise
11870
11871   def CheckPrereq(self):
11872     """Check prerequisites.
11873
11874     """
11875     # Check whether any instance on this node has faulty disks
11876     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11877       if inst.admin_state != constants.ADMINST_UP:
11878         continue
11879       check_nodes = set(inst.all_nodes)
11880       check_nodes.discard(self.op.node_name)
11881       for inst_node_name in check_nodes:
11882         self._CheckFaultyDisks(inst, inst_node_name)
11883
11884   def Exec(self, feedback_fn):
11885     feedback_fn("Repairing storage unit '%s' on %s ..." %
11886                 (self.op.name, self.op.node_name))
11887
11888     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11889     result = self.rpc.call_storage_execute(self.op.node_name,
11890                                            self.op.storage_type, st_args,
11891                                            self.op.name,
11892                                            constants.SO_FIX_CONSISTENCY)
11893     result.Raise("Failed to repair storage unit '%s' on %s" %
11894                  (self.op.name, self.op.node_name))
11895
11896
11897 class LUNodeEvacuate(NoHooksLU):
11898   """Evacuates instances off a list of nodes.
11899
11900   """
11901   REQ_BGL = False
11902
11903   _MODE2IALLOCATOR = {
11904     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11905     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11906     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11907     }
11908   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11909   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11910           constants.IALLOCATOR_NEVAC_MODES)
11911
11912   def CheckArguments(self):
11913     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11914
11915   def ExpandNames(self):
11916     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11917
11918     if self.op.remote_node is not None:
11919       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11920       assert self.op.remote_node
11921
11922       if self.op.remote_node == self.op.node_name:
11923         raise errors.OpPrereqError("Can not use evacuated node as a new"
11924                                    " secondary node", errors.ECODE_INVAL)
11925
11926       if self.op.mode != constants.NODE_EVAC_SEC:
11927         raise errors.OpPrereqError("Without the use of an iallocator only"
11928                                    " secondary instances can be evacuated",
11929                                    errors.ECODE_INVAL)
11930
11931     # Declare locks
11932     self.share_locks = _ShareAll()
11933     self.needed_locks = {
11934       locking.LEVEL_INSTANCE: [],
11935       locking.LEVEL_NODEGROUP: [],
11936       locking.LEVEL_NODE: [],
11937       }
11938
11939     # Determine nodes (via group) optimistically, needs verification once locks
11940     # have been acquired
11941     self.lock_nodes = self._DetermineNodes()
11942
11943   def _DetermineNodes(self):
11944     """Gets the list of nodes to operate on.
11945
11946     """
11947     if self.op.remote_node is None:
11948       # Iallocator will choose any node(s) in the same group
11949       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11950     else:
11951       group_nodes = frozenset([self.op.remote_node])
11952
11953     # Determine nodes to be locked
11954     return set([self.op.node_name]) | group_nodes
11955
11956   def _DetermineInstances(self):
11957     """Builds list of instances to operate on.
11958
11959     """
11960     assert self.op.mode in constants.NODE_EVAC_MODES
11961
11962     if self.op.mode == constants.NODE_EVAC_PRI:
11963       # Primary instances only
11964       inst_fn = _GetNodePrimaryInstances
11965       assert self.op.remote_node is None, \
11966         "Evacuating primary instances requires iallocator"
11967     elif self.op.mode == constants.NODE_EVAC_SEC:
11968       # Secondary instances only
11969       inst_fn = _GetNodeSecondaryInstances
11970     else:
11971       # All instances
11972       assert self.op.mode == constants.NODE_EVAC_ALL
11973       inst_fn = _GetNodeInstances
11974       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11975       # per instance
11976       raise errors.OpPrereqError("Due to an issue with the iallocator"
11977                                  " interface it is not possible to evacuate"
11978                                  " all instances at once; specify explicitly"
11979                                  " whether to evacuate primary or secondary"
11980                                  " instances",
11981                                  errors.ECODE_INVAL)
11982
11983     return inst_fn(self.cfg, self.op.node_name)
11984
11985   def DeclareLocks(self, level):
11986     if level == locking.LEVEL_INSTANCE:
11987       # Lock instances optimistically, needs verification once node and group
11988       # locks have been acquired
11989       self.needed_locks[locking.LEVEL_INSTANCE] = \
11990         set(i.name for i in self._DetermineInstances())
11991
11992     elif level == locking.LEVEL_NODEGROUP:
11993       # Lock node groups for all potential target nodes optimistically, needs
11994       # verification once nodes have been acquired
11995       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11996         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11997
11998     elif level == locking.LEVEL_NODE:
11999       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12000
12001   def CheckPrereq(self):
12002     # Verify locks
12003     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12004     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12005     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12006
12007     need_nodes = self._DetermineNodes()
12008
12009     if not owned_nodes.issuperset(need_nodes):
12010       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12011                                  " locks were acquired, current nodes are"
12012                                  " are '%s', used to be '%s'; retry the"
12013                                  " operation" %
12014                                  (self.op.node_name,
12015                                   utils.CommaJoin(need_nodes),
12016                                   utils.CommaJoin(owned_nodes)),
12017                                  errors.ECODE_STATE)
12018
12019     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12020     if owned_groups != wanted_groups:
12021       raise errors.OpExecError("Node groups changed since locks were acquired,"
12022                                " current groups are '%s', used to be '%s';"
12023                                " retry the operation" %
12024                                (utils.CommaJoin(wanted_groups),
12025                                 utils.CommaJoin(owned_groups)))
12026
12027     # Determine affected instances
12028     self.instances = self._DetermineInstances()
12029     self.instance_names = [i.name for i in self.instances]
12030
12031     if set(self.instance_names) != owned_instances:
12032       raise errors.OpExecError("Instances on node '%s' changed since locks"
12033                                " were acquired, current instances are '%s',"
12034                                " used to be '%s'; retry the operation" %
12035                                (self.op.node_name,
12036                                 utils.CommaJoin(self.instance_names),
12037                                 utils.CommaJoin(owned_instances)))
12038
12039     if self.instance_names:
12040       self.LogInfo("Evacuating instances from node '%s': %s",
12041                    self.op.node_name,
12042                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
12043     else:
12044       self.LogInfo("No instances to evacuate from node '%s'",
12045                    self.op.node_name)
12046
12047     if self.op.remote_node is not None:
12048       for i in self.instances:
12049         if i.primary_node == self.op.remote_node:
12050           raise errors.OpPrereqError("Node %s is the primary node of"
12051                                      " instance %s, cannot use it as"
12052                                      " secondary" %
12053                                      (self.op.remote_node, i.name),
12054                                      errors.ECODE_INVAL)
12055
12056   def Exec(self, feedback_fn):
12057     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12058
12059     if not self.instance_names:
12060       # No instances to evacuate
12061       jobs = []
12062
12063     elif self.op.iallocator is not None:
12064       # TODO: Implement relocation to other group
12065       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12066       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12067                                      instances=list(self.instance_names))
12068       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12069
12070       ial.Run(self.op.iallocator)
12071
12072       if not ial.success:
12073         raise errors.OpPrereqError("Can't compute node evacuation using"
12074                                    " iallocator '%s': %s" %
12075                                    (self.op.iallocator, ial.info),
12076                                    errors.ECODE_NORES)
12077
12078       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12079
12080     elif self.op.remote_node is not None:
12081       assert self.op.mode == constants.NODE_EVAC_SEC
12082       jobs = [
12083         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12084                                         remote_node=self.op.remote_node,
12085                                         disks=[],
12086                                         mode=constants.REPLACE_DISK_CHG,
12087                                         early_release=self.op.early_release)]
12088         for instance_name in self.instance_names]
12089
12090     else:
12091       raise errors.ProgrammerError("No iallocator or remote node")
12092
12093     return ResultWithJobs(jobs)
12094
12095
12096 def _SetOpEarlyRelease(early_release, op):
12097   """Sets C{early_release} flag on opcodes if available.
12098
12099   """
12100   try:
12101     op.early_release = early_release
12102   except AttributeError:
12103     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12104
12105   return op
12106
12107
12108 def _NodeEvacDest(use_nodes, group, nodes):
12109   """Returns group or nodes depending on caller's choice.
12110
12111   """
12112   if use_nodes:
12113     return utils.CommaJoin(nodes)
12114   else:
12115     return group
12116
12117
12118 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12119   """Unpacks the result of change-group and node-evacuate iallocator requests.
12120
12121   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12122   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12123
12124   @type lu: L{LogicalUnit}
12125   @param lu: Logical unit instance
12126   @type alloc_result: tuple/list
12127   @param alloc_result: Result from iallocator
12128   @type early_release: bool
12129   @param early_release: Whether to release locks early if possible
12130   @type use_nodes: bool
12131   @param use_nodes: Whether to display node names instead of groups
12132
12133   """
12134   (moved, failed, jobs) = alloc_result
12135
12136   if failed:
12137     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12138                                  for (name, reason) in failed)
12139     lu.LogWarning("Unable to evacuate instances %s", failreason)
12140     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12141
12142   if moved:
12143     lu.LogInfo("Instances to be moved: %s",
12144                utils.CommaJoin("%s (to %s)" %
12145                                (name, _NodeEvacDest(use_nodes, group, nodes))
12146                                for (name, group, nodes) in moved))
12147
12148   return [map(compat.partial(_SetOpEarlyRelease, early_release),
12149               map(opcodes.OpCode.LoadOpCode, ops))
12150           for ops in jobs]
12151
12152
12153 def _DiskSizeInBytesToMebibytes(lu, size):
12154   """Converts a disk size in bytes to mebibytes.
12155
12156   Warns and rounds up if the size isn't an even multiple of 1 MiB.
12157
12158   """
12159   (mib, remainder) = divmod(size, 1024 * 1024)
12160
12161   if remainder != 0:
12162     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12163                   " to not overwrite existing data (%s bytes will not be"
12164                   " wiped)", (1024 * 1024) - remainder)
12165     mib += 1
12166
12167   return mib
12168
12169
12170 class LUInstanceGrowDisk(LogicalUnit):
12171   """Grow a disk of an instance.
12172
12173   """
12174   HPATH = "disk-grow"
12175   HTYPE = constants.HTYPE_INSTANCE
12176   REQ_BGL = False
12177
12178   def ExpandNames(self):
12179     self._ExpandAndLockInstance()
12180     self.needed_locks[locking.LEVEL_NODE] = []
12181     self.needed_locks[locking.LEVEL_NODE_RES] = []
12182     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12183     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12184
12185   def DeclareLocks(self, level):
12186     if level == locking.LEVEL_NODE:
12187       self._LockInstancesNodes()
12188     elif level == locking.LEVEL_NODE_RES:
12189       # Copy node locks
12190       self.needed_locks[locking.LEVEL_NODE_RES] = \
12191         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12192
12193   def BuildHooksEnv(self):
12194     """Build hooks env.
12195
12196     This runs on the master, the primary and all the secondaries.
12197
12198     """
12199     env = {
12200       "DISK": self.op.disk,
12201       "AMOUNT": self.op.amount,
12202       "ABSOLUTE": self.op.absolute,
12203       }
12204     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12205     return env
12206
12207   def BuildHooksNodes(self):
12208     """Build hooks nodes.
12209
12210     """
12211     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12212     return (nl, nl)
12213
12214   def CheckPrereq(self):
12215     """Check prerequisites.
12216
12217     This checks that the instance is in the cluster.
12218
12219     """
12220     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12221     assert instance is not None, \
12222       "Cannot retrieve locked instance %s" % self.op.instance_name
12223     nodenames = list(instance.all_nodes)
12224     for node in nodenames:
12225       _CheckNodeOnline(self, node)
12226
12227     self.instance = instance
12228
12229     if instance.disk_template not in constants.DTS_GROWABLE:
12230       raise errors.OpPrereqError("Instance's disk layout does not support"
12231                                  " growing", errors.ECODE_INVAL)
12232
12233     self.disk = instance.FindDisk(self.op.disk)
12234
12235     if self.op.absolute:
12236       self.target = self.op.amount
12237       self.delta = self.target - self.disk.size
12238       if self.delta < 0:
12239         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12240                                    "current disk size (%s)" %
12241                                    (utils.FormatUnit(self.target, "h"),
12242                                     utils.FormatUnit(self.disk.size, "h")),
12243                                    errors.ECODE_STATE)
12244     else:
12245       self.delta = self.op.amount
12246       self.target = self.disk.size + self.delta
12247       if self.delta < 0:
12248         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12249                                    utils.FormatUnit(self.delta, "h"),
12250                                    errors.ECODE_INVAL)
12251
12252     if instance.disk_template not in (constants.DT_FILE,
12253                                       constants.DT_SHARED_FILE,
12254                                       constants.DT_RBD):
12255       # TODO: check the free disk space for file, when that feature will be
12256       # supported
12257       _CheckNodesFreeDiskPerVG(self, nodenames,
12258                                self.disk.ComputeGrowth(self.delta))
12259
12260   def Exec(self, feedback_fn):
12261     """Execute disk grow.
12262
12263     """
12264     instance = self.instance
12265     disk = self.disk
12266
12267     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12268     assert (self.owned_locks(locking.LEVEL_NODE) ==
12269             self.owned_locks(locking.LEVEL_NODE_RES))
12270
12271     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12272
12273     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12274     if not disks_ok:
12275       raise errors.OpExecError("Cannot activate block device to grow")
12276
12277     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12278                 (self.op.disk, instance.name,
12279                  utils.FormatUnit(self.delta, "h"),
12280                  utils.FormatUnit(self.target, "h")))
12281
12282     # First run all grow ops in dry-run mode
12283     for node in instance.all_nodes:
12284       self.cfg.SetDiskID(disk, node)
12285       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12286                                            True, True)
12287       result.Raise("Dry-run grow request failed to node %s" % node)
12288
12289     if wipe_disks:
12290       # Get disk size from primary node for wiping
12291       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12292       result.Raise("Failed to retrieve disk size from node '%s'" %
12293                    instance.primary_node)
12294
12295       (disk_size_in_bytes, ) = result.payload
12296
12297       if disk_size_in_bytes is None:
12298         raise errors.OpExecError("Failed to retrieve disk size from primary"
12299                                  " node '%s'" % instance.primary_node)
12300
12301       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12302
12303       assert old_disk_size >= disk.size, \
12304         ("Retrieved disk size too small (got %s, should be at least %s)" %
12305          (old_disk_size, disk.size))
12306     else:
12307       old_disk_size = None
12308
12309     # We know that (as far as we can test) operations across different
12310     # nodes will succeed, time to run it for real on the backing storage
12311     for node in instance.all_nodes:
12312       self.cfg.SetDiskID(disk, node)
12313       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12314                                            False, True)
12315       result.Raise("Grow request failed to node %s" % node)
12316
12317     # And now execute it for logical storage, on the primary node
12318     node = instance.primary_node
12319     self.cfg.SetDiskID(disk, node)
12320     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12321                                          False, False)
12322     result.Raise("Grow request failed to node %s" % node)
12323
12324     disk.RecordGrow(self.delta)
12325     self.cfg.Update(instance, feedback_fn)
12326
12327     # Changes have been recorded, release node lock
12328     _ReleaseLocks(self, locking.LEVEL_NODE)
12329
12330     # Downgrade lock while waiting for sync
12331     self.glm.downgrade(locking.LEVEL_INSTANCE)
12332
12333     assert wipe_disks ^ (old_disk_size is None)
12334
12335     if wipe_disks:
12336       assert instance.disks[self.op.disk] == disk
12337
12338       # Wipe newly added disk space
12339       _WipeDisks(self, instance,
12340                  disks=[(self.op.disk, disk, old_disk_size)])
12341
12342     if self.op.wait_for_sync:
12343       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12344       if disk_abort:
12345         self.LogWarning("Disk syncing has not returned a good status; check"
12346                         " the instance")
12347       if instance.admin_state != constants.ADMINST_UP:
12348         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12349     elif instance.admin_state != constants.ADMINST_UP:
12350       self.LogWarning("Not shutting down the disk even if the instance is"
12351                       " not supposed to be running because no wait for"
12352                       " sync mode was requested")
12353
12354     assert self.owned_locks(locking.LEVEL_NODE_RES)
12355     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12356
12357
12358 class LUInstanceQueryData(NoHooksLU):
12359   """Query runtime instance data.
12360
12361   """
12362   REQ_BGL = False
12363
12364   def ExpandNames(self):
12365     self.needed_locks = {}
12366
12367     # Use locking if requested or when non-static information is wanted
12368     if not (self.op.static or self.op.use_locking):
12369       self.LogWarning("Non-static data requested, locks need to be acquired")
12370       self.op.use_locking = True
12371
12372     if self.op.instances or not self.op.use_locking:
12373       # Expand instance names right here
12374       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12375     else:
12376       # Will use acquired locks
12377       self.wanted_names = None
12378
12379     if self.op.use_locking:
12380       self.share_locks = _ShareAll()
12381
12382       if self.wanted_names is None:
12383         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12384       else:
12385         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12386
12387       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12388       self.needed_locks[locking.LEVEL_NODE] = []
12389       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12390
12391   def DeclareLocks(self, level):
12392     if self.op.use_locking:
12393       if level == locking.LEVEL_NODEGROUP:
12394         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12395
12396         # Lock all groups used by instances optimistically; this requires going
12397         # via the node before it's locked, requiring verification later on
12398         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12399           frozenset(group_uuid
12400                     for instance_name in owned_instances
12401                     for group_uuid in
12402                       self.cfg.GetInstanceNodeGroups(instance_name))
12403
12404       elif level == locking.LEVEL_NODE:
12405         self._LockInstancesNodes()
12406
12407   def CheckPrereq(self):
12408     """Check prerequisites.
12409
12410     This only checks the optional instance list against the existing names.
12411
12412     """
12413     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12414     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12415     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12416
12417     if self.wanted_names is None:
12418       assert self.op.use_locking, "Locking was not used"
12419       self.wanted_names = owned_instances
12420
12421     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12422
12423     if self.op.use_locking:
12424       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12425                                 None)
12426     else:
12427       assert not (owned_instances or owned_groups or owned_nodes)
12428
12429     self.wanted_instances = instances.values()
12430
12431   def _ComputeBlockdevStatus(self, node, instance, dev):
12432     """Returns the status of a block device
12433
12434     """
12435     if self.op.static or not node:
12436       return None
12437
12438     self.cfg.SetDiskID(dev, node)
12439
12440     result = self.rpc.call_blockdev_find(node, dev)
12441     if result.offline:
12442       return None
12443
12444     result.Raise("Can't compute disk status for %s" % instance.name)
12445
12446     status = result.payload
12447     if status is None:
12448       return None
12449
12450     return (status.dev_path, status.major, status.minor,
12451             status.sync_percent, status.estimated_time,
12452             status.is_degraded, status.ldisk_status)
12453
12454   def _ComputeDiskStatus(self, instance, snode, dev):
12455     """Compute block device status.
12456
12457     """
12458     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12459
12460     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12461
12462   def _ComputeDiskStatusInner(self, instance, snode, dev):
12463     """Compute block device status.
12464
12465     @attention: The device has to be annotated already.
12466
12467     """
12468     if dev.dev_type in constants.LDS_DRBD:
12469       # we change the snode then (otherwise we use the one passed in)
12470       if dev.logical_id[0] == instance.primary_node:
12471         snode = dev.logical_id[1]
12472       else:
12473         snode = dev.logical_id[0]
12474
12475     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12476                                               instance, dev)
12477     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12478
12479     if dev.children:
12480       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12481                                         instance, snode),
12482                          dev.children)
12483     else:
12484       dev_children = []
12485
12486     return {
12487       "iv_name": dev.iv_name,
12488       "dev_type": dev.dev_type,
12489       "logical_id": dev.logical_id,
12490       "physical_id": dev.physical_id,
12491       "pstatus": dev_pstatus,
12492       "sstatus": dev_sstatus,
12493       "children": dev_children,
12494       "mode": dev.mode,
12495       "size": dev.size,
12496       }
12497
12498   def Exec(self, feedback_fn):
12499     """Gather and return data"""
12500     result = {}
12501
12502     cluster = self.cfg.GetClusterInfo()
12503
12504     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12505     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12506
12507     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12508                                                  for node in nodes.values()))
12509
12510     group2name_fn = lambda uuid: groups[uuid].name
12511
12512     for instance in self.wanted_instances:
12513       pnode = nodes[instance.primary_node]
12514
12515       if self.op.static or pnode.offline:
12516         remote_state = None
12517         if pnode.offline:
12518           self.LogWarning("Primary node %s is marked offline, returning static"
12519                           " information only for instance %s" %
12520                           (pnode.name, instance.name))
12521       else:
12522         remote_info = self.rpc.call_instance_info(instance.primary_node,
12523                                                   instance.name,
12524                                                   instance.hypervisor)
12525         remote_info.Raise("Error checking node %s" % instance.primary_node)
12526         remote_info = remote_info.payload
12527         if remote_info and "state" in remote_info:
12528           remote_state = "up"
12529         else:
12530           if instance.admin_state == constants.ADMINST_UP:
12531             remote_state = "down"
12532           else:
12533             remote_state = instance.admin_state
12534
12535       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12536                   instance.disks)
12537
12538       snodes_group_uuids = [nodes[snode_name].group
12539                             for snode_name in instance.secondary_nodes]
12540
12541       result[instance.name] = {
12542         "name": instance.name,
12543         "config_state": instance.admin_state,
12544         "run_state": remote_state,
12545         "pnode": instance.primary_node,
12546         "pnode_group_uuid": pnode.group,
12547         "pnode_group_name": group2name_fn(pnode.group),
12548         "snodes": instance.secondary_nodes,
12549         "snodes_group_uuids": snodes_group_uuids,
12550         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12551         "os": instance.os,
12552         # this happens to be the same format used for hooks
12553         "nics": _NICListToTuple(self, instance.nics),
12554         "disk_template": instance.disk_template,
12555         "disks": disks,
12556         "hypervisor": instance.hypervisor,
12557         "network_port": instance.network_port,
12558         "hv_instance": instance.hvparams,
12559         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12560         "be_instance": instance.beparams,
12561         "be_actual": cluster.FillBE(instance),
12562         "os_instance": instance.osparams,
12563         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12564         "serial_no": instance.serial_no,
12565         "mtime": instance.mtime,
12566         "ctime": instance.ctime,
12567         "uuid": instance.uuid,
12568         }
12569
12570     return result
12571
12572
12573 def PrepareContainerMods(mods, private_fn):
12574   """Prepares a list of container modifications by adding a private data field.
12575
12576   @type mods: list of tuples; (operation, index, parameters)
12577   @param mods: List of modifications
12578   @type private_fn: callable or None
12579   @param private_fn: Callable for constructing a private data field for a
12580     modification
12581   @rtype: list
12582
12583   """
12584   if private_fn is None:
12585     fn = lambda: None
12586   else:
12587     fn = private_fn
12588
12589   return [(op, idx, params, fn()) for (op, idx, params) in mods]
12590
12591
12592 #: Type description for changes as returned by L{ApplyContainerMods}'s
12593 #: callbacks
12594 _TApplyContModsCbChanges = \
12595   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12596     ht.TNonEmptyString,
12597     ht.TAny,
12598     ])))
12599
12600
12601 def ApplyContainerMods(kind, container, chgdesc, mods,
12602                        create_fn, modify_fn, remove_fn):
12603   """Applies descriptions in C{mods} to C{container}.
12604
12605   @type kind: string
12606   @param kind: One-word item description
12607   @type container: list
12608   @param container: Container to modify
12609   @type chgdesc: None or list
12610   @param chgdesc: List of applied changes
12611   @type mods: list
12612   @param mods: Modifications as returned by L{PrepareContainerMods}
12613   @type create_fn: callable
12614   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12615     receives absolute item index, parameters and private data object as added
12616     by L{PrepareContainerMods}, returns tuple containing new item and changes
12617     as list
12618   @type modify_fn: callable
12619   @param modify_fn: Callback for modifying an existing item
12620     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12621     and private data object as added by L{PrepareContainerMods}, returns
12622     changes as list
12623   @type remove_fn: callable
12624   @param remove_fn: Callback on removing item; receives absolute item index,
12625     item and private data object as added by L{PrepareContainerMods}
12626
12627   """
12628   for (op, idx, params, private) in mods:
12629     if idx == -1:
12630       # Append
12631       absidx = len(container) - 1
12632     elif idx < 0:
12633       raise IndexError("Not accepting negative indices other than -1")
12634     elif idx > len(container):
12635       raise IndexError("Got %s index %s, but there are only %s" %
12636                        (kind, idx, len(container)))
12637     else:
12638       absidx = idx
12639
12640     changes = None
12641
12642     if op == constants.DDM_ADD:
12643       # Calculate where item will be added
12644       if idx == -1:
12645         addidx = len(container)
12646       else:
12647         addidx = idx
12648
12649       if create_fn is None:
12650         item = params
12651       else:
12652         (item, changes) = create_fn(addidx, params, private)
12653
12654       if idx == -1:
12655         container.append(item)
12656       else:
12657         assert idx >= 0
12658         assert idx <= len(container)
12659         # list.insert does so before the specified index
12660         container.insert(idx, item)
12661     else:
12662       # Retrieve existing item
12663       try:
12664         item = container[absidx]
12665       except IndexError:
12666         raise IndexError("Invalid %s index %s" % (kind, idx))
12667
12668       if op == constants.DDM_REMOVE:
12669         assert not params
12670
12671         if remove_fn is not None:
12672           remove_fn(absidx, item, private)
12673
12674         changes = [("%s/%s" % (kind, absidx), "remove")]
12675
12676         assert container[absidx] == item
12677         del container[absidx]
12678       elif op == constants.DDM_MODIFY:
12679         if modify_fn is not None:
12680           changes = modify_fn(absidx, item, params, private)
12681       else:
12682         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12683
12684     assert _TApplyContModsCbChanges(changes)
12685
12686     if not (chgdesc is None or changes is None):
12687       chgdesc.extend(changes)
12688
12689
12690 def _UpdateIvNames(base_index, disks):
12691   """Updates the C{iv_name} attribute of disks.
12692
12693   @type disks: list of L{objects.Disk}
12694
12695   """
12696   for (idx, disk) in enumerate(disks):
12697     disk.iv_name = "disk/%s" % (base_index + idx, )
12698
12699
12700 class _InstNicModPrivate:
12701   """Data structure for network interface modifications.
12702
12703   Used by L{LUInstanceSetParams}.
12704
12705   """
12706   def __init__(self):
12707     self.params = None
12708     self.filled = None
12709
12710
12711 class LUInstanceSetParams(LogicalUnit):
12712   """Modifies an instances's parameters.
12713
12714   """
12715   HPATH = "instance-modify"
12716   HTYPE = constants.HTYPE_INSTANCE
12717   REQ_BGL = False
12718
12719   @staticmethod
12720   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12721     assert ht.TList(mods)
12722     assert not mods or len(mods[0]) in (2, 3)
12723
12724     if mods and len(mods[0]) == 2:
12725       result = []
12726
12727       addremove = 0
12728       for op, params in mods:
12729         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12730           result.append((op, -1, params))
12731           addremove += 1
12732
12733           if addremove > 1:
12734             raise errors.OpPrereqError("Only one %s add or remove operation is"
12735                                        " supported at a time" % kind,
12736                                        errors.ECODE_INVAL)
12737         else:
12738           result.append((constants.DDM_MODIFY, op, params))
12739
12740       assert verify_fn(result)
12741     else:
12742       result = mods
12743
12744     return result
12745
12746   @staticmethod
12747   def _CheckMods(kind, mods, key_types, item_fn):
12748     """Ensures requested disk/NIC modifications are valid.
12749
12750     """
12751     for (op, _, params) in mods:
12752       assert ht.TDict(params)
12753
12754       utils.ForceDictType(params, key_types)
12755
12756       if op == constants.DDM_REMOVE:
12757         if params:
12758           raise errors.OpPrereqError("No settings should be passed when"
12759                                      " removing a %s" % kind,
12760                                      errors.ECODE_INVAL)
12761       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12762         item_fn(op, params)
12763       else:
12764         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12765
12766   @staticmethod
12767   def _VerifyDiskModification(op, params):
12768     """Verifies a disk modification.
12769
12770     """
12771     if op == constants.DDM_ADD:
12772       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12773       if mode not in constants.DISK_ACCESS_SET:
12774         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12775                                    errors.ECODE_INVAL)
12776
12777       size = params.get(constants.IDISK_SIZE, None)
12778       if size is None:
12779         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12780                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12781
12782       try:
12783         size = int(size)
12784       except (TypeError, ValueError), err:
12785         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12786                                    errors.ECODE_INVAL)
12787
12788       params[constants.IDISK_SIZE] = size
12789
12790     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12791       raise errors.OpPrereqError("Disk size change not possible, use"
12792                                  " grow-disk", errors.ECODE_INVAL)
12793
12794   @staticmethod
12795   def _VerifyNicModification(op, params):
12796     """Verifies a network interface modification.
12797
12798     """
12799     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12800       ip = params.get(constants.INIC_IP, None)
12801       req_net = params.get(constants.INIC_NETWORK, None)
12802       link = params.get(constants.NIC_LINK, None)
12803       mode = params.get(constants.NIC_MODE, None)
12804       if req_net is not None:
12805         if req_net.lower() == constants.VALUE_NONE:
12806           params[constants.INIC_NETWORK] = None
12807           req_net = None
12808         elif link is not None or mode is not None:
12809           raise errors.OpPrereqError("If network is given"
12810                                      " mode or link should not",
12811                                      errors.ECODE_INVAL)
12812
12813       if op == constants.DDM_ADD:
12814         macaddr = params.get(constants.INIC_MAC, None)
12815         if macaddr is None:
12816           params[constants.INIC_MAC] = constants.VALUE_AUTO
12817
12818       if ip is not None:
12819         if ip.lower() == constants.VALUE_NONE:
12820           params[constants.INIC_IP] = None
12821         else:
12822           if ip.lower() == constants.NIC_IP_POOL:
12823             if op == constants.DDM_ADD and req_net is None:
12824               raise errors.OpPrereqError("If ip=pool, parameter network"
12825                                          " cannot be none",
12826                                          errors.ECODE_INVAL)
12827           else:
12828             if not netutils.IPAddress.IsValid(ip):
12829               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12830                                          errors.ECODE_INVAL)
12831
12832       if constants.INIC_MAC in params:
12833         macaddr = params[constants.INIC_MAC]
12834         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12835           macaddr = utils.NormalizeAndValidateMac(macaddr)
12836
12837         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12838           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12839                                      " modifying an existing NIC",
12840                                      errors.ECODE_INVAL)
12841
12842   def CheckArguments(self):
12843     if not (self.op.nics or self.op.disks or self.op.disk_template or
12844             self.op.hvparams or self.op.beparams or self.op.os_name or
12845             self.op.offline is not None or self.op.runtime_mem):
12846       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12847
12848     if self.op.hvparams:
12849       _CheckGlobalHvParams(self.op.hvparams)
12850
12851     self.op.disks = self._UpgradeDiskNicMods(
12852       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12853     self.op.nics = self._UpgradeDiskNicMods(
12854       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12855
12856     # Check disk modifications
12857     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12858                     self._VerifyDiskModification)
12859
12860     if self.op.disks and self.op.disk_template is not None:
12861       raise errors.OpPrereqError("Disk template conversion and other disk"
12862                                  " changes not supported at the same time",
12863                                  errors.ECODE_INVAL)
12864
12865     if (self.op.disk_template and
12866         self.op.disk_template in constants.DTS_INT_MIRROR and
12867         self.op.remote_node is None):
12868       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12869                                  " one requires specifying a secondary node",
12870                                  errors.ECODE_INVAL)
12871
12872     # Check NIC modifications
12873     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12874                     self._VerifyNicModification)
12875
12876   def ExpandNames(self):
12877     self._ExpandAndLockInstance()
12878     self.needed_locks[locking.LEVEL_NODEGROUP] = []
12879     # Can't even acquire node locks in shared mode as upcoming changes in
12880     # Ganeti 2.6 will start to modify the node object on disk conversion
12881     self.needed_locks[locking.LEVEL_NODE] = []
12882     self.needed_locks[locking.LEVEL_NODE_RES] = []
12883     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12884     # Look node group to look up the ipolicy
12885     self.share_locks[locking.LEVEL_NODEGROUP] = 1
12886
12887   def DeclareLocks(self, level):
12888     if level == locking.LEVEL_NODEGROUP:
12889       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12890       # Acquire locks for the instance's nodegroups optimistically. Needs
12891       # to be verified in CheckPrereq
12892       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12893         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12894     elif level == locking.LEVEL_NODE:
12895       self._LockInstancesNodes()
12896       if self.op.disk_template and self.op.remote_node:
12897         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12898         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12899     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12900       # Copy node locks
12901       self.needed_locks[locking.LEVEL_NODE_RES] = \
12902         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12903
12904   def BuildHooksEnv(self):
12905     """Build hooks env.
12906
12907     This runs on the master, primary and secondaries.
12908
12909     """
12910     args = {}
12911     if constants.BE_MINMEM in self.be_new:
12912       args["minmem"] = self.be_new[constants.BE_MINMEM]
12913     if constants.BE_MAXMEM in self.be_new:
12914       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12915     if constants.BE_VCPUS in self.be_new:
12916       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12917     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12918     # information at all.
12919
12920     if self._new_nics is not None:
12921       nics = []
12922
12923       for nic in self._new_nics:
12924         n = copy.deepcopy(nic)
12925         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12926         n.nicparams = nicparams
12927         nics.append(_NICToTuple(self, n))
12928
12929       args["nics"] = nics
12930
12931     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12932     if self.op.disk_template:
12933       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12934     if self.op.runtime_mem:
12935       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12936
12937     return env
12938
12939   def BuildHooksNodes(self):
12940     """Build hooks nodes.
12941
12942     """
12943     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12944     return (nl, nl)
12945
12946   def _PrepareNicModification(self, params, private, old_ip, old_net,
12947                               old_params, cluster, pnode):
12948
12949     update_params_dict = dict([(key, params[key])
12950                                for key in constants.NICS_PARAMETERS
12951                                if key in params])
12952
12953     req_link = update_params_dict.get(constants.NIC_LINK, None)
12954     req_mode = update_params_dict.get(constants.NIC_MODE, None)
12955
12956     new_net = params.get(constants.INIC_NETWORK, old_net)
12957     if new_net is not None:
12958       netparams = self.cfg.GetGroupNetParams(new_net, pnode)
12959       if netparams is None:
12960         raise errors.OpPrereqError("No netparams found for the network"
12961                                    " %s, probably not connected" % new_net,
12962                                    errors.ECODE_INVAL)
12963       new_params = dict(netparams)
12964     else:
12965       new_params = _GetUpdatedParams(old_params, update_params_dict)
12966
12967     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12968
12969     new_filled_params = cluster.SimpleFillNIC(new_params)
12970     objects.NIC.CheckParameterSyntax(new_filled_params)
12971
12972     new_mode = new_filled_params[constants.NIC_MODE]
12973     if new_mode == constants.NIC_MODE_BRIDGED:
12974       bridge = new_filled_params[constants.NIC_LINK]
12975       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12976       if msg:
12977         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12978         if self.op.force:
12979           self.warn.append(msg)
12980         else:
12981           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12982
12983     elif new_mode == constants.NIC_MODE_ROUTED:
12984       ip = params.get(constants.INIC_IP, old_ip)
12985       if ip is None:
12986         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12987                                    " on a routed NIC", errors.ECODE_INVAL)
12988
12989     if constants.INIC_MAC in params:
12990       mac = params[constants.INIC_MAC]
12991       if mac is None:
12992         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12993                                    errors.ECODE_INVAL)
12994       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12995         # otherwise generate the MAC address
12996         params[constants.INIC_MAC] = \
12997           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12998       else:
12999         # or validate/reserve the current one
13000         try:
13001           self.cfg.ReserveMAC(mac, self.proc.GetECId())
13002         except errors.ReservationError:
13003           raise errors.OpPrereqError("MAC address '%s' already in use"
13004                                      " in cluster" % mac,
13005                                      errors.ECODE_NOTUNIQUE)
13006     elif new_net != old_net:
13007
13008       def get_net_prefix(net):
13009         if net:
13010           uuid = self.cfg.LookupNetwork(net)
13011           if uuid:
13012             nobj = self.cfg.GetNetwork(uuid)
13013             return nobj.mac_prefix
13014         return None
13015
13016       new_prefix = get_net_prefix(new_net)
13017       old_prefix = get_net_prefix(old_net)
13018       if old_prefix != new_prefix:
13019         params[constants.INIC_MAC] = \
13020           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13021
13022     #if there is a change in nic-network configuration
13023     new_ip = params.get(constants.INIC_IP, old_ip)
13024     if (new_ip, new_net) != (old_ip, old_net):
13025       if new_ip:
13026         if new_net:
13027           if new_ip.lower() == constants.NIC_IP_POOL:
13028             try:
13029               new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13030             except errors.ReservationError:
13031               raise errors.OpPrereqError("Unable to get a free IP"
13032                                          " from the address pool",
13033                                          errors.ECODE_STATE)
13034             self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13035             params[constants.INIC_IP] = new_ip
13036           elif new_ip != old_ip or new_net != old_net:
13037             try:
13038               self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13039               self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13040             except errors.ReservationError:
13041               raise errors.OpPrereqError("IP %s not available in network %s" %
13042                                          (new_ip, new_net),
13043                                          errors.ECODE_NOTUNIQUE)
13044         elif new_ip.lower() == constants.NIC_IP_POOL:
13045           raise errors.OpPrereqError("ip=pool, but no network found",
13046                                      errors.ECODE_INVAL)
13047         else:
13048           # new net is None
13049           if self.op.conflicts_check:
13050             _CheckForConflictingIp(self, new_ip, pnode)
13051
13052       if old_ip:
13053         if old_net:
13054           try:
13055             self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13056           except errors.AddressPoolError:
13057             logging.warning("Release IP %s not contained in network %s",
13058                             old_ip, old_net)
13059
13060     # there are no changes in (net, ip) tuple
13061     elif (old_net is not None and
13062           (req_link is not None or req_mode is not None)):
13063       raise errors.OpPrereqError("Not allowed to change link or mode of"
13064                                  " a NIC that is connected to a network",
13065                                  errors.ECODE_INVAL)
13066
13067     private.params = new_params
13068     private.filled = new_filled_params
13069
13070   def CheckPrereq(self):
13071     """Check prerequisites.
13072
13073     This only checks the instance list against the existing names.
13074
13075     """
13076     assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13077     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13078
13079     cluster = self.cluster = self.cfg.GetClusterInfo()
13080     assert self.instance is not None, \
13081       "Cannot retrieve locked instance %s" % self.op.instance_name
13082
13083     pnode = instance.primary_node
13084     assert pnode in self.owned_locks(locking.LEVEL_NODE)
13085     nodelist = list(instance.all_nodes)
13086     pnode_info = self.cfg.GetNodeInfo(pnode)
13087     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13088
13089     #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13090     assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13091     group_info = self.cfg.GetNodeGroup(pnode_info.group)
13092
13093     # dictionary with instance information after the modification
13094     ispec = {}
13095
13096     # Prepare disk/NIC modifications
13097     self.diskmod = PrepareContainerMods(self.op.disks, None)
13098     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13099
13100     # OS change
13101     if self.op.os_name and not self.op.force:
13102       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13103                       self.op.force_variant)
13104       instance_os = self.op.os_name
13105     else:
13106       instance_os = instance.os
13107
13108     assert not (self.op.disk_template and self.op.disks), \
13109       "Can't modify disk template and apply disk changes at the same time"
13110
13111     if self.op.disk_template:
13112       if instance.disk_template == self.op.disk_template:
13113         raise errors.OpPrereqError("Instance already has disk template %s" %
13114                                    instance.disk_template, errors.ECODE_INVAL)
13115
13116       if (instance.disk_template,
13117           self.op.disk_template) not in self._DISK_CONVERSIONS:
13118         raise errors.OpPrereqError("Unsupported disk template conversion from"
13119                                    " %s to %s" % (instance.disk_template,
13120                                                   self.op.disk_template),
13121                                    errors.ECODE_INVAL)
13122       _CheckInstanceState(self, instance, INSTANCE_DOWN,
13123                           msg="cannot change disk template")
13124       if self.op.disk_template in constants.DTS_INT_MIRROR:
13125         if self.op.remote_node == pnode:
13126           raise errors.OpPrereqError("Given new secondary node %s is the same"
13127                                      " as the primary node of the instance" %
13128                                      self.op.remote_node, errors.ECODE_STATE)
13129         _CheckNodeOnline(self, self.op.remote_node)
13130         _CheckNodeNotDrained(self, self.op.remote_node)
13131         # FIXME: here we assume that the old instance type is DT_PLAIN
13132         assert instance.disk_template == constants.DT_PLAIN
13133         disks = [{constants.IDISK_SIZE: d.size,
13134                   constants.IDISK_VG: d.logical_id[0]}
13135                  for d in instance.disks]
13136         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13137         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13138
13139         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13140         snode_group = self.cfg.GetNodeGroup(snode_info.group)
13141         ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13142                                                                 snode_group)
13143         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13144                                 ignore=self.op.ignore_ipolicy)
13145         if pnode_info.group != snode_info.group:
13146           self.LogWarning("The primary and secondary nodes are in two"
13147                           " different node groups; the disk parameters"
13148                           " from the first disk's node group will be"
13149                           " used")
13150
13151     # hvparams processing
13152     if self.op.hvparams:
13153       hv_type = instance.hypervisor
13154       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13155       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13156       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13157
13158       # local check
13159       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
13160       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13161       self.hv_proposed = self.hv_new = hv_new # the new actual values
13162       self.hv_inst = i_hvdict # the new dict (without defaults)
13163     else:
13164       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13165                                               instance.hvparams)
13166       self.hv_new = self.hv_inst = {}
13167
13168     # beparams processing
13169     if self.op.beparams:
13170       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13171                                    use_none=True)
13172       objects.UpgradeBeParams(i_bedict)
13173       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13174       be_new = cluster.SimpleFillBE(i_bedict)
13175       self.be_proposed = self.be_new = be_new # the new actual values
13176       self.be_inst = i_bedict # the new dict (without defaults)
13177     else:
13178       self.be_new = self.be_inst = {}
13179       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13180     be_old = cluster.FillBE(instance)
13181
13182     # CPU param validation -- checking every time a parameter is
13183     # changed to cover all cases where either CPU mask or vcpus have
13184     # changed
13185     if (constants.BE_VCPUS in self.be_proposed and
13186         constants.HV_CPU_MASK in self.hv_proposed):
13187       cpu_list = \
13188         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13189       # Verify mask is consistent with number of vCPUs. Can skip this
13190       # test if only 1 entry in the CPU mask, which means same mask
13191       # is applied to all vCPUs.
13192       if (len(cpu_list) > 1 and
13193           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13194         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13195                                    " CPU mask [%s]" %
13196                                    (self.be_proposed[constants.BE_VCPUS],
13197                                     self.hv_proposed[constants.HV_CPU_MASK]),
13198                                    errors.ECODE_INVAL)
13199
13200       # Only perform this test if a new CPU mask is given
13201       if constants.HV_CPU_MASK in self.hv_new:
13202         # Calculate the largest CPU number requested
13203         max_requested_cpu = max(map(max, cpu_list))
13204         # Check that all of the instance's nodes have enough physical CPUs to
13205         # satisfy the requested CPU mask
13206         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13207                                 max_requested_cpu + 1, instance.hypervisor)
13208
13209     # osparams processing
13210     if self.op.osparams:
13211       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13212       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13213       self.os_inst = i_osdict # the new dict (without defaults)
13214     else:
13215       self.os_inst = {}
13216
13217     self.warn = []
13218
13219     #TODO(dynmem): do the appropriate check involving MINMEM
13220     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13221         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13222       mem_check_list = [pnode]
13223       if be_new[constants.BE_AUTO_BALANCE]:
13224         # either we changed auto_balance to yes or it was from before
13225         mem_check_list.extend(instance.secondary_nodes)
13226       instance_info = self.rpc.call_instance_info(pnode, instance.name,
13227                                                   instance.hypervisor)
13228       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13229                                          [instance.hypervisor])
13230       pninfo = nodeinfo[pnode]
13231       msg = pninfo.fail_msg
13232       if msg:
13233         # Assume the primary node is unreachable and go ahead
13234         self.warn.append("Can't get info from primary node %s: %s" %
13235                          (pnode, msg))
13236       else:
13237         (_, _, (pnhvinfo, )) = pninfo.payload
13238         if not isinstance(pnhvinfo.get("memory_free", None), int):
13239           self.warn.append("Node data from primary node %s doesn't contain"
13240                            " free memory information" % pnode)
13241         elif instance_info.fail_msg:
13242           self.warn.append("Can't get instance runtime information: %s" %
13243                            instance_info.fail_msg)
13244         else:
13245           if instance_info.payload:
13246             current_mem = int(instance_info.payload["memory"])
13247           else:
13248             # Assume instance not running
13249             # (there is a slight race condition here, but it's not very
13250             # probable, and we have no other way to check)
13251             # TODO: Describe race condition
13252             current_mem = 0
13253           #TODO(dynmem): do the appropriate check involving MINMEM
13254           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13255                       pnhvinfo["memory_free"])
13256           if miss_mem > 0:
13257             raise errors.OpPrereqError("This change will prevent the instance"
13258                                        " from starting, due to %d MB of memory"
13259                                        " missing on its primary node" %
13260                                        miss_mem, errors.ECODE_NORES)
13261
13262       if be_new[constants.BE_AUTO_BALANCE]:
13263         for node, nres in nodeinfo.items():
13264           if node not in instance.secondary_nodes:
13265             continue
13266           nres.Raise("Can't get info from secondary node %s" % node,
13267                      prereq=True, ecode=errors.ECODE_STATE)
13268           (_, _, (nhvinfo, )) = nres.payload
13269           if not isinstance(nhvinfo.get("memory_free", None), int):
13270             raise errors.OpPrereqError("Secondary node %s didn't return free"
13271                                        " memory information" % node,
13272                                        errors.ECODE_STATE)
13273           #TODO(dynmem): do the appropriate check involving MINMEM
13274           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13275             raise errors.OpPrereqError("This change will prevent the instance"
13276                                        " from failover to its secondary node"
13277                                        " %s, due to not enough memory" % node,
13278                                        errors.ECODE_STATE)
13279
13280     if self.op.runtime_mem:
13281       remote_info = self.rpc.call_instance_info(instance.primary_node,
13282                                                 instance.name,
13283                                                 instance.hypervisor)
13284       remote_info.Raise("Error checking node %s" % instance.primary_node)
13285       if not remote_info.payload: # not running already
13286         raise errors.OpPrereqError("Instance %s is not running" %
13287                                    instance.name, errors.ECODE_STATE)
13288
13289       current_memory = remote_info.payload["memory"]
13290       if (not self.op.force and
13291            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13292             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13293         raise errors.OpPrereqError("Instance %s must have memory between %d"
13294                                    " and %d MB of memory unless --force is"
13295                                    " given" %
13296                                    (instance.name,
13297                                     self.be_proposed[constants.BE_MINMEM],
13298                                     self.be_proposed[constants.BE_MAXMEM]),
13299                                    errors.ECODE_INVAL)
13300
13301       delta = self.op.runtime_mem - current_memory
13302       if delta > 0:
13303         _CheckNodeFreeMemory(self, instance.primary_node,
13304                              "ballooning memory for instance %s" %
13305                              instance.name, delta, instance.hypervisor)
13306
13307     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13308       raise errors.OpPrereqError("Disk operations not supported for"
13309                                  " diskless instances", errors.ECODE_INVAL)
13310
13311     def _PrepareNicCreate(_, params, private):
13312       self._PrepareNicModification(params, private, None, None,
13313                                    {}, cluster, pnode)
13314       return (None, None)
13315
13316     def _PrepareNicMod(_, nic, params, private):
13317       self._PrepareNicModification(params, private, nic.ip, nic.network,
13318                                    nic.nicparams, cluster, pnode)
13319       return None
13320
13321     def _PrepareNicRemove(_, params, __):
13322       ip = params.ip
13323       net = params.network
13324       if net is not None and ip is not None:
13325         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13326
13327     # Verify NIC changes (operating on copy)
13328     nics = instance.nics[:]
13329     ApplyContainerMods("NIC", nics, None, self.nicmod,
13330                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13331     if len(nics) > constants.MAX_NICS:
13332       raise errors.OpPrereqError("Instance has too many network interfaces"
13333                                  " (%d), cannot add more" % constants.MAX_NICS,
13334                                  errors.ECODE_STATE)
13335
13336     # Verify disk changes (operating on a copy)
13337     disks = instance.disks[:]
13338     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13339     if len(disks) > constants.MAX_DISKS:
13340       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13341                                  " more" % constants.MAX_DISKS,
13342                                  errors.ECODE_STATE)
13343     disk_sizes = [disk.size for disk in instance.disks]
13344     disk_sizes.extend(params["size"] for (op, idx, params, private) in
13345                       self.diskmod if op == constants.DDM_ADD)
13346     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13347     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13348
13349     if self.op.offline is not None:
13350       if self.op.offline:
13351         msg = "can't change to offline"
13352       else:
13353         msg = "can't change to online"
13354       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13355
13356     # Pre-compute NIC changes (necessary to use result in hooks)
13357     self._nic_chgdesc = []
13358     if self.nicmod:
13359       # Operate on copies as this is still in prereq
13360       nics = [nic.Copy() for nic in instance.nics]
13361       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13362                          self._CreateNewNic, self._ApplyNicMods, None)
13363       self._new_nics = nics
13364       ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13365     else:
13366       self._new_nics = None
13367       ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13368
13369     if not self.op.ignore_ipolicy:
13370       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13371                                                               group_info)
13372
13373       # Fill ispec with backend parameters
13374       ispec[constants.ISPEC_SPINDLE_USE] = \
13375         self.be_new.get(constants.BE_SPINDLE_USE, None)
13376       ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13377                                                          None)
13378
13379       # Copy ispec to verify parameters with min/max values separately
13380       ispec_max = ispec.copy()
13381       ispec_max[constants.ISPEC_MEM_SIZE] = \
13382         self.be_new.get(constants.BE_MAXMEM, None)
13383       res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13384       ispec_min = ispec.copy()
13385       ispec_min[constants.ISPEC_MEM_SIZE] = \
13386         self.be_new.get(constants.BE_MINMEM, None)
13387       res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13388
13389       if (res_max or res_min):
13390         # FIXME: Improve error message by including information about whether
13391         # the upper or lower limit of the parameter fails the ipolicy.
13392         msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13393                (group_info, group_info.name,
13394                 utils.CommaJoin(set(res_max + res_min))))
13395         raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13396
13397   def _ConvertPlainToDrbd(self, feedback_fn):
13398     """Converts an instance from plain to drbd.
13399
13400     """
13401     feedback_fn("Converting template to drbd")
13402     instance = self.instance
13403     pnode = instance.primary_node
13404     snode = self.op.remote_node
13405
13406     assert instance.disk_template == constants.DT_PLAIN
13407
13408     # create a fake disk info for _GenerateDiskTemplate
13409     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13410                   constants.IDISK_VG: d.logical_id[0]}
13411                  for d in instance.disks]
13412     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13413                                       instance.name, pnode, [snode],
13414                                       disk_info, None, None, 0, feedback_fn,
13415                                       self.diskparams)
13416     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13417                                         self.diskparams)
13418     info = _GetInstanceInfoText(instance)
13419     feedback_fn("Creating additional volumes...")
13420     # first, create the missing data and meta devices
13421     for disk in anno_disks:
13422       # unfortunately this is... not too nice
13423       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13424                             info, True)
13425       for child in disk.children:
13426         _CreateSingleBlockDev(self, snode, instance, child, info, True)
13427     # at this stage, all new LVs have been created, we can rename the
13428     # old ones
13429     feedback_fn("Renaming original volumes...")
13430     rename_list = [(o, n.children[0].logical_id)
13431                    for (o, n) in zip(instance.disks, new_disks)]
13432     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13433     result.Raise("Failed to rename original LVs")
13434
13435     feedback_fn("Initializing DRBD devices...")
13436     # all child devices are in place, we can now create the DRBD devices
13437     for disk in anno_disks:
13438       for node in [pnode, snode]:
13439         f_create = node == pnode
13440         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13441
13442     # at this point, the instance has been modified
13443     instance.disk_template = constants.DT_DRBD8
13444     instance.disks = new_disks
13445     self.cfg.Update(instance, feedback_fn)
13446
13447     # Release node locks while waiting for sync
13448     _ReleaseLocks(self, locking.LEVEL_NODE)
13449
13450     # disks are created, waiting for sync
13451     disk_abort = not _WaitForSync(self, instance,
13452                                   oneshot=not self.op.wait_for_sync)
13453     if disk_abort:
13454       raise errors.OpExecError("There are some degraded disks for"
13455                                " this instance, please cleanup manually")
13456
13457     # Node resource locks will be released by caller
13458
13459   def _ConvertDrbdToPlain(self, feedback_fn):
13460     """Converts an instance from drbd to plain.
13461
13462     """
13463     instance = self.instance
13464
13465     assert len(instance.secondary_nodes) == 1
13466     assert instance.disk_template == constants.DT_DRBD8
13467
13468     pnode = instance.primary_node
13469     snode = instance.secondary_nodes[0]
13470     feedback_fn("Converting template to plain")
13471
13472     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13473     new_disks = [d.children[0] for d in instance.disks]
13474
13475     # copy over size and mode
13476     for parent, child in zip(old_disks, new_disks):
13477       child.size = parent.size
13478       child.mode = parent.mode
13479
13480     # this is a DRBD disk, return its port to the pool
13481     # NOTE: this must be done right before the call to cfg.Update!
13482     for disk in old_disks:
13483       tcp_port = disk.logical_id[2]
13484       self.cfg.AddTcpUdpPort(tcp_port)
13485
13486     # update instance structure
13487     instance.disks = new_disks
13488     instance.disk_template = constants.DT_PLAIN
13489     self.cfg.Update(instance, feedback_fn)
13490
13491     # Release locks in case removing disks takes a while
13492     _ReleaseLocks(self, locking.LEVEL_NODE)
13493
13494     feedback_fn("Removing volumes on the secondary node...")
13495     for disk in old_disks:
13496       self.cfg.SetDiskID(disk, snode)
13497       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13498       if msg:
13499         self.LogWarning("Could not remove block device %s on node %s,"
13500                         " continuing anyway: %s", disk.iv_name, snode, msg)
13501
13502     feedback_fn("Removing unneeded volumes on the primary node...")
13503     for idx, disk in enumerate(old_disks):
13504       meta = disk.children[1]
13505       self.cfg.SetDiskID(meta, pnode)
13506       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13507       if msg:
13508         self.LogWarning("Could not remove metadata for disk %d on node %s,"
13509                         " continuing anyway: %s", idx, pnode, msg)
13510
13511   def _CreateNewDisk(self, idx, params, _):
13512     """Creates a new disk.
13513
13514     """
13515     instance = self.instance
13516
13517     # add a new disk
13518     if instance.disk_template in constants.DTS_FILEBASED:
13519       (file_driver, file_path) = instance.disks[0].logical_id
13520       file_path = os.path.dirname(file_path)
13521     else:
13522       file_driver = file_path = None
13523
13524     disk = \
13525       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13526                             instance.primary_node, instance.secondary_nodes,
13527                             [params], file_path, file_driver, idx,
13528                             self.Log, self.diskparams)[0]
13529
13530     info = _GetInstanceInfoText(instance)
13531
13532     logging.info("Creating volume %s for instance %s",
13533                  disk.iv_name, instance.name)
13534     # Note: this needs to be kept in sync with _CreateDisks
13535     #HARDCODE
13536     for node in instance.all_nodes:
13537       f_create = (node == instance.primary_node)
13538       try:
13539         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13540       except errors.OpExecError, err:
13541         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13542                         disk.iv_name, disk, node, err)
13543
13544     return (disk, [
13545       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13546       ])
13547
13548   @staticmethod
13549   def _ModifyDisk(idx, disk, params, _):
13550     """Modifies a disk.
13551
13552     """
13553     disk.mode = params[constants.IDISK_MODE]
13554
13555     return [
13556       ("disk.mode/%d" % idx, disk.mode),
13557       ]
13558
13559   def _RemoveDisk(self, idx, root, _):
13560     """Removes a disk.
13561
13562     """
13563     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13564     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13565       self.cfg.SetDiskID(disk, node)
13566       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13567       if msg:
13568         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13569                         " continuing anyway", idx, node, msg)
13570
13571     # if this is a DRBD disk, return its port to the pool
13572     if root.dev_type in constants.LDS_DRBD:
13573       self.cfg.AddTcpUdpPort(root.logical_id[2])
13574
13575   @staticmethod
13576   def _CreateNewNic(idx, params, private):
13577     """Creates data structure for a new network interface.
13578
13579     """
13580     mac = params[constants.INIC_MAC]
13581     ip = params.get(constants.INIC_IP, None)
13582     net = params.get(constants.INIC_NETWORK, None)
13583     #TODO: not private.filled?? can a nic have no nicparams??
13584     nicparams = private.filled
13585
13586     return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
13587       ("nic.%d" % idx,
13588        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13589        (mac, ip, private.filled[constants.NIC_MODE],
13590        private.filled[constants.NIC_LINK],
13591        net)),
13592       ])
13593
13594   @staticmethod
13595   def _ApplyNicMods(idx, nic, params, private):
13596     """Modifies a network interface.
13597
13598     """
13599     changes = []
13600
13601     for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13602       if key in params:
13603         changes.append(("nic.%s/%d" % (key, idx), params[key]))
13604         setattr(nic, key, params[key])
13605
13606     if private.filled:
13607       nic.nicparams = private.filled
13608
13609       for (key, val) in nic.nicparams.items():
13610         changes.append(("nic.%s/%d" % (key, idx), val))
13611
13612     return changes
13613
13614   def Exec(self, feedback_fn):
13615     """Modifies an instance.
13616
13617     All parameters take effect only at the next restart of the instance.
13618
13619     """
13620     # Process here the warnings from CheckPrereq, as we don't have a
13621     # feedback_fn there.
13622     # TODO: Replace with self.LogWarning
13623     for warn in self.warn:
13624       feedback_fn("WARNING: %s" % warn)
13625
13626     assert ((self.op.disk_template is None) ^
13627             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13628       "Not owning any node resource locks"
13629
13630     result = []
13631     instance = self.instance
13632
13633     # runtime memory
13634     if self.op.runtime_mem:
13635       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13636                                                      instance,
13637                                                      self.op.runtime_mem)
13638       rpcres.Raise("Cannot modify instance runtime memory")
13639       result.append(("runtime_memory", self.op.runtime_mem))
13640
13641     # Apply disk changes
13642     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13643                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13644     _UpdateIvNames(0, instance.disks)
13645
13646     if self.op.disk_template:
13647       if __debug__:
13648         check_nodes = set(instance.all_nodes)
13649         if self.op.remote_node:
13650           check_nodes.add(self.op.remote_node)
13651         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13652           owned = self.owned_locks(level)
13653           assert not (check_nodes - owned), \
13654             ("Not owning the correct locks, owning %r, expected at least %r" %
13655              (owned, check_nodes))
13656
13657       r_shut = _ShutdownInstanceDisks(self, instance)
13658       if not r_shut:
13659         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13660                                  " proceed with disk template conversion")
13661       mode = (instance.disk_template, self.op.disk_template)
13662       try:
13663         self._DISK_CONVERSIONS[mode](self, feedback_fn)
13664       except:
13665         self.cfg.ReleaseDRBDMinors(instance.name)
13666         raise
13667       result.append(("disk_template", self.op.disk_template))
13668
13669       assert instance.disk_template == self.op.disk_template, \
13670         ("Expected disk template '%s', found '%s'" %
13671          (self.op.disk_template, instance.disk_template))
13672
13673     # Release node and resource locks if there are any (they might already have
13674     # been released during disk conversion)
13675     _ReleaseLocks(self, locking.LEVEL_NODE)
13676     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13677
13678     # Apply NIC changes
13679     if self._new_nics is not None:
13680       instance.nics = self._new_nics
13681       result.extend(self._nic_chgdesc)
13682
13683     # hvparams changes
13684     if self.op.hvparams:
13685       instance.hvparams = self.hv_inst
13686       for key, val in self.op.hvparams.iteritems():
13687         result.append(("hv/%s" % key, val))
13688
13689     # beparams changes
13690     if self.op.beparams:
13691       instance.beparams = self.be_inst
13692       for key, val in self.op.beparams.iteritems():
13693         result.append(("be/%s" % key, val))
13694
13695     # OS change
13696     if self.op.os_name:
13697       instance.os = self.op.os_name
13698
13699     # osparams changes
13700     if self.op.osparams:
13701       instance.osparams = self.os_inst
13702       for key, val in self.op.osparams.iteritems():
13703         result.append(("os/%s" % key, val))
13704
13705     if self.op.offline is None:
13706       # Ignore
13707       pass
13708     elif self.op.offline:
13709       # Mark instance as offline
13710       self.cfg.MarkInstanceOffline(instance.name)
13711       result.append(("admin_state", constants.ADMINST_OFFLINE))
13712     else:
13713       # Mark instance as online, but stopped
13714       self.cfg.MarkInstanceDown(instance.name)
13715       result.append(("admin_state", constants.ADMINST_DOWN))
13716
13717     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13718
13719     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13720                 self.owned_locks(locking.LEVEL_NODE)), \
13721       "All node locks should have been released by now"
13722
13723     return result
13724
13725   _DISK_CONVERSIONS = {
13726     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13727     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13728     }
13729
13730
13731 class LUInstanceChangeGroup(LogicalUnit):
13732   HPATH = "instance-change-group"
13733   HTYPE = constants.HTYPE_INSTANCE
13734   REQ_BGL = False
13735
13736   def ExpandNames(self):
13737     self.share_locks = _ShareAll()
13738
13739     self.needed_locks = {
13740       locking.LEVEL_NODEGROUP: [],
13741       locking.LEVEL_NODE: [],
13742       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
13743       }
13744
13745     self._ExpandAndLockInstance()
13746
13747     if self.op.target_groups:
13748       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13749                                   self.op.target_groups)
13750     else:
13751       self.req_target_uuids = None
13752
13753     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13754
13755   def DeclareLocks(self, level):
13756     if level == locking.LEVEL_NODEGROUP:
13757       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13758
13759       if self.req_target_uuids:
13760         lock_groups = set(self.req_target_uuids)
13761
13762         # Lock all groups used by instance optimistically; this requires going
13763         # via the node before it's locked, requiring verification later on
13764         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13765         lock_groups.update(instance_groups)
13766       else:
13767         # No target groups, need to lock all of them
13768         lock_groups = locking.ALL_SET
13769
13770       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13771
13772     elif level == locking.LEVEL_NODE:
13773       if self.req_target_uuids:
13774         # Lock all nodes used by instances
13775         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13776         self._LockInstancesNodes()
13777
13778         # Lock all nodes in all potential target groups
13779         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13780                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13781         member_nodes = [node_name
13782                         for group in lock_groups
13783                         for node_name in self.cfg.GetNodeGroup(group).members]
13784         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13785       else:
13786         # Lock all nodes as all groups are potential targets
13787         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13788
13789   def CheckPrereq(self):
13790     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13791     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13792     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13793
13794     assert (self.req_target_uuids is None or
13795             owned_groups.issuperset(self.req_target_uuids))
13796     assert owned_instances == set([self.op.instance_name])
13797
13798     # Get instance information
13799     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13800
13801     # Check if node groups for locked instance are still correct
13802     assert owned_nodes.issuperset(self.instance.all_nodes), \
13803       ("Instance %s's nodes changed while we kept the lock" %
13804        self.op.instance_name)
13805
13806     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13807                                            owned_groups)
13808
13809     if self.req_target_uuids:
13810       # User requested specific target groups
13811       self.target_uuids = frozenset(self.req_target_uuids)
13812     else:
13813       # All groups except those used by the instance are potential targets
13814       self.target_uuids = owned_groups - inst_groups
13815
13816     conflicting_groups = self.target_uuids & inst_groups
13817     if conflicting_groups:
13818       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13819                                  " used by the instance '%s'" %
13820                                  (utils.CommaJoin(conflicting_groups),
13821                                   self.op.instance_name),
13822                                  errors.ECODE_INVAL)
13823
13824     if not self.target_uuids:
13825       raise errors.OpPrereqError("There are no possible target groups",
13826                                  errors.ECODE_INVAL)
13827
13828   def BuildHooksEnv(self):
13829     """Build hooks env.
13830
13831     """
13832     assert self.target_uuids
13833
13834     env = {
13835       "TARGET_GROUPS": " ".join(self.target_uuids),
13836       }
13837
13838     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13839
13840     return env
13841
13842   def BuildHooksNodes(self):
13843     """Build hooks nodes.
13844
13845     """
13846     mn = self.cfg.GetMasterNode()
13847     return ([mn], [mn])
13848
13849   def Exec(self, feedback_fn):
13850     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13851
13852     assert instances == [self.op.instance_name], "Instance not locked"
13853
13854     req = iallocator.IAReqGroupChange(instances=instances,
13855                                       target_groups=list(self.target_uuids))
13856     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13857
13858     ial.Run(self.op.iallocator)
13859
13860     if not ial.success:
13861       raise errors.OpPrereqError("Can't compute solution for changing group of"
13862                                  " instance '%s' using iallocator '%s': %s" %
13863                                  (self.op.instance_name, self.op.iallocator,
13864                                   ial.info), errors.ECODE_NORES)
13865
13866     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13867
13868     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13869                  " instance '%s'", len(jobs), self.op.instance_name)
13870
13871     return ResultWithJobs(jobs)
13872
13873
13874 class LUBackupQuery(NoHooksLU):
13875   """Query the exports list
13876
13877   """
13878   REQ_BGL = False
13879
13880   def CheckArguments(self):
13881     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13882                              ["node", "export"], self.op.use_locking)
13883
13884   def ExpandNames(self):
13885     self.expq.ExpandNames(self)
13886
13887   def DeclareLocks(self, level):
13888     self.expq.DeclareLocks(self, level)
13889
13890   def Exec(self, feedback_fn):
13891     result = {}
13892
13893     for (node, expname) in self.expq.OldStyleQuery(self):
13894       if expname is None:
13895         result[node] = False
13896       else:
13897         result.setdefault(node, []).append(expname)
13898
13899     return result
13900
13901
13902 class _ExportQuery(_QueryBase):
13903   FIELDS = query.EXPORT_FIELDS
13904
13905   #: The node name is not a unique key for this query
13906   SORT_FIELD = "node"
13907
13908   def ExpandNames(self, lu):
13909     lu.needed_locks = {}
13910
13911     # The following variables interact with _QueryBase._GetNames
13912     if self.names:
13913       self.wanted = _GetWantedNodes(lu, self.names)
13914     else:
13915       self.wanted = locking.ALL_SET
13916
13917     self.do_locking = self.use_locking
13918
13919     if self.do_locking:
13920       lu.share_locks = _ShareAll()
13921       lu.needed_locks = {
13922         locking.LEVEL_NODE: self.wanted,
13923         }
13924
13925   def DeclareLocks(self, lu, level):
13926     pass
13927
13928   def _GetQueryData(self, lu):
13929     """Computes the list of nodes and their attributes.
13930
13931     """
13932     # Locking is not used
13933     # TODO
13934     assert not (compat.any(lu.glm.is_owned(level)
13935                            for level in locking.LEVELS
13936                            if level != locking.LEVEL_CLUSTER) or
13937                 self.do_locking or self.use_locking)
13938
13939     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13940
13941     result = []
13942
13943     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13944       if nres.fail_msg:
13945         result.append((node, None))
13946       else:
13947         result.extend((node, expname) for expname in nres.payload)
13948
13949     return result
13950
13951
13952 class LUBackupPrepare(NoHooksLU):
13953   """Prepares an instance for an export and returns useful information.
13954
13955   """
13956   REQ_BGL = False
13957
13958   def ExpandNames(self):
13959     self._ExpandAndLockInstance()
13960
13961   def CheckPrereq(self):
13962     """Check prerequisites.
13963
13964     """
13965     instance_name = self.op.instance_name
13966
13967     self.instance = self.cfg.GetInstanceInfo(instance_name)
13968     assert self.instance is not None, \
13969           "Cannot retrieve locked instance %s" % self.op.instance_name
13970     _CheckNodeOnline(self, self.instance.primary_node)
13971
13972     self._cds = _GetClusterDomainSecret()
13973
13974   def Exec(self, feedback_fn):
13975     """Prepares an instance for an export.
13976
13977     """
13978     instance = self.instance
13979
13980     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13981       salt = utils.GenerateSecret(8)
13982
13983       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13984       result = self.rpc.call_x509_cert_create(instance.primary_node,
13985                                               constants.RIE_CERT_VALIDITY)
13986       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13987
13988       (name, cert_pem) = result.payload
13989
13990       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13991                                              cert_pem)
13992
13993       return {
13994         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13995         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13996                           salt),
13997         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13998         }
13999
14000     return None
14001
14002
14003 class LUBackupExport(LogicalUnit):
14004   """Export an instance to an image in the cluster.
14005
14006   """
14007   HPATH = "instance-export"
14008   HTYPE = constants.HTYPE_INSTANCE
14009   REQ_BGL = False
14010
14011   def CheckArguments(self):
14012     """Check the arguments.
14013
14014     """
14015     self.x509_key_name = self.op.x509_key_name
14016     self.dest_x509_ca_pem = self.op.destination_x509_ca
14017
14018     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14019       if not self.x509_key_name:
14020         raise errors.OpPrereqError("Missing X509 key name for encryption",
14021                                    errors.ECODE_INVAL)
14022
14023       if not self.dest_x509_ca_pem:
14024         raise errors.OpPrereqError("Missing destination X509 CA",
14025                                    errors.ECODE_INVAL)
14026
14027   def ExpandNames(self):
14028     self._ExpandAndLockInstance()
14029
14030     # Lock all nodes for local exports
14031     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14032       # FIXME: lock only instance primary and destination node
14033       #
14034       # Sad but true, for now we have do lock all nodes, as we don't know where
14035       # the previous export might be, and in this LU we search for it and
14036       # remove it from its current node. In the future we could fix this by:
14037       #  - making a tasklet to search (share-lock all), then create the
14038       #    new one, then one to remove, after
14039       #  - removing the removal operation altogether
14040       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14041
14042   def DeclareLocks(self, level):
14043     """Last minute lock declaration."""
14044     # All nodes are locked anyway, so nothing to do here.
14045
14046   def BuildHooksEnv(self):
14047     """Build hooks env.
14048
14049     This will run on the master, primary node and target node.
14050
14051     """
14052     env = {
14053       "EXPORT_MODE": self.op.mode,
14054       "EXPORT_NODE": self.op.target_node,
14055       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14056       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14057       # TODO: Generic function for boolean env variables
14058       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14059       }
14060
14061     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14062
14063     return env
14064
14065   def BuildHooksNodes(self):
14066     """Build hooks nodes.
14067
14068     """
14069     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14070
14071     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14072       nl.append(self.op.target_node)
14073
14074     return (nl, nl)
14075
14076   def CheckPrereq(self):
14077     """Check prerequisites.
14078
14079     This checks that the instance and node names are valid.
14080
14081     """
14082     instance_name = self.op.instance_name
14083
14084     self.instance = self.cfg.GetInstanceInfo(instance_name)
14085     assert self.instance is not None, \
14086           "Cannot retrieve locked instance %s" % self.op.instance_name
14087     _CheckNodeOnline(self, self.instance.primary_node)
14088
14089     if (self.op.remove_instance and
14090         self.instance.admin_state == constants.ADMINST_UP and
14091         not self.op.shutdown):
14092       raise errors.OpPrereqError("Can not remove instance without shutting it"
14093                                  " down before", errors.ECODE_STATE)
14094
14095     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14096       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14097       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14098       assert self.dst_node is not None
14099
14100       _CheckNodeOnline(self, self.dst_node.name)
14101       _CheckNodeNotDrained(self, self.dst_node.name)
14102
14103       self._cds = None
14104       self.dest_disk_info = None
14105       self.dest_x509_ca = None
14106
14107     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14108       self.dst_node = None
14109
14110       if len(self.op.target_node) != len(self.instance.disks):
14111         raise errors.OpPrereqError(("Received destination information for %s"
14112                                     " disks, but instance %s has %s disks") %
14113                                    (len(self.op.target_node), instance_name,
14114                                     len(self.instance.disks)),
14115                                    errors.ECODE_INVAL)
14116
14117       cds = _GetClusterDomainSecret()
14118
14119       # Check X509 key name
14120       try:
14121         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14122       except (TypeError, ValueError), err:
14123         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14124                                    errors.ECODE_INVAL)
14125
14126       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14127         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14128                                    errors.ECODE_INVAL)
14129
14130       # Load and verify CA
14131       try:
14132         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14133       except OpenSSL.crypto.Error, err:
14134         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14135                                    (err, ), errors.ECODE_INVAL)
14136
14137       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14138       if errcode is not None:
14139         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14140                                    (msg, ), errors.ECODE_INVAL)
14141
14142       self.dest_x509_ca = cert
14143
14144       # Verify target information
14145       disk_info = []
14146       for idx, disk_data in enumerate(self.op.target_node):
14147         try:
14148           (host, port, magic) = \
14149             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14150         except errors.GenericError, err:
14151           raise errors.OpPrereqError("Target info for disk %s: %s" %
14152                                      (idx, err), errors.ECODE_INVAL)
14153
14154         disk_info.append((host, port, magic))
14155
14156       assert len(disk_info) == len(self.op.target_node)
14157       self.dest_disk_info = disk_info
14158
14159     else:
14160       raise errors.ProgrammerError("Unhandled export mode %r" %
14161                                    self.op.mode)
14162
14163     # instance disk type verification
14164     # TODO: Implement export support for file-based disks
14165     for disk in self.instance.disks:
14166       if disk.dev_type == constants.LD_FILE:
14167         raise errors.OpPrereqError("Export not supported for instances with"
14168                                    " file-based disks", errors.ECODE_INVAL)
14169
14170   def _CleanupExports(self, feedback_fn):
14171     """Removes exports of current instance from all other nodes.
14172
14173     If an instance in a cluster with nodes A..D was exported to node C, its
14174     exports will be removed from the nodes A, B and D.
14175
14176     """
14177     assert self.op.mode != constants.EXPORT_MODE_REMOTE
14178
14179     nodelist = self.cfg.GetNodeList()
14180     nodelist.remove(self.dst_node.name)
14181
14182     # on one-node clusters nodelist will be empty after the removal
14183     # if we proceed the backup would be removed because OpBackupQuery
14184     # substitutes an empty list with the full cluster node list.
14185     iname = self.instance.name
14186     if nodelist:
14187       feedback_fn("Removing old exports for instance %s" % iname)
14188       exportlist = self.rpc.call_export_list(nodelist)
14189       for node in exportlist:
14190         if exportlist[node].fail_msg:
14191           continue
14192         if iname in exportlist[node].payload:
14193           msg = self.rpc.call_export_remove(node, iname).fail_msg
14194           if msg:
14195             self.LogWarning("Could not remove older export for instance %s"
14196                             " on node %s: %s", iname, node, msg)
14197
14198   def Exec(self, feedback_fn):
14199     """Export an instance to an image in the cluster.
14200
14201     """
14202     assert self.op.mode in constants.EXPORT_MODES
14203
14204     instance = self.instance
14205     src_node = instance.primary_node
14206
14207     if self.op.shutdown:
14208       # shutdown the instance, but not the disks
14209       feedback_fn("Shutting down instance %s" % instance.name)
14210       result = self.rpc.call_instance_shutdown(src_node, instance,
14211                                                self.op.shutdown_timeout)
14212       # TODO: Maybe ignore failures if ignore_remove_failures is set
14213       result.Raise("Could not shutdown instance %s on"
14214                    " node %s" % (instance.name, src_node))
14215
14216     # set the disks ID correctly since call_instance_start needs the
14217     # correct drbd minor to create the symlinks
14218     for disk in instance.disks:
14219       self.cfg.SetDiskID(disk, src_node)
14220
14221     activate_disks = (instance.admin_state != constants.ADMINST_UP)
14222
14223     if activate_disks:
14224       # Activate the instance disks if we'exporting a stopped instance
14225       feedback_fn("Activating disks for %s" % instance.name)
14226       _StartInstanceDisks(self, instance, None)
14227
14228     try:
14229       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14230                                                      instance)
14231
14232       helper.CreateSnapshots()
14233       try:
14234         if (self.op.shutdown and
14235             instance.admin_state == constants.ADMINST_UP and
14236             not self.op.remove_instance):
14237           assert not activate_disks
14238           feedback_fn("Starting instance %s" % instance.name)
14239           result = self.rpc.call_instance_start(src_node,
14240                                                 (instance, None, None), False)
14241           msg = result.fail_msg
14242           if msg:
14243             feedback_fn("Failed to start instance: %s" % msg)
14244             _ShutdownInstanceDisks(self, instance)
14245             raise errors.OpExecError("Could not start instance: %s" % msg)
14246
14247         if self.op.mode == constants.EXPORT_MODE_LOCAL:
14248           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14249         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14250           connect_timeout = constants.RIE_CONNECT_TIMEOUT
14251           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14252
14253           (key_name, _, _) = self.x509_key_name
14254
14255           dest_ca_pem = \
14256             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14257                                             self.dest_x509_ca)
14258
14259           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14260                                                      key_name, dest_ca_pem,
14261                                                      timeouts)
14262       finally:
14263         helper.Cleanup()
14264
14265       # Check for backwards compatibility
14266       assert len(dresults) == len(instance.disks)
14267       assert compat.all(isinstance(i, bool) for i in dresults), \
14268              "Not all results are boolean: %r" % dresults
14269
14270     finally:
14271       if activate_disks:
14272         feedback_fn("Deactivating disks for %s" % instance.name)
14273         _ShutdownInstanceDisks(self, instance)
14274
14275     if not (compat.all(dresults) and fin_resu):
14276       failures = []
14277       if not fin_resu:
14278         failures.append("export finalization")
14279       if not compat.all(dresults):
14280         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14281                                if not dsk)
14282         failures.append("disk export: disk(s) %s" % fdsk)
14283
14284       raise errors.OpExecError("Export failed, errors in %s" %
14285                                utils.CommaJoin(failures))
14286
14287     # At this point, the export was successful, we can cleanup/finish
14288
14289     # Remove instance if requested
14290     if self.op.remove_instance:
14291       feedback_fn("Removing instance %s" % instance.name)
14292       _RemoveInstance(self, feedback_fn, instance,
14293                       self.op.ignore_remove_failures)
14294
14295     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14296       self._CleanupExports(feedback_fn)
14297
14298     return fin_resu, dresults
14299
14300
14301 class LUBackupRemove(NoHooksLU):
14302   """Remove exports related to the named instance.
14303
14304   """
14305   REQ_BGL = False
14306
14307   def ExpandNames(self):
14308     self.needed_locks = {}
14309     # We need all nodes to be locked in order for RemoveExport to work, but we
14310     # don't need to lock the instance itself, as nothing will happen to it (and
14311     # we can remove exports also for a removed instance)
14312     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14313
14314   def Exec(self, feedback_fn):
14315     """Remove any export.
14316
14317     """
14318     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14319     # If the instance was not found we'll try with the name that was passed in.
14320     # This will only work if it was an FQDN, though.
14321     fqdn_warn = False
14322     if not instance_name:
14323       fqdn_warn = True
14324       instance_name = self.op.instance_name
14325
14326     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14327     exportlist = self.rpc.call_export_list(locked_nodes)
14328     found = False
14329     for node in exportlist:
14330       msg = exportlist[node].fail_msg
14331       if msg:
14332         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14333         continue
14334       if instance_name in exportlist[node].payload:
14335         found = True
14336         result = self.rpc.call_export_remove(node, instance_name)
14337         msg = result.fail_msg
14338         if msg:
14339           logging.error("Could not remove export for instance %s"
14340                         " on node %s: %s", instance_name, node, msg)
14341
14342     if fqdn_warn and not found:
14343       feedback_fn("Export not found. If trying to remove an export belonging"
14344                   " to a deleted instance please use its Fully Qualified"
14345                   " Domain Name.")
14346
14347
14348 class LUGroupAdd(LogicalUnit):
14349   """Logical unit for creating node groups.
14350
14351   """
14352   HPATH = "group-add"
14353   HTYPE = constants.HTYPE_GROUP
14354   REQ_BGL = False
14355
14356   def ExpandNames(self):
14357     # We need the new group's UUID here so that we can create and acquire the
14358     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14359     # that it should not check whether the UUID exists in the configuration.
14360     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14361     self.needed_locks = {}
14362     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14363
14364   def CheckPrereq(self):
14365     """Check prerequisites.
14366
14367     This checks that the given group name is not an existing node group
14368     already.
14369
14370     """
14371     try:
14372       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14373     except errors.OpPrereqError:
14374       pass
14375     else:
14376       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14377                                  " node group (UUID: %s)" %
14378                                  (self.op.group_name, existing_uuid),
14379                                  errors.ECODE_EXISTS)
14380
14381     if self.op.ndparams:
14382       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14383
14384     if self.op.hv_state:
14385       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14386     else:
14387       self.new_hv_state = None
14388
14389     if self.op.disk_state:
14390       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14391     else:
14392       self.new_disk_state = None
14393
14394     if self.op.diskparams:
14395       for templ in constants.DISK_TEMPLATES:
14396         if templ in self.op.diskparams:
14397           utils.ForceDictType(self.op.diskparams[templ],
14398                               constants.DISK_DT_TYPES)
14399       self.new_diskparams = self.op.diskparams
14400       try:
14401         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14402       except errors.OpPrereqError, err:
14403         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14404                                    errors.ECODE_INVAL)
14405     else:
14406       self.new_diskparams = {}
14407
14408     if self.op.ipolicy:
14409       cluster = self.cfg.GetClusterInfo()
14410       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14411       try:
14412         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14413       except errors.ConfigurationError, err:
14414         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14415                                    errors.ECODE_INVAL)
14416
14417   def BuildHooksEnv(self):
14418     """Build hooks env.
14419
14420     """
14421     return {
14422       "GROUP_NAME": self.op.group_name,
14423       }
14424
14425   def BuildHooksNodes(self):
14426     """Build hooks nodes.
14427
14428     """
14429     mn = self.cfg.GetMasterNode()
14430     return ([mn], [mn])
14431
14432   def Exec(self, feedback_fn):
14433     """Add the node group to the cluster.
14434
14435     """
14436     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14437                                   uuid=self.group_uuid,
14438                                   alloc_policy=self.op.alloc_policy,
14439                                   ndparams=self.op.ndparams,
14440                                   diskparams=self.new_diskparams,
14441                                   ipolicy=self.op.ipolicy,
14442                                   hv_state_static=self.new_hv_state,
14443                                   disk_state_static=self.new_disk_state)
14444
14445     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14446     del self.remove_locks[locking.LEVEL_NODEGROUP]
14447
14448
14449 class LUGroupAssignNodes(NoHooksLU):
14450   """Logical unit for assigning nodes to groups.
14451
14452   """
14453   REQ_BGL = False
14454
14455   def ExpandNames(self):
14456     # These raise errors.OpPrereqError on their own:
14457     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14458     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14459
14460     # We want to lock all the affected nodes and groups. We have readily
14461     # available the list of nodes, and the *destination* group. To gather the
14462     # list of "source" groups, we need to fetch node information later on.
14463     self.needed_locks = {
14464       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14465       locking.LEVEL_NODE: self.op.nodes,
14466       }
14467
14468   def DeclareLocks(self, level):
14469     if level == locking.LEVEL_NODEGROUP:
14470       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14471
14472       # Try to get all affected nodes' groups without having the group or node
14473       # lock yet. Needs verification later in the code flow.
14474       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14475
14476       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14477
14478   def CheckPrereq(self):
14479     """Check prerequisites.
14480
14481     """
14482     assert self.needed_locks[locking.LEVEL_NODEGROUP]
14483     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14484             frozenset(self.op.nodes))
14485
14486     expected_locks = (set([self.group_uuid]) |
14487                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14488     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14489     if actual_locks != expected_locks:
14490       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14491                                " current groups are '%s', used to be '%s'" %
14492                                (utils.CommaJoin(expected_locks),
14493                                 utils.CommaJoin(actual_locks)))
14494
14495     self.node_data = self.cfg.GetAllNodesInfo()
14496     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14497     instance_data = self.cfg.GetAllInstancesInfo()
14498
14499     if self.group is None:
14500       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14501                                (self.op.group_name, self.group_uuid))
14502
14503     (new_splits, previous_splits) = \
14504       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14505                                              for node in self.op.nodes],
14506                                             self.node_data, instance_data)
14507
14508     if new_splits:
14509       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14510
14511       if not self.op.force:
14512         raise errors.OpExecError("The following instances get split by this"
14513                                  " change and --force was not given: %s" %
14514                                  fmt_new_splits)
14515       else:
14516         self.LogWarning("This operation will split the following instances: %s",
14517                         fmt_new_splits)
14518
14519         if previous_splits:
14520           self.LogWarning("In addition, these already-split instances continue"
14521                           " to be split across groups: %s",
14522                           utils.CommaJoin(utils.NiceSort(previous_splits)))
14523
14524   def Exec(self, feedback_fn):
14525     """Assign nodes to a new group.
14526
14527     """
14528     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14529
14530     self.cfg.AssignGroupNodes(mods)
14531
14532   @staticmethod
14533   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14534     """Check for split instances after a node assignment.
14535
14536     This method considers a series of node assignments as an atomic operation,
14537     and returns information about split instances after applying the set of
14538     changes.
14539
14540     In particular, it returns information about newly split instances, and
14541     instances that were already split, and remain so after the change.
14542
14543     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14544     considered.
14545
14546     @type changes: list of (node_name, new_group_uuid) pairs.
14547     @param changes: list of node assignments to consider.
14548     @param node_data: a dict with data for all nodes
14549     @param instance_data: a dict with all instances to consider
14550     @rtype: a two-tuple
14551     @return: a list of instances that were previously okay and result split as a
14552       consequence of this change, and a list of instances that were previously
14553       split and this change does not fix.
14554
14555     """
14556     changed_nodes = dict((node, group) for node, group in changes
14557                          if node_data[node].group != group)
14558
14559     all_split_instances = set()
14560     previously_split_instances = set()
14561
14562     def InstanceNodes(instance):
14563       return [instance.primary_node] + list(instance.secondary_nodes)
14564
14565     for inst in instance_data.values():
14566       if inst.disk_template not in constants.DTS_INT_MIRROR:
14567         continue
14568
14569       instance_nodes = InstanceNodes(inst)
14570
14571       if len(set(node_data[node].group for node in instance_nodes)) > 1:
14572         previously_split_instances.add(inst.name)
14573
14574       if len(set(changed_nodes.get(node, node_data[node].group)
14575                  for node in instance_nodes)) > 1:
14576         all_split_instances.add(inst.name)
14577
14578     return (list(all_split_instances - previously_split_instances),
14579             list(previously_split_instances & all_split_instances))
14580
14581
14582 class _GroupQuery(_QueryBase):
14583   FIELDS = query.GROUP_FIELDS
14584
14585   def ExpandNames(self, lu):
14586     lu.needed_locks = {}
14587
14588     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14589     self._cluster = lu.cfg.GetClusterInfo()
14590     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14591
14592     if not self.names:
14593       self.wanted = [name_to_uuid[name]
14594                      for name in utils.NiceSort(name_to_uuid.keys())]
14595     else:
14596       # Accept names to be either names or UUIDs.
14597       missing = []
14598       self.wanted = []
14599       all_uuid = frozenset(self._all_groups.keys())
14600
14601       for name in self.names:
14602         if name in all_uuid:
14603           self.wanted.append(name)
14604         elif name in name_to_uuid:
14605           self.wanted.append(name_to_uuid[name])
14606         else:
14607           missing.append(name)
14608
14609       if missing:
14610         raise errors.OpPrereqError("Some groups do not exist: %s" %
14611                                    utils.CommaJoin(missing),
14612                                    errors.ECODE_NOENT)
14613
14614   def DeclareLocks(self, lu, level):
14615     pass
14616
14617   def _GetQueryData(self, lu):
14618     """Computes the list of node groups and their attributes.
14619
14620     """
14621     do_nodes = query.GQ_NODE in self.requested_data
14622     do_instances = query.GQ_INST in self.requested_data
14623
14624     group_to_nodes = None
14625     group_to_instances = None
14626
14627     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14628     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14629     # latter GetAllInstancesInfo() is not enough, for we have to go through
14630     # instance->node. Hence, we will need to process nodes even if we only need
14631     # instance information.
14632     if do_nodes or do_instances:
14633       all_nodes = lu.cfg.GetAllNodesInfo()
14634       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14635       node_to_group = {}
14636
14637       for node in all_nodes.values():
14638         if node.group in group_to_nodes:
14639           group_to_nodes[node.group].append(node.name)
14640           node_to_group[node.name] = node.group
14641
14642       if do_instances:
14643         all_instances = lu.cfg.GetAllInstancesInfo()
14644         group_to_instances = dict((uuid, []) for uuid in self.wanted)
14645
14646         for instance in all_instances.values():
14647           node = instance.primary_node
14648           if node in node_to_group:
14649             group_to_instances[node_to_group[node]].append(instance.name)
14650
14651         if not do_nodes:
14652           # Do not pass on node information if it was not requested.
14653           group_to_nodes = None
14654
14655     return query.GroupQueryData(self._cluster,
14656                                 [self._all_groups[uuid]
14657                                  for uuid in self.wanted],
14658                                 group_to_nodes, group_to_instances,
14659                                 query.GQ_DISKPARAMS in self.requested_data)
14660
14661
14662 class LUGroupQuery(NoHooksLU):
14663   """Logical unit for querying node groups.
14664
14665   """
14666   REQ_BGL = False
14667
14668   def CheckArguments(self):
14669     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14670                           self.op.output_fields, False)
14671
14672   def ExpandNames(self):
14673     self.gq.ExpandNames(self)
14674
14675   def DeclareLocks(self, level):
14676     self.gq.DeclareLocks(self, level)
14677
14678   def Exec(self, feedback_fn):
14679     return self.gq.OldStyleQuery(self)
14680
14681
14682 class LUGroupSetParams(LogicalUnit):
14683   """Modifies the parameters of a node group.
14684
14685   """
14686   HPATH = "group-modify"
14687   HTYPE = constants.HTYPE_GROUP
14688   REQ_BGL = False
14689
14690   def CheckArguments(self):
14691     all_changes = [
14692       self.op.ndparams,
14693       self.op.diskparams,
14694       self.op.alloc_policy,
14695       self.op.hv_state,
14696       self.op.disk_state,
14697       self.op.ipolicy,
14698       ]
14699
14700     if all_changes.count(None) == len(all_changes):
14701       raise errors.OpPrereqError("Please pass at least one modification",
14702                                  errors.ECODE_INVAL)
14703
14704   def ExpandNames(self):
14705     # This raises errors.OpPrereqError on its own:
14706     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14707
14708     self.needed_locks = {
14709       locking.LEVEL_INSTANCE: [],
14710       locking.LEVEL_NODEGROUP: [self.group_uuid],
14711       }
14712
14713     self.share_locks[locking.LEVEL_INSTANCE] = 1
14714
14715   def DeclareLocks(self, level):
14716     if level == locking.LEVEL_INSTANCE:
14717       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14718
14719       # Lock instances optimistically, needs verification once group lock has
14720       # been acquired
14721       self.needed_locks[locking.LEVEL_INSTANCE] = \
14722           self.cfg.GetNodeGroupInstances(self.group_uuid)
14723
14724   @staticmethod
14725   def _UpdateAndVerifyDiskParams(old, new):
14726     """Updates and verifies disk parameters.
14727
14728     """
14729     new_params = _GetUpdatedParams(old, new)
14730     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14731     return new_params
14732
14733   def CheckPrereq(self):
14734     """Check prerequisites.
14735
14736     """
14737     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14738
14739     # Check if locked instances are still correct
14740     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14741
14742     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14743     cluster = self.cfg.GetClusterInfo()
14744
14745     if self.group is None:
14746       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14747                                (self.op.group_name, self.group_uuid))
14748
14749     if self.op.ndparams:
14750       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14751       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14752       self.new_ndparams = new_ndparams
14753
14754     if self.op.diskparams:
14755       diskparams = self.group.diskparams
14756       uavdp = self._UpdateAndVerifyDiskParams
14757       # For each disktemplate subdict update and verify the values
14758       new_diskparams = dict((dt,
14759                              uavdp(diskparams.get(dt, {}),
14760                                    self.op.diskparams[dt]))
14761                             for dt in constants.DISK_TEMPLATES
14762                             if dt in self.op.diskparams)
14763       # As we've all subdicts of diskparams ready, lets merge the actual
14764       # dict with all updated subdicts
14765       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14766       try:
14767         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14768       except errors.OpPrereqError, err:
14769         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14770                                    errors.ECODE_INVAL)
14771
14772     if self.op.hv_state:
14773       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14774                                                  self.group.hv_state_static)
14775
14776     if self.op.disk_state:
14777       self.new_disk_state = \
14778         _MergeAndVerifyDiskState(self.op.disk_state,
14779                                  self.group.disk_state_static)
14780
14781     if self.op.ipolicy:
14782       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14783                                             self.op.ipolicy,
14784                                             group_policy=True)
14785
14786       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14787       inst_filter = lambda inst: inst.name in owned_instances
14788       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14789       gmi = ganeti.masterd.instance
14790       violations = \
14791           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14792                                                                   self.group),
14793                                         new_ipolicy, instances)
14794
14795       if violations:
14796         self.LogWarning("After the ipolicy change the following instances"
14797                         " violate them: %s",
14798                         utils.CommaJoin(violations))
14799
14800   def BuildHooksEnv(self):
14801     """Build hooks env.
14802
14803     """
14804     return {
14805       "GROUP_NAME": self.op.group_name,
14806       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14807       }
14808
14809   def BuildHooksNodes(self):
14810     """Build hooks nodes.
14811
14812     """
14813     mn = self.cfg.GetMasterNode()
14814     return ([mn], [mn])
14815
14816   def Exec(self, feedback_fn):
14817     """Modifies the node group.
14818
14819     """
14820     result = []
14821
14822     if self.op.ndparams:
14823       self.group.ndparams = self.new_ndparams
14824       result.append(("ndparams", str(self.group.ndparams)))
14825
14826     if self.op.diskparams:
14827       self.group.diskparams = self.new_diskparams
14828       result.append(("diskparams", str(self.group.diskparams)))
14829
14830     if self.op.alloc_policy:
14831       self.group.alloc_policy = self.op.alloc_policy
14832
14833     if self.op.hv_state:
14834       self.group.hv_state_static = self.new_hv_state
14835
14836     if self.op.disk_state:
14837       self.group.disk_state_static = self.new_disk_state
14838
14839     if self.op.ipolicy:
14840       self.group.ipolicy = self.new_ipolicy
14841
14842     self.cfg.Update(self.group, feedback_fn)
14843     return result
14844
14845
14846 class LUGroupRemove(LogicalUnit):
14847   HPATH = "group-remove"
14848   HTYPE = constants.HTYPE_GROUP
14849   REQ_BGL = False
14850
14851   def ExpandNames(self):
14852     # This will raises errors.OpPrereqError on its own:
14853     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14854     self.needed_locks = {
14855       locking.LEVEL_NODEGROUP: [self.group_uuid],
14856       }
14857
14858   def CheckPrereq(self):
14859     """Check prerequisites.
14860
14861     This checks that the given group name exists as a node group, that is
14862     empty (i.e., contains no nodes), and that is not the last group of the
14863     cluster.
14864
14865     """
14866     # Verify that the group is empty.
14867     group_nodes = [node.name
14868                    for node in self.cfg.GetAllNodesInfo().values()
14869                    if node.group == self.group_uuid]
14870
14871     if group_nodes:
14872       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14873                                  " nodes: %s" %
14874                                  (self.op.group_name,
14875                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14876                                  errors.ECODE_STATE)
14877
14878     # Verify the cluster would not be left group-less.
14879     if len(self.cfg.GetNodeGroupList()) == 1:
14880       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14881                                  " removed" % self.op.group_name,
14882                                  errors.ECODE_STATE)
14883
14884   def BuildHooksEnv(self):
14885     """Build hooks env.
14886
14887     """
14888     return {
14889       "GROUP_NAME": self.op.group_name,
14890       }
14891
14892   def BuildHooksNodes(self):
14893     """Build hooks nodes.
14894
14895     """
14896     mn = self.cfg.GetMasterNode()
14897     return ([mn], [mn])
14898
14899   def Exec(self, feedback_fn):
14900     """Remove the node group.
14901
14902     """
14903     try:
14904       self.cfg.RemoveNodeGroup(self.group_uuid)
14905     except errors.ConfigurationError:
14906       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14907                                (self.op.group_name, self.group_uuid))
14908
14909     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14910
14911
14912 class LUGroupRename(LogicalUnit):
14913   HPATH = "group-rename"
14914   HTYPE = constants.HTYPE_GROUP
14915   REQ_BGL = False
14916
14917   def ExpandNames(self):
14918     # This raises errors.OpPrereqError on its own:
14919     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14920
14921     self.needed_locks = {
14922       locking.LEVEL_NODEGROUP: [self.group_uuid],
14923       }
14924
14925   def CheckPrereq(self):
14926     """Check prerequisites.
14927
14928     Ensures requested new name is not yet used.
14929
14930     """
14931     try:
14932       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14933     except errors.OpPrereqError:
14934       pass
14935     else:
14936       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14937                                  " node group (UUID: %s)" %
14938                                  (self.op.new_name, new_name_uuid),
14939                                  errors.ECODE_EXISTS)
14940
14941   def BuildHooksEnv(self):
14942     """Build hooks env.
14943
14944     """
14945     return {
14946       "OLD_NAME": self.op.group_name,
14947       "NEW_NAME": self.op.new_name,
14948       }
14949
14950   def BuildHooksNodes(self):
14951     """Build hooks nodes.
14952
14953     """
14954     mn = self.cfg.GetMasterNode()
14955
14956     all_nodes = self.cfg.GetAllNodesInfo()
14957     all_nodes.pop(mn, None)
14958
14959     run_nodes = [mn]
14960     run_nodes.extend(node.name for node in all_nodes.values()
14961                      if node.group == self.group_uuid)
14962
14963     return (run_nodes, run_nodes)
14964
14965   def Exec(self, feedback_fn):
14966     """Rename the node group.
14967
14968     """
14969     group = self.cfg.GetNodeGroup(self.group_uuid)
14970
14971     if group is None:
14972       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14973                                (self.op.group_name, self.group_uuid))
14974
14975     group.name = self.op.new_name
14976     self.cfg.Update(group, feedback_fn)
14977
14978     return self.op.new_name
14979
14980
14981 class LUGroupEvacuate(LogicalUnit):
14982   HPATH = "group-evacuate"
14983   HTYPE = constants.HTYPE_GROUP
14984   REQ_BGL = False
14985
14986   def ExpandNames(self):
14987     # This raises errors.OpPrereqError on its own:
14988     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14989
14990     if self.op.target_groups:
14991       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14992                                   self.op.target_groups)
14993     else:
14994       self.req_target_uuids = []
14995
14996     if self.group_uuid in self.req_target_uuids:
14997       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14998                                  " as a target group (targets are %s)" %
14999                                  (self.group_uuid,
15000                                   utils.CommaJoin(self.req_target_uuids)),
15001                                  errors.ECODE_INVAL)
15002
15003     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15004
15005     self.share_locks = _ShareAll()
15006     self.needed_locks = {
15007       locking.LEVEL_INSTANCE: [],
15008       locking.LEVEL_NODEGROUP: [],
15009       locking.LEVEL_NODE: [],
15010       }
15011
15012   def DeclareLocks(self, level):
15013     if level == locking.LEVEL_INSTANCE:
15014       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15015
15016       # Lock instances optimistically, needs verification once node and group
15017       # locks have been acquired
15018       self.needed_locks[locking.LEVEL_INSTANCE] = \
15019         self.cfg.GetNodeGroupInstances(self.group_uuid)
15020
15021     elif level == locking.LEVEL_NODEGROUP:
15022       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15023
15024       if self.req_target_uuids:
15025         lock_groups = set([self.group_uuid] + self.req_target_uuids)
15026
15027         # Lock all groups used by instances optimistically; this requires going
15028         # via the node before it's locked, requiring verification later on
15029         lock_groups.update(group_uuid
15030                            for instance_name in
15031                              self.owned_locks(locking.LEVEL_INSTANCE)
15032                            for group_uuid in
15033                              self.cfg.GetInstanceNodeGroups(instance_name))
15034       else:
15035         # No target groups, need to lock all of them
15036         lock_groups = locking.ALL_SET
15037
15038       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15039
15040     elif level == locking.LEVEL_NODE:
15041       # This will only lock the nodes in the group to be evacuated which
15042       # contain actual instances
15043       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15044       self._LockInstancesNodes()
15045
15046       # Lock all nodes in group to be evacuated and target groups
15047       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15048       assert self.group_uuid in owned_groups
15049       member_nodes = [node_name
15050                       for group in owned_groups
15051                       for node_name in self.cfg.GetNodeGroup(group).members]
15052       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15053
15054   def CheckPrereq(self):
15055     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15056     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15057     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15058
15059     assert owned_groups.issuperset(self.req_target_uuids)
15060     assert self.group_uuid in owned_groups
15061
15062     # Check if locked instances are still correct
15063     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15064
15065     # Get instance information
15066     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15067
15068     # Check if node groups for locked instances are still correct
15069     _CheckInstancesNodeGroups(self.cfg, self.instances,
15070                               owned_groups, owned_nodes, self.group_uuid)
15071
15072     if self.req_target_uuids:
15073       # User requested specific target groups
15074       self.target_uuids = self.req_target_uuids
15075     else:
15076       # All groups except the one to be evacuated are potential targets
15077       self.target_uuids = [group_uuid for group_uuid in owned_groups
15078                            if group_uuid != self.group_uuid]
15079
15080       if not self.target_uuids:
15081         raise errors.OpPrereqError("There are no possible target groups",
15082                                    errors.ECODE_INVAL)
15083
15084   def BuildHooksEnv(self):
15085     """Build hooks env.
15086
15087     """
15088     return {
15089       "GROUP_NAME": self.op.group_name,
15090       "TARGET_GROUPS": " ".join(self.target_uuids),
15091       }
15092
15093   def BuildHooksNodes(self):
15094     """Build hooks nodes.
15095
15096     """
15097     mn = self.cfg.GetMasterNode()
15098
15099     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15100
15101     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15102
15103     return (run_nodes, run_nodes)
15104
15105   def Exec(self, feedback_fn):
15106     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15107
15108     assert self.group_uuid not in self.target_uuids
15109
15110     req = iallocator.IAReqGroupChange(instances=instances,
15111                                       target_groups=self.target_uuids)
15112     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15113
15114     ial.Run(self.op.iallocator)
15115
15116     if not ial.success:
15117       raise errors.OpPrereqError("Can't compute group evacuation using"
15118                                  " iallocator '%s': %s" %
15119                                  (self.op.iallocator, ial.info),
15120                                  errors.ECODE_NORES)
15121
15122     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15123
15124     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15125                  len(jobs), self.op.group_name)
15126
15127     return ResultWithJobs(jobs)
15128
15129
15130 class TagsLU(NoHooksLU): # pylint: disable=W0223
15131   """Generic tags LU.
15132
15133   This is an abstract class which is the parent of all the other tags LUs.
15134
15135   """
15136   def ExpandNames(self):
15137     self.group_uuid = None
15138     self.needed_locks = {}
15139
15140     if self.op.kind == constants.TAG_NODE:
15141       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15142       lock_level = locking.LEVEL_NODE
15143       lock_name = self.op.name
15144     elif self.op.kind == constants.TAG_INSTANCE:
15145       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15146       lock_level = locking.LEVEL_INSTANCE
15147       lock_name = self.op.name
15148     elif self.op.kind == constants.TAG_NODEGROUP:
15149       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15150       lock_level = locking.LEVEL_NODEGROUP
15151       lock_name = self.group_uuid
15152     elif self.op.kind == constants.TAG_NETWORK:
15153       self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15154       lock_level = locking.LEVEL_NETWORK
15155       lock_name = self.network_uuid
15156     else:
15157       lock_level = None
15158       lock_name = None
15159
15160     if lock_level and getattr(self.op, "use_locking", True):
15161       self.needed_locks[lock_level] = lock_name
15162
15163     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15164     # not possible to acquire the BGL based on opcode parameters)
15165
15166   def CheckPrereq(self):
15167     """Check prerequisites.
15168
15169     """
15170     if self.op.kind == constants.TAG_CLUSTER:
15171       self.target = self.cfg.GetClusterInfo()
15172     elif self.op.kind == constants.TAG_NODE:
15173       self.target = self.cfg.GetNodeInfo(self.op.name)
15174     elif self.op.kind == constants.TAG_INSTANCE:
15175       self.target = self.cfg.GetInstanceInfo(self.op.name)
15176     elif self.op.kind == constants.TAG_NODEGROUP:
15177       self.target = self.cfg.GetNodeGroup(self.group_uuid)
15178     elif self.op.kind == constants.TAG_NETWORK:
15179       self.target = self.cfg.GetNetwork(self.network_uuid)
15180     else:
15181       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15182                                  str(self.op.kind), errors.ECODE_INVAL)
15183
15184
15185 class LUTagsGet(TagsLU):
15186   """Returns the tags of a given object.
15187
15188   """
15189   REQ_BGL = False
15190
15191   def ExpandNames(self):
15192     TagsLU.ExpandNames(self)
15193
15194     # Share locks as this is only a read operation
15195     self.share_locks = _ShareAll()
15196
15197   def Exec(self, feedback_fn):
15198     """Returns the tag list.
15199
15200     """
15201     return list(self.target.GetTags())
15202
15203
15204 class LUTagsSearch(NoHooksLU):
15205   """Searches the tags for a given pattern.
15206
15207   """
15208   REQ_BGL = False
15209
15210   def ExpandNames(self):
15211     self.needed_locks = {}
15212
15213   def CheckPrereq(self):
15214     """Check prerequisites.
15215
15216     This checks the pattern passed for validity by compiling it.
15217
15218     """
15219     try:
15220       self.re = re.compile(self.op.pattern)
15221     except re.error, err:
15222       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15223                                  (self.op.pattern, err), errors.ECODE_INVAL)
15224
15225   def Exec(self, feedback_fn):
15226     """Returns the tag list.
15227
15228     """
15229     cfg = self.cfg
15230     tgts = [("/cluster", cfg.GetClusterInfo())]
15231     ilist = cfg.GetAllInstancesInfo().values()
15232     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15233     nlist = cfg.GetAllNodesInfo().values()
15234     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15235     tgts.extend(("/nodegroup/%s" % n.name, n)
15236                 for n in cfg.GetAllNodeGroupsInfo().values())
15237     results = []
15238     for path, target in tgts:
15239       for tag in target.GetTags():
15240         if self.re.search(tag):
15241           results.append((path, tag))
15242     return results
15243
15244
15245 class LUTagsSet(TagsLU):
15246   """Sets a tag on a given object.
15247
15248   """
15249   REQ_BGL = False
15250
15251   def CheckPrereq(self):
15252     """Check prerequisites.
15253
15254     This checks the type and length of the tag name and value.
15255
15256     """
15257     TagsLU.CheckPrereq(self)
15258     for tag in self.op.tags:
15259       objects.TaggableObject.ValidateTag(tag)
15260
15261   def Exec(self, feedback_fn):
15262     """Sets the tag.
15263
15264     """
15265     try:
15266       for tag in self.op.tags:
15267         self.target.AddTag(tag)
15268     except errors.TagError, err:
15269       raise errors.OpExecError("Error while setting tag: %s" % str(err))
15270     self.cfg.Update(self.target, feedback_fn)
15271
15272
15273 class LUTagsDel(TagsLU):
15274   """Delete a list of tags from a given object.
15275
15276   """
15277   REQ_BGL = False
15278
15279   def CheckPrereq(self):
15280     """Check prerequisites.
15281
15282     This checks that we have the given tag.
15283
15284     """
15285     TagsLU.CheckPrereq(self)
15286     for tag in self.op.tags:
15287       objects.TaggableObject.ValidateTag(tag)
15288     del_tags = frozenset(self.op.tags)
15289     cur_tags = self.target.GetTags()
15290
15291     diff_tags = del_tags - cur_tags
15292     if diff_tags:
15293       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15294       raise errors.OpPrereqError("Tag(s) %s not found" %
15295                                  (utils.CommaJoin(diff_names), ),
15296                                  errors.ECODE_NOENT)
15297
15298   def Exec(self, feedback_fn):
15299     """Remove the tag from the object.
15300
15301     """
15302     for tag in self.op.tags:
15303       self.target.RemoveTag(tag)
15304     self.cfg.Update(self.target, feedback_fn)
15305
15306
15307 class LUTestDelay(NoHooksLU):
15308   """Sleep for a specified amount of time.
15309
15310   This LU sleeps on the master and/or nodes for a specified amount of
15311   time.
15312
15313   """
15314   REQ_BGL = False
15315
15316   def ExpandNames(self):
15317     """Expand names and set required locks.
15318
15319     This expands the node list, if any.
15320
15321     """
15322     self.needed_locks = {}
15323     if self.op.on_nodes:
15324       # _GetWantedNodes can be used here, but is not always appropriate to use
15325       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15326       # more information.
15327       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15328       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15329
15330   def _TestDelay(self):
15331     """Do the actual sleep.
15332
15333     """
15334     if self.op.on_master:
15335       if not utils.TestDelay(self.op.duration):
15336         raise errors.OpExecError("Error during master delay test")
15337     if self.op.on_nodes:
15338       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15339       for node, node_result in result.items():
15340         node_result.Raise("Failure during rpc call to node %s" % node)
15341
15342   def Exec(self, feedback_fn):
15343     """Execute the test delay opcode, with the wanted repetitions.
15344
15345     """
15346     if self.op.repeat == 0:
15347       self._TestDelay()
15348     else:
15349       top_value = self.op.repeat - 1
15350       for i in range(self.op.repeat):
15351         self.LogInfo("Test delay iteration %d/%d", i, top_value)
15352         self._TestDelay()
15353
15354
15355 class LURestrictedCommand(NoHooksLU):
15356   """Logical unit for executing restricted commands.
15357
15358   """
15359   REQ_BGL = False
15360
15361   def ExpandNames(self):
15362     if self.op.nodes:
15363       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15364
15365     self.needed_locks = {
15366       locking.LEVEL_NODE: self.op.nodes,
15367       }
15368     self.share_locks = {
15369       locking.LEVEL_NODE: not self.op.use_locking,
15370       }
15371
15372   def CheckPrereq(self):
15373     """Check prerequisites.
15374
15375     """
15376
15377   def Exec(self, feedback_fn):
15378     """Execute restricted command and return output.
15379
15380     """
15381     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15382
15383     # Check if correct locks are held
15384     assert set(self.op.nodes).issubset(owned_nodes)
15385
15386     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15387
15388     result = []
15389
15390     for node_name in self.op.nodes:
15391       nres = rpcres[node_name]
15392       if nres.fail_msg:
15393         msg = ("Command '%s' on node '%s' failed: %s" %
15394                (self.op.command, node_name, nres.fail_msg))
15395         result.append((False, msg))
15396       else:
15397         result.append((True, nres.payload))
15398
15399     return result
15400
15401
15402 class LUTestJqueue(NoHooksLU):
15403   """Utility LU to test some aspects of the job queue.
15404
15405   """
15406   REQ_BGL = False
15407
15408   # Must be lower than default timeout for WaitForJobChange to see whether it
15409   # notices changed jobs
15410   _CLIENT_CONNECT_TIMEOUT = 20.0
15411   _CLIENT_CONFIRM_TIMEOUT = 60.0
15412
15413   @classmethod
15414   def _NotifyUsingSocket(cls, cb, errcls):
15415     """Opens a Unix socket and waits for another program to connect.
15416
15417     @type cb: callable
15418     @param cb: Callback to send socket name to client
15419     @type errcls: class
15420     @param errcls: Exception class to use for errors
15421
15422     """
15423     # Using a temporary directory as there's no easy way to create temporary
15424     # sockets without writing a custom loop around tempfile.mktemp and
15425     # socket.bind
15426     tmpdir = tempfile.mkdtemp()
15427     try:
15428       tmpsock = utils.PathJoin(tmpdir, "sock")
15429
15430       logging.debug("Creating temporary socket at %s", tmpsock)
15431       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15432       try:
15433         sock.bind(tmpsock)
15434         sock.listen(1)
15435
15436         # Send details to client
15437         cb(tmpsock)
15438
15439         # Wait for client to connect before continuing
15440         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15441         try:
15442           (conn, _) = sock.accept()
15443         except socket.error, err:
15444           raise errcls("Client didn't connect in time (%s)" % err)
15445       finally:
15446         sock.close()
15447     finally:
15448       # Remove as soon as client is connected
15449       shutil.rmtree(tmpdir)
15450
15451     # Wait for client to close
15452     try:
15453       try:
15454         # pylint: disable=E1101
15455         # Instance of '_socketobject' has no ... member
15456         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15457         conn.recv(1)
15458       except socket.error, err:
15459         raise errcls("Client failed to confirm notification (%s)" % err)
15460     finally:
15461       conn.close()
15462
15463   def _SendNotification(self, test, arg, sockname):
15464     """Sends a notification to the client.
15465
15466     @type test: string
15467     @param test: Test name
15468     @param arg: Test argument (depends on test)
15469     @type sockname: string
15470     @param sockname: Socket path
15471
15472     """
15473     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15474
15475   def _Notify(self, prereq, test, arg):
15476     """Notifies the client of a test.
15477
15478     @type prereq: bool
15479     @param prereq: Whether this is a prereq-phase test
15480     @type test: string
15481     @param test: Test name
15482     @param arg: Test argument (depends on test)
15483
15484     """
15485     if prereq:
15486       errcls = errors.OpPrereqError
15487     else:
15488       errcls = errors.OpExecError
15489
15490     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15491                                                   test, arg),
15492                                    errcls)
15493
15494   def CheckArguments(self):
15495     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15496     self.expandnames_calls = 0
15497
15498   def ExpandNames(self):
15499     checkargs_calls = getattr(self, "checkargs_calls", 0)
15500     if checkargs_calls < 1:
15501       raise errors.ProgrammerError("CheckArguments was not called")
15502
15503     self.expandnames_calls += 1
15504
15505     if self.op.notify_waitlock:
15506       self._Notify(True, constants.JQT_EXPANDNAMES, None)
15507
15508     self.LogInfo("Expanding names")
15509
15510     # Get lock on master node (just to get a lock, not for a particular reason)
15511     self.needed_locks = {
15512       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15513       }
15514
15515   def Exec(self, feedback_fn):
15516     if self.expandnames_calls < 1:
15517       raise errors.ProgrammerError("ExpandNames was not called")
15518
15519     if self.op.notify_exec:
15520       self._Notify(False, constants.JQT_EXEC, None)
15521
15522     self.LogInfo("Executing")
15523
15524     if self.op.log_messages:
15525       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15526       for idx, msg in enumerate(self.op.log_messages):
15527         self.LogInfo("Sending log message %s", idx + 1)
15528         feedback_fn(constants.JQT_MSGPREFIX + msg)
15529         # Report how many test messages have been sent
15530         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15531
15532     if self.op.fail:
15533       raise errors.OpExecError("Opcode failure was requested")
15534
15535     return True
15536
15537
15538 class LUTestAllocator(NoHooksLU):
15539   """Run allocator tests.
15540
15541   This LU runs the allocator tests
15542
15543   """
15544   def CheckPrereq(self):
15545     """Check prerequisites.
15546
15547     This checks the opcode parameters depending on the director and mode test.
15548
15549     """
15550     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15551                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
15552       for attr in ["memory", "disks", "disk_template",
15553                    "os", "tags", "nics", "vcpus"]:
15554         if not hasattr(self.op, attr):
15555           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15556                                      attr, errors.ECODE_INVAL)
15557       iname = self.cfg.ExpandInstanceName(self.op.name)
15558       if iname is not None:
15559         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15560                                    iname, errors.ECODE_EXISTS)
15561       if not isinstance(self.op.nics, list):
15562         raise errors.OpPrereqError("Invalid parameter 'nics'",
15563                                    errors.ECODE_INVAL)
15564       if not isinstance(self.op.disks, list):
15565         raise errors.OpPrereqError("Invalid parameter 'disks'",
15566                                    errors.ECODE_INVAL)
15567       for row in self.op.disks:
15568         if (not isinstance(row, dict) or
15569             constants.IDISK_SIZE not in row or
15570             not isinstance(row[constants.IDISK_SIZE], int) or
15571             constants.IDISK_MODE not in row or
15572             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15573           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15574                                      " parameter", errors.ECODE_INVAL)
15575       if self.op.hypervisor is None:
15576         self.op.hypervisor = self.cfg.GetHypervisorType()
15577     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15578       fname = _ExpandInstanceName(self.cfg, self.op.name)
15579       self.op.name = fname
15580       self.relocate_from = \
15581           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15582     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15583                           constants.IALLOCATOR_MODE_NODE_EVAC):
15584       if not self.op.instances:
15585         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15586       self.op.instances = _GetWantedInstances(self, self.op.instances)
15587     else:
15588       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15589                                  self.op.mode, errors.ECODE_INVAL)
15590
15591     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15592       if self.op.iallocator is None:
15593         raise errors.OpPrereqError("Missing allocator name",
15594                                    errors.ECODE_INVAL)
15595     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15596       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15597                                  self.op.direction, errors.ECODE_INVAL)
15598
15599   def Exec(self, feedback_fn):
15600     """Run the allocator test.
15601
15602     """
15603     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15604       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15605                                           memory=self.op.memory,
15606                                           disks=self.op.disks,
15607                                           disk_template=self.op.disk_template,
15608                                           os=self.op.os,
15609                                           tags=self.op.tags,
15610                                           nics=self.op.nics,
15611                                           vcpus=self.op.vcpus,
15612                                           spindle_use=self.op.spindle_use,
15613                                           hypervisor=self.op.hypervisor)
15614     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15615       req = iallocator.IAReqRelocate(name=self.op.name,
15616                                      relocate_from=list(self.relocate_from))
15617     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15618       req = iallocator.IAReqGroupChange(instances=self.op.instances,
15619                                         target_groups=self.op.target_groups)
15620     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15621       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15622                                      evac_mode=self.op.evac_mode)
15623     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15624       disk_template = self.op.disk_template
15625       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15626                                              memory=self.op.memory,
15627                                              disks=self.op.disks,
15628                                              disk_template=disk_template,
15629                                              os=self.op.os,
15630                                              tags=self.op.tags,
15631                                              nics=self.op.nics,
15632                                              vcpus=self.op.vcpus,
15633                                              spindle_use=self.op.spindle_use,
15634                                              hypervisor=self.op.hypervisor)
15635                for idx in range(self.op.count)]
15636       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15637     else:
15638       raise errors.ProgrammerError("Uncatched mode %s in"
15639                                    " LUTestAllocator.Exec", self.op.mode)
15640
15641     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15642     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15643       result = ial.in_text
15644     else:
15645       ial.Run(self.op.iallocator, validate=False)
15646       result = ial.out_text
15647     return result
15648
15649
15650 class LUNetworkAdd(LogicalUnit):
15651   """Logical unit for creating networks.
15652
15653   """
15654   HPATH = "network-add"
15655   HTYPE = constants.HTYPE_NETWORK
15656   REQ_BGL = False
15657
15658   def BuildHooksNodes(self):
15659     """Build hooks nodes.
15660
15661     """
15662     mn = self.cfg.GetMasterNode()
15663     return ([mn], [mn])
15664
15665   def ExpandNames(self):
15666     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15667
15668     if self.op.conflicts_check:
15669       self.share_locks[locking.LEVEL_NODE] = 1
15670       self.needed_locks = {
15671         locking.LEVEL_NODE: locking.ALL_SET,
15672         }
15673     else:
15674       self.needed_locks = {}
15675
15676     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15677
15678   def CheckPrereq(self):
15679     """Check prerequisites.
15680
15681     This checks that the given group name is not an existing node group
15682     already.
15683
15684     """
15685     if self.op.network is None:
15686       raise errors.OpPrereqError("Network must be given",
15687                                  errors.ECODE_INVAL)
15688
15689     uuid = self.cfg.LookupNetwork(self.op.network_name)
15690
15691     if uuid:
15692       raise errors.OpPrereqError("Network '%s' already defined" %
15693                                  self.op.network, errors.ECODE_EXISTS)
15694
15695     if self.op.mac_prefix:
15696       utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15697
15698     # Check tag validity
15699     for tag in self.op.tags:
15700       objects.TaggableObject.ValidateTag(tag)
15701
15702   def BuildHooksEnv(self):
15703     """Build hooks env.
15704
15705     """
15706     args = {
15707       "name": self.op.network_name,
15708       "subnet": self.op.network,
15709       "gateway": self.op.gateway,
15710       "network6": self.op.network6,
15711       "gateway6": self.op.gateway6,
15712       "mac_prefix": self.op.mac_prefix,
15713       "network_type": self.op.network_type,
15714       "tags": self.op.tags,
15715       }
15716     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15717
15718   def Exec(self, feedback_fn):
15719     """Add the ip pool to the cluster.
15720
15721     """
15722     nobj = objects.Network(name=self.op.network_name,
15723                            network=self.op.network,
15724                            gateway=self.op.gateway,
15725                            network6=self.op.network6,
15726                            gateway6=self.op.gateway6,
15727                            mac_prefix=self.op.mac_prefix,
15728                            network_type=self.op.network_type,
15729                            uuid=self.network_uuid,
15730                            family=constants.IP4_VERSION)
15731     # Initialize the associated address pool
15732     try:
15733       pool = network.AddressPool.InitializeNetwork(nobj)
15734     except errors.AddressPoolError, e:
15735       raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15736
15737     # Check if we need to reserve the nodes and the cluster master IP
15738     # These may not be allocated to any instances in routed mode, as
15739     # they wouldn't function anyway.
15740     if self.op.conflicts_check:
15741       for node in self.cfg.GetAllNodesInfo().values():
15742         for ip in [node.primary_ip, node.secondary_ip]:
15743           try:
15744             if pool.Contains(ip):
15745               pool.Reserve(ip)
15746               self.LogInfo("Reserved IP address of node '%s' (%s)",
15747                            node.name, ip)
15748           except errors.AddressPoolError:
15749             self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
15750                             node.name, ip)
15751
15752       master_ip = self.cfg.GetClusterInfo().master_ip
15753       try:
15754         if pool.Contains(master_ip):
15755           pool.Reserve(master_ip)
15756           self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
15757       except errors.AddressPoolError:
15758         self.LogWarning("Cannot reserve cluster master IP address (%s)",
15759                         master_ip)
15760
15761     if self.op.add_reserved_ips:
15762       for ip in self.op.add_reserved_ips:
15763         try:
15764           pool.Reserve(ip, external=True)
15765         except errors.AddressPoolError, e:
15766           raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15767
15768     if self.op.tags:
15769       for tag in self.op.tags:
15770         nobj.AddTag(tag)
15771
15772     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15773     del self.remove_locks[locking.LEVEL_NETWORK]
15774
15775
15776 class LUNetworkRemove(LogicalUnit):
15777   HPATH = "network-remove"
15778   HTYPE = constants.HTYPE_NETWORK
15779   REQ_BGL = False
15780
15781   def ExpandNames(self):
15782     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15783
15784     if not self.network_uuid:
15785       raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
15786                                  errors.ECODE_INVAL)
15787
15788     self.share_locks[locking.LEVEL_NODEGROUP] = 1
15789     self.needed_locks = {
15790       locking.LEVEL_NETWORK: [self.network_uuid],
15791       locking.LEVEL_NODEGROUP: locking.ALL_SET,
15792       }
15793
15794   def CheckPrereq(self):
15795     """Check prerequisites.
15796
15797     This checks that the given network name exists as a network, that is
15798     empty (i.e., contains no nodes), and that is not the last group of the
15799     cluster.
15800
15801     """
15802
15803     # Verify that the network is not conncted.
15804     node_groups = [group.name
15805                    for group in self.cfg.GetAllNodeGroupsInfo().values()
15806                    for net in group.networks.keys()
15807                    if net == self.network_uuid]
15808
15809     if node_groups:
15810       self.LogWarning("Nework '%s' is connected to the following"
15811                       " node groups: %s" % (self.op.network_name,
15812                       utils.CommaJoin(utils.NiceSort(node_groups))))
15813       raise errors.OpPrereqError("Network still connected",
15814                                  errors.ECODE_STATE)
15815
15816   def BuildHooksEnv(self):
15817     """Build hooks env.
15818
15819     """
15820     return {
15821       "NETWORK_NAME": self.op.network_name,
15822       }
15823
15824   def BuildHooksNodes(self):
15825     """Build hooks nodes.
15826
15827     """
15828     mn = self.cfg.GetMasterNode()
15829     return ([mn], [mn])
15830
15831   def Exec(self, feedback_fn):
15832     """Remove the network.
15833
15834     """
15835     try:
15836       self.cfg.RemoveNetwork(self.network_uuid)
15837     except errors.ConfigurationError:
15838       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15839                                (self.op.network_name, self.network_uuid))
15840
15841
15842 class LUNetworkSetParams(LogicalUnit):
15843   """Modifies the parameters of a network.
15844
15845   """
15846   HPATH = "network-modify"
15847   HTYPE = constants.HTYPE_NETWORK
15848   REQ_BGL = False
15849
15850   def CheckArguments(self):
15851     if (self.op.gateway and
15852         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15853       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15854                                  " at once", errors.ECODE_INVAL)
15855
15856   def ExpandNames(self):
15857     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15858     self.network = self.cfg.GetNetwork(self.network_uuid)
15859     if self.network is None:
15860       raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
15861                                  (self.op.network_name, self.network_uuid),
15862                                  errors.ECODE_INVAL)
15863     self.needed_locks = {
15864       locking.LEVEL_NETWORK: [self.network_uuid],
15865       }
15866
15867   def CheckPrereq(self):
15868     """Check prerequisites.
15869
15870     """
15871     self.gateway = self.network.gateway
15872     self.network_type = self.network.network_type
15873     self.mac_prefix = self.network.mac_prefix
15874     self.network6 = self.network.network6
15875     self.gateway6 = self.network.gateway6
15876     self.tags = self.network.tags
15877
15878     self.pool = network.AddressPool(self.network)
15879
15880     if self.op.gateway:
15881       if self.op.gateway == constants.VALUE_NONE:
15882         self.gateway = None
15883       else:
15884         self.gateway = self.op.gateway
15885         if self.pool.IsReserved(self.gateway):
15886           raise errors.OpPrereqError("%s is already reserved" %
15887                                      self.gateway, errors.ECODE_INVAL)
15888
15889     if self.op.network_type:
15890       if self.op.network_type == constants.VALUE_NONE:
15891         self.network_type = None
15892       else:
15893         self.network_type = self.op.network_type
15894
15895     if self.op.mac_prefix:
15896       if self.op.mac_prefix == constants.VALUE_NONE:
15897         self.mac_prefix = None
15898       else:
15899         utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15900         self.mac_prefix = self.op.mac_prefix
15901
15902     if self.op.gateway6:
15903       if self.op.gateway6 == constants.VALUE_NONE:
15904         self.gateway6 = None
15905       else:
15906         self.gateway6 = self.op.gateway6
15907
15908     if self.op.network6:
15909       if self.op.network6 == constants.VALUE_NONE:
15910         self.network6 = None
15911       else:
15912         self.network6 = self.op.network6
15913
15914   def BuildHooksEnv(self):
15915     """Build hooks env.
15916
15917     """
15918     args = {
15919       "name": self.op.network_name,
15920       "subnet": self.network.network,
15921       "gateway": self.gateway,
15922       "network6": self.network6,
15923       "gateway6": self.gateway6,
15924       "mac_prefix": self.mac_prefix,
15925       "network_type": self.network_type,
15926       "tags": self.tags,
15927       }
15928     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15929
15930   def BuildHooksNodes(self):
15931     """Build hooks nodes.
15932
15933     """
15934     mn = self.cfg.GetMasterNode()
15935     return ([mn], [mn])
15936
15937   def Exec(self, feedback_fn):
15938     """Modifies the network.
15939
15940     """
15941     #TODO: reserve/release via temporary reservation manager
15942     #      extend cfg.ReserveIp/ReleaseIp with the external flag
15943     if self.op.gateway:
15944       if self.gateway == self.network.gateway:
15945         self.LogWarning("Gateway is already %s", self.gateway)
15946       else:
15947         if self.gateway:
15948           self.pool.Reserve(self.gateway, external=True)
15949         if self.network.gateway:
15950           self.pool.Release(self.network.gateway, external=True)
15951         self.network.gateway = self.gateway
15952
15953     if self.op.add_reserved_ips:
15954       for ip in self.op.add_reserved_ips:
15955         try:
15956           if self.pool.IsReserved(ip):
15957             self.LogWarning("IP address %s is already reserved", ip)
15958           else:
15959             self.pool.Reserve(ip, external=True)
15960         except errors.AddressPoolError, err:
15961           self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
15962
15963     if self.op.remove_reserved_ips:
15964       for ip in self.op.remove_reserved_ips:
15965         if ip == self.network.gateway:
15966           self.LogWarning("Cannot unreserve Gateway's IP")
15967           continue
15968         try:
15969           if not self.pool.IsReserved(ip):
15970             self.LogWarning("IP address %s is already unreserved", ip)
15971           else:
15972             self.pool.Release(ip, external=True)
15973         except errors.AddressPoolError, err:
15974           self.LogWarning("Cannot release IP address %s: %s", ip, err)
15975
15976     if self.op.mac_prefix:
15977       self.network.mac_prefix = self.mac_prefix
15978
15979     if self.op.network6:
15980       self.network.network6 = self.network6
15981
15982     if self.op.gateway6:
15983       self.network.gateway6 = self.gateway6
15984
15985     if self.op.network_type:
15986       self.network.network_type = self.network_type
15987
15988     self.pool.Validate()
15989
15990     self.cfg.Update(self.network, feedback_fn)
15991
15992
15993 class _NetworkQuery(_QueryBase):
15994   FIELDS = query.NETWORK_FIELDS
15995
15996   def ExpandNames(self, lu):
15997     lu.needed_locks = {}
15998
15999     self._all_networks = lu.cfg.GetAllNetworksInfo()
16000     name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
16001
16002     if not self.names:
16003       self.wanted = [name_to_uuid[name]
16004                      for name in utils.NiceSort(name_to_uuid.keys())]
16005     else:
16006       # Accept names to be either names or UUIDs.
16007       missing = []
16008       self.wanted = []
16009       all_uuid = frozenset(self._all_networks.keys())
16010
16011       for name in self.names:
16012         if name in all_uuid:
16013           self.wanted.append(name)
16014         elif name in name_to_uuid:
16015           self.wanted.append(name_to_uuid[name])
16016         else:
16017           missing.append(name)
16018
16019       if missing:
16020         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16021                                    errors.ECODE_NOENT)
16022
16023   def DeclareLocks(self, lu, level):
16024     pass
16025
16026   def _GetQueryData(self, lu):
16027     """Computes the list of networks and their attributes.
16028
16029     """
16030     do_instances = query.NETQ_INST in self.requested_data
16031     do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
16032     do_stats = query.NETQ_STATS in self.requested_data
16033
16034     network_to_groups = None
16035     network_to_instances = None
16036     stats = None
16037
16038     # For NETQ_GROUP, we need to map network->[groups]
16039     if do_groups:
16040       all_groups = lu.cfg.GetAllNodeGroupsInfo()
16041       network_to_groups = dict((uuid, []) for uuid in self.wanted)
16042
16043       if do_instances:
16044         all_instances = lu.cfg.GetAllInstancesInfo()
16045         all_nodes = lu.cfg.GetAllNodesInfo()
16046         network_to_instances = dict((uuid, []) for uuid in self.wanted)
16047
16048       for group in all_groups.values():
16049         if do_instances:
16050           group_nodes = [node.name for node in all_nodes.values() if
16051                          node.group == group.uuid]
16052           group_instances = [instance for instance in all_instances.values()
16053                              if instance.primary_node in group_nodes]
16054
16055         for net_uuid in group.networks.keys():
16056           if net_uuid in network_to_groups:
16057             netparams = group.networks[net_uuid]
16058             mode = netparams[constants.NIC_MODE]
16059             link = netparams[constants.NIC_LINK]
16060             info = group.name + "(" + mode + ", " + link + ")"
16061             network_to_groups[net_uuid].append(info)
16062
16063             if do_instances:
16064               for instance in group_instances:
16065                 for nic in instance.nics:
16066                   if nic.network == self._all_networks[net_uuid].name:
16067                     network_to_instances[net_uuid].append(instance.name)
16068                     break
16069
16070     if do_stats:
16071       stats = {}
16072       for uuid, net in self._all_networks.items():
16073         if uuid in self.wanted:
16074           pool = network.AddressPool(net)
16075           stats[uuid] = {
16076             "free_count": pool.GetFreeCount(),
16077             "reserved_count": pool.GetReservedCount(),
16078             "map": pool.GetMap(),
16079             "external_reservations":
16080               utils.CommaJoin(pool.GetExternalReservations()),
16081             }
16082
16083     return query.NetworkQueryData([self._all_networks[uuid]
16084                                    for uuid in self.wanted],
16085                                    network_to_groups,
16086                                    network_to_instances,
16087                                    stats)
16088
16089
16090 class LUNetworkQuery(NoHooksLU):
16091   """Logical unit for querying networks.
16092
16093   """
16094   REQ_BGL = False
16095
16096   def CheckArguments(self):
16097     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16098                             self.op.output_fields, False)
16099
16100   def ExpandNames(self):
16101     self.nq.ExpandNames(self)
16102
16103   def Exec(self, feedback_fn):
16104     return self.nq.OldStyleQuery(self)
16105
16106
16107 class LUNetworkConnect(LogicalUnit):
16108   """Connect a network to a nodegroup
16109
16110   """
16111   HPATH = "network-connect"
16112   HTYPE = constants.HTYPE_NETWORK
16113   REQ_BGL = False
16114
16115   def ExpandNames(self):
16116     self.network_name = self.op.network_name
16117     self.group_name = self.op.group_name
16118     self.network_mode = self.op.network_mode
16119     self.network_link = self.op.network_link
16120
16121     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16122     self.network = self.cfg.GetNetwork(self.network_uuid)
16123     if self.network is None:
16124       raise errors.OpPrereqError("Network %s does not exist" %
16125                                  self.network_name, errors.ECODE_INVAL)
16126
16127     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16128     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16129     if self.group is None:
16130       raise errors.OpPrereqError("Group %s does not exist" %
16131                                  self.group_name, errors.ECODE_INVAL)
16132
16133     self.share_locks[locking.LEVEL_INSTANCE] = 1
16134     self.needed_locks = {
16135       locking.LEVEL_INSTANCE: [],
16136       locking.LEVEL_NODEGROUP: [self.group_uuid],
16137       }
16138
16139   def DeclareLocks(self, level):
16140     if level == locking.LEVEL_INSTANCE:
16141       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16142
16143       # Lock instances optimistically, needs verification once group lock has
16144       # been acquired
16145       if self.op.conflicts_check:
16146         self.needed_locks[locking.LEVEL_INSTANCE] = \
16147             self.cfg.GetNodeGroupInstances(self.group_uuid)
16148         self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16149
16150   def BuildHooksEnv(self):
16151     ret = {
16152       "GROUP_NAME": self.group_name,
16153       "GROUP_NETWORK_MODE": self.network_mode,
16154       "GROUP_NETWORK_LINK": self.network_link,
16155       }
16156     ret.update(_BuildNetworkHookEnvByObject(self.network))
16157     return ret
16158
16159   def BuildHooksNodes(self):
16160     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16161     return (nodes, nodes)
16162
16163   def CheckPrereq(self):
16164     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16165     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16166
16167     assert self.group_uuid in owned_groups
16168
16169     # Check if locked instances are still correct
16170     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16171
16172     l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16173                                       for i in value)
16174
16175     self.netparams = {
16176       constants.NIC_MODE: self.network_mode,
16177       constants.NIC_LINK: self.network_link,
16178       }
16179     objects.NIC.CheckParameterSyntax(self.netparams)
16180
16181     #if self.network_mode == constants.NIC_MODE_BRIDGED:
16182     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16183     self.connected = False
16184     if self.network_uuid in self.group.networks:
16185       self.LogWarning("Network '%s' is already mapped to group '%s'" %
16186                       (self.network_name, self.group.name))
16187       self.connected = True
16188       return
16189
16190     if self.op.conflicts_check:
16191       pool = network.AddressPool(self.network)
16192       conflicting_instances = []
16193
16194       for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
16195         for idx, nic in enumerate(instance.nics):
16196           if pool.Contains(nic.ip):
16197             conflicting_instances.append((instance.name, idx, nic.ip))
16198
16199       if conflicting_instances:
16200         self.LogWarning("Following occurences use IPs from network %s"
16201                         " that is about to connect to nodegroup %s: %s" %
16202                         (self.network_name, self.group.name,
16203                         l(conflicting_instances)))
16204         raise errors.OpPrereqError("Conflicting IPs found."
16205                                    " Please remove/modify"
16206                                    " corresponding NICs",
16207                                    errors.ECODE_INVAL)
16208
16209   def Exec(self, feedback_fn):
16210     if self.connected:
16211       return
16212
16213     self.group.networks[self.network_uuid] = self.netparams
16214     self.cfg.Update(self.group, feedback_fn)
16215
16216
16217 class LUNetworkDisconnect(LogicalUnit):
16218   """Disconnect a network to a nodegroup
16219
16220   """
16221   HPATH = "network-disconnect"
16222   HTYPE = constants.HTYPE_NETWORK
16223   REQ_BGL = False
16224
16225   def ExpandNames(self):
16226     self.network_name = self.op.network_name
16227     self.group_name = self.op.group_name
16228
16229     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16230     self.network = self.cfg.GetNetwork(self.network_uuid)
16231     if self.network is None:
16232       raise errors.OpPrereqError("Network %s does not exist" %
16233                                  self.network_name, errors.ECODE_INVAL)
16234
16235     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16236     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16237     if self.group is None:
16238       raise errors.OpPrereqError("Group %s does not exist" %
16239                                  self.group_name, errors.ECODE_INVAL)
16240
16241     self.needed_locks = {
16242       locking.LEVEL_NODEGROUP: [self.group_uuid],
16243       }
16244     self.share_locks[locking.LEVEL_INSTANCE] = 1
16245
16246   def DeclareLocks(self, level):
16247     if level == locking.LEVEL_INSTANCE:
16248       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16249
16250       # Lock instances optimistically, needs verification once group lock has
16251       # been acquired
16252       if self.op.conflicts_check:
16253         self.needed_locks[locking.LEVEL_INSTANCE] = \
16254           self.cfg.GetNodeGroupInstances(self.group_uuid)
16255
16256   def BuildHooksEnv(self):
16257     ret = {
16258       "GROUP_NAME": self.group_name,
16259       }
16260     ret.update(_BuildNetworkHookEnvByObject(self.network))
16261     return ret
16262
16263   def BuildHooksNodes(self):
16264     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16265     return (nodes, nodes)
16266
16267   def CheckPrereq(self):
16268     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16269     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16270
16271     assert self.group_uuid in owned_groups
16272
16273     # Check if locked instances are still correct
16274     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16275
16276     l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16277                                       for i in value)
16278
16279     self.connected = True
16280     if self.network_uuid not in self.group.networks:
16281       self.LogWarning("Network '%s' is not mapped to group '%s'",
16282                       self.network_name, self.group.name)
16283       self.connected = False
16284       return
16285
16286     if self.op.conflicts_check:
16287       conflicting_instances = []
16288
16289       for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
16290         for idx, nic in enumerate(instance.nics):
16291           if nic.network == self.network_name:
16292             conflicting_instances.append((instance.name, idx, nic.ip))
16293
16294       if conflicting_instances:
16295         self.LogWarning("Following occurences use IPs from network %s"
16296                            " that is about to disconnected from the nodegroup"
16297                            " %s: %s" %
16298                            (self.network_name, self.group.name,
16299                             l(conflicting_instances)))
16300         raise errors.OpPrereqError("Conflicting IPs."
16301                                    " Please remove/modify"
16302                                    " corresponding NICS",
16303                                    errors.ECODE_INVAL)
16304
16305   def Exec(self, feedback_fn):
16306     if not self.connected:
16307       return
16308
16309     del self.group.networks[self.network_uuid]
16310     self.cfg.Update(self.group, feedback_fn)
16311
16312
16313 #: Query type implementations
16314 _QUERY_IMPL = {
16315   constants.QR_CLUSTER: _ClusterQuery,
16316   constants.QR_INSTANCE: _InstanceQuery,
16317   constants.QR_NODE: _NodeQuery,
16318   constants.QR_GROUP: _GroupQuery,
16319   constants.QR_NETWORK: _NetworkQuery,
16320   constants.QR_OS: _OsQuery,
16321   constants.QR_EXPORT: _ExportQuery,
16322   }
16323
16324 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16325
16326
16327 def _GetQueryImplementation(name):
16328   """Returns the implemtnation for a query type.
16329
16330   @param name: Query type, must be one of L{constants.QR_VIA_OP}
16331
16332   """
16333   try:
16334     return _QUERY_IMPL[name]
16335   except KeyError:
16336     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16337                                errors.ECODE_INVAL)
16338
16339
16340 def _CheckForConflictingIp(lu, ip, node):
16341   """In case of conflicting ip raise error.
16342
16343   @type ip: string
16344   @param ip: ip address
16345   @type node: string
16346   @param node: node name
16347
16348   """
16349   (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16350   if conf_net is not None:
16351     raise errors.OpPrereqError("Conflicting IP found:"
16352                                " %s <> %s." % (ip, conf_net),
16353                                errors.ECODE_INVAL)
16354
16355   return (None, None)