code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60 from ganeti import ht
  61 from ganeti import rpc
  62 from ganeti import runtime
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141     # Dicts used to declare locking needs to mcpu
 142     self.needed_locks = None
 143     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 144     self.add_locks = {}
 145     self.remove_locks = {}
 146     # Used to force good behavior when calling helper functions
 147     self.recalculate_locks = {}
 148     # logging
 149     self.Log = processor.Log # pylint: disable=C0103
 150     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 151     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 152     self.LogStep = processor.LogStep # pylint: disable=C0103
 153     # support for dry-run
 154     self.dry_run_result = None
 155     # support for generic debug attribute
 156     if (not hasattr(self.op, "debug_level") or
 157         not isinstance(self.op.debug_level, int)):
 158       self.op.debug_level = 0
 159
 160     # Tasklets
 161     self.tasklets = None
 162
 163     # Validate opcode parameters and set defaults
 164     self.op.Validate(True)
 165
 166     self.CheckArguments()
 167
 168   def CheckArguments(self):
 169     """Check syntactic validity for the opcode arguments.
 170
 171     This method is for doing a simple syntactic check and ensure
 172     validity of opcode parameters, without any cluster-related
 173     checks. While the same can be accomplished in ExpandNames and/or
 174     CheckPrereq, doing these separate is better because:
 175
 176       - ExpandNames is left as as purely a lock-related function
 177       - CheckPrereq is run after we have acquired locks (and possible
 178         waited for them)
 179
 180     The function is allowed to change the self.op attribute so that
 181     later methods can no longer worry about missing parameters.
 182
 183     """
 184     pass
 185
 186   def ExpandNames(self):
 187     """Expand names for this LU.
 188
 189     This method is called before starting to execute the opcode, and it should
 190     update all the parameters of the opcode to their canonical form (e.g. a
 191     short node name must be fully expanded after this method has successfully
 192     completed). This way locking, hooks, logging, etc. can work correctly.
 193
 194     LUs which implement this method must also populate the self.needed_locks
 195     member, as a dict with lock levels as keys, and a list of needed lock names
 196     as values. Rules:
 197
 198       - use an empty dict if you don't need any lock
 199       - if you don't need any lock at a particular level omit that
 200         level (note that in this case C{DeclareLocks} won't be called
 201         at all for that level)
 202       - if you need locks at a level, but you can't calculate it in
 203         this function, initialise that level with an empty list and do
 204         further processing in L{LogicalUnit.DeclareLocks} (see that
 205         function's docstring)
 206       - don't put anything for the BGL level
 207       - if you want all locks at a level use L{locking.ALL_SET} as a value
 208
 209     If you need to share locks (rather than acquire them exclusively) at one
 210     level you can modify self.share_locks, setting a true value (usually 1) for
 211     that level. By default locks are not shared.
 212
 213     This function can also define a list of tasklets, which then will be
 214     executed in order instead of the usual LU-level CheckPrereq and Exec
 215     functions, if those are not defined by the LU.
 216
 217     Examples::
 218
 219       # Acquire all nodes and one instance
 220       self.needed_locks = {
 221         locking.LEVEL_NODE: locking.ALL_SET,
 222         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 223       }
 224       # Acquire just two nodes
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 227       }
 228       # Acquire no locks
 229       self.needed_locks = {} # No, you can't leave it to the default value None
 230
 231     """
 232     # The implementation of this method is mandatory only if the new LU is
 233     # concurrent, so that old LUs don't need to be changed all at the same
 234     # time.
 235     if self.REQ_BGL:
 236       self.needed_locks = {} # Exclusive LUs don't need locks.
 237     else:
 238       raise NotImplementedError
 239
 240   def DeclareLocks(self, level):
 241     """Declare LU locking needs for a level
 242
 243     While most LUs can just declare their locking needs at ExpandNames time,
 244     sometimes there's the need to calculate some locks after having acquired
 245     the ones before. This function is called just before acquiring locks at a
 246     particular level, but after acquiring the ones at lower levels, and permits
 247     such calculations. It can be used to modify self.needed_locks, and by
 248     default it does nothing.
 249
 250     This function is only called if you have something already set in
 251     self.needed_locks for the level.
 252
 253     @param level: Locking level which is going to be locked
 254     @type level: member of L{ganeti.locking.LEVELS}
 255
 256     """
 257
 258   def CheckPrereq(self):
 259     """Check prerequisites for this LU.
 260
 261     This method should check that the prerequisites for the execution
 262     of this LU are fulfilled. It can do internode communication, but
 263     it should be idempotent - no cluster or system changes are
 264     allowed.
 265
 266     The method should raise errors.OpPrereqError in case something is
 267     not fulfilled. Its return value is ignored.
 268
 269     This method should also update all the parameters of the opcode to
 270     their canonical form if it hasn't been done by ExpandNames before.
 271
 272     """
 273     if self.tasklets is not None:
 274       for (idx, tl) in enumerate(self.tasklets):
 275         logging.debug("Checking prerequisites for tasklet %s/%s",
 276                       idx + 1, len(self.tasklets))
 277         tl.CheckPrereq()
 278     else:
 279       pass
 280
 281   def Exec(self, feedback_fn):
 282     """Execute the LU.
 283
 284     This method should implement the actual work. It should raise
 285     errors.OpExecError for failures that are somewhat dealt with in
 286     code, or expected.
 287
 288     """
 289     if self.tasklets is not None:
 290       for (idx, tl) in enumerate(self.tasklets):
 291         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 292         tl.Exec(feedback_fn)
 293     else:
 294       raise NotImplementedError
 295
 296   def BuildHooksEnv(self):
 297     """Build hooks environment for this LU.
 298
 299     @rtype: dict
 300     @return: Dictionary containing the environment that will be used for
 301       running the hooks for this LU. The keys of the dict must not be prefixed
 302       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 303       will extend the environment with additional variables. If no environment
 304       should be defined, an empty dictionary should be returned (not C{None}).
 305     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 306       will not be called.
 307
 308     """
 309     raise NotImplementedError
 310
 311   def BuildHooksNodes(self):
 312     """Build list of nodes to run LU's hooks.
 313
 314     @rtype: tuple; (list, list)
 315     @return: Tuple containing a list of node names on which the hook
 316       should run before the execution and a list of node names on which the
 317       hook should run after the execution. No nodes should be returned as an
 318       empty list (and not None).
 319     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 320       will not be called.
 321
 322     """
 323     raise NotImplementedError
 324
 325   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 326     """Notify the LU about the results of its hooks.
 327
 328     This method is called every time a hooks phase is executed, and notifies
 329     the Logical Unit about the hooks' result. The LU can then use it to alter
 330     its result based on the hooks.  By default the method does nothing and the
 331     previous result is passed back unchanged but any LU can define it if it
 332     wants to use the local cluster hook-scripts somehow.
 333
 334     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 335         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 336     @param hook_results: the results of the multi-node hooks rpc call
 337     @param feedback_fn: function used send feedback back to the caller
 338     @param lu_result: the previous Exec result this LU had, or None
 339         in the PRE phase
 340     @return: the new Exec result, based on the previous result
 341         and hook results
 342
 343     """
 344     # API must be kept, thus we ignore the unused argument and could
 345     # be a function warnings
 346     # pylint: disable=W0613,R0201
 347     return lu_result
 348
 349   def _ExpandAndLockInstance(self):
 350     """Helper function to expand and lock an instance.
 351
 352     Many LUs that work on an instance take its name in self.op.instance_name
 353     and need to expand it and then declare the expanded name for locking. This
 354     function does it, and then updates self.op.instance_name to the expanded
 355     name. It also initializes needed_locks as a dict, if this hasn't been done
 356     before.
 357
 358     """
 359     if self.needed_locks is None:
 360       self.needed_locks = {}
 361     else:
 362       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 363         "_ExpandAndLockInstance called with instance-level locks set"
 364     self.op.instance_name = _ExpandInstanceName(self.cfg,
 365                                                 self.op.instance_name)
 366     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 367
 368   def _LockInstancesNodes(self, primary_only=False,
 369                           level=locking.LEVEL_NODE):
 370     """Helper function to declare instances' nodes for locking.
 371
 372     This function should be called after locking one or more instances to lock
 373     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 374     with all primary or secondary nodes for instances already locked and
 375     present in self.needed_locks[locking.LEVEL_INSTANCE].
 376
 377     It should be called from DeclareLocks, and for safety only works if
 378     self.recalculate_locks[locking.LEVEL_NODE] is set.
 379
 380     In the future it may grow parameters to just lock some instance's nodes, or
 381     to just lock primaries or secondary nodes, if needed.
 382
 383     If should be called in DeclareLocks in a way similar to::
 384
 385       if level == locking.LEVEL_NODE:
 386         self._LockInstancesNodes()
 387
 388     @type primary_only: boolean
 389     @param primary_only: only lock primary nodes of locked instances
 390     @param level: Which lock level to use for locking nodes
 391
 392     """
 393     assert level in self.recalculate_locks, \
 394       "_LockInstancesNodes helper function called with no nodes to recalculate"
 395
 396     # TODO: check if we're really been called with the instance locks held
 397
 398     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 399     # future we might want to have different behaviors depending on the value
 400     # of self.recalculate_locks[locking.LEVEL_NODE]
 401     wanted_nodes = []
 402     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 403     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 404       wanted_nodes.append(instance.primary_node)
 405       if not primary_only:
 406         wanted_nodes.extend(instance.secondary_nodes)
 407
 408     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 409       self.needed_locks[level] = wanted_nodes
 410     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 411       self.needed_locks[level].extend(wanted_nodes)
 412     else:
 413       raise errors.ProgrammerError("Unknown recalculation mode")
 414
 415     del self.recalculate_locks[level]
 416
 417
 418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 419   """Simple LU which runs no hooks.
 420
 421   This LU is intended as a parent for other LogicalUnits which will
 422   run no hooks, in order to reduce duplicate code.
 423
 424   """
 425   HPATH = None
 426   HTYPE = None
 427
 428   def BuildHooksEnv(self):
 429     """Empty BuildHooksEnv for NoHooksLu.
 430
 431     This just raises an error.
 432
 433     """
 434     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 435
 436   def BuildHooksNodes(self):
 437     """Empty BuildHooksNodes for NoHooksLU.
 438
 439     """
 440     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 441
 442
 443 class Tasklet:
 444   """Tasklet base class.
 445
 446   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 447   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 448   tasklets know nothing about locks.
 449
 450   Subclasses must follow these rules:
 451     - Implement CheckPrereq
 452     - Implement Exec
 453
 454   """
 455   def __init__(self, lu):
 456     self.lu = lu
 457
 458     # Shortcuts
 459     self.cfg = lu.cfg
 460     self.rpc = lu.rpc
 461
 462   def CheckPrereq(self):
 463     """Check prerequisites for this tasklets.
 464
 465     This method should check whether the prerequisites for the execution of
 466     this tasklet are fulfilled. It can do internode communication, but it
 467     should be idempotent - no cluster or system changes are allowed.
 468
 469     The method should raise errors.OpPrereqError in case something is not
 470     fulfilled. Its return value is ignored.
 471
 472     This method should also update all parameters to their canonical form if it
 473     hasn't been done before.
 474
 475     """
 476     pass
 477
 478   def Exec(self, feedback_fn):
 479     """Execute the tasklet.
 480
 481     This method should implement the actual work. It should raise
 482     errors.OpExecError for failures that are somewhat dealt with in code, or
 483     expected.
 484
 485     """
 486     raise NotImplementedError
 487
 488
 489 class _QueryBase:
 490   """Base for query utility classes.
 491
 492   """
 493   #: Attribute holding field definitions
 494   FIELDS = None
 495
 496   #: Field to sort by
 497   SORT_FIELD = "name"
 498
 499   def __init__(self, qfilter, fields, use_locking):
 500     """Initializes this class.
 501
 502     """
 503     self.use_locking = use_locking
 504
 505     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 506                              namefield=self.SORT_FIELD)
 507     self.requested_data = self.query.RequestedData()
 508     self.names = self.query.RequestedNames()
 509
 510     # Sort only if no names were requested
 511     self.sort_by_name = not self.names
 512
 513     self.do_locking = None
 514     self.wanted = None
 515
 516   def _GetNames(self, lu, all_names, lock_level):
 517     """Helper function to determine names asked for in the query.
 518
 519     """
 520     if self.do_locking:
 521       names = lu.owned_locks(lock_level)
 522     else:
 523       names = all_names
 524
 525     if self.wanted == locking.ALL_SET:
 526       assert not self.names
 527       # caller didn't specify names, so ordering is not important
 528       return utils.NiceSort(names)
 529
 530     # caller specified names and we must keep the same order
 531     assert self.names
 532     assert not self.do_locking or lu.glm.is_owned(lock_level)
 533
 534     missing = set(self.wanted).difference(names)
 535     if missing:
 536       raise errors.OpExecError("Some items were removed before retrieving"
 537                                " their data: %s" % missing)
 538
 539     # Return expanded names
 540     return self.wanted
 541
 542   def ExpandNames(self, lu):
 543     """Expand names for this query.
 544
 545     See L{LogicalUnit.ExpandNames}.
 546
 547     """
 548     raise NotImplementedError()
 549
 550   def DeclareLocks(self, lu, level):
 551     """Declare locks for this query.
 552
 553     See L{LogicalUnit.DeclareLocks}.
 554
 555     """
 556     raise NotImplementedError()
 557
 558   def _GetQueryData(self, lu):
 559     """Collects all data for this query.
 560
 561     @return: Query data object
 562
 563     """
 564     raise NotImplementedError()
 565
 566   def NewStyleQuery(self, lu):
 567     """Collect data and execute query.
 568
 569     """
 570     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 571                                   sort_by_name=self.sort_by_name)
 572
 573   def OldStyleQuery(self, lu):
 574     """Collect data and execute query.
 575
 576     """
 577     return self.query.OldStyleQuery(self._GetQueryData(lu),
 578                                     sort_by_name=self.sort_by_name)
 579
 580
 581 def _ShareAll():
 582   """Returns a dict declaring all lock levels shared.
 583
 584   """
 585   return dict.fromkeys(locking.LEVELS, 1)
 586
 587
 588 def _MakeLegacyNodeInfo(data):
 589   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 590
 591   Converts the data into a single dictionary. This is fine for most use cases,
 592   but some require information from more than one volume group or hypervisor.
 593
 594   """
 595   (bootid, (vg_info, ), (hv_info, )) = data
 596
 597   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 598     "bootid": bootid,
 599     })
 600
 601
 602 def _AnnotateDiskParams(instance, devs, cfg):
 603   """Little helper wrapper to the rpc annotation method.
 604
 605   @param instance: The instance object
 606   @type devs: List of L{objects.Disk}
 607   @param devs: The root devices (not any of its children!)
 608   @param cfg: The config object
 609   @returns The annotated disk copies
 610   @see L{rpc.AnnotateDiskParams}
 611
 612   """
 613   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 614                                 cfg.GetInstanceDiskParams(instance))
 615
 616
 617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 618                               cur_group_uuid):
 619   """Checks if node groups for locked instances are still correct.
 620
 621   @type cfg: L{config.ConfigWriter}
 622   @param cfg: Cluster configuration
 623   @type instances: dict; string as key, L{objects.Instance} as value
 624   @param instances: Dictionary, instance name as key, instance object as value
 625   @type owned_groups: iterable of string
 626   @param owned_groups: List of owned groups
 627   @type owned_nodes: iterable of string
 628   @param owned_nodes: List of owned nodes
 629   @type cur_group_uuid: string or None
 630   @param cur_group_uuid: Optional group UUID to check against instance's groups
 631
 632   """
 633   for (name, inst) in instances.items():
 634     assert owned_nodes.issuperset(inst.all_nodes), \
 635       "Instance %s's nodes changed while we kept the lock" % name
 636
 637     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 638
 639     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 640       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 641
 642
 643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 644                              primary_only=False):
 645   """Checks if the owned node groups are still correct for an instance.
 646
 647   @type cfg: L{config.ConfigWriter}
 648   @param cfg: The cluster configuration
 649   @type instance_name: string
 650   @param instance_name: Instance name
 651   @type owned_groups: set or frozenset
 652   @param owned_groups: List of currently owned node groups
 653   @type primary_only: boolean
 654   @param primary_only: Whether to check node groups for only the primary node
 655
 656   """
 657   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 658
 659   if not owned_groups.issuperset(inst_groups):
 660     raise errors.OpPrereqError("Instance %s's node groups changed since"
 661                                " locks were acquired, current groups are"
 662                                " are '%s', owning groups '%s'; retry the"
 663                                " operation" %
 664                                (instance_name,
 665                                 utils.CommaJoin(inst_groups),
 666                                 utils.CommaJoin(owned_groups)),
 667                                errors.ECODE_STATE)
 668
 669   return inst_groups
 670
 671
 672 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 673   """Checks if the instances in a node group are still correct.
 674
 675   @type cfg: L{config.ConfigWriter}
 676   @param cfg: The cluster configuration
 677   @type group_uuid: string
 678   @param group_uuid: Node group UUID
 679   @type owned_instances: set or frozenset
 680   @param owned_instances: List of currently owned instances
 681
 682   """
 683   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 684   if owned_instances != wanted_instances:
 685     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 686                                " locks were acquired, wanted '%s', have '%s';"
 687                                " retry the operation" %
 688                                (group_uuid,
 689                                 utils.CommaJoin(wanted_instances),
 690                                 utils.CommaJoin(owned_instances)),
 691                                errors.ECODE_STATE)
 692
 693   return wanted_instances
 694
 695
 696 def _SupportsOob(cfg, node):
 697   """Tells if node supports OOB.
 698
 699   @type cfg: L{config.ConfigWriter}
 700   @param cfg: The cluster configuration
 701   @type node: L{objects.Node}
 702   @param node: The node
 703   @return: The OOB script if supported or an empty string otherwise
 704
 705   """
 706   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 707
 708
 709 def _GetWantedNodes(lu, nodes):
 710   """Returns list of checked and expanded node names.
 711
 712   @type lu: L{LogicalUnit}
 713   @param lu: the logical unit on whose behalf we execute
 714   @type nodes: list
 715   @param nodes: list of node names or None for all nodes
 716   @rtype: list
 717   @return: the list of nodes, sorted
 718   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 719
 720   """
 721   if nodes:
 722     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 723
 724   return utils.NiceSort(lu.cfg.GetNodeList())
 725
 726
 727 def _GetWantedInstances(lu, instances):
 728   """Returns list of checked and expanded instance names.
 729
 730   @type lu: L{LogicalUnit}
 731   @param lu: the logical unit on whose behalf we execute
 732   @type instances: list
 733   @param instances: list of instance names or None for all instances
 734   @rtype: list
 735   @return: the list of instances, sorted
 736   @raise errors.OpPrereqError: if the instances parameter is wrong type
 737   @raise errors.OpPrereqError: if any of the passed instances is not found
 738
 739   """
 740   if instances:
 741     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 742   else:
 743     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 744   return wanted
 745
 746
 747 def _GetUpdatedParams(old_params, update_dict,
 748                       use_default=True, use_none=False):
 749   """Return the new version of a parameter dictionary.
 750
 751   @type old_params: dict
 752   @param old_params: old parameters
 753   @type update_dict: dict
 754   @param update_dict: dict containing new parameter values, or
 755       constants.VALUE_DEFAULT to reset the parameter to its default
 756       value
 757   @param use_default: boolean
 758   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 759       values as 'to be deleted' values
 760   @param use_none: boolean
 761   @type use_none: whether to recognise C{None} values as 'to be
 762       deleted' values
 763   @rtype: dict
 764   @return: the new parameter dictionary
 765
 766   """
 767   params_copy = copy.deepcopy(old_params)
 768   for key, val in update_dict.iteritems():
 769     if ((use_default and val == constants.VALUE_DEFAULT) or
 770         (use_none and val is None)):
 771       try:
 772         del params_copy[key]
 773       except KeyError:
 774         pass
 775     else:
 776       params_copy[key] = val
 777   return params_copy
 778
 779
 780 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 781   """Return the new version of a instance policy.
 782
 783   @param group_policy: whether this policy applies to a group and thus
 784     we should support removal of policy entries
 785
 786   """
 787   use_none = use_default = group_policy
 788   ipolicy = copy.deepcopy(old_ipolicy)
 789   for key, value in new_ipolicy.items():
 790     if key not in constants.IPOLICY_ALL_KEYS:
 791       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 792                                  errors.ECODE_INVAL)
 793     if key in constants.IPOLICY_ISPECS:
 794       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 795       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 796                                        use_none=use_none,
 797                                        use_default=use_default)
 798     else:
 799       if (not value or value == [constants.VALUE_DEFAULT] or
 800           value == constants.VALUE_DEFAULT):
 801         if group_policy:
 802           del ipolicy[key]
 803         else:
 804           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 805                                      " on the cluster'" % key,
 806                                      errors.ECODE_INVAL)
 807       else:
 808         if key in constants.IPOLICY_PARAMETERS:
 809           # FIXME: we assume all such values are float
 810           try:
 811             ipolicy[key] = float(value)
 812           except (TypeError, ValueError), err:
 813             raise errors.OpPrereqError("Invalid value for attribute"
 814                                        " '%s': '%s', error: %s" %
 815                                        (key, value, err), errors.ECODE_INVAL)
 816         else:
 817           # FIXME: we assume all others are lists; this should be redone
 818           # in a nicer way
 819           ipolicy[key] = list(value)
 820   try:
 821     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 822   except errors.ConfigurationError, err:
 823     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 824                                errors.ECODE_INVAL)
 825   return ipolicy
 826
 827
 828 def _UpdateAndVerifySubDict(base, updates, type_check):
 829   """Updates and verifies a dict with sub dicts of the same type.
 830
 831   @param base: The dict with the old data
 832   @param updates: The dict with the new data
 833   @param type_check: Dict suitable to ForceDictType to verify correct types
 834   @returns: A new dict with updated and verified values
 835
 836   """
 837   def fn(old, value):
 838     new = _GetUpdatedParams(old, value)
 839     utils.ForceDictType(new, type_check)
 840     return new
 841
 842   ret = copy.deepcopy(base)
 843   ret.update(dict((key, fn(base.get(key, {}), value))
 844                   for key, value in updates.items()))
 845   return ret
 846
 847
 848 def _MergeAndVerifyHvState(op_input, obj_input):
 849   """Combines the hv state from an opcode with the one of the object
 850
 851   @param op_input: The input dict from the opcode
 852   @param obj_input: The input dict from the objects
 853   @return: The verified and updated dict
 854
 855   """
 856   if op_input:
 857     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 858     if invalid_hvs:
 859       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 860                                  " %s" % utils.CommaJoin(invalid_hvs),
 861                                  errors.ECODE_INVAL)
 862     if obj_input is None:
 863       obj_input = {}
 864     type_check = constants.HVSTS_PARAMETER_TYPES
 865     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 866
 867   return None
 868
 869
 870 def _MergeAndVerifyDiskState(op_input, obj_input):
 871   """Combines the disk state from an opcode with the one of the object
 872
 873   @param op_input: The input dict from the opcode
 874   @param obj_input: The input dict from the objects
 875   @return: The verified and updated dict
 876   """
 877   if op_input:
 878     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 879     if invalid_dst:
 880       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 881                                  utils.CommaJoin(invalid_dst),
 882                                  errors.ECODE_INVAL)
 883     type_check = constants.DSS_PARAMETER_TYPES
 884     if obj_input is None:
 885       obj_input = {}
 886     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 887                                               type_check))
 888                 for key, value in op_input.items())
 889
 890   return None
 891
 892
 893 def _ReleaseLocks(lu, level, names=None, keep=None):
 894   """Releases locks owned by an LU.
 895
 896   @type lu: L{LogicalUnit}
 897   @param level: Lock level
 898   @type names: list or None
 899   @param names: Names of locks to release
 900   @type keep: list or None
 901   @param keep: Names of locks to retain
 902
 903   """
 904   assert not (keep is not None and names is not None), \
 905          "Only one of the 'names' and the 'keep' parameters can be given"
 906
 907   if names is not None:
 908     should_release = names.__contains__
 909   elif keep:
 910     should_release = lambda name: name not in keep
 911   else:
 912     should_release = None
 913
 914   owned = lu.owned_locks(level)
 915   if not owned:
 916     # Not owning any lock at this level, do nothing
 917     pass
 918
 919   elif should_release:
 920     retain = []
 921     release = []
 922
 923     # Determine which locks to release
 924     for name in owned:
 925       if should_release(name):
 926         release.append(name)
 927       else:
 928         retain.append(name)
 929
 930     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 931
 932     # Release just some locks
 933     lu.glm.release(level, names=release)
 934
 935     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 936   else:
 937     # Release everything
 938     lu.glm.release(level)
 939
 940     assert not lu.glm.is_owned(level), "No locks should be owned"
 941
 942
 943 def _MapInstanceDisksToNodes(instances):
 944   """Creates a map from (node, volume) to instance name.
 945
 946   @type instances: list of L{objects.Instance}
 947   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 948
 949   """
 950   return dict(((node, vol), inst.name)
 951               for inst in instances
 952               for (node, vols) in inst.MapLVsByNode().items()
 953               for vol in vols)
 954
 955
 956 def _RunPostHook(lu, node_name):
 957   """Runs the post-hook for an opcode on a single node.
 958
 959   """
 960   hm = lu.proc.BuildHooksManager(lu)
 961   try:
 962     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 963   except:
 964     # pylint: disable=W0702
 965     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 966
 967
 968 def _CheckOutputFields(static, dynamic, selected):
 969   """Checks whether all selected fields are valid.
 970
 971   @type static: L{utils.FieldSet}
 972   @param static: static fields set
 973   @type dynamic: L{utils.FieldSet}
 974   @param dynamic: dynamic fields set
 975
 976   """
 977   f = utils.FieldSet()
 978   f.Extend(static)
 979   f.Extend(dynamic)
 980
 981   delta = f.NonMatching(selected)
 982   if delta:
 983     raise errors.OpPrereqError("Unknown output fields selected: %s"
 984                                % ",".join(delta), errors.ECODE_INVAL)
 985
 986
 987 def _CheckGlobalHvParams(params):
 988   """Validates that given hypervisor params are not global ones.
 989
 990   This will ensure that instances don't get customised versions of
 991   global params.
 992
 993   """
 994   used_globals = constants.HVC_GLOBALS.intersection(params)
 995   if used_globals:
 996     msg = ("The following hypervisor parameters are global and cannot"
 997            " be customized at instance level, please modify them at"
 998            " cluster level: %s" % utils.CommaJoin(used_globals))
 999     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1000
1001
1002 def _CheckNodeOnline(lu, node, msg=None):
1003   """Ensure that a given node is online.
1004
1005   @param lu: the LU on behalf of which we make the check
1006   @param node: the node to check
1007   @param msg: if passed, should be a message to replace the default one
1008   @raise errors.OpPrereqError: if the node is offline
1009
1010   """
1011   if msg is None:
1012     msg = "Can't use offline node"
1013   if lu.cfg.GetNodeInfo(node).offline:
1014     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1015
1016
1017 def _CheckNodeNotDrained(lu, node):
1018   """Ensure that a given node is not drained.
1019
1020   @param lu: the LU on behalf of which we make the check
1021   @param node: the node to check
1022   @raise errors.OpPrereqError: if the node is drained
1023
1024   """
1025   if lu.cfg.GetNodeInfo(node).drained:
1026     raise errors.OpPrereqError("Can't use drained node %s" % node,
1027                                errors.ECODE_STATE)
1028
1029
1030 def _CheckNodeVmCapable(lu, node):
1031   """Ensure that a given node is vm capable.
1032
1033   @param lu: the LU on behalf of which we make the check
1034   @param node: the node to check
1035   @raise errors.OpPrereqError: if the node is not vm capable
1036
1037   """
1038   if not lu.cfg.GetNodeInfo(node).vm_capable:
1039     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1040                                errors.ECODE_STATE)
1041
1042
1043 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1044   """Ensure that a node supports a given OS.
1045
1046   @param lu: the LU on behalf of which we make the check
1047   @param node: the node to check
1048   @param os_name: the OS to query about
1049   @param force_variant: whether to ignore variant errors
1050   @raise errors.OpPrereqError: if the node is not supporting the OS
1051
1052   """
1053   result = lu.rpc.call_os_get(node, os_name)
1054   result.Raise("OS '%s' not in supported OS list for node %s" %
1055                (os_name, node),
1056                prereq=True, ecode=errors.ECODE_INVAL)
1057   if not force_variant:
1058     _CheckOSVariant(result.payload, os_name)
1059
1060
1061 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1062   """Ensure that a node has the given secondary ip.
1063
1064   @type lu: L{LogicalUnit}
1065   @param lu: the LU on behalf of which we make the check
1066   @type node: string
1067   @param node: the node to check
1068   @type secondary_ip: string
1069   @param secondary_ip: the ip to check
1070   @type prereq: boolean
1071   @param prereq: whether to throw a prerequisite or an execute error
1072   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1073   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1074
1075   """
1076   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1077   result.Raise("Failure checking secondary ip on node %s" % node,
1078                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1079   if not result.payload:
1080     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1081            " please fix and re-run this command" % secondary_ip)
1082     if prereq:
1083       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1084     else:
1085       raise errors.OpExecError(msg)
1086
1087
1088 def _GetClusterDomainSecret():
1089   """Reads the cluster domain secret.
1090
1091   """
1092   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1093                                strict=True)
1094
1095
1096 def _CheckInstanceState(lu, instance, req_states, msg=None):
1097   """Ensure that an instance is in one of the required states.
1098
1099   @param lu: the LU on behalf of which we make the check
1100   @param instance: the instance to check
1101   @param msg: if passed, should be a message to replace the default one
1102   @raise errors.OpPrereqError: if the instance is not in the required state
1103
1104   """
1105   if msg is None:
1106     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1107   if instance.admin_state not in req_states:
1108     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1109                                (instance.name, instance.admin_state, msg),
1110                                errors.ECODE_STATE)
1111
1112   if constants.ADMINST_UP not in req_states:
1113     pnode = instance.primary_node
1114     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1115     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1116                 prereq=True, ecode=errors.ECODE_ENVIRON)
1117
1118     if instance.name in ins_l.payload:
1119       raise errors.OpPrereqError("Instance %s is running, %s" %
1120                                  (instance.name, msg), errors.ECODE_STATE)
1121
1122
1123 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1124   """Computes if value is in the desired range.
1125
1126   @param name: name of the parameter for which we perform the check
1127   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1128       not just 'disk')
1129   @param ipolicy: dictionary containing min, max and std values
1130   @param value: actual value that we want to use
1131   @return: None or element not meeting the criteria
1132
1133
1134   """
1135   if value in [None, constants.VALUE_AUTO]:
1136     return None
1137   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1138   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1139   if value > max_v or min_v > value:
1140     if qualifier:
1141       fqn = "%s/%s" % (name, qualifier)
1142     else:
1143       fqn = name
1144     return ("%s value %s is not in range [%s, %s]" %
1145             (fqn, value, min_v, max_v))
1146   return None
1147
1148
1149 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1150                                  nic_count, disk_sizes, spindle_use,
1151                                  _compute_fn=_ComputeMinMaxSpec):
1152   """Verifies ipolicy against provided specs.
1153
1154   @type ipolicy: dict
1155   @param ipolicy: The ipolicy
1156   @type mem_size: int
1157   @param mem_size: The memory size
1158   @type cpu_count: int
1159   @param cpu_count: Used cpu cores
1160   @type disk_count: int
1161   @param disk_count: Number of disks used
1162   @type nic_count: int
1163   @param nic_count: Number of nics used
1164   @type disk_sizes: list of ints
1165   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1166   @type spindle_use: int
1167   @param spindle_use: The number of spindles this instance uses
1168   @param _compute_fn: The compute function (unittest only)
1169   @return: A list of violations, or an empty list of no violations are found
1170
1171   """
1172   assert disk_count == len(disk_sizes)
1173
1174   test_settings = [
1175     (constants.ISPEC_MEM_SIZE, "", mem_size),
1176     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1177     (constants.ISPEC_DISK_COUNT, "", disk_count),
1178     (constants.ISPEC_NIC_COUNT, "", nic_count),
1179     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1180     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1181          for idx, d in enumerate(disk_sizes)]
1182
1183   return filter(None,
1184                 (_compute_fn(name, qualifier, ipolicy, value)
1185                  for (name, qualifier, value) in test_settings))
1186
1187
1188 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1189                                      _compute_fn=_ComputeIPolicySpecViolation):
1190   """Compute if instance meets the specs of ipolicy.
1191
1192   @type ipolicy: dict
1193   @param ipolicy: The ipolicy to verify against
1194   @type instance: L{objects.Instance}
1195   @param instance: The instance to verify
1196   @param _compute_fn: The function to verify ipolicy (unittest only)
1197   @see: L{_ComputeIPolicySpecViolation}
1198
1199   """
1200   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1201   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1202   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1203   disk_count = len(instance.disks)
1204   disk_sizes = [disk.size for disk in instance.disks]
1205   nic_count = len(instance.nics)
1206
1207   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1208                      disk_sizes, spindle_use)
1209
1210
1211 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1212     _compute_fn=_ComputeIPolicySpecViolation):
1213   """Compute if instance specs meets the specs of ipolicy.
1214
1215   @type ipolicy: dict
1216   @param ipolicy: The ipolicy to verify against
1217   @param instance_spec: dict
1218   @param instance_spec: The instance spec to verify
1219   @param _compute_fn: The function to verify ipolicy (unittest only)
1220   @see: L{_ComputeIPolicySpecViolation}
1221
1222   """
1223   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1224   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1225   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1226   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1227   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1228   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1229
1230   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1231                      disk_sizes, spindle_use)
1232
1233
1234 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1235                                  target_group,
1236                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1237   """Compute if instance meets the specs of the new target group.
1238
1239   @param ipolicy: The ipolicy to verify
1240   @param instance: The instance object to verify
1241   @param current_group: The current group of the instance
1242   @param target_group: The new group of the instance
1243   @param _compute_fn: The function to verify ipolicy (unittest only)
1244   @see: L{_ComputeIPolicySpecViolation}
1245
1246   """
1247   if current_group == target_group:
1248     return []
1249   else:
1250     return _compute_fn(ipolicy, instance)
1251
1252
1253 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1254                             _compute_fn=_ComputeIPolicyNodeViolation):
1255   """Checks that the target node is correct in terms of instance policy.
1256
1257   @param ipolicy: The ipolicy to verify
1258   @param instance: The instance object to verify
1259   @param node: The new node to relocate
1260   @param ignore: Ignore violations of the ipolicy
1261   @param _compute_fn: The function to verify ipolicy (unittest only)
1262   @see: L{_ComputeIPolicySpecViolation}
1263
1264   """
1265   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1266   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1267
1268   if res:
1269     msg = ("Instance does not meet target node group's (%s) instance"
1270            " policy: %s") % (node.group, utils.CommaJoin(res))
1271     if ignore:
1272       lu.LogWarning(msg)
1273     else:
1274       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1275
1276
1277 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1278   """Computes a set of any instances that would violate the new ipolicy.
1279
1280   @param old_ipolicy: The current (still in-place) ipolicy
1281   @param new_ipolicy: The new (to become) ipolicy
1282   @param instances: List of instances to verify
1283   @return: A list of instances which violates the new ipolicy but
1284       did not before
1285
1286   """
1287   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1288           _ComputeViolatingInstances(old_ipolicy, instances))
1289
1290
1291 def _ExpandItemName(fn, name, kind):
1292   """Expand an item name.
1293
1294   @param fn: the function to use for expansion
1295   @param name: requested item name
1296   @param kind: text description ('Node' or 'Instance')
1297   @return: the resolved (full) name
1298   @raise errors.OpPrereqError: if the item is not found
1299
1300   """
1301   full_name = fn(name)
1302   if full_name is None:
1303     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1304                                errors.ECODE_NOENT)
1305   return full_name
1306
1307
1308 def _ExpandNodeName(cfg, name):
1309   """Wrapper over L{_ExpandItemName} for nodes."""
1310   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1311
1312
1313 def _ExpandInstanceName(cfg, name):
1314   """Wrapper over L{_ExpandItemName} for instance."""
1315   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1316
1317
1318 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1319                           minmem, maxmem, vcpus, nics, disk_template, disks,
1320                           bep, hvp, hypervisor_name, tags):
1321   """Builds instance related env variables for hooks
1322
1323   This builds the hook environment from individual variables.
1324
1325   @type name: string
1326   @param name: the name of the instance
1327   @type primary_node: string
1328   @param primary_node: the name of the instance's primary node
1329   @type secondary_nodes: list
1330   @param secondary_nodes: list of secondary nodes as strings
1331   @type os_type: string
1332   @param os_type: the name of the instance's OS
1333   @type status: string
1334   @param status: the desired status of the instance
1335   @type minmem: string
1336   @param minmem: the minimum memory size of the instance
1337   @type maxmem: string
1338   @param maxmem: the maximum memory size of the instance
1339   @type vcpus: string
1340   @param vcpus: the count of VCPUs the instance has
1341   @type nics: list
1342   @param nics: list of tuples (ip, mac, mode, link) representing
1343       the NICs the instance has
1344   @type disk_template: string
1345   @param disk_template: the disk template of the instance
1346   @type disks: list
1347   @param disks: the list of (size, mode) pairs
1348   @type bep: dict
1349   @param bep: the backend parameters for the instance
1350   @type hvp: dict
1351   @param hvp: the hypervisor parameters for the instance
1352   @type hypervisor_name: string
1353   @param hypervisor_name: the hypervisor for the instance
1354   @type tags: list
1355   @param tags: list of instance tags as strings
1356   @rtype: dict
1357   @return: the hook environment for this instance
1358
1359   """
1360   env = {
1361     "OP_TARGET": name,
1362     "INSTANCE_NAME": name,
1363     "INSTANCE_PRIMARY": primary_node,
1364     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1365     "INSTANCE_OS_TYPE": os_type,
1366     "INSTANCE_STATUS": status,
1367     "INSTANCE_MINMEM": minmem,
1368     "INSTANCE_MAXMEM": maxmem,
1369     # TODO(2.7) remove deprecated "memory" value
1370     "INSTANCE_MEMORY": maxmem,
1371     "INSTANCE_VCPUS": vcpus,
1372     "INSTANCE_DISK_TEMPLATE": disk_template,
1373     "INSTANCE_HYPERVISOR": hypervisor_name,
1374   }
1375   if nics:
1376     nic_count = len(nics)
1377     for idx, (ip, mac, mode, link) in enumerate(nics):
1378       if ip is None:
1379         ip = ""
1380       env["INSTANCE_NIC%d_IP" % idx] = ip
1381       env["INSTANCE_NIC%d_MAC" % idx] = mac
1382       env["INSTANCE_NIC%d_MODE" % idx] = mode
1383       env["INSTANCE_NIC%d_LINK" % idx] = link
1384       if mode == constants.NIC_MODE_BRIDGED:
1385         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1386   else:
1387     nic_count = 0
1388
1389   env["INSTANCE_NIC_COUNT"] = nic_count
1390
1391   if disks:
1392     disk_count = len(disks)
1393     for idx, (size, mode) in enumerate(disks):
1394       env["INSTANCE_DISK%d_SIZE" % idx] = size
1395       env["INSTANCE_DISK%d_MODE" % idx] = mode
1396   else:
1397     disk_count = 0
1398
1399   env["INSTANCE_DISK_COUNT"] = disk_count
1400
1401   if not tags:
1402     tags = []
1403
1404   env["INSTANCE_TAGS"] = " ".join(tags)
1405
1406   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1407     for key, value in source.items():
1408       env["INSTANCE_%s_%s" % (kind, key)] = value
1409
1410   return env
1411
1412
1413 def _NICListToTuple(lu, nics):
1414   """Build a list of nic information tuples.
1415
1416   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1417   value in LUInstanceQueryData.
1418
1419   @type lu:  L{LogicalUnit}
1420   @param lu: the logical unit on whose behalf we execute
1421   @type nics: list of L{objects.NIC}
1422   @param nics: list of nics to convert to hooks tuples
1423
1424   """
1425   hooks_nics = []
1426   cluster = lu.cfg.GetClusterInfo()
1427   for nic in nics:
1428     ip = nic.ip
1429     mac = nic.mac
1430     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1431     mode = filled_params[constants.NIC_MODE]
1432     link = filled_params[constants.NIC_LINK]
1433     hooks_nics.append((ip, mac, mode, link))
1434   return hooks_nics
1435
1436
1437 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1438   """Builds instance related env variables for hooks from an object.
1439
1440   @type lu: L{LogicalUnit}
1441   @param lu: the logical unit on whose behalf we execute
1442   @type instance: L{objects.Instance}
1443   @param instance: the instance for which we should build the
1444       environment
1445   @type override: dict
1446   @param override: dictionary with key/values that will override
1447       our values
1448   @rtype: dict
1449   @return: the hook environment dictionary
1450
1451   """
1452   cluster = lu.cfg.GetClusterInfo()
1453   bep = cluster.FillBE(instance)
1454   hvp = cluster.FillHV(instance)
1455   args = {
1456     "name": instance.name,
1457     "primary_node": instance.primary_node,
1458     "secondary_nodes": instance.secondary_nodes,
1459     "os_type": instance.os,
1460     "status": instance.admin_state,
1461     "maxmem": bep[constants.BE_MAXMEM],
1462     "minmem": bep[constants.BE_MINMEM],
1463     "vcpus": bep[constants.BE_VCPUS],
1464     "nics": _NICListToTuple(lu, instance.nics),
1465     "disk_template": instance.disk_template,
1466     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1467     "bep": bep,
1468     "hvp": hvp,
1469     "hypervisor_name": instance.hypervisor,
1470     "tags": instance.tags,
1471   }
1472   if override:
1473     args.update(override)
1474   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1475
1476
1477 def _AdjustCandidatePool(lu, exceptions):
1478   """Adjust the candidate pool after node operations.
1479
1480   """
1481   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1482   if mod_list:
1483     lu.LogInfo("Promoted nodes to master candidate role: %s",
1484                utils.CommaJoin(node.name for node in mod_list))
1485     for name in mod_list:
1486       lu.context.ReaddNode(name)
1487   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1488   if mc_now > mc_max:
1489     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1490                (mc_now, mc_max))
1491
1492
1493 def _DecideSelfPromotion(lu, exceptions=None):
1494   """Decide whether I should promote myself as a master candidate.
1495
1496   """
1497   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1498   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1499   # the new node will increase mc_max with one, so:
1500   mc_should = min(mc_should + 1, cp_size)
1501   return mc_now < mc_should
1502
1503
1504 def _CalculateGroupIPolicy(cluster, group):
1505   """Calculate instance policy for group.
1506
1507   """
1508   return cluster.SimpleFillIPolicy(group.ipolicy)
1509
1510
1511 def _ComputeViolatingInstances(ipolicy, instances):
1512   """Computes a set of instances who violates given ipolicy.
1513
1514   @param ipolicy: The ipolicy to verify
1515   @type instances: object.Instance
1516   @param instances: List of instances to verify
1517   @return: A frozenset of instance names violating the ipolicy
1518
1519   """
1520   return frozenset([inst.name for inst in instances
1521                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1522
1523
1524 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1525   """Check that the brigdes needed by a list of nics exist.
1526
1527   """
1528   cluster = lu.cfg.GetClusterInfo()
1529   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1530   brlist = [params[constants.NIC_LINK] for params in paramslist
1531             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1532   if brlist:
1533     result = lu.rpc.call_bridges_exist(target_node, brlist)
1534     result.Raise("Error checking bridges on destination node '%s'" %
1535                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1536
1537
1538 def _CheckInstanceBridgesExist(lu, instance, node=None):
1539   """Check that the brigdes needed by an instance exist.
1540
1541   """
1542   if node is None:
1543     node = instance.primary_node
1544   _CheckNicsBridgesExist(lu, instance.nics, node)
1545
1546
1547 def _CheckOSVariant(os_obj, name):
1548   """Check whether an OS name conforms to the os variants specification.
1549
1550   @type os_obj: L{objects.OS}
1551   @param os_obj: OS object to check
1552   @type name: string
1553   @param name: OS name passed by the user, to check for validity
1554
1555   """
1556   variant = objects.OS.GetVariant(name)
1557   if not os_obj.supported_variants:
1558     if variant:
1559       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1560                                  " passed)" % (os_obj.name, variant),
1561                                  errors.ECODE_INVAL)
1562     return
1563   if not variant:
1564     raise errors.OpPrereqError("OS name must include a variant",
1565                                errors.ECODE_INVAL)
1566
1567   if variant not in os_obj.supported_variants:
1568     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1569
1570
1571 def _GetNodeInstancesInner(cfg, fn):
1572   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1573
1574
1575 def _GetNodeInstances(cfg, node_name):
1576   """Returns a list of all primary and secondary instances on a node.
1577
1578   """
1579
1580   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1581
1582
1583 def _GetNodePrimaryInstances(cfg, node_name):
1584   """Returns primary instances on a node.
1585
1586   """
1587   return _GetNodeInstancesInner(cfg,
1588                                 lambda inst: node_name == inst.primary_node)
1589
1590
1591 def _GetNodeSecondaryInstances(cfg, node_name):
1592   """Returns secondary instances on a node.
1593
1594   """
1595   return _GetNodeInstancesInner(cfg,
1596                                 lambda inst: node_name in inst.secondary_nodes)
1597
1598
1599 def _GetStorageTypeArgs(cfg, storage_type):
1600   """Returns the arguments for a storage type.
1601
1602   """
1603   # Special case for file storage
1604   if storage_type == constants.ST_FILE:
1605     # storage.FileStorage wants a list of storage directories
1606     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1607
1608   return []
1609
1610
1611 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1612   faulty = []
1613
1614   for dev in instance.disks:
1615     cfg.SetDiskID(dev, node_name)
1616
1617   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1618                                                                 instance))
1619   result.Raise("Failed to get disk status from node %s" % node_name,
1620                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1621
1622   for idx, bdev_status in enumerate(result.payload):
1623     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1624       faulty.append(idx)
1625
1626   return faulty
1627
1628
1629 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1630   """Check the sanity of iallocator and node arguments and use the
1631   cluster-wide iallocator if appropriate.
1632
1633   Check that at most one of (iallocator, node) is specified. If none is
1634   specified, then the LU's opcode's iallocator slot is filled with the
1635   cluster-wide default iallocator.
1636
1637   @type iallocator_slot: string
1638   @param iallocator_slot: the name of the opcode iallocator slot
1639   @type node_slot: string
1640   @param node_slot: the name of the opcode target node slot
1641
1642   """
1643   node = getattr(lu.op, node_slot, None)
1644   iallocator = getattr(lu.op, iallocator_slot, None)
1645
1646   if node is not None and iallocator is not None:
1647     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1648                                errors.ECODE_INVAL)
1649   elif node is None and iallocator is None:
1650     default_iallocator = lu.cfg.GetDefaultIAllocator()
1651     if default_iallocator:
1652       setattr(lu.op, iallocator_slot, default_iallocator)
1653     else:
1654       raise errors.OpPrereqError("No iallocator or node given and no"
1655                                  " cluster-wide default iallocator found;"
1656                                  " please specify either an iallocator or a"
1657                                  " node, or set a cluster-wide default"
1658                                  " iallocator")
1659
1660
1661 def _GetDefaultIAllocator(cfg, iallocator):
1662   """Decides on which iallocator to use.
1663
1664   @type cfg: L{config.ConfigWriter}
1665   @param cfg: Cluster configuration object
1666   @type iallocator: string or None
1667   @param iallocator: Iallocator specified in opcode
1668   @rtype: string
1669   @return: Iallocator name
1670
1671   """
1672   if not iallocator:
1673     # Use default iallocator
1674     iallocator = cfg.GetDefaultIAllocator()
1675
1676   if not iallocator:
1677     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1678                                " opcode nor as a cluster-wide default",
1679                                errors.ECODE_INVAL)
1680
1681   return iallocator
1682
1683
1684 class LUClusterPostInit(LogicalUnit):
1685   """Logical unit for running hooks after cluster initialization.
1686
1687   """
1688   HPATH = "cluster-init"
1689   HTYPE = constants.HTYPE_CLUSTER
1690
1691   def BuildHooksEnv(self):
1692     """Build hooks env.
1693
1694     """
1695     return {
1696       "OP_TARGET": self.cfg.GetClusterName(),
1697       }
1698
1699   def BuildHooksNodes(self):
1700     """Build hooks nodes.
1701
1702     """
1703     return ([], [self.cfg.GetMasterNode()])
1704
1705   def Exec(self, feedback_fn):
1706     """Nothing to do.
1707
1708     """
1709     return True
1710
1711
1712 class LUClusterDestroy(LogicalUnit):
1713   """Logical unit for destroying the cluster.
1714
1715   """
1716   HPATH = "cluster-destroy"
1717   HTYPE = constants.HTYPE_CLUSTER
1718
1719   def BuildHooksEnv(self):
1720     """Build hooks env.
1721
1722     """
1723     return {
1724       "OP_TARGET": self.cfg.GetClusterName(),
1725       }
1726
1727   def BuildHooksNodes(self):
1728     """Build hooks nodes.
1729
1730     """
1731     return ([], [])
1732
1733   def CheckPrereq(self):
1734     """Check prerequisites.
1735
1736     This checks whether the cluster is empty.
1737
1738     Any errors are signaled by raising errors.OpPrereqError.
1739
1740     """
1741     master = self.cfg.GetMasterNode()
1742
1743     nodelist = self.cfg.GetNodeList()
1744     if len(nodelist) != 1 or nodelist[0] != master:
1745       raise errors.OpPrereqError("There are still %d node(s) in"
1746                                  " this cluster." % (len(nodelist) - 1),
1747                                  errors.ECODE_INVAL)
1748     instancelist = self.cfg.GetInstanceList()
1749     if instancelist:
1750       raise errors.OpPrereqError("There are still %d instance(s) in"
1751                                  " this cluster." % len(instancelist),
1752                                  errors.ECODE_INVAL)
1753
1754   def Exec(self, feedback_fn):
1755     """Destroys the cluster.
1756
1757     """
1758     master_params = self.cfg.GetMasterNetworkParameters()
1759
1760     # Run post hooks on master node before it's removed
1761     _RunPostHook(self, master_params.name)
1762
1763     ems = self.cfg.GetUseExternalMipScript()
1764     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1765                                                      master_params, ems)
1766     if result.fail_msg:
1767       self.LogWarning("Error disabling the master IP address: %s",
1768                       result.fail_msg)
1769
1770     return master_params.name
1771
1772
1773 def _VerifyCertificate(filename):
1774   """Verifies a certificate for L{LUClusterVerifyConfig}.
1775
1776   @type filename: string
1777   @param filename: Path to PEM file
1778
1779   """
1780   try:
1781     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1782                                            utils.ReadFile(filename))
1783   except Exception, err: # pylint: disable=W0703
1784     return (LUClusterVerifyConfig.ETYPE_ERROR,
1785             "Failed to load X509 certificate %s: %s" % (filename, err))
1786
1787   (errcode, msg) = \
1788     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1789                                 constants.SSL_CERT_EXPIRATION_ERROR)
1790
1791   if msg:
1792     fnamemsg = "While verifying %s: %s" % (filename, msg)
1793   else:
1794     fnamemsg = None
1795
1796   if errcode is None:
1797     return (None, fnamemsg)
1798   elif errcode == utils.CERT_WARNING:
1799     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1800   elif errcode == utils.CERT_ERROR:
1801     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1802
1803   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1804
1805
1806 def _GetAllHypervisorParameters(cluster, instances):
1807   """Compute the set of all hypervisor parameters.
1808
1809   @type cluster: L{objects.Cluster}
1810   @param cluster: the cluster object
1811   @param instances: list of L{objects.Instance}
1812   @param instances: additional instances from which to obtain parameters
1813   @rtype: list of (origin, hypervisor, parameters)
1814   @return: a list with all parameters found, indicating the hypervisor they
1815        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1816
1817   """
1818   hvp_data = []
1819
1820   for hv_name in cluster.enabled_hypervisors:
1821     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1822
1823   for os_name, os_hvp in cluster.os_hvp.items():
1824     for hv_name, hv_params in os_hvp.items():
1825       if hv_params:
1826         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1827         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1828
1829   # TODO: collapse identical parameter values in a single one
1830   for instance in instances:
1831     if instance.hvparams:
1832       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1833                        cluster.FillHV(instance)))
1834
1835   return hvp_data
1836
1837
1838 class _VerifyErrors(object):
1839   """Mix-in for cluster/group verify LUs.
1840
1841   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1842   self.op and self._feedback_fn to be available.)
1843
1844   """
1845
1846   ETYPE_FIELD = "code"
1847   ETYPE_ERROR = "ERROR"
1848   ETYPE_WARNING = "WARNING"
1849
1850   def _Error(self, ecode, item, msg, *args, **kwargs):
1851     """Format an error message.
1852
1853     Based on the opcode's error_codes parameter, either format a
1854     parseable error code, or a simpler error string.
1855
1856     This must be called only from Exec and functions called from Exec.
1857
1858     """
1859     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1860     itype, etxt, _ = ecode
1861     # first complete the msg
1862     if args:
1863       msg = msg % args
1864     # then format the whole message
1865     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1866       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1867     else:
1868       if item:
1869         item = " " + item
1870       else:
1871         item = ""
1872       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1873     # and finally report it via the feedback_fn
1874     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1875
1876   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1877     """Log an error message if the passed condition is True.
1878
1879     """
1880     cond = (bool(cond)
1881             or self.op.debug_simulate_errors) # pylint: disable=E1101
1882
1883     # If the error code is in the list of ignored errors, demote the error to a
1884     # warning
1885     (_, etxt, _) = ecode
1886     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1887       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1888
1889     if cond:
1890       self._Error(ecode, *args, **kwargs)
1891
1892     # do not mark the operation as failed for WARN cases only
1893     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1894       self.bad = self.bad or cond
1895
1896
1897 class LUClusterVerify(NoHooksLU):
1898   """Submits all jobs necessary to verify the cluster.
1899
1900   """
1901   REQ_BGL = False
1902
1903   def ExpandNames(self):
1904     self.needed_locks = {}
1905
1906   def Exec(self, feedback_fn):
1907     jobs = []
1908
1909     if self.op.group_name:
1910       groups = [self.op.group_name]
1911       depends_fn = lambda: None
1912     else:
1913       groups = self.cfg.GetNodeGroupList()
1914
1915       # Verify global configuration
1916       jobs.append([
1917         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1918         ])
1919
1920       # Always depend on global verification
1921       depends_fn = lambda: [(-len(jobs), [])]
1922
1923     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1924                                             ignore_errors=self.op.ignore_errors,
1925                                             depends=depends_fn())]
1926                 for group in groups)
1927
1928     # Fix up all parameters
1929     for op in itertools.chain(*jobs): # pylint: disable=W0142
1930       op.debug_simulate_errors = self.op.debug_simulate_errors
1931       op.verbose = self.op.verbose
1932       op.error_codes = self.op.error_codes
1933       try:
1934         op.skip_checks = self.op.skip_checks
1935       except AttributeError:
1936         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1937
1938     return ResultWithJobs(jobs)
1939
1940
1941 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1942   """Verifies the cluster config.
1943
1944   """
1945   REQ_BGL = False
1946
1947   def _VerifyHVP(self, hvp_data):
1948     """Verifies locally the syntax of the hypervisor parameters.
1949
1950     """
1951     for item, hv_name, hv_params in hvp_data:
1952       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1953              (item, hv_name))
1954       try:
1955         hv_class = hypervisor.GetHypervisor(hv_name)
1956         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1957         hv_class.CheckParameterSyntax(hv_params)
1958       except errors.GenericError, err:
1959         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1960
1961   def ExpandNames(self):
1962     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1963     self.share_locks = _ShareAll()
1964
1965   def CheckPrereq(self):
1966     """Check prerequisites.
1967
1968     """
1969     # Retrieve all information
1970     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1971     self.all_node_info = self.cfg.GetAllNodesInfo()
1972     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1973
1974   def Exec(self, feedback_fn):
1975     """Verify integrity of cluster, performing various test on nodes.
1976
1977     """
1978     self.bad = False
1979     self._feedback_fn = feedback_fn
1980
1981     feedback_fn("* Verifying cluster config")
1982
1983     for msg in self.cfg.VerifyConfig():
1984       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1985
1986     feedback_fn("* Verifying cluster certificate files")
1987
1988     for cert_filename in constants.ALL_CERT_FILES:
1989       (errcode, msg) = _VerifyCertificate(cert_filename)
1990       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1991
1992     feedback_fn("* Verifying hypervisor parameters")
1993
1994     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1995                                                 self.all_inst_info.values()))
1996
1997     feedback_fn("* Verifying all nodes belong to an existing group")
1998
1999     # We do this verification here because, should this bogus circumstance
2000     # occur, it would never be caught by VerifyGroup, which only acts on
2001     # nodes/instances reachable from existing node groups.
2002
2003     dangling_nodes = set(node.name for node in self.all_node_info.values()
2004                          if node.group not in self.all_group_info)
2005
2006     dangling_instances = {}
2007     no_node_instances = []
2008
2009     for inst in self.all_inst_info.values():
2010       if inst.primary_node in dangling_nodes:
2011         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2012       elif inst.primary_node not in self.all_node_info:
2013         no_node_instances.append(inst.name)
2014
2015     pretty_dangling = [
2016         "%s (%s)" %
2017         (node.name,
2018          utils.CommaJoin(dangling_instances.get(node.name,
2019                                                 ["no instances"])))
2020         for node in dangling_nodes]
2021
2022     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2023                   None,
2024                   "the following nodes (and their instances) belong to a non"
2025                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2026
2027     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2028                   None,
2029                   "the following instances have a non-existing primary-node:"
2030                   " %s", utils.CommaJoin(no_node_instances))
2031
2032     return not self.bad
2033
2034
2035 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2036   """Verifies the status of a node group.
2037
2038   """
2039   HPATH = "cluster-verify"
2040   HTYPE = constants.HTYPE_CLUSTER
2041   REQ_BGL = False
2042
2043   _HOOKS_INDENT_RE = re.compile("^", re.M)
2044
2045   class NodeImage(object):
2046     """A class representing the logical and physical status of a node.
2047
2048     @type name: string
2049     @ivar name: the node name to which this object refers
2050     @ivar volumes: a structure as returned from
2051         L{ganeti.backend.GetVolumeList} (runtime)
2052     @ivar instances: a list of running instances (runtime)
2053     @ivar pinst: list of configured primary instances (config)
2054     @ivar sinst: list of configured secondary instances (config)
2055     @ivar sbp: dictionary of {primary-node: list of instances} for all
2056         instances for which this node is secondary (config)
2057     @ivar mfree: free memory, as reported by hypervisor (runtime)
2058     @ivar dfree: free disk, as reported by the node (runtime)
2059     @ivar offline: the offline status (config)
2060     @type rpc_fail: boolean
2061     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2062         not whether the individual keys were correct) (runtime)
2063     @type lvm_fail: boolean
2064     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2065     @type hyp_fail: boolean
2066     @ivar hyp_fail: whether the RPC call didn't return the instance list
2067     @type ghost: boolean
2068     @ivar ghost: whether this is a known node or not (config)
2069     @type os_fail: boolean
2070     @ivar os_fail: whether the RPC call didn't return valid OS data
2071     @type oslist: list
2072     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2073     @type vm_capable: boolean
2074     @ivar vm_capable: whether the node can host instances
2075
2076     """
2077     def __init__(self, offline=False, name=None, vm_capable=True):
2078       self.name = name
2079       self.volumes = {}
2080       self.instances = []
2081       self.pinst = []
2082       self.sinst = []
2083       self.sbp = {}
2084       self.mfree = 0
2085       self.dfree = 0
2086       self.offline = offline
2087       self.vm_capable = vm_capable
2088       self.rpc_fail = False
2089       self.lvm_fail = False
2090       self.hyp_fail = False
2091       self.ghost = False
2092       self.os_fail = False
2093       self.oslist = {}
2094
2095   def ExpandNames(self):
2096     # This raises errors.OpPrereqError on its own:
2097     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2098
2099     # Get instances in node group; this is unsafe and needs verification later
2100     inst_names = \
2101       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2102
2103     self.needed_locks = {
2104       locking.LEVEL_INSTANCE: inst_names,
2105       locking.LEVEL_NODEGROUP: [self.group_uuid],
2106       locking.LEVEL_NODE: [],
2107       }
2108
2109     self.share_locks = _ShareAll()
2110
2111   def DeclareLocks(self, level):
2112     if level == locking.LEVEL_NODE:
2113       # Get members of node group; this is unsafe and needs verification later
2114       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2115
2116       all_inst_info = self.cfg.GetAllInstancesInfo()
2117
2118       # In Exec(), we warn about mirrored instances that have primary and
2119       # secondary living in separate node groups. To fully verify that
2120       # volumes for these instances are healthy, we will need to do an
2121       # extra call to their secondaries. We ensure here those nodes will
2122       # be locked.
2123       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2124         # Important: access only the instances whose lock is owned
2125         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2126           nodes.update(all_inst_info[inst].secondary_nodes)
2127
2128       self.needed_locks[locking.LEVEL_NODE] = nodes
2129
2130   def CheckPrereq(self):
2131     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2132     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2133
2134     group_nodes = set(self.group_info.members)
2135     group_instances = \
2136       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2137
2138     unlocked_nodes = \
2139         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2140
2141     unlocked_instances = \
2142         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2143
2144     if unlocked_nodes:
2145       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2146                                  utils.CommaJoin(unlocked_nodes),
2147                                  errors.ECODE_STATE)
2148
2149     if unlocked_instances:
2150       raise errors.OpPrereqError("Missing lock for instances: %s" %
2151                                  utils.CommaJoin(unlocked_instances),
2152                                  errors.ECODE_STATE)
2153
2154     self.all_node_info = self.cfg.GetAllNodesInfo()
2155     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2156
2157     self.my_node_names = utils.NiceSort(group_nodes)
2158     self.my_inst_names = utils.NiceSort(group_instances)
2159
2160     self.my_node_info = dict((name, self.all_node_info[name])
2161                              for name in self.my_node_names)
2162
2163     self.my_inst_info = dict((name, self.all_inst_info[name])
2164                              for name in self.my_inst_names)
2165
2166     # We detect here the nodes that will need the extra RPC calls for verifying
2167     # split LV volumes; they should be locked.
2168     extra_lv_nodes = set()
2169
2170     for inst in self.my_inst_info.values():
2171       if inst.disk_template in constants.DTS_INT_MIRROR:
2172         for nname in inst.all_nodes:
2173           if self.all_node_info[nname].group != self.group_uuid:
2174             extra_lv_nodes.add(nname)
2175
2176     unlocked_lv_nodes = \
2177         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2178
2179     if unlocked_lv_nodes:
2180       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2181                                  utils.CommaJoin(unlocked_lv_nodes),
2182                                  errors.ECODE_STATE)
2183     self.extra_lv_nodes = list(extra_lv_nodes)
2184
2185   def _VerifyNode(self, ninfo, nresult):
2186     """Perform some basic validation on data returned from a node.
2187
2188       - check the result data structure is well formed and has all the
2189         mandatory fields
2190       - check ganeti version
2191
2192     @type ninfo: L{objects.Node}
2193     @param ninfo: the node to check
2194     @param nresult: the results from the node
2195     @rtype: boolean
2196     @return: whether overall this call was successful (and we can expect
2197          reasonable values in the respose)
2198
2199     """
2200     node = ninfo.name
2201     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2202
2203     # main result, nresult should be a non-empty dict
2204     test = not nresult or not isinstance(nresult, dict)
2205     _ErrorIf(test, constants.CV_ENODERPC, node,
2206                   "unable to verify node: no data returned")
2207     if test:
2208       return False
2209
2210     # compares ganeti version
2211     local_version = constants.PROTOCOL_VERSION
2212     remote_version = nresult.get("version", None)
2213     test = not (remote_version and
2214                 isinstance(remote_version, (list, tuple)) and
2215                 len(remote_version) == 2)
2216     _ErrorIf(test, constants.CV_ENODERPC, node,
2217              "connection to node returned invalid data")
2218     if test:
2219       return False
2220
2221     test = local_version != remote_version[0]
2222     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2223              "incompatible protocol versions: master %s,"
2224              " node %s", local_version, remote_version[0])
2225     if test:
2226       return False
2227
2228     # node seems compatible, we can actually try to look into its results
2229
2230     # full package version
2231     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2232                   constants.CV_ENODEVERSION, node,
2233                   "software version mismatch: master %s, node %s",
2234                   constants.RELEASE_VERSION, remote_version[1],
2235                   code=self.ETYPE_WARNING)
2236
2237     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2238     if ninfo.vm_capable and isinstance(hyp_result, dict):
2239       for hv_name, hv_result in hyp_result.iteritems():
2240         test = hv_result is not None
2241         _ErrorIf(test, constants.CV_ENODEHV, node,
2242                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2243
2244     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2245     if ninfo.vm_capable and isinstance(hvp_result, list):
2246       for item, hv_name, hv_result in hvp_result:
2247         _ErrorIf(True, constants.CV_ENODEHV, node,
2248                  "hypervisor %s parameter verify failure (source %s): %s",
2249                  hv_name, item, hv_result)
2250
2251     test = nresult.get(constants.NV_NODESETUP,
2252                        ["Missing NODESETUP results"])
2253     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2254              "; ".join(test))
2255
2256     return True
2257
2258   def _VerifyNodeTime(self, ninfo, nresult,
2259                       nvinfo_starttime, nvinfo_endtime):
2260     """Check the node time.
2261
2262     @type ninfo: L{objects.Node}
2263     @param ninfo: the node to check
2264     @param nresult: the remote results for the node
2265     @param nvinfo_starttime: the start time of the RPC call
2266     @param nvinfo_endtime: the end time of the RPC call
2267
2268     """
2269     node = ninfo.name
2270     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2271
2272     ntime = nresult.get(constants.NV_TIME, None)
2273     try:
2274       ntime_merged = utils.MergeTime(ntime)
2275     except (ValueError, TypeError):
2276       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2277       return
2278
2279     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2280       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2281     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2282       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2283     else:
2284       ntime_diff = None
2285
2286     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2287              "Node time diverges by at least %s from master node time",
2288              ntime_diff)
2289
2290   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2291     """Check the node LVM results.
2292
2293     @type ninfo: L{objects.Node}
2294     @param ninfo: the node to check
2295     @param nresult: the remote results for the node
2296     @param vg_name: the configured VG name
2297
2298     """
2299     if vg_name is None:
2300       return
2301
2302     node = ninfo.name
2303     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2304
2305     # checks vg existence and size > 20G
2306     vglist = nresult.get(constants.NV_VGLIST, None)
2307     test = not vglist
2308     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2309     if not test:
2310       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2311                                             constants.MIN_VG_SIZE)
2312       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2313
2314     # check pv names
2315     pvlist = nresult.get(constants.NV_PVLIST, None)
2316     test = pvlist is None
2317     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2318     if not test:
2319       # check that ':' is not present in PV names, since it's a
2320       # special character for lvcreate (denotes the range of PEs to
2321       # use on the PV)
2322       for _, pvname, owner_vg in pvlist:
2323         test = ":" in pvname
2324         _ErrorIf(test, constants.CV_ENODELVM, node,
2325                  "Invalid character ':' in PV '%s' of VG '%s'",
2326                  pvname, owner_vg)
2327
2328   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2329     """Check the node bridges.
2330
2331     @type ninfo: L{objects.Node}
2332     @param ninfo: the node to check
2333     @param nresult: the remote results for the node
2334     @param bridges: the expected list of bridges
2335
2336     """
2337     if not bridges:
2338       return
2339
2340     node = ninfo.name
2341     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2342
2343     missing = nresult.get(constants.NV_BRIDGES, None)
2344     test = not isinstance(missing, list)
2345     _ErrorIf(test, constants.CV_ENODENET, node,
2346              "did not return valid bridge information")
2347     if not test:
2348       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2349                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2350
2351   def _VerifyNodeUserScripts(self, ninfo, nresult):
2352     """Check the results of user scripts presence and executability on the node
2353
2354     @type ninfo: L{objects.Node}
2355     @param ninfo: the node to check
2356     @param nresult: the remote results for the node
2357
2358     """
2359     node = ninfo.name
2360
2361     test = not constants.NV_USERSCRIPTS in nresult
2362     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2363                   "did not return user scripts information")
2364
2365     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2366     if not test:
2367       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2368                     "user scripts not present or not executable: %s" %
2369                     utils.CommaJoin(sorted(broken_scripts)))
2370
2371   def _VerifyNodeNetwork(self, ninfo, nresult):
2372     """Check the node network connectivity results.
2373
2374     @type ninfo: L{objects.Node}
2375     @param ninfo: the node to check
2376     @param nresult: the remote results for the node
2377
2378     """
2379     node = ninfo.name
2380     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2381
2382     test = constants.NV_NODELIST not in nresult
2383     _ErrorIf(test, constants.CV_ENODESSH, node,
2384              "node hasn't returned node ssh connectivity data")
2385     if not test:
2386       if nresult[constants.NV_NODELIST]:
2387         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2388           _ErrorIf(True, constants.CV_ENODESSH, node,
2389                    "ssh communication with node '%s': %s", a_node, a_msg)
2390
2391     test = constants.NV_NODENETTEST not in nresult
2392     _ErrorIf(test, constants.CV_ENODENET, node,
2393              "node hasn't returned node tcp connectivity data")
2394     if not test:
2395       if nresult[constants.NV_NODENETTEST]:
2396         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2397         for anode in nlist:
2398           _ErrorIf(True, constants.CV_ENODENET, node,
2399                    "tcp communication with node '%s': %s",
2400                    anode, nresult[constants.NV_NODENETTEST][anode])
2401
2402     test = constants.NV_MASTERIP not in nresult
2403     _ErrorIf(test, constants.CV_ENODENET, node,
2404              "node hasn't returned node master IP reachability data")
2405     if not test:
2406       if not nresult[constants.NV_MASTERIP]:
2407         if node == self.master_node:
2408           msg = "the master node cannot reach the master IP (not configured?)"
2409         else:
2410           msg = "cannot reach the master IP"
2411         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2412
2413   def _VerifyInstance(self, instance, instanceconfig, node_image,
2414                       diskstatus):
2415     """Verify an instance.
2416
2417     This function checks to see if the required block devices are
2418     available on the instance's node.
2419
2420     """
2421     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2422     node_current = instanceconfig.primary_node
2423
2424     node_vol_should = {}
2425     instanceconfig.MapLVsByNode(node_vol_should)
2426
2427     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2428     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2429     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2430
2431     for node in node_vol_should:
2432       n_img = node_image[node]
2433       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2434         # ignore missing volumes on offline or broken nodes
2435         continue
2436       for volume in node_vol_should[node]:
2437         test = volume not in n_img.volumes
2438         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2439                  "volume %s missing on node %s", volume, node)
2440
2441     if instanceconfig.admin_state == constants.ADMINST_UP:
2442       pri_img = node_image[node_current]
2443       test = instance not in pri_img.instances and not pri_img.offline
2444       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2445                "instance not running on its primary node %s",
2446                node_current)
2447
2448     diskdata = [(nname, success, status, idx)
2449                 for (nname, disks) in diskstatus.items()
2450                 for idx, (success, status) in enumerate(disks)]
2451
2452     for nname, success, bdev_status, idx in diskdata:
2453       # the 'ghost node' construction in Exec() ensures that we have a
2454       # node here
2455       snode = node_image[nname]
2456       bad_snode = snode.ghost or snode.offline
2457       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2458                not success and not bad_snode,
2459                constants.CV_EINSTANCEFAULTYDISK, instance,
2460                "couldn't retrieve status for disk/%s on %s: %s",
2461                idx, nname, bdev_status)
2462       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2463                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2464                constants.CV_EINSTANCEFAULTYDISK, instance,
2465                "disk/%s on %s is faulty", idx, nname)
2466
2467   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2468     """Verify if there are any unknown volumes in the cluster.
2469
2470     The .os, .swap and backup volumes are ignored. All other volumes are
2471     reported as unknown.
2472
2473     @type reserved: L{ganeti.utils.FieldSet}
2474     @param reserved: a FieldSet of reserved volume names
2475
2476     """
2477     for node, n_img in node_image.items():
2478       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2479           self.all_node_info[node].group != self.group_uuid):
2480         # skip non-healthy nodes
2481         continue
2482       for volume in n_img.volumes:
2483         test = ((node not in node_vol_should or
2484                 volume not in node_vol_should[node]) and
2485                 not reserved.Matches(volume))
2486         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2487                       "volume %s is unknown", volume)
2488
2489   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2490     """Verify N+1 Memory Resilience.
2491
2492     Check that if one single node dies we can still start all the
2493     instances it was primary for.
2494
2495     """
2496     cluster_info = self.cfg.GetClusterInfo()
2497     for node, n_img in node_image.items():
2498       # This code checks that every node which is now listed as
2499       # secondary has enough memory to host all instances it is
2500       # supposed to should a single other node in the cluster fail.
2501       # FIXME: not ready for failover to an arbitrary node
2502       # FIXME: does not support file-backed instances
2503       # WARNING: we currently take into account down instances as well
2504       # as up ones, considering that even if they're down someone
2505       # might want to start them even in the event of a node failure.
2506       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2507         # we're skipping nodes marked offline and nodes in other groups from
2508         # the N+1 warning, since most likely we don't have good memory
2509         # infromation from them; we already list instances living on such
2510         # nodes, and that's enough warning
2511         continue
2512       #TODO(dynmem): also consider ballooning out other instances
2513       for prinode, instances in n_img.sbp.items():
2514         needed_mem = 0
2515         for instance in instances:
2516           bep = cluster_info.FillBE(instance_cfg[instance])
2517           if bep[constants.BE_AUTO_BALANCE]:
2518             needed_mem += bep[constants.BE_MINMEM]
2519         test = n_img.mfree < needed_mem
2520         self._ErrorIf(test, constants.CV_ENODEN1, node,
2521                       "not enough memory to accomodate instance failovers"
2522                       " should node %s fail (%dMiB needed, %dMiB available)",
2523                       prinode, needed_mem, n_img.mfree)
2524
2525   @classmethod
2526   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2527                    (files_all, files_opt, files_mc, files_vm)):
2528     """Verifies file checksums collected from all nodes.
2529
2530     @param errorif: Callback for reporting errors
2531     @param nodeinfo: List of L{objects.Node} objects
2532     @param master_node: Name of master node
2533     @param all_nvinfo: RPC results
2534
2535     """
2536     # Define functions determining which nodes to consider for a file
2537     files2nodefn = [
2538       (files_all, None),
2539       (files_mc, lambda node: (node.master_candidate or
2540                                node.name == master_node)),
2541       (files_vm, lambda node: node.vm_capable),
2542       ]
2543
2544     # Build mapping from filename to list of nodes which should have the file
2545     nodefiles = {}
2546     for (files, fn) in files2nodefn:
2547       if fn is None:
2548         filenodes = nodeinfo
2549       else:
2550         filenodes = filter(fn, nodeinfo)
2551       nodefiles.update((filename,
2552                         frozenset(map(operator.attrgetter("name"), filenodes)))
2553                        for filename in files)
2554
2555     assert set(nodefiles) == (files_all | files_mc | files_vm)
2556
2557     fileinfo = dict((filename, {}) for filename in nodefiles)
2558     ignore_nodes = set()
2559
2560     for node in nodeinfo:
2561       if node.offline:
2562         ignore_nodes.add(node.name)
2563         continue
2564
2565       nresult = all_nvinfo[node.name]
2566
2567       if nresult.fail_msg or not nresult.payload:
2568         node_files = None
2569       else:
2570         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2571
2572       test = not (node_files and isinstance(node_files, dict))
2573       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2574               "Node did not return file checksum data")
2575       if test:
2576         ignore_nodes.add(node.name)
2577         continue
2578
2579       # Build per-checksum mapping from filename to nodes having it
2580       for (filename, checksum) in node_files.items():
2581         assert filename in nodefiles
2582         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2583
2584     for (filename, checksums) in fileinfo.items():
2585       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2586
2587       # Nodes having the file
2588       with_file = frozenset(node_name
2589                             for nodes in fileinfo[filename].values()
2590                             for node_name in nodes) - ignore_nodes
2591
2592       expected_nodes = nodefiles[filename] - ignore_nodes
2593
2594       # Nodes missing file
2595       missing_file = expected_nodes - with_file
2596
2597       if filename in files_opt:
2598         # All or no nodes
2599         errorif(missing_file and missing_file != expected_nodes,
2600                 constants.CV_ECLUSTERFILECHECK, None,
2601                 "File %s is optional, but it must exist on all or no"
2602                 " nodes (not found on %s)",
2603                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2604       else:
2605         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2606                 "File %s is missing from node(s) %s", filename,
2607                 utils.CommaJoin(utils.NiceSort(missing_file)))
2608
2609         # Warn if a node has a file it shouldn't
2610         unexpected = with_file - expected_nodes
2611         errorif(unexpected,
2612                 constants.CV_ECLUSTERFILECHECK, None,
2613                 "File %s should not exist on node(s) %s",
2614                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2615
2616       # See if there are multiple versions of the file
2617       test = len(checksums) > 1
2618       if test:
2619         variants = ["variant %s on %s" %
2620                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2621                     for (idx, (checksum, nodes)) in
2622                       enumerate(sorted(checksums.items()))]
2623       else:
2624         variants = []
2625
2626       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2627               "File %s found with %s different checksums (%s)",
2628               filename, len(checksums), "; ".join(variants))
2629
2630   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2631                       drbd_map):
2632     """Verifies and the node DRBD status.
2633
2634     @type ninfo: L{objects.Node}
2635     @param ninfo: the node to check
2636     @param nresult: the remote results for the node
2637     @param instanceinfo: the dict of instances
2638     @param drbd_helper: the configured DRBD usermode helper
2639     @param drbd_map: the DRBD map as returned by
2640         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2641
2642     """
2643     node = ninfo.name
2644     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2645
2646     if drbd_helper:
2647       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2648       test = (helper_result == None)
2649       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2650                "no drbd usermode helper returned")
2651       if helper_result:
2652         status, payload = helper_result
2653         test = not status
2654         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2655                  "drbd usermode helper check unsuccessful: %s", payload)
2656         test = status and (payload != drbd_helper)
2657         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2658                  "wrong drbd usermode helper: %s", payload)
2659
2660     # compute the DRBD minors
2661     node_drbd = {}
2662     for minor, instance in drbd_map[node].items():
2663       test = instance not in instanceinfo
2664       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2665                "ghost instance '%s' in temporary DRBD map", instance)
2666         # ghost instance should not be running, but otherwise we
2667         # don't give double warnings (both ghost instance and
2668         # unallocated minor in use)
2669       if test:
2670         node_drbd[minor] = (instance, False)
2671       else:
2672         instance = instanceinfo[instance]
2673         node_drbd[minor] = (instance.name,
2674                             instance.admin_state == constants.ADMINST_UP)
2675
2676     # and now check them
2677     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2678     test = not isinstance(used_minors, (tuple, list))
2679     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2680              "cannot parse drbd status file: %s", str(used_minors))
2681     if test:
2682       # we cannot check drbd status
2683       return
2684
2685     for minor, (iname, must_exist) in node_drbd.items():
2686       test = minor not in used_minors and must_exist
2687       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2688                "drbd minor %d of instance %s is not active", minor, iname)
2689     for minor in used_minors:
2690       test = minor not in node_drbd
2691       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2692                "unallocated drbd minor %d is in use", minor)
2693
2694   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2695     """Builds the node OS structures.
2696
2697     @type ninfo: L{objects.Node}
2698     @param ninfo: the node to check
2699     @param nresult: the remote results for the node
2700     @param nimg: the node image object
2701
2702     """
2703     node = ninfo.name
2704     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2705
2706     remote_os = nresult.get(constants.NV_OSLIST, None)
2707     test = (not isinstance(remote_os, list) or
2708             not compat.all(isinstance(v, list) and len(v) == 7
2709                            for v in remote_os))
2710
2711     _ErrorIf(test, constants.CV_ENODEOS, node,
2712              "node hasn't returned valid OS data")
2713
2714     nimg.os_fail = test
2715
2716     if test:
2717       return
2718
2719     os_dict = {}
2720
2721     for (name, os_path, status, diagnose,
2722          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2723
2724       if name not in os_dict:
2725         os_dict[name] = []
2726
2727       # parameters is a list of lists instead of list of tuples due to
2728       # JSON lacking a real tuple type, fix it:
2729       parameters = [tuple(v) for v in parameters]
2730       os_dict[name].append((os_path, status, diagnose,
2731                             set(variants), set(parameters), set(api_ver)))
2732
2733     nimg.oslist = os_dict
2734
2735   def _VerifyNodeOS(self, ninfo, nimg, base):
2736     """Verifies the node OS list.
2737
2738     @type ninfo: L{objects.Node}
2739     @param ninfo: the node to check
2740     @param nimg: the node image object
2741     @param base: the 'template' node we match against (e.g. from the master)
2742
2743     """
2744     node = ninfo.name
2745     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2746
2747     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2748
2749     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2750     for os_name, os_data in nimg.oslist.items():
2751       assert os_data, "Empty OS status for OS %s?!" % os_name
2752       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2753       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2754                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2755       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2756                "OS '%s' has multiple entries (first one shadows the rest): %s",
2757                os_name, utils.CommaJoin([v[0] for v in os_data]))
2758       # comparisons with the 'base' image
2759       test = os_name not in base.oslist
2760       _ErrorIf(test, constants.CV_ENODEOS, node,
2761                "Extra OS %s not present on reference node (%s)",
2762                os_name, base.name)
2763       if test:
2764         continue
2765       assert base.oslist[os_name], "Base node has empty OS status?"
2766       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2767       if not b_status:
2768         # base OS is invalid, skipping
2769         continue
2770       for kind, a, b in [("API version", f_api, b_api),
2771                          ("variants list", f_var, b_var),
2772                          ("parameters", beautify_params(f_param),
2773                           beautify_params(b_param))]:
2774         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2775                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2776                  kind, os_name, base.name,
2777                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2778
2779     # check any missing OSes
2780     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2781     _ErrorIf(missing, constants.CV_ENODEOS, node,
2782              "OSes present on reference node %s but missing on this node: %s",
2783              base.name, utils.CommaJoin(missing))
2784
2785   def _VerifyOob(self, ninfo, nresult):
2786     """Verifies out of band functionality of a node.
2787
2788     @type ninfo: L{objects.Node}
2789     @param ninfo: the node to check
2790     @param nresult: the remote results for the node
2791
2792     """
2793     node = ninfo.name
2794     # We just have to verify the paths on master and/or master candidates
2795     # as the oob helper is invoked on the master
2796     if ((ninfo.master_candidate or ninfo.master_capable) and
2797         constants.NV_OOB_PATHS in nresult):
2798       for path_result in nresult[constants.NV_OOB_PATHS]:
2799         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2800
2801   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2802     """Verifies and updates the node volume data.
2803
2804     This function will update a L{NodeImage}'s internal structures
2805     with data from the remote call.
2806
2807     @type ninfo: L{objects.Node}
2808     @param ninfo: the node to check
2809     @param nresult: the remote results for the node
2810     @param nimg: the node image object
2811     @param vg_name: the configured VG name
2812
2813     """
2814     node = ninfo.name
2815     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2816
2817     nimg.lvm_fail = True
2818     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2819     if vg_name is None:
2820       pass
2821     elif isinstance(lvdata, basestring):
2822       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2823                utils.SafeEncode(lvdata))
2824     elif not isinstance(lvdata, dict):
2825       _ErrorIf(True, constants.CV_ENODELVM, node,
2826                "rpc call to node failed (lvlist)")
2827     else:
2828       nimg.volumes = lvdata
2829       nimg.lvm_fail = False
2830
2831   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2832     """Verifies and updates the node instance list.
2833
2834     If the listing was successful, then updates this node's instance
2835     list. Otherwise, it marks the RPC call as failed for the instance
2836     list key.
2837
2838     @type ninfo: L{objects.Node}
2839     @param ninfo: the node to check
2840     @param nresult: the remote results for the node
2841     @param nimg: the node image object
2842
2843     """
2844     idata = nresult.get(constants.NV_INSTANCELIST, None)
2845     test = not isinstance(idata, list)
2846     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2847                   "rpc call to node failed (instancelist): %s",
2848                   utils.SafeEncode(str(idata)))
2849     if test:
2850       nimg.hyp_fail = True
2851     else:
2852       nimg.instances = idata
2853
2854   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2855     """Verifies and computes a node information map
2856
2857     @type ninfo: L{objects.Node}
2858     @param ninfo: the node to check
2859     @param nresult: the remote results for the node
2860     @param nimg: the node image object
2861     @param vg_name: the configured VG name
2862
2863     """
2864     node = ninfo.name
2865     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2866
2867     # try to read free memory (from the hypervisor)
2868     hv_info = nresult.get(constants.NV_HVINFO, None)
2869     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2870     _ErrorIf(test, constants.CV_ENODEHV, node,
2871              "rpc call to node failed (hvinfo)")
2872     if not test:
2873       try:
2874         nimg.mfree = int(hv_info["memory_free"])
2875       except (ValueError, TypeError):
2876         _ErrorIf(True, constants.CV_ENODERPC, node,
2877                  "node returned invalid nodeinfo, check hypervisor")
2878
2879     # FIXME: devise a free space model for file based instances as well
2880     if vg_name is not None:
2881       test = (constants.NV_VGLIST not in nresult or
2882               vg_name not in nresult[constants.NV_VGLIST])
2883       _ErrorIf(test, constants.CV_ENODELVM, node,
2884                "node didn't return data for the volume group '%s'"
2885                " - it is either missing or broken", vg_name)
2886       if not test:
2887         try:
2888           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2889         except (ValueError, TypeError):
2890           _ErrorIf(True, constants.CV_ENODERPC, node,
2891                    "node returned invalid LVM info, check LVM status")
2892
2893   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2894     """Gets per-disk status information for all instances.
2895
2896     @type nodelist: list of strings
2897     @param nodelist: Node names
2898     @type node_image: dict of (name, L{objects.Node})
2899     @param node_image: Node objects
2900     @type instanceinfo: dict of (name, L{objects.Instance})
2901     @param instanceinfo: Instance objects
2902     @rtype: {instance: {node: [(succes, payload)]}}
2903     @return: a dictionary of per-instance dictionaries with nodes as
2904         keys and disk information as values; the disk information is a
2905         list of tuples (success, payload)
2906
2907     """
2908     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2909
2910     node_disks = {}
2911     node_disks_devonly = {}
2912     diskless_instances = set()
2913     diskless = constants.DT_DISKLESS
2914
2915     for nname in nodelist:
2916       node_instances = list(itertools.chain(node_image[nname].pinst,
2917                                             node_image[nname].sinst))
2918       diskless_instances.update(inst for inst in node_instances
2919                                 if instanceinfo[inst].disk_template == diskless)
2920       disks = [(inst, disk)
2921                for inst in node_instances
2922                for disk in instanceinfo[inst].disks]
2923
2924       if not disks:
2925         # No need to collect data
2926         continue
2927
2928       node_disks[nname] = disks
2929
2930       # _AnnotateDiskParams makes already copies of the disks
2931       devonly = []
2932       for (inst, dev) in disks:
2933         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2934         self.cfg.SetDiskID(anno_disk, nname)
2935         devonly.append(anno_disk)
2936
2937       node_disks_devonly[nname] = devonly
2938
2939     assert len(node_disks) == len(node_disks_devonly)
2940
2941     # Collect data from all nodes with disks
2942     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2943                                                           node_disks_devonly)
2944
2945     assert len(result) == len(node_disks)
2946
2947     instdisk = {}
2948
2949     for (nname, nres) in result.items():
2950       disks = node_disks[nname]
2951
2952       if nres.offline:
2953         # No data from this node
2954         data = len(disks) * [(False, "node offline")]
2955       else:
2956         msg = nres.fail_msg
2957         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2958                  "while getting disk information: %s", msg)
2959         if msg:
2960           # No data from this node
2961           data = len(disks) * [(False, msg)]
2962         else:
2963           data = []
2964           for idx, i in enumerate(nres.payload):
2965             if isinstance(i, (tuple, list)) and len(i) == 2:
2966               data.append(i)
2967             else:
2968               logging.warning("Invalid result from node %s, entry %d: %s",
2969                               nname, idx, i)
2970               data.append((False, "Invalid result from the remote node"))
2971
2972       for ((inst, _), status) in zip(disks, data):
2973         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2974
2975     # Add empty entries for diskless instances.
2976     for inst in diskless_instances:
2977       assert inst not in instdisk
2978       instdisk[inst] = {}
2979
2980     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2981                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2982                       compat.all(isinstance(s, (tuple, list)) and
2983                                  len(s) == 2 for s in statuses)
2984                       for inst, nnames in instdisk.items()
2985                       for nname, statuses in nnames.items())
2986     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2987
2988     return instdisk
2989
2990   @staticmethod
2991   def _SshNodeSelector(group_uuid, all_nodes):
2992     """Create endless iterators for all potential SSH check hosts.
2993
2994     """
2995     nodes = [node for node in all_nodes
2996              if (node.group != group_uuid and
2997                  not node.offline)]
2998     keyfunc = operator.attrgetter("group")
2999
3000     return map(itertools.cycle,
3001                [sorted(map(operator.attrgetter("name"), names))
3002                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3003                                                   keyfunc)])
3004
3005   @classmethod
3006   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3007     """Choose which nodes should talk to which other nodes.
3008
3009     We will make nodes contact all nodes in their group, and one node from
3010     every other group.
3011
3012     @warning: This algorithm has a known issue if one node group is much
3013       smaller than others (e.g. just one node). In such a case all other
3014       nodes will talk to the single node.
3015
3016     """
3017     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3018     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3019
3020     return (online_nodes,
3021             dict((name, sorted([i.next() for i in sel]))
3022                  for name in online_nodes))
3023
3024   def BuildHooksEnv(self):
3025     """Build hooks env.
3026
3027     Cluster-Verify hooks just ran in the post phase and their failure makes
3028     the output be logged in the verify output and the verification to fail.
3029
3030     """
3031     env = {
3032       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3033       }
3034
3035     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3036                for node in self.my_node_info.values())
3037
3038     return env
3039
3040   def BuildHooksNodes(self):
3041     """Build hooks nodes.
3042
3043     """
3044     return ([], self.my_node_names)
3045
3046   def Exec(self, feedback_fn):
3047     """Verify integrity of the node group, performing various test on nodes.
3048
3049     """
3050     # This method has too many local variables. pylint: disable=R0914
3051     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3052
3053     if not self.my_node_names:
3054       # empty node group
3055       feedback_fn("* Empty node group, skipping verification")
3056       return True
3057
3058     self.bad = False
3059     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3060     verbose = self.op.verbose
3061     self._feedback_fn = feedback_fn
3062
3063     vg_name = self.cfg.GetVGName()
3064     drbd_helper = self.cfg.GetDRBDHelper()
3065     cluster = self.cfg.GetClusterInfo()
3066     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3067     hypervisors = cluster.enabled_hypervisors
3068     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3069
3070     i_non_redundant = [] # Non redundant instances
3071     i_non_a_balanced = [] # Non auto-balanced instances
3072     i_offline = 0 # Count of offline instances
3073     n_offline = 0 # Count of offline nodes
3074     n_drained = 0 # Count of nodes being drained
3075     node_vol_should = {}
3076
3077     # FIXME: verify OS list
3078
3079     # File verification
3080     filemap = _ComputeAncillaryFiles(cluster, False)
3081
3082     # do local checksums
3083     master_node = self.master_node = self.cfg.GetMasterNode()
3084     master_ip = self.cfg.GetMasterIP()
3085
3086     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3087
3088     user_scripts = []
3089     if self.cfg.GetUseExternalMipScript():
3090       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3091
3092     node_verify_param = {
3093       constants.NV_FILELIST:
3094         utils.UniqueSequence(filename
3095                              for files in filemap
3096                              for filename in files),
3097       constants.NV_NODELIST:
3098         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3099                                   self.all_node_info.values()),
3100       constants.NV_HYPERVISOR: hypervisors,
3101       constants.NV_HVPARAMS:
3102         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3103       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3104                                  for node in node_data_list
3105                                  if not node.offline],
3106       constants.NV_INSTANCELIST: hypervisors,
3107       constants.NV_VERSION: None,
3108       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3109       constants.NV_NODESETUP: None,
3110       constants.NV_TIME: None,
3111       constants.NV_MASTERIP: (master_node, master_ip),
3112       constants.NV_OSLIST: None,
3113       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3114       constants.NV_USERSCRIPTS: user_scripts,
3115       }
3116
3117     if vg_name is not None:
3118       node_verify_param[constants.NV_VGLIST] = None
3119       node_verify_param[constants.NV_LVLIST] = vg_name
3120       node_verify_param[constants.NV_PVLIST] = [vg_name]
3121       node_verify_param[constants.NV_DRBDLIST] = None
3122
3123     if drbd_helper:
3124       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3125
3126     # bridge checks
3127     # FIXME: this needs to be changed per node-group, not cluster-wide
3128     bridges = set()
3129     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3130     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3131       bridges.add(default_nicpp[constants.NIC_LINK])
3132     for instance in self.my_inst_info.values():
3133       for nic in instance.nics:
3134         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3135         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3136           bridges.add(full_nic[constants.NIC_LINK])
3137
3138     if bridges:
3139       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3140
3141     # Build our expected cluster state
3142     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3143                                                  name=node.name,
3144                                                  vm_capable=node.vm_capable))
3145                       for node in node_data_list)
3146
3147     # Gather OOB paths
3148     oob_paths = []
3149     for node in self.all_node_info.values():
3150       path = _SupportsOob(self.cfg, node)
3151       if path and path not in oob_paths:
3152         oob_paths.append(path)
3153
3154     if oob_paths:
3155       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3156
3157     for instance in self.my_inst_names:
3158       inst_config = self.my_inst_info[instance]
3159       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3160         i_offline += 1
3161
3162       for nname in inst_config.all_nodes:
3163         if nname not in node_image:
3164           gnode = self.NodeImage(name=nname)
3165           gnode.ghost = (nname not in self.all_node_info)
3166           node_image[nname] = gnode
3167
3168       inst_config.MapLVsByNode(node_vol_should)
3169
3170       pnode = inst_config.primary_node
3171       node_image[pnode].pinst.append(instance)
3172
3173       for snode in inst_config.secondary_nodes:
3174         nimg = node_image[snode]
3175         nimg.sinst.append(instance)
3176         if pnode not in nimg.sbp:
3177           nimg.sbp[pnode] = []
3178         nimg.sbp[pnode].append(instance)
3179
3180     # At this point, we have the in-memory data structures complete,
3181     # except for the runtime information, which we'll gather next
3182
3183     # Due to the way our RPC system works, exact response times cannot be
3184     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3185     # time before and after executing the request, we can at least have a time
3186     # window.
3187     nvinfo_starttime = time.time()
3188     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3189                                            node_verify_param,
3190                                            self.cfg.GetClusterName())
3191     nvinfo_endtime = time.time()
3192
3193     if self.extra_lv_nodes and vg_name is not None:
3194       extra_lv_nvinfo = \
3195           self.rpc.call_node_verify(self.extra_lv_nodes,
3196                                     {constants.NV_LVLIST: vg_name},
3197                                     self.cfg.GetClusterName())
3198     else:
3199       extra_lv_nvinfo = {}
3200
3201     all_drbd_map = self.cfg.ComputeDRBDMap()
3202
3203     feedback_fn("* Gathering disk information (%s nodes)" %
3204                 len(self.my_node_names))
3205     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3206                                      self.my_inst_info)
3207
3208     feedback_fn("* Verifying configuration file consistency")
3209
3210     # If not all nodes are being checked, we need to make sure the master node
3211     # and a non-checked vm_capable node are in the list.
3212     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3213     if absent_nodes:
3214       vf_nvinfo = all_nvinfo.copy()
3215       vf_node_info = list(self.my_node_info.values())
3216       additional_nodes = []
3217       if master_node not in self.my_node_info:
3218         additional_nodes.append(master_node)
3219         vf_node_info.append(self.all_node_info[master_node])
3220       # Add the first vm_capable node we find which is not included
3221       for node in absent_nodes:
3222         nodeinfo = self.all_node_info[node]
3223         if nodeinfo.vm_capable and not nodeinfo.offline:
3224           additional_nodes.append(node)
3225           vf_node_info.append(self.all_node_info[node])
3226           break
3227       key = constants.NV_FILELIST
3228       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3229                                                  {key: node_verify_param[key]},
3230                                                  self.cfg.GetClusterName()))
3231     else:
3232       vf_nvinfo = all_nvinfo
3233       vf_node_info = self.my_node_info.values()
3234
3235     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3236
3237     feedback_fn("* Verifying node status")
3238
3239     refos_img = None
3240
3241     for node_i in node_data_list:
3242       node = node_i.name
3243       nimg = node_image[node]
3244
3245       if node_i.offline:
3246         if verbose:
3247           feedback_fn("* Skipping offline node %s" % (node,))
3248         n_offline += 1
3249         continue
3250
3251       if node == master_node:
3252         ntype = "master"
3253       elif node_i.master_candidate:
3254         ntype = "master candidate"
3255       elif node_i.drained:
3256         ntype = "drained"
3257         n_drained += 1
3258       else:
3259         ntype = "regular"
3260       if verbose:
3261         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3262
3263       msg = all_nvinfo[node].fail_msg
3264       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3265                msg)
3266       if msg:
3267         nimg.rpc_fail = True
3268         continue
3269
3270       nresult = all_nvinfo[node].payload
3271
3272       nimg.call_ok = self._VerifyNode(node_i, nresult)
3273       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3274       self._VerifyNodeNetwork(node_i, nresult)
3275       self._VerifyNodeUserScripts(node_i, nresult)
3276       self._VerifyOob(node_i, nresult)
3277
3278       if nimg.vm_capable:
3279         self._VerifyNodeLVM(node_i, nresult, vg_name)
3280         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3281                              all_drbd_map)
3282
3283         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3284         self._UpdateNodeInstances(node_i, nresult, nimg)
3285         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3286         self._UpdateNodeOS(node_i, nresult, nimg)
3287
3288         if not nimg.os_fail:
3289           if refos_img is None:
3290             refos_img = nimg
3291           self._VerifyNodeOS(node_i, nimg, refos_img)
3292         self._VerifyNodeBridges(node_i, nresult, bridges)
3293
3294         # Check whether all running instancies are primary for the node. (This
3295         # can no longer be done from _VerifyInstance below, since some of the
3296         # wrong instances could be from other node groups.)
3297         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3298
3299         for inst in non_primary_inst:
3300           test = inst in self.all_inst_info
3301           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3302                    "instance should not run on node %s", node_i.name)
3303           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3304                    "node is running unknown instance %s", inst)
3305
3306     for node, result in extra_lv_nvinfo.items():
3307       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3308                               node_image[node], vg_name)
3309
3310     feedback_fn("* Verifying instance status")
3311     for instance in self.my_inst_names:
3312       if verbose:
3313         feedback_fn("* Verifying instance %s" % instance)
3314       inst_config = self.my_inst_info[instance]
3315       self._VerifyInstance(instance, inst_config, node_image,
3316                            instdisk[instance])
3317       inst_nodes_offline = []
3318
3319       pnode = inst_config.primary_node
3320       pnode_img = node_image[pnode]
3321       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3322                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3323                " primary node failed", instance)
3324
3325       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3326                pnode_img.offline,
3327                constants.CV_EINSTANCEBADNODE, instance,
3328                "instance is marked as running and lives on offline node %s",
3329                inst_config.primary_node)
3330
3331       # If the instance is non-redundant we cannot survive losing its primary
3332       # node, so we are not N+1 compliant. On the other hand we have no disk
3333       # templates with more than one secondary so that situation is not well
3334       # supported either.
3335       # FIXME: does not support file-backed instances
3336       if not inst_config.secondary_nodes:
3337         i_non_redundant.append(instance)
3338
3339       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3340                constants.CV_EINSTANCELAYOUT,
3341                instance, "instance has multiple secondary nodes: %s",
3342                utils.CommaJoin(inst_config.secondary_nodes),
3343                code=self.ETYPE_WARNING)
3344
3345       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3346         pnode = inst_config.primary_node
3347         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3348         instance_groups = {}
3349
3350         for node in instance_nodes:
3351           instance_groups.setdefault(self.all_node_info[node].group,
3352                                      []).append(node)
3353
3354         pretty_list = [
3355           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3356           # Sort so that we always list the primary node first.
3357           for group, nodes in sorted(instance_groups.items(),
3358                                      key=lambda (_, nodes): pnode in nodes,
3359                                      reverse=True)]
3360
3361         self._ErrorIf(len(instance_groups) > 1,
3362                       constants.CV_EINSTANCESPLITGROUPS,
3363                       instance, "instance has primary and secondary nodes in"
3364                       " different groups: %s", utils.CommaJoin(pretty_list),
3365                       code=self.ETYPE_WARNING)
3366
3367       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3368         i_non_a_balanced.append(instance)
3369
3370       for snode in inst_config.secondary_nodes:
3371         s_img = node_image[snode]
3372         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3373                  snode, "instance %s, connection to secondary node failed",
3374                  instance)
3375
3376         if s_img.offline:
3377           inst_nodes_offline.append(snode)
3378
3379       # warn that the instance lives on offline nodes
3380       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3381                "instance has offline secondary node(s) %s",
3382                utils.CommaJoin(inst_nodes_offline))
3383       # ... or ghost/non-vm_capable nodes
3384       for node in inst_config.all_nodes:
3385         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3386                  instance, "instance lives on ghost node %s", node)
3387         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3388                  instance, "instance lives on non-vm_capable node %s", node)
3389
3390     feedback_fn("* Verifying orphan volumes")
3391     reserved = utils.FieldSet(*cluster.reserved_lvs)
3392
3393     # We will get spurious "unknown volume" warnings if any node of this group
3394     # is secondary for an instance whose primary is in another group. To avoid
3395     # them, we find these instances and add their volumes to node_vol_should.
3396     for inst in self.all_inst_info.values():
3397       for secondary in inst.secondary_nodes:
3398         if (secondary in self.my_node_info
3399             and inst.name not in self.my_inst_info):
3400           inst.MapLVsByNode(node_vol_should)
3401           break
3402
3403     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3404
3405     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3406       feedback_fn("* Verifying N+1 Memory redundancy")
3407       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3408
3409     feedback_fn("* Other Notes")
3410     if i_non_redundant:
3411       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3412                   % len(i_non_redundant))
3413
3414     if i_non_a_balanced:
3415       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3416                   % len(i_non_a_balanced))
3417
3418     if i_offline:
3419       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3420
3421     if n_offline:
3422       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3423
3424     if n_drained:
3425       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3426
3427     return not self.bad
3428
3429   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3430     """Analyze the post-hooks' result
3431
3432     This method analyses the hook result, handles it, and sends some
3433     nicely-formatted feedback back to the user.
3434
3435     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3436         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3437     @param hooks_results: the results of the multi-node hooks rpc call
3438     @param feedback_fn: function used send feedback back to the caller
3439     @param lu_result: previous Exec result
3440     @return: the new Exec result, based on the previous result
3441         and hook results
3442
3443     """
3444     # We only really run POST phase hooks, only for non-empty groups,
3445     # and are only interested in their results
3446     if not self.my_node_names:
3447       # empty node group
3448       pass
3449     elif phase == constants.HOOKS_PHASE_POST:
3450       # Used to change hooks' output to proper indentation
3451       feedback_fn("* Hooks Results")
3452       assert hooks_results, "invalid result from hooks"
3453
3454       for node_name in hooks_results:
3455         res = hooks_results[node_name]
3456         msg = res.fail_msg
3457         test = msg and not res.offline
3458         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3459                       "Communication failure in hooks execution: %s", msg)
3460         if res.offline or msg:
3461           # No need to investigate payload if node is offline or gave
3462           # an error.
3463           continue
3464         for script, hkr, output in res.payload:
3465           test = hkr == constants.HKR_FAIL
3466           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3467                         "Script %s failed, output:", script)
3468           if test:
3469             output = self._HOOKS_INDENT_RE.sub("      ", output)
3470             feedback_fn("%s" % output)
3471             lu_result = False
3472
3473     return lu_result
3474
3475
3476 class LUClusterVerifyDisks(NoHooksLU):
3477   """Verifies the cluster disks status.
3478
3479   """
3480   REQ_BGL = False
3481
3482   def ExpandNames(self):
3483     self.share_locks = _ShareAll()
3484     self.needed_locks = {
3485       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3486       }
3487
3488   def Exec(self, feedback_fn):
3489     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3490
3491     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3492     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3493                            for group in group_names])
3494
3495
3496 class LUGroupVerifyDisks(NoHooksLU):
3497   """Verifies the status of all disks in a node group.
3498
3499   """
3500   REQ_BGL = False
3501
3502   def ExpandNames(self):
3503     # Raises errors.OpPrereqError on its own if group can't be found
3504     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3505
3506     self.share_locks = _ShareAll()
3507     self.needed_locks = {
3508       locking.LEVEL_INSTANCE: [],
3509       locking.LEVEL_NODEGROUP: [],
3510       locking.LEVEL_NODE: [],
3511       }
3512
3513   def DeclareLocks(self, level):
3514     if level == locking.LEVEL_INSTANCE:
3515       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3516
3517       # Lock instances optimistically, needs verification once node and group
3518       # locks have been acquired
3519       self.needed_locks[locking.LEVEL_INSTANCE] = \
3520         self.cfg.GetNodeGroupInstances(self.group_uuid)
3521
3522     elif level == locking.LEVEL_NODEGROUP:
3523       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3524
3525       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3526         set([self.group_uuid] +
3527             # Lock all groups used by instances optimistically; this requires
3528             # going via the node before it's locked, requiring verification
3529             # later on
3530             [group_uuid
3531              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3532              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3533
3534     elif level == locking.LEVEL_NODE:
3535       # This will only lock the nodes in the group to be verified which contain
3536       # actual instances
3537       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3538       self._LockInstancesNodes()
3539
3540       # Lock all nodes in group to be verified
3541       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3542       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3543       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3544
3545   def CheckPrereq(self):
3546     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3547     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3548     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3549
3550     assert self.group_uuid in owned_groups
3551
3552     # Check if locked instances are still correct
3553     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3554
3555     # Get instance information
3556     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3557
3558     # Check if node groups for locked instances are still correct
3559     _CheckInstancesNodeGroups(self.cfg, self.instances,
3560                               owned_groups, owned_nodes, self.group_uuid)
3561
3562   def Exec(self, feedback_fn):
3563     """Verify integrity of cluster disks.
3564
3565     @rtype: tuple of three items
3566     @return: a tuple of (dict of node-to-node_error, list of instances
3567         which need activate-disks, dict of instance: (node, volume) for
3568         missing volumes
3569
3570     """
3571     res_nodes = {}
3572     res_instances = set()
3573     res_missing = {}
3574
3575     nv_dict = _MapInstanceDisksToNodes([inst
3576             for inst in self.instances.values()
3577             if inst.admin_state == constants.ADMINST_UP])
3578
3579     if nv_dict:
3580       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3581                              set(self.cfg.GetVmCapableNodeList()))
3582
3583       node_lvs = self.rpc.call_lv_list(nodes, [])
3584
3585       for (node, node_res) in node_lvs.items():
3586         if node_res.offline:
3587           continue
3588
3589         msg = node_res.fail_msg
3590         if msg:
3591           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3592           res_nodes[node] = msg
3593           continue
3594
3595         for lv_name, (_, _, lv_online) in node_res.payload.items():
3596           inst = nv_dict.pop((node, lv_name), None)
3597           if not (lv_online or inst is None):
3598             res_instances.add(inst)
3599
3600       # any leftover items in nv_dict are missing LVs, let's arrange the data
3601       # better
3602       for key, inst in nv_dict.iteritems():
3603         res_missing.setdefault(inst, []).append(list(key))
3604
3605     return (res_nodes, list(res_instances), res_missing)
3606
3607
3608 class LUClusterRepairDiskSizes(NoHooksLU):
3609   """Verifies the cluster disks sizes.
3610
3611   """
3612   REQ_BGL = False
3613
3614   def ExpandNames(self):
3615     if self.op.instances:
3616       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3617       self.needed_locks = {
3618         locking.LEVEL_NODE_RES: [],
3619         locking.LEVEL_INSTANCE: self.wanted_names,
3620         }
3621       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3622     else:
3623       self.wanted_names = None
3624       self.needed_locks = {
3625         locking.LEVEL_NODE_RES: locking.ALL_SET,
3626         locking.LEVEL_INSTANCE: locking.ALL_SET,
3627         }
3628     self.share_locks = {
3629       locking.LEVEL_NODE_RES: 1,
3630       locking.LEVEL_INSTANCE: 0,
3631       }
3632
3633   def DeclareLocks(self, level):
3634     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3635       self._LockInstancesNodes(primary_only=True, level=level)
3636
3637   def CheckPrereq(self):
3638     """Check prerequisites.
3639
3640     This only checks the optional instance list against the existing names.
3641
3642     """
3643     if self.wanted_names is None:
3644       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3645
3646     self.wanted_instances = \
3647         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3648
3649   def _EnsureChildSizes(self, disk):
3650     """Ensure children of the disk have the needed disk size.
3651
3652     This is valid mainly for DRBD8 and fixes an issue where the
3653     children have smaller disk size.
3654
3655     @param disk: an L{ganeti.objects.Disk} object
3656
3657     """
3658     if disk.dev_type == constants.LD_DRBD8:
3659       assert disk.children, "Empty children for DRBD8?"
3660       fchild = disk.children[0]
3661       mismatch = fchild.size < disk.size
3662       if mismatch:
3663         self.LogInfo("Child disk has size %d, parent %d, fixing",
3664                      fchild.size, disk.size)
3665         fchild.size = disk.size
3666
3667       # and we recurse on this child only, not on the metadev
3668       return self._EnsureChildSizes(fchild) or mismatch
3669     else:
3670       return False
3671
3672   def Exec(self, feedback_fn):
3673     """Verify the size of cluster disks.
3674
3675     """
3676     # TODO: check child disks too
3677     # TODO: check differences in size between primary/secondary nodes
3678     per_node_disks = {}
3679     for instance in self.wanted_instances:
3680       pnode = instance.primary_node
3681       if pnode not in per_node_disks:
3682         per_node_disks[pnode] = []
3683       for idx, disk in enumerate(instance.disks):
3684         per_node_disks[pnode].append((instance, idx, disk))
3685
3686     assert not (frozenset(per_node_disks.keys()) -
3687                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3688       "Not owning correct locks"
3689     assert not self.owned_locks(locking.LEVEL_NODE)
3690
3691     changed = []
3692     for node, dskl in per_node_disks.items():
3693       newl = [v[2].Copy() for v in dskl]
3694       for dsk in newl:
3695         self.cfg.SetDiskID(dsk, node)
3696       result = self.rpc.call_blockdev_getsize(node, newl)
3697       if result.fail_msg:
3698         self.LogWarning("Failure in blockdev_getsize call to node"
3699                         " %s, ignoring", node)
3700         continue
3701       if len(result.payload) != len(dskl):
3702         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3703                         " result.payload=%s", node, len(dskl), result.payload)
3704         self.LogWarning("Invalid result from node %s, ignoring node results",
3705                         node)
3706         continue
3707       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3708         if size is None:
3709           self.LogWarning("Disk %d of instance %s did not return size"
3710                           " information, ignoring", idx, instance.name)
3711           continue
3712         if not isinstance(size, (int, long)):
3713           self.LogWarning("Disk %d of instance %s did not return valid"
3714                           " size information, ignoring", idx, instance.name)
3715           continue
3716         size = size >> 20
3717         if size != disk.size:
3718           self.LogInfo("Disk %d of instance %s has mismatched size,"
3719                        " correcting: recorded %d, actual %d", idx,
3720                        instance.name, disk.size, size)
3721           disk.size = size
3722           self.cfg.Update(instance, feedback_fn)
3723           changed.append((instance.name, idx, size))
3724         if self._EnsureChildSizes(disk):
3725           self.cfg.Update(instance, feedback_fn)
3726           changed.append((instance.name, idx, disk.size))
3727     return changed
3728
3729
3730 class LUClusterRename(LogicalUnit):
3731   """Rename the cluster.
3732
3733   """
3734   HPATH = "cluster-rename"
3735   HTYPE = constants.HTYPE_CLUSTER
3736
3737   def BuildHooksEnv(self):
3738     """Build hooks env.
3739
3740     """
3741     return {
3742       "OP_TARGET": self.cfg.GetClusterName(),
3743       "NEW_NAME": self.op.name,
3744       }
3745
3746   def BuildHooksNodes(self):
3747     """Build hooks nodes.
3748
3749     """
3750     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3751
3752   def CheckPrereq(self):
3753     """Verify that the passed name is a valid one.
3754
3755     """
3756     hostname = netutils.GetHostname(name=self.op.name,
3757                                     family=self.cfg.GetPrimaryIPFamily())
3758
3759     new_name = hostname.name
3760     self.ip = new_ip = hostname.ip
3761     old_name = self.cfg.GetClusterName()
3762     old_ip = self.cfg.GetMasterIP()
3763     if new_name == old_name and new_ip == old_ip:
3764       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3765                                  " cluster has changed",
3766                                  errors.ECODE_INVAL)
3767     if new_ip != old_ip:
3768       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3769         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3770                                    " reachable on the network" %
3771                                    new_ip, errors.ECODE_NOTUNIQUE)
3772
3773     self.op.name = new_name
3774
3775   def Exec(self, feedback_fn):
3776     """Rename the cluster.
3777
3778     """
3779     clustername = self.op.name
3780     new_ip = self.ip
3781
3782     # shutdown the master IP
3783     master_params = self.cfg.GetMasterNetworkParameters()
3784     ems = self.cfg.GetUseExternalMipScript()
3785     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3786                                                      master_params, ems)
3787     result.Raise("Could not disable the master role")
3788
3789     try:
3790       cluster = self.cfg.GetClusterInfo()
3791       cluster.cluster_name = clustername
3792       cluster.master_ip = new_ip
3793       self.cfg.Update(cluster, feedback_fn)
3794
3795       # update the known hosts file
3796       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3797       node_list = self.cfg.GetOnlineNodeList()
3798       try:
3799         node_list.remove(master_params.name)
3800       except ValueError:
3801         pass
3802       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3803     finally:
3804       master_params.ip = new_ip
3805       result = self.rpc.call_node_activate_master_ip(master_params.name,
3806                                                      master_params, ems)
3807       msg = result.fail_msg
3808       if msg:
3809         self.LogWarning("Could not re-enable the master role on"
3810                         " the master, please restart manually: %s", msg)
3811
3812     return clustername
3813
3814
3815 def _ValidateNetmask(cfg, netmask):
3816   """Checks if a netmask is valid.
3817
3818   @type cfg: L{config.ConfigWriter}
3819   @param cfg: The cluster configuration
3820   @type netmask: int
3821   @param netmask: the netmask to be verified
3822   @raise errors.OpPrereqError: if the validation fails
3823
3824   """
3825   ip_family = cfg.GetPrimaryIPFamily()
3826   try:
3827     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3828   except errors.ProgrammerError:
3829     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3830                                ip_family)
3831   if not ipcls.ValidateNetmask(netmask):
3832     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3833                                 (netmask))
3834
3835
3836 class LUClusterSetParams(LogicalUnit):
3837   """Change the parameters of the cluster.
3838
3839   """
3840   HPATH = "cluster-modify"
3841   HTYPE = constants.HTYPE_CLUSTER
3842   REQ_BGL = False
3843
3844   def CheckArguments(self):
3845     """Check parameters
3846
3847     """
3848     if self.op.uid_pool:
3849       uidpool.CheckUidPool(self.op.uid_pool)
3850
3851     if self.op.add_uids:
3852       uidpool.CheckUidPool(self.op.add_uids)
3853
3854     if self.op.remove_uids:
3855       uidpool.CheckUidPool(self.op.remove_uids)
3856
3857     if self.op.master_netmask is not None:
3858       _ValidateNetmask(self.cfg, self.op.master_netmask)
3859
3860     if self.op.diskparams:
3861       for dt_params in self.op.diskparams.values():
3862         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3863       try:
3864         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3865       except errors.OpPrereqError, err:
3866         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3867                                    errors.ECODE_INVAL)
3868
3869   def ExpandNames(self):
3870     # FIXME: in the future maybe other cluster params won't require checking on
3871     # all nodes to be modified.
3872     self.needed_locks = {
3873       locking.LEVEL_NODE: locking.ALL_SET,
3874       locking.LEVEL_INSTANCE: locking.ALL_SET,
3875       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3876     }
3877     self.share_locks = {
3878         locking.LEVEL_NODE: 1,
3879         locking.LEVEL_INSTANCE: 1,
3880         locking.LEVEL_NODEGROUP: 1,
3881     }
3882
3883   def BuildHooksEnv(self):
3884     """Build hooks env.
3885
3886     """
3887     return {
3888       "OP_TARGET": self.cfg.GetClusterName(),
3889       "NEW_VG_NAME": self.op.vg_name,
3890       }
3891
3892   def BuildHooksNodes(self):
3893     """Build hooks nodes.
3894
3895     """
3896     mn = self.cfg.GetMasterNode()
3897     return ([mn], [mn])
3898
3899   def CheckPrereq(self):
3900     """Check prerequisites.
3901
3902     This checks whether the given params don't conflict and
3903     if the given volume group is valid.
3904
3905     """
3906     if self.op.vg_name is not None and not self.op.vg_name:
3907       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3908         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3909                                    " instances exist", errors.ECODE_INVAL)
3910
3911     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3912       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3913         raise errors.OpPrereqError("Cannot disable drbd helper while"
3914                                    " drbd-based instances exist",
3915                                    errors.ECODE_INVAL)
3916
3917     node_list = self.owned_locks(locking.LEVEL_NODE)
3918
3919     # if vg_name not None, checks given volume group on all nodes
3920     if self.op.vg_name:
3921       vglist = self.rpc.call_vg_list(node_list)
3922       for node in node_list:
3923         msg = vglist[node].fail_msg
3924         if msg:
3925           # ignoring down node
3926           self.LogWarning("Error while gathering data on node %s"
3927                           " (ignoring node): %s", node, msg)
3928           continue
3929         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3930                                               self.op.vg_name,
3931                                               constants.MIN_VG_SIZE)
3932         if vgstatus:
3933           raise errors.OpPrereqError("Error on node '%s': %s" %
3934                                      (node, vgstatus), errors.ECODE_ENVIRON)
3935
3936     if self.op.drbd_helper:
3937       # checks given drbd helper on all nodes
3938       helpers = self.rpc.call_drbd_helper(node_list)
3939       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3940         if ninfo.offline:
3941           self.LogInfo("Not checking drbd helper on offline node %s", node)
3942           continue
3943         msg = helpers[node].fail_msg
3944         if msg:
3945           raise errors.OpPrereqError("Error checking drbd helper on node"
3946                                      " '%s': %s" % (node, msg),
3947                                      errors.ECODE_ENVIRON)
3948         node_helper = helpers[node].payload
3949         if node_helper != self.op.drbd_helper:
3950           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3951                                      (node, node_helper), errors.ECODE_ENVIRON)
3952
3953     self.cluster = cluster = self.cfg.GetClusterInfo()
3954     # validate params changes
3955     if self.op.beparams:
3956       objects.UpgradeBeParams(self.op.beparams)
3957       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3958       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3959
3960     if self.op.ndparams:
3961       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3962       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3963
3964       # TODO: we need a more general way to handle resetting
3965       # cluster-level parameters to default values
3966       if self.new_ndparams["oob_program"] == "":
3967         self.new_ndparams["oob_program"] = \
3968             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3969
3970     if self.op.hv_state:
3971       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3972                                             self.cluster.hv_state_static)
3973       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3974                                for hv, values in new_hv_state.items())
3975
3976     if self.op.disk_state:
3977       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3978                                                 self.cluster.disk_state_static)
3979       self.new_disk_state = \
3980         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3981                             for name, values in svalues.items()))
3982              for storage, svalues in new_disk_state.items())
3983
3984     if self.op.ipolicy:
3985       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3986                                             group_policy=False)
3987
3988       all_instances = self.cfg.GetAllInstancesInfo().values()
3989       violations = set()
3990       for group in self.cfg.GetAllNodeGroupsInfo().values():
3991         instances = frozenset([inst for inst in all_instances
3992                                if compat.any(node in group.members
3993                                              for node in inst.all_nodes)])
3994         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3995         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3996                                                                    group),
3997                                             new_ipolicy, instances)
3998         if new:
3999           violations.update(new)
4000
4001       if violations:
4002         self.LogWarning("After the ipolicy change the following instances"
4003                         " violate them: %s",
4004                         utils.CommaJoin(utils.NiceSort(violations)))
4005
4006     if self.op.nicparams:
4007       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4008       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4009       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4010       nic_errors = []
4011
4012       # check all instances for consistency
4013       for instance in self.cfg.GetAllInstancesInfo().values():
4014         for nic_idx, nic in enumerate(instance.nics):
4015           params_copy = copy.deepcopy(nic.nicparams)
4016           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4017
4018           # check parameter syntax
4019           try:
4020             objects.NIC.CheckParameterSyntax(params_filled)
4021           except errors.ConfigurationError, err:
4022             nic_errors.append("Instance %s, nic/%d: %s" %
4023                               (instance.name, nic_idx, err))
4024
4025           # if we're moving instances to routed, check that they have an ip
4026           target_mode = params_filled[constants.NIC_MODE]
4027           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4028             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4029                               " address" % (instance.name, nic_idx))
4030       if nic_errors:
4031         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4032                                    "\n".join(nic_errors))
4033
4034     # hypervisor list/parameters
4035     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4036     if self.op.hvparams:
4037       for hv_name, hv_dict in self.op.hvparams.items():
4038         if hv_name not in self.new_hvparams:
4039           self.new_hvparams[hv_name] = hv_dict
4040         else:
4041           self.new_hvparams[hv_name].update(hv_dict)
4042
4043     # disk template parameters
4044     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4045     if self.op.diskparams:
4046       for dt_name, dt_params in self.op.diskparams.items():
4047         if dt_name not in self.op.diskparams:
4048           self.new_diskparams[dt_name] = dt_params
4049         else:
4050           self.new_diskparams[dt_name].update(dt_params)
4051
4052     # os hypervisor parameters
4053     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4054     if self.op.os_hvp:
4055       for os_name, hvs in self.op.os_hvp.items():
4056         if os_name not in self.new_os_hvp:
4057           self.new_os_hvp[os_name] = hvs
4058         else:
4059           for hv_name, hv_dict in hvs.items():
4060             if hv_name not in self.new_os_hvp[os_name]:
4061               self.new_os_hvp[os_name][hv_name] = hv_dict
4062             else:
4063               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4064
4065     # os parameters
4066     self.new_osp = objects.FillDict(cluster.osparams, {})
4067     if self.op.osparams:
4068       for os_name, osp in self.op.osparams.items():
4069         if os_name not in self.new_osp:
4070           self.new_osp[os_name] = {}
4071
4072         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4073                                                   use_none=True)
4074
4075         if not self.new_osp[os_name]:
4076           # we removed all parameters
4077           del self.new_osp[os_name]
4078         else:
4079           # check the parameter validity (remote check)
4080           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4081                          os_name, self.new_osp[os_name])
4082
4083     # changes to the hypervisor list
4084     if self.op.enabled_hypervisors is not None:
4085       self.hv_list = self.op.enabled_hypervisors
4086       for hv in self.hv_list:
4087         # if the hypervisor doesn't already exist in the cluster
4088         # hvparams, we initialize it to empty, and then (in both
4089         # cases) we make sure to fill the defaults, as we might not
4090         # have a complete defaults list if the hypervisor wasn't
4091         # enabled before
4092         if hv not in new_hvp:
4093           new_hvp[hv] = {}
4094         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4095         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4096     else:
4097       self.hv_list = cluster.enabled_hypervisors
4098
4099     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4100       # either the enabled list has changed, or the parameters have, validate
4101       for hv_name, hv_params in self.new_hvparams.items():
4102         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4103             (self.op.enabled_hypervisors and
4104              hv_name in self.op.enabled_hypervisors)):
4105           # either this is a new hypervisor, or its parameters have changed
4106           hv_class = hypervisor.GetHypervisor(hv_name)
4107           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4108           hv_class.CheckParameterSyntax(hv_params)
4109           _CheckHVParams(self, node_list, hv_name, hv_params)
4110
4111     if self.op.os_hvp:
4112       # no need to check any newly-enabled hypervisors, since the
4113       # defaults have already been checked in the above code-block
4114       for os_name, os_hvp in self.new_os_hvp.items():
4115         for hv_name, hv_params in os_hvp.items():
4116           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4117           # we need to fill in the new os_hvp on top of the actual hv_p
4118           cluster_defaults = self.new_hvparams.get(hv_name, {})
4119           new_osp = objects.FillDict(cluster_defaults, hv_params)
4120           hv_class = hypervisor.GetHypervisor(hv_name)
4121           hv_class.CheckParameterSyntax(new_osp)
4122           _CheckHVParams(self, node_list, hv_name, new_osp)
4123
4124     if self.op.default_iallocator:
4125       alloc_script = utils.FindFile(self.op.default_iallocator,
4126                                     constants.IALLOCATOR_SEARCH_PATH,
4127                                     os.path.isfile)
4128       if alloc_script is None:
4129         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4130                                    " specified" % self.op.default_iallocator,
4131                                    errors.ECODE_INVAL)
4132
4133   def Exec(self, feedback_fn):
4134     """Change the parameters of the cluster.
4135
4136     """
4137     if self.op.vg_name is not None:
4138       new_volume = self.op.vg_name
4139       if not new_volume:
4140         new_volume = None
4141       if new_volume != self.cfg.GetVGName():
4142         self.cfg.SetVGName(new_volume)
4143       else:
4144         feedback_fn("Cluster LVM configuration already in desired"
4145                     " state, not changing")
4146     if self.op.drbd_helper is not None:
4147       new_helper = self.op.drbd_helper
4148       if not new_helper:
4149         new_helper = None
4150       if new_helper != self.cfg.GetDRBDHelper():
4151         self.cfg.SetDRBDHelper(new_helper)
4152       else:
4153         feedback_fn("Cluster DRBD helper already in desired state,"
4154                     " not changing")
4155     if self.op.hvparams:
4156       self.cluster.hvparams = self.new_hvparams
4157     if self.op.os_hvp:
4158       self.cluster.os_hvp = self.new_os_hvp
4159     if self.op.enabled_hypervisors is not None:
4160       self.cluster.hvparams = self.new_hvparams
4161       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4162     if self.op.beparams:
4163       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4164     if self.op.nicparams:
4165       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4166     if self.op.ipolicy:
4167       self.cluster.ipolicy = self.new_ipolicy
4168     if self.op.osparams:
4169       self.cluster.osparams = self.new_osp
4170     if self.op.ndparams:
4171       self.cluster.ndparams = self.new_ndparams
4172     if self.op.diskparams:
4173       self.cluster.diskparams = self.new_diskparams
4174     if self.op.hv_state:
4175       self.cluster.hv_state_static = self.new_hv_state
4176     if self.op.disk_state:
4177       self.cluster.disk_state_static = self.new_disk_state
4178
4179     if self.op.candidate_pool_size is not None:
4180       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4181       # we need to update the pool size here, otherwise the save will fail
4182       _AdjustCandidatePool(self, [])
4183
4184     if self.op.maintain_node_health is not None:
4185       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4186         feedback_fn("Note: CONFD was disabled at build time, node health"
4187                     " maintenance is not useful (still enabling it)")
4188       self.cluster.maintain_node_health = self.op.maintain_node_health
4189
4190     if self.op.prealloc_wipe_disks is not None:
4191       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4192
4193     if self.op.add_uids is not None:
4194       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4195
4196     if self.op.remove_uids is not None:
4197       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4198
4199     if self.op.uid_pool is not None:
4200       self.cluster.uid_pool = self.op.uid_pool
4201
4202     if self.op.default_iallocator is not None:
4203       self.cluster.default_iallocator = self.op.default_iallocator
4204
4205     if self.op.reserved_lvs is not None:
4206       self.cluster.reserved_lvs = self.op.reserved_lvs
4207
4208     if self.op.use_external_mip_script is not None:
4209       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4210
4211     def helper_os(aname, mods, desc):
4212       desc += " OS list"
4213       lst = getattr(self.cluster, aname)
4214       for key, val in mods:
4215         if key == constants.DDM_ADD:
4216           if val in lst:
4217             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4218           else:
4219             lst.append(val)
4220         elif key == constants.DDM_REMOVE:
4221           if val in lst:
4222             lst.remove(val)
4223           else:
4224             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4225         else:
4226           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4227
4228     if self.op.hidden_os:
4229       helper_os("hidden_os", self.op.hidden_os, "hidden")
4230
4231     if self.op.blacklisted_os:
4232       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4233
4234     if self.op.master_netdev:
4235       master_params = self.cfg.GetMasterNetworkParameters()
4236       ems = self.cfg.GetUseExternalMipScript()
4237       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4238                   self.cluster.master_netdev)
4239       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4240                                                        master_params, ems)
4241       result.Raise("Could not disable the master ip")
4242       feedback_fn("Changing master_netdev from %s to %s" %
4243                   (master_params.netdev, self.op.master_netdev))
4244       self.cluster.master_netdev = self.op.master_netdev
4245
4246     if self.op.master_netmask:
4247       master_params = self.cfg.GetMasterNetworkParameters()
4248       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4249       result = self.rpc.call_node_change_master_netmask(master_params.name,
4250                                                         master_params.netmask,
4251                                                         self.op.master_netmask,
4252                                                         master_params.ip,
4253                                                         master_params.netdev)
4254       if result.fail_msg:
4255         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4256         feedback_fn(msg)
4257
4258       self.cluster.master_netmask = self.op.master_netmask
4259
4260     self.cfg.Update(self.cluster, feedback_fn)
4261
4262     if self.op.master_netdev:
4263       master_params = self.cfg.GetMasterNetworkParameters()
4264       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4265                   self.op.master_netdev)
4266       ems = self.cfg.GetUseExternalMipScript()
4267       result = self.rpc.call_node_activate_master_ip(master_params.name,
4268                                                      master_params, ems)
4269       if result.fail_msg:
4270         self.LogWarning("Could not re-enable the master ip on"
4271                         " the master, please restart manually: %s",
4272                         result.fail_msg)
4273
4274
4275 def _UploadHelper(lu, nodes, fname):
4276   """Helper for uploading a file and showing warnings.
4277
4278   """
4279   if os.path.exists(fname):
4280     result = lu.rpc.call_upload_file(nodes, fname)
4281     for to_node, to_result in result.items():
4282       msg = to_result.fail_msg
4283       if msg:
4284         msg = ("Copy of file %s to node %s failed: %s" %
4285                (fname, to_node, msg))
4286         lu.proc.LogWarning(msg)
4287
4288
4289 def _ComputeAncillaryFiles(cluster, redist):
4290   """Compute files external to Ganeti which need to be consistent.
4291
4292   @type redist: boolean
4293   @param redist: Whether to include files which need to be redistributed
4294
4295   """
4296   # Compute files for all nodes
4297   files_all = set([
4298     constants.SSH_KNOWN_HOSTS_FILE,
4299     constants.CONFD_HMAC_KEY,
4300     constants.CLUSTER_DOMAIN_SECRET_FILE,
4301     constants.SPICE_CERT_FILE,
4302     constants.SPICE_CACERT_FILE,
4303     constants.RAPI_USERS_FILE,
4304     ])
4305
4306   if not redist:
4307     files_all.update(constants.ALL_CERT_FILES)
4308     files_all.update(ssconf.SimpleStore().GetFileList())
4309   else:
4310     # we need to ship at least the RAPI certificate
4311     files_all.add(constants.RAPI_CERT_FILE)
4312
4313   if cluster.modify_etc_hosts:
4314     files_all.add(constants.ETC_HOSTS)
4315
4316   if cluster.use_external_mip_script:
4317     files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4318
4319   # Files which are optional, these must:
4320   # - be present in one other category as well
4321   # - either exist or not exist on all nodes of that category (mc, vm all)
4322   files_opt = set([
4323     constants.RAPI_USERS_FILE,
4324     ])
4325
4326   # Files which should only be on master candidates
4327   files_mc = set()
4328
4329   if not redist:
4330     files_mc.add(constants.CLUSTER_CONF_FILE)
4331
4332   # Files which should only be on VM-capable nodes
4333   files_vm = set(filename
4334     for hv_name in cluster.enabled_hypervisors
4335     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4336
4337   files_opt |= set(filename
4338     for hv_name in cluster.enabled_hypervisors
4339     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4340
4341   # Filenames in each category must be unique
4342   all_files_set = files_all | files_mc | files_vm
4343   assert (len(all_files_set) ==
4344           sum(map(len, [files_all, files_mc, files_vm]))), \
4345          "Found file listed in more than one file list"
4346
4347   # Optional files must be present in one other category
4348   assert all_files_set.issuperset(files_opt), \
4349          "Optional file not in a different required list"
4350
4351   return (files_all, files_opt, files_mc, files_vm)
4352
4353
4354 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4355   """Distribute additional files which are part of the cluster configuration.
4356
4357   ConfigWriter takes care of distributing the config and ssconf files, but
4358   there are more files which should be distributed to all nodes. This function
4359   makes sure those are copied.
4360
4361   @param lu: calling logical unit
4362   @param additional_nodes: list of nodes not in the config to distribute to
4363   @type additional_vm: boolean
4364   @param additional_vm: whether the additional nodes are vm-capable or not
4365
4366   """
4367   # Gather target nodes
4368   cluster = lu.cfg.GetClusterInfo()
4369   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4370
4371   online_nodes = lu.cfg.GetOnlineNodeList()
4372   online_set = frozenset(online_nodes)
4373   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4374
4375   if additional_nodes is not None:
4376     online_nodes.extend(additional_nodes)
4377     if additional_vm:
4378       vm_nodes.extend(additional_nodes)
4379
4380   # Never distribute to master node
4381   for nodelist in [online_nodes, vm_nodes]:
4382     if master_info.name in nodelist:
4383       nodelist.remove(master_info.name)
4384
4385   # Gather file lists
4386   (files_all, _, files_mc, files_vm) = \
4387     _ComputeAncillaryFiles(cluster, True)
4388
4389   # Never re-distribute configuration file from here
4390   assert not (constants.CLUSTER_CONF_FILE in files_all or
4391               constants.CLUSTER_CONF_FILE in files_vm)
4392   assert not files_mc, "Master candidates not handled in this function"
4393
4394   filemap = [
4395     (online_nodes, files_all),
4396     (vm_nodes, files_vm),
4397     ]
4398
4399   # Upload the files
4400   for (node_list, files) in filemap:
4401     for fname in files:
4402       _UploadHelper(lu, node_list, fname)
4403
4404
4405 class LUClusterRedistConf(NoHooksLU):
4406   """Force the redistribution of cluster configuration.
4407
4408   This is a very simple LU.
4409
4410   """
4411   REQ_BGL = False
4412
4413   def ExpandNames(self):
4414     self.needed_locks = {
4415       locking.LEVEL_NODE: locking.ALL_SET,
4416     }
4417     self.share_locks[locking.LEVEL_NODE] = 1
4418
4419   def Exec(self, feedback_fn):
4420     """Redistribute the configuration.
4421
4422     """
4423     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4424     _RedistributeAncillaryFiles(self)
4425
4426
4427 class LUClusterActivateMasterIp(NoHooksLU):
4428   """Activate the master IP on the master node.
4429
4430   """
4431   def Exec(self, feedback_fn):
4432     """Activate the master IP.
4433
4434     """
4435     master_params = self.cfg.GetMasterNetworkParameters()
4436     ems = self.cfg.GetUseExternalMipScript()
4437     result = self.rpc.call_node_activate_master_ip(master_params.name,
4438                                                    master_params, ems)
4439     result.Raise("Could not activate the master IP")
4440
4441
4442 class LUClusterDeactivateMasterIp(NoHooksLU):
4443   """Deactivate the master IP on the master node.
4444
4445   """
4446   def Exec(self, feedback_fn):
4447     """Deactivate the master IP.
4448
4449     """
4450     master_params = self.cfg.GetMasterNetworkParameters()
4451     ems = self.cfg.GetUseExternalMipScript()
4452     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4453                                                      master_params, ems)
4454     result.Raise("Could not deactivate the master IP")
4455
4456
4457 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4458   """Sleep and poll for an instance's disk to sync.
4459
4460   """
4461   if not instance.disks or disks is not None and not disks:
4462     return True
4463
4464   disks = _ExpandCheckDisks(instance, disks)
4465
4466   if not oneshot:
4467     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4468
4469   node = instance.primary_node
4470
4471   for dev in disks:
4472     lu.cfg.SetDiskID(dev, node)
4473
4474   # TODO: Convert to utils.Retry
4475
4476   retries = 0
4477   degr_retries = 10 # in seconds, as we sleep 1 second each time
4478   while True:
4479     max_time = 0
4480     done = True
4481     cumul_degraded = False
4482     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4483     msg = rstats.fail_msg
4484     if msg:
4485       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4486       retries += 1
4487       if retries >= 10:
4488         raise errors.RemoteError("Can't contact node %s for mirror data,"
4489                                  " aborting." % node)
4490       time.sleep(6)
4491       continue
4492     rstats = rstats.payload
4493     retries = 0
4494     for i, mstat in enumerate(rstats):
4495       if mstat is None:
4496         lu.LogWarning("Can't compute data for node %s/%s",
4497                            node, disks[i].iv_name)
4498         continue
4499
4500       cumul_degraded = (cumul_degraded or
4501                         (mstat.is_degraded and mstat.sync_percent is None))
4502       if mstat.sync_percent is not None:
4503         done = False
4504         if mstat.estimated_time is not None:
4505           rem_time = ("%s remaining (estimated)" %
4506                       utils.FormatSeconds(mstat.estimated_time))
4507           max_time = mstat.estimated_time
4508         else:
4509           rem_time = "no time estimate"
4510         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4511                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4512
4513     # if we're done but degraded, let's do a few small retries, to
4514     # make sure we see a stable and not transient situation; therefore
4515     # we force restart of the loop
4516     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4517       logging.info("Degraded disks found, %d retries left", degr_retries)
4518       degr_retries -= 1
4519       time.sleep(1)
4520       continue
4521
4522     if done or oneshot:
4523       break
4524
4525     time.sleep(min(60, max_time))
4526
4527   if done:
4528     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4529   return not cumul_degraded
4530
4531
4532 def _BlockdevFind(lu, node, dev, instance):
4533   """Wrapper around call_blockdev_find to annotate diskparams.
4534
4535   @param lu: A reference to the lu object
4536   @param node: The node to call out
4537   @param dev: The device to find
4538   @param instance: The instance object the device belongs to
4539   @returns The result of the rpc call
4540
4541   """
4542   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4543   return lu.rpc.call_blockdev_find(node, disk)
4544
4545
4546 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4547   """Wrapper around L{_CheckDiskConsistencyInner}.
4548
4549   """
4550   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4551   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4552                                     ldisk=ldisk)
4553
4554
4555 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4556                                ldisk=False):
4557   """Check that mirrors are not degraded.
4558
4559   @attention: The device has to be annotated already.
4560
4561   The ldisk parameter, if True, will change the test from the
4562   is_degraded attribute (which represents overall non-ok status for
4563   the device(s)) to the ldisk (representing the local storage status).
4564
4565   """
4566   lu.cfg.SetDiskID(dev, node)
4567
4568   result = True
4569
4570   if on_primary or dev.AssembleOnSecondary():
4571     rstats = lu.rpc.call_blockdev_find(node, dev)
4572     msg = rstats.fail_msg
4573     if msg:
4574       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4575       result = False
4576     elif not rstats.payload:
4577       lu.LogWarning("Can't find disk on node %s", node)
4578       result = False
4579     else:
4580       if ldisk:
4581         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4582       else:
4583         result = result and not rstats.payload.is_degraded
4584
4585   if dev.children:
4586     for child in dev.children:
4587       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4588                                                      on_primary)
4589
4590   return result
4591
4592
4593 class LUOobCommand(NoHooksLU):
4594   """Logical unit for OOB handling.
4595
4596   """
4597   REQ_BGL = False
4598   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4599
4600   def ExpandNames(self):
4601     """Gather locks we need.
4602
4603     """
4604     if self.op.node_names:
4605       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4606       lock_names = self.op.node_names
4607     else:
4608       lock_names = locking.ALL_SET
4609
4610     self.needed_locks = {
4611       locking.LEVEL_NODE: lock_names,
4612       }
4613
4614   def CheckPrereq(self):
4615     """Check prerequisites.
4616
4617     This checks:
4618      - the node exists in the configuration
4619      - OOB is supported
4620
4621     Any errors are signaled by raising errors.OpPrereqError.
4622
4623     """
4624     self.nodes = []
4625     self.master_node = self.cfg.GetMasterNode()
4626
4627     assert self.op.power_delay >= 0.0
4628
4629     if self.op.node_names:
4630       if (self.op.command in self._SKIP_MASTER and
4631           self.master_node in self.op.node_names):
4632         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4633         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4634
4635         if master_oob_handler:
4636           additional_text = ("run '%s %s %s' if you want to operate on the"
4637                              " master regardless") % (master_oob_handler,
4638                                                       self.op.command,
4639                                                       self.master_node)
4640         else:
4641           additional_text = "it does not support out-of-band operations"
4642
4643         raise errors.OpPrereqError(("Operating on the master node %s is not"
4644                                     " allowed for %s; %s") %
4645                                    (self.master_node, self.op.command,
4646                                     additional_text), errors.ECODE_INVAL)
4647     else:
4648       self.op.node_names = self.cfg.GetNodeList()
4649       if self.op.command in self._SKIP_MASTER:
4650         self.op.node_names.remove(self.master_node)
4651
4652     if self.op.command in self._SKIP_MASTER:
4653       assert self.master_node not in self.op.node_names
4654
4655     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4656       if node is None:
4657         raise errors.OpPrereqError("Node %s not found" % node_name,
4658                                    errors.ECODE_NOENT)
4659       else:
4660         self.nodes.append(node)
4661
4662       if (not self.op.ignore_status and
4663           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4664         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4665                                     " not marked offline") % node_name,
4666                                    errors.ECODE_STATE)
4667
4668   def Exec(self, feedback_fn):
4669     """Execute OOB and return result if we expect any.
4670
4671     """
4672     master_node = self.master_node
4673     ret = []
4674
4675     for idx, node in enumerate(utils.NiceSort(self.nodes,
4676                                               key=lambda node: node.name)):
4677       node_entry = [(constants.RS_NORMAL, node.name)]
4678       ret.append(node_entry)
4679
4680       oob_program = _SupportsOob(self.cfg, node)
4681
4682       if not oob_program:
4683         node_entry.append((constants.RS_UNAVAIL, None))
4684         continue
4685
4686       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4687                    self.op.command, oob_program, node.name)
4688       result = self.rpc.call_run_oob(master_node, oob_program,
4689                                      self.op.command, node.name,
4690                                      self.op.timeout)
4691
4692       if result.fail_msg:
4693         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4694                         node.name, result.fail_msg)
4695         node_entry.append((constants.RS_NODATA, None))
4696       else:
4697         try:
4698           self._CheckPayload(result)
4699         except errors.OpExecError, err:
4700           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4701                           node.name, err)
4702           node_entry.append((constants.RS_NODATA, None))
4703         else:
4704           if self.op.command == constants.OOB_HEALTH:
4705             # For health we should log important events
4706             for item, status in result.payload:
4707               if status in [constants.OOB_STATUS_WARNING,
4708                             constants.OOB_STATUS_CRITICAL]:
4709                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4710                                 item, node.name, status)
4711
4712           if self.op.command == constants.OOB_POWER_ON:
4713             node.powered = True
4714           elif self.op.command == constants.OOB_POWER_OFF:
4715             node.powered = False
4716           elif self.op.command == constants.OOB_POWER_STATUS:
4717             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4718             if powered != node.powered:
4719               logging.warning(("Recorded power state (%s) of node '%s' does not"
4720                                " match actual power state (%s)"), node.powered,
4721                               node.name, powered)
4722
4723           # For configuration changing commands we should update the node
4724           if self.op.command in (constants.OOB_POWER_ON,
4725                                  constants.OOB_POWER_OFF):
4726             self.cfg.Update(node, feedback_fn)
4727
4728           node_entry.append((constants.RS_NORMAL, result.payload))
4729
4730           if (self.op.command == constants.OOB_POWER_ON and
4731               idx < len(self.nodes) - 1):
4732             time.sleep(self.op.power_delay)
4733
4734     return ret
4735
4736   def _CheckPayload(self, result):
4737     """Checks if the payload is valid.
4738
4739     @param result: RPC result
4740     @raises errors.OpExecError: If payload is not valid
4741
4742     """
4743     errs = []
4744     if self.op.command == constants.OOB_HEALTH:
4745       if not isinstance(result.payload, list):
4746         errs.append("command 'health' is expected to return a list but got %s" %
4747                     type(result.payload))
4748       else:
4749         for item, status in result.payload:
4750           if status not in constants.OOB_STATUSES:
4751             errs.append("health item '%s' has invalid status '%s'" %
4752                         (item, status))
4753
4754     if self.op.command == constants.OOB_POWER_STATUS:
4755       if not isinstance(result.payload, dict):
4756         errs.append("power-status is expected to return a dict but got %s" %
4757                     type(result.payload))
4758
4759     if self.op.command in [
4760         constants.OOB_POWER_ON,
4761         constants.OOB_POWER_OFF,
4762         constants.OOB_POWER_CYCLE,
4763         ]:
4764       if result.payload is not None:
4765         errs.append("%s is expected to not return payload but got '%s'" %
4766                     (self.op.command, result.payload))
4767
4768     if errs:
4769       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4770                                utils.CommaJoin(errs))
4771
4772
4773 class _OsQuery(_QueryBase):
4774   FIELDS = query.OS_FIELDS
4775
4776   def ExpandNames(self, lu):
4777     # Lock all nodes in shared mode
4778     # Temporary removal of locks, should be reverted later
4779     # TODO: reintroduce locks when they are lighter-weight
4780     lu.needed_locks = {}
4781     #self.share_locks[locking.LEVEL_NODE] = 1
4782     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4783
4784     # The following variables interact with _QueryBase._GetNames
4785     if self.names:
4786       self.wanted = self.names
4787     else:
4788       self.wanted = locking.ALL_SET
4789
4790     self.do_locking = self.use_locking
4791
4792   def DeclareLocks(self, lu, level):
4793     pass
4794
4795   @staticmethod
4796   def _DiagnoseByOS(rlist):
4797     """Remaps a per-node return list into an a per-os per-node dictionary
4798
4799     @param rlist: a map with node names as keys and OS objects as values
4800
4801     @rtype: dict
4802     @return: a dictionary with osnames as keys and as value another
4803         map, with nodes as keys and tuples of (path, status, diagnose,
4804         variants, parameters, api_versions) as values, eg::
4805
4806           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4807                                      (/srv/..., False, "invalid api")],
4808                            "node2": [(/srv/..., True, "", [], [])]}
4809           }
4810
4811     """
4812     all_os = {}
4813     # we build here the list of nodes that didn't fail the RPC (at RPC
4814     # level), so that nodes with a non-responding node daemon don't
4815     # make all OSes invalid
4816     good_nodes = [node_name for node_name in rlist
4817                   if not rlist[node_name].fail_msg]
4818     for node_name, nr in rlist.items():
4819       if nr.fail_msg or not nr.payload:
4820         continue
4821       for (name, path, status, diagnose, variants,
4822            params, api_versions) in nr.payload:
4823         if name not in all_os:
4824           # build a list of nodes for this os containing empty lists
4825           # for each node in node_list
4826           all_os[name] = {}
4827           for nname in good_nodes:
4828             all_os[name][nname] = []
4829         # convert params from [name, help] to (name, help)
4830         params = [tuple(v) for v in params]
4831         all_os[name][node_name].append((path, status, diagnose,
4832                                         variants, params, api_versions))
4833     return all_os
4834
4835   def _GetQueryData(self, lu):
4836     """Computes the list of nodes and their attributes.
4837
4838     """
4839     # Locking is not used
4840     assert not (compat.any(lu.glm.is_owned(level)
4841                            for level in locking.LEVELS
4842                            if level != locking.LEVEL_CLUSTER) or
4843                 self.do_locking or self.use_locking)
4844
4845     valid_nodes = [node.name
4846                    for node in lu.cfg.GetAllNodesInfo().values()
4847                    if not node.offline and node.vm_capable]
4848     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4849     cluster = lu.cfg.GetClusterInfo()
4850
4851     data = {}
4852
4853     for (os_name, os_data) in pol.items():
4854       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4855                           hidden=(os_name in cluster.hidden_os),
4856                           blacklisted=(os_name in cluster.blacklisted_os))
4857
4858       variants = set()
4859       parameters = set()
4860       api_versions = set()
4861
4862       for idx, osl in enumerate(os_data.values()):
4863         info.valid = bool(info.valid and osl and osl[0][1])
4864         if not info.valid:
4865           break
4866
4867         (node_variants, node_params, node_api) = osl[0][3:6]
4868         if idx == 0:
4869           # First entry
4870           variants.update(node_variants)
4871           parameters.update(node_params)
4872           api_versions.update(node_api)
4873         else:
4874           # Filter out inconsistent values
4875           variants.intersection_update(node_variants)
4876           parameters.intersection_update(node_params)
4877           api_versions.intersection_update(node_api)
4878
4879       info.variants = list(variants)
4880       info.parameters = list(parameters)
4881       info.api_versions = list(api_versions)
4882
4883       data[os_name] = info
4884
4885     # Prepare data in requested order
4886     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4887             if name in data]
4888
4889
4890 class LUOsDiagnose(NoHooksLU):
4891   """Logical unit for OS diagnose/query.
4892
4893   """
4894   REQ_BGL = False
4895
4896   @staticmethod
4897   def _BuildFilter(fields, names):
4898     """Builds a filter for querying OSes.
4899
4900     """
4901     name_filter = qlang.MakeSimpleFilter("name", names)
4902
4903     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4904     # respective field is not requested
4905     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4906                      for fname in ["hidden", "blacklisted"]
4907                      if fname not in fields]
4908     if "valid" not in fields:
4909       status_filter.append([qlang.OP_TRUE, "valid"])
4910
4911     if status_filter:
4912       status_filter.insert(0, qlang.OP_AND)
4913     else:
4914       status_filter = None
4915
4916     if name_filter and status_filter:
4917       return [qlang.OP_AND, name_filter, status_filter]
4918     elif name_filter:
4919       return name_filter
4920     else:
4921       return status_filter
4922
4923   def CheckArguments(self):
4924     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4925                        self.op.output_fields, False)
4926
4927   def ExpandNames(self):
4928     self.oq.ExpandNames(self)
4929
4930   def Exec(self, feedback_fn):
4931     return self.oq.OldStyleQuery(self)
4932
4933
4934 class LUNodeRemove(LogicalUnit):
4935   """Logical unit for removing a node.
4936
4937   """
4938   HPATH = "node-remove"
4939   HTYPE = constants.HTYPE_NODE
4940
4941   def BuildHooksEnv(self):
4942     """Build hooks env.
4943
4944     """
4945     return {
4946       "OP_TARGET": self.op.node_name,
4947       "NODE_NAME": self.op.node_name,
4948       }
4949
4950   def BuildHooksNodes(self):
4951     """Build hooks nodes.
4952
4953     This doesn't run on the target node in the pre phase as a failed
4954     node would then be impossible to remove.
4955
4956     """
4957     all_nodes = self.cfg.GetNodeList()
4958     try:
4959       all_nodes.remove(self.op.node_name)
4960     except ValueError:
4961       pass
4962     return (all_nodes, all_nodes)
4963
4964   def CheckPrereq(self):
4965     """Check prerequisites.
4966
4967     This checks:
4968      - the node exists in the configuration
4969      - it does not have primary or secondary instances
4970      - it's not the master
4971
4972     Any errors are signaled by raising errors.OpPrereqError.
4973
4974     """
4975     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4976     node = self.cfg.GetNodeInfo(self.op.node_name)
4977     assert node is not None
4978
4979     masternode = self.cfg.GetMasterNode()
4980     if node.name == masternode:
4981       raise errors.OpPrereqError("Node is the master node, failover to another"
4982                                  " node is required", errors.ECODE_INVAL)
4983
4984     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4985       if node.name in instance.all_nodes:
4986         raise errors.OpPrereqError("Instance %s is still running on the node,"
4987                                    " please remove first" % instance_name,
4988                                    errors.ECODE_INVAL)
4989     self.op.node_name = node.name
4990     self.node = node
4991
4992   def Exec(self, feedback_fn):
4993     """Removes the node from the cluster.
4994
4995     """
4996     node = self.node
4997     logging.info("Stopping the node daemon and removing configs from node %s",
4998                  node.name)
4999
5000     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5001
5002     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5003       "Not owning BGL"
5004
5005     # Promote nodes to master candidate as needed
5006     _AdjustCandidatePool(self, exceptions=[node.name])
5007     self.context.RemoveNode(node.name)
5008
5009     # Run post hooks on the node before it's removed
5010     _RunPostHook(self, node.name)
5011
5012     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5013     msg = result.fail_msg
5014     if msg:
5015       self.LogWarning("Errors encountered on the remote node while leaving"
5016                       " the cluster: %s", msg)
5017
5018     # Remove node from our /etc/hosts
5019     if self.cfg.GetClusterInfo().modify_etc_hosts:
5020       master_node = self.cfg.GetMasterNode()
5021       result = self.rpc.call_etc_hosts_modify(master_node,
5022                                               constants.ETC_HOSTS_REMOVE,
5023                                               node.name, None)
5024       result.Raise("Can't update hosts file with new host data")
5025       _RedistributeAncillaryFiles(self)
5026
5027
5028 class _NodeQuery(_QueryBase):
5029   FIELDS = query.NODE_FIELDS
5030
5031   def ExpandNames(self, lu):
5032     lu.needed_locks = {}
5033     lu.share_locks = _ShareAll()
5034
5035     if self.names:
5036       self.wanted = _GetWantedNodes(lu, self.names)
5037     else:
5038       self.wanted = locking.ALL_SET
5039
5040     self.do_locking = (self.use_locking and
5041                        query.NQ_LIVE in self.requested_data)
5042
5043     if self.do_locking:
5044       # If any non-static field is requested we need to lock the nodes
5045       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5046
5047   def DeclareLocks(self, lu, level):
5048     pass
5049
5050   def _GetQueryData(self, lu):
5051     """Computes the list of nodes and their attributes.
5052
5053     """
5054     all_info = lu.cfg.GetAllNodesInfo()
5055
5056     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5057
5058     # Gather data as requested
5059     if query.NQ_LIVE in self.requested_data:
5060       # filter out non-vm_capable nodes
5061       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5062
5063       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5064                                         [lu.cfg.GetHypervisorType()])
5065       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5066                        for (name, nresult) in node_data.items()
5067                        if not nresult.fail_msg and nresult.payload)
5068     else:
5069       live_data = None
5070
5071     if query.NQ_INST in self.requested_data:
5072       node_to_primary = dict([(name, set()) for name in nodenames])
5073       node_to_secondary = dict([(name, set()) for name in nodenames])
5074
5075       inst_data = lu.cfg.GetAllInstancesInfo()
5076
5077       for inst in inst_data.values():
5078         if inst.primary_node in node_to_primary:
5079           node_to_primary[inst.primary_node].add(inst.name)
5080         for secnode in inst.secondary_nodes:
5081           if secnode in node_to_secondary:
5082             node_to_secondary[secnode].add(inst.name)
5083     else:
5084       node_to_primary = None
5085       node_to_secondary = None
5086
5087     if query.NQ_OOB in self.requested_data:
5088       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5089                          for name, node in all_info.iteritems())
5090     else:
5091       oob_support = None
5092
5093     if query.NQ_GROUP in self.requested_data:
5094       groups = lu.cfg.GetAllNodeGroupsInfo()
5095     else:
5096       groups = {}
5097
5098     return query.NodeQueryData([all_info[name] for name in nodenames],
5099                                live_data, lu.cfg.GetMasterNode(),
5100                                node_to_primary, node_to_secondary, groups,
5101                                oob_support, lu.cfg.GetClusterInfo())
5102
5103
5104 class LUNodeQuery(NoHooksLU):
5105   """Logical unit for querying nodes.
5106
5107   """
5108   # pylint: disable=W0142
5109   REQ_BGL = False
5110
5111   def CheckArguments(self):
5112     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5113                          self.op.output_fields, self.op.use_locking)
5114
5115   def ExpandNames(self):
5116     self.nq.ExpandNames(self)
5117
5118   def DeclareLocks(self, level):
5119     self.nq.DeclareLocks(self, level)
5120
5121   def Exec(self, feedback_fn):
5122     return self.nq.OldStyleQuery(self)
5123
5124
5125 class LUNodeQueryvols(NoHooksLU):
5126   """Logical unit for getting volumes on node(s).
5127
5128   """
5129   REQ_BGL = False
5130   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5131   _FIELDS_STATIC = utils.FieldSet("node")
5132
5133   def CheckArguments(self):
5134     _CheckOutputFields(static=self._FIELDS_STATIC,
5135                        dynamic=self._FIELDS_DYNAMIC,
5136                        selected=self.op.output_fields)
5137
5138   def ExpandNames(self):
5139     self.share_locks = _ShareAll()
5140     self.needed_locks = {}
5141
5142     if not self.op.nodes:
5143       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5144     else:
5145       self.needed_locks[locking.LEVEL_NODE] = \
5146         _GetWantedNodes(self, self.op.nodes)
5147
5148   def Exec(self, feedback_fn):
5149     """Computes the list of nodes and their attributes.
5150
5151     """
5152     nodenames = self.owned_locks(locking.LEVEL_NODE)
5153     volumes = self.rpc.call_node_volumes(nodenames)
5154
5155     ilist = self.cfg.GetAllInstancesInfo()
5156     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5157
5158     output = []
5159     for node in nodenames:
5160       nresult = volumes[node]
5161       if nresult.offline:
5162         continue
5163       msg = nresult.fail_msg
5164       if msg:
5165         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5166         continue
5167
5168       node_vols = sorted(nresult.payload,
5169                          key=operator.itemgetter("dev"))
5170
5171       for vol in node_vols:
5172         node_output = []
5173         for field in self.op.output_fields:
5174           if field == "node":
5175             val = node
5176           elif field == "phys":
5177             val = vol["dev"]
5178           elif field == "vg":
5179             val = vol["vg"]
5180           elif field == "name":
5181             val = vol["name"]
5182           elif field == "size":
5183             val = int(float(vol["size"]))
5184           elif field == "instance":
5185             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5186           else:
5187             raise errors.ParameterError(field)
5188           node_output.append(str(val))
5189
5190         output.append(node_output)
5191
5192     return output
5193
5194
5195 class LUNodeQueryStorage(NoHooksLU):
5196   """Logical unit for getting information on storage units on node(s).
5197
5198   """
5199   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5200   REQ_BGL = False
5201
5202   def CheckArguments(self):
5203     _CheckOutputFields(static=self._FIELDS_STATIC,
5204                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5205                        selected=self.op.output_fields)
5206
5207   def ExpandNames(self):
5208     self.share_locks = _ShareAll()
5209     self.needed_locks = {}
5210
5211     if self.op.nodes:
5212       self.needed_locks[locking.LEVEL_NODE] = \
5213         _GetWantedNodes(self, self.op.nodes)
5214     else:
5215       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5216
5217   def Exec(self, feedback_fn):
5218     """Computes the list of nodes and their attributes.
5219
5220     """
5221     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5222
5223     # Always get name to sort by
5224     if constants.SF_NAME in self.op.output_fields:
5225       fields = self.op.output_fields[:]
5226     else:
5227       fields = [constants.SF_NAME] + self.op.output_fields
5228
5229     # Never ask for node or type as it's only known to the LU
5230     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5231       while extra in fields:
5232         fields.remove(extra)
5233
5234     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5235     name_idx = field_idx[constants.SF_NAME]
5236
5237     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5238     data = self.rpc.call_storage_list(self.nodes,
5239                                       self.op.storage_type, st_args,
5240                                       self.op.name, fields)
5241
5242     result = []
5243
5244     for node in utils.NiceSort(self.nodes):
5245       nresult = data[node]
5246       if nresult.offline:
5247         continue
5248
5249       msg = nresult.fail_msg
5250       if msg:
5251         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5252         continue
5253
5254       rows = dict([(row[name_idx], row) for row in nresult.payload])
5255
5256       for name in utils.NiceSort(rows.keys()):
5257         row = rows[name]
5258
5259         out = []
5260
5261         for field in self.op.output_fields:
5262           if field == constants.SF_NODE:
5263             val = node
5264           elif field == constants.SF_TYPE:
5265             val = self.op.storage_type
5266           elif field in field_idx:
5267             val = row[field_idx[field]]
5268           else:
5269             raise errors.ParameterError(field)
5270
5271           out.append(val)
5272
5273         result.append(out)
5274
5275     return result
5276
5277
5278 class _InstanceQuery(_QueryBase):
5279   FIELDS = query.INSTANCE_FIELDS
5280
5281   def ExpandNames(self, lu):
5282     lu.needed_locks = {}
5283     lu.share_locks = _ShareAll()
5284
5285     if self.names:
5286       self.wanted = _GetWantedInstances(lu, self.names)
5287     else:
5288       self.wanted = locking.ALL_SET
5289
5290     self.do_locking = (self.use_locking and
5291                        query.IQ_LIVE in self.requested_data)
5292     if self.do_locking:
5293       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5294       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5295       lu.needed_locks[locking.LEVEL_NODE] = []
5296       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5297
5298     self.do_grouplocks = (self.do_locking and
5299                           query.IQ_NODES in self.requested_data)
5300
5301   def DeclareLocks(self, lu, level):
5302     if self.do_locking:
5303       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5304         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5305
5306         # Lock all groups used by instances optimistically; this requires going
5307         # via the node before it's locked, requiring verification later on
5308         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5309           set(group_uuid
5310               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5311               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5312       elif level == locking.LEVEL_NODE:
5313         lu._LockInstancesNodes() # pylint: disable=W0212
5314
5315   @staticmethod
5316   def _CheckGroupLocks(lu):
5317     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5318     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5319
5320     # Check if node groups for locked instances are still correct
5321     for instance_name in owned_instances:
5322       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5323
5324   def _GetQueryData(self, lu):
5325     """Computes the list of instances and their attributes.
5326
5327     """
5328     if self.do_grouplocks:
5329       self._CheckGroupLocks(lu)
5330
5331     cluster = lu.cfg.GetClusterInfo()
5332     all_info = lu.cfg.GetAllInstancesInfo()
5333
5334     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5335
5336     instance_list = [all_info[name] for name in instance_names]
5337     nodes = frozenset(itertools.chain(*(inst.all_nodes
5338                                         for inst in instance_list)))
5339     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5340     bad_nodes = []
5341     offline_nodes = []
5342     wrongnode_inst = set()
5343
5344     # Gather data as requested
5345     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5346       live_data = {}
5347       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5348       for name in nodes:
5349         result = node_data[name]
5350         if result.offline:
5351           # offline nodes will be in both lists
5352           assert result.fail_msg
5353           offline_nodes.append(name)
5354         if result.fail_msg:
5355           bad_nodes.append(name)
5356         elif result.payload:
5357           for inst in result.payload:
5358             if inst in all_info:
5359               if all_info[inst].primary_node == name:
5360                 live_data.update(result.payload)
5361               else:
5362                 wrongnode_inst.add(inst)
5363             else:
5364               # orphan instance; we don't list it here as we don't
5365               # handle this case yet in the output of instance listing
5366               logging.warning("Orphan instance '%s' found on node %s",
5367                               inst, name)
5368         # else no instance is alive
5369     else:
5370       live_data = {}
5371
5372     if query.IQ_DISKUSAGE in self.requested_data:
5373       disk_usage = dict((inst.name,
5374                          _ComputeDiskSize(inst.disk_template,
5375                                           [{constants.IDISK_SIZE: disk.size}
5376                                            for disk in inst.disks]))
5377                         for inst in instance_list)
5378     else:
5379       disk_usage = None
5380
5381     if query.IQ_CONSOLE in self.requested_data:
5382       consinfo = {}
5383       for inst in instance_list:
5384         if inst.name in live_data:
5385           # Instance is running
5386           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5387         else:
5388           consinfo[inst.name] = None
5389       assert set(consinfo.keys()) == set(instance_names)
5390     else:
5391       consinfo = None
5392
5393     if query.IQ_NODES in self.requested_data:
5394       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5395                                             instance_list)))
5396       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5397       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5398                     for uuid in set(map(operator.attrgetter("group"),
5399                                         nodes.values())))
5400     else:
5401       nodes = None
5402       groups = None
5403
5404     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5405                                    disk_usage, offline_nodes, bad_nodes,
5406                                    live_data, wrongnode_inst, consinfo,
5407                                    nodes, groups)
5408
5409
5410 class LUQuery(NoHooksLU):
5411   """Query for resources/items of a certain kind.
5412
5413   """
5414   # pylint: disable=W0142
5415   REQ_BGL = False
5416
5417   def CheckArguments(self):
5418     qcls = _GetQueryImplementation(self.op.what)
5419
5420     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5421
5422   def ExpandNames(self):
5423     self.impl.ExpandNames(self)
5424
5425   def DeclareLocks(self, level):
5426     self.impl.DeclareLocks(self, level)
5427
5428   def Exec(self, feedback_fn):
5429     return self.impl.NewStyleQuery(self)
5430
5431
5432 class LUQueryFields(NoHooksLU):
5433   """Query for resources/items of a certain kind.
5434
5435   """
5436   # pylint: disable=W0142
5437   REQ_BGL = False
5438
5439   def CheckArguments(self):
5440     self.qcls = _GetQueryImplementation(self.op.what)
5441
5442   def ExpandNames(self):
5443     self.needed_locks = {}
5444
5445   def Exec(self, feedback_fn):
5446     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5447
5448
5449 class LUNodeModifyStorage(NoHooksLU):
5450   """Logical unit for modifying a storage volume on a node.
5451
5452   """
5453   REQ_BGL = False
5454
5455   def CheckArguments(self):
5456     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5457
5458     storage_type = self.op.storage_type
5459
5460     try:
5461       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5462     except KeyError:
5463       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5464                                  " modified" % storage_type,
5465                                  errors.ECODE_INVAL)
5466
5467     diff = set(self.op.changes.keys()) - modifiable
5468     if diff:
5469       raise errors.OpPrereqError("The following fields can not be modified for"
5470                                  " storage units of type '%s': %r" %
5471                                  (storage_type, list(diff)),
5472                                  errors.ECODE_INVAL)
5473
5474   def ExpandNames(self):
5475     self.needed_locks = {
5476       locking.LEVEL_NODE: self.op.node_name,
5477       }
5478
5479   def Exec(self, feedback_fn):
5480     """Computes the list of nodes and their attributes.
5481
5482     """
5483     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5484     result = self.rpc.call_storage_modify(self.op.node_name,
5485                                           self.op.storage_type, st_args,
5486                                           self.op.name, self.op.changes)
5487     result.Raise("Failed to modify storage unit '%s' on %s" %
5488                  (self.op.name, self.op.node_name))
5489
5490
5491 class LUNodeAdd(LogicalUnit):
5492   """Logical unit for adding node to the cluster.
5493
5494   """
5495   HPATH = "node-add"
5496   HTYPE = constants.HTYPE_NODE
5497   _NFLAGS = ["master_capable", "vm_capable"]
5498
5499   def CheckArguments(self):
5500     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5501     # validate/normalize the node name
5502     self.hostname = netutils.GetHostname(name=self.op.node_name,
5503                                          family=self.primary_ip_family)
5504     self.op.node_name = self.hostname.name
5505
5506     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5507       raise errors.OpPrereqError("Cannot readd the master node",
5508                                  errors.ECODE_STATE)
5509
5510     if self.op.readd and self.op.group:
5511       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5512                                  " being readded", errors.ECODE_INVAL)
5513
5514   def BuildHooksEnv(self):
5515     """Build hooks env.
5516
5517     This will run on all nodes before, and on all nodes + the new node after.
5518
5519     """
5520     return {
5521       "OP_TARGET": self.op.node_name,
5522       "NODE_NAME": self.op.node_name,
5523       "NODE_PIP": self.op.primary_ip,
5524       "NODE_SIP": self.op.secondary_ip,
5525       "MASTER_CAPABLE": str(self.op.master_capable),
5526       "VM_CAPABLE": str(self.op.vm_capable),
5527       }
5528
5529   def BuildHooksNodes(self):
5530     """Build hooks nodes.
5531
5532     """
5533     # Exclude added node
5534     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5535     post_nodes = pre_nodes + [self.op.node_name, ]
5536
5537     return (pre_nodes, post_nodes)
5538
5539   def CheckPrereq(self):
5540     """Check prerequisites.
5541
5542     This checks:
5543      - the new node is not already in the config
5544      - it is resolvable
5545      - its parameters (single/dual homed) matches the cluster
5546
5547     Any errors are signaled by raising errors.OpPrereqError.
5548
5549     """
5550     cfg = self.cfg
5551     hostname = self.hostname
5552     node = hostname.name
5553     primary_ip = self.op.primary_ip = hostname.ip
5554     if self.op.secondary_ip is None:
5555       if self.primary_ip_family == netutils.IP6Address.family:
5556         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5557                                    " IPv4 address must be given as secondary",
5558                                    errors.ECODE_INVAL)
5559       self.op.secondary_ip = primary_ip
5560
5561     secondary_ip = self.op.secondary_ip
5562     if not netutils.IP4Address.IsValid(secondary_ip):
5563       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5564                                  " address" % secondary_ip, errors.ECODE_INVAL)
5565
5566     node_list = cfg.GetNodeList()
5567     if not self.op.readd and node in node_list:
5568       raise errors.OpPrereqError("Node %s is already in the configuration" %
5569                                  node, errors.ECODE_EXISTS)
5570     elif self.op.readd and node not in node_list:
5571       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5572                                  errors.ECODE_NOENT)
5573
5574     self.changed_primary_ip = False
5575
5576     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5577       if self.op.readd and node == existing_node_name:
5578         if existing_node.secondary_ip != secondary_ip:
5579           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5580                                      " address configuration as before",
5581                                      errors.ECODE_INVAL)
5582         if existing_node.primary_ip != primary_ip:
5583           self.changed_primary_ip = True
5584
5585         continue
5586
5587       if (existing_node.primary_ip == primary_ip or
5588           existing_node.secondary_ip == primary_ip or
5589           existing_node.primary_ip == secondary_ip or
5590           existing_node.secondary_ip == secondary_ip):
5591         raise errors.OpPrereqError("New node ip address(es) conflict with"
5592                                    " existing node %s" % existing_node.name,
5593                                    errors.ECODE_NOTUNIQUE)
5594
5595     # After this 'if' block, None is no longer a valid value for the
5596     # _capable op attributes
5597     if self.op.readd:
5598       old_node = self.cfg.GetNodeInfo(node)
5599       assert old_node is not None, "Can't retrieve locked node %s" % node
5600       for attr in self._NFLAGS:
5601         if getattr(self.op, attr) is None:
5602           setattr(self.op, attr, getattr(old_node, attr))
5603     else:
5604       for attr in self._NFLAGS:
5605         if getattr(self.op, attr) is None:
5606           setattr(self.op, attr, True)
5607
5608     if self.op.readd and not self.op.vm_capable:
5609       pri, sec = cfg.GetNodeInstances(node)
5610       if pri or sec:
5611         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5612                                    " flag set to false, but it already holds"
5613                                    " instances" % node,
5614                                    errors.ECODE_STATE)
5615
5616     # check that the type of the node (single versus dual homed) is the
5617     # same as for the master
5618     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5619     master_singlehomed = myself.secondary_ip == myself.primary_ip
5620     newbie_singlehomed = secondary_ip == primary_ip
5621     if master_singlehomed != newbie_singlehomed:
5622       if master_singlehomed:
5623         raise errors.OpPrereqError("The master has no secondary ip but the"
5624                                    " new node has one",
5625                                    errors.ECODE_INVAL)
5626       else:
5627         raise errors.OpPrereqError("The master has a secondary ip but the"
5628                                    " new node doesn't have one",
5629                                    errors.ECODE_INVAL)
5630
5631     # checks reachability
5632     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5633       raise errors.OpPrereqError("Node not reachable by ping",
5634                                  errors.ECODE_ENVIRON)
5635
5636     if not newbie_singlehomed:
5637       # check reachability from my secondary ip to newbie's secondary ip
5638       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5639                            source=myself.secondary_ip):
5640         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5641                                    " based ping to node daemon port",
5642                                    errors.ECODE_ENVIRON)
5643
5644     if self.op.readd:
5645       exceptions = [node]
5646     else:
5647       exceptions = []
5648
5649     if self.op.master_capable:
5650       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5651     else:
5652       self.master_candidate = False
5653
5654     if self.op.readd:
5655       self.new_node = old_node
5656     else:
5657       node_group = cfg.LookupNodeGroup(self.op.group)
5658       self.new_node = objects.Node(name=node,
5659                                    primary_ip=primary_ip,
5660                                    secondary_ip=secondary_ip,
5661                                    master_candidate=self.master_candidate,
5662                                    offline=False, drained=False,
5663                                    group=node_group)
5664
5665     if self.op.ndparams:
5666       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5667
5668     if self.op.hv_state:
5669       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5670
5671     if self.op.disk_state:
5672       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5673
5674     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5675     #       it a property on the base class.
5676     result = rpc.DnsOnlyRunner().call_version([node])[node]
5677     result.Raise("Can't get version information from node %s" % node)
5678     if constants.PROTOCOL_VERSION == result.payload:
5679       logging.info("Communication to node %s fine, sw version %s match",
5680                    node, result.payload)
5681     else:
5682       raise errors.OpPrereqError("Version mismatch master version %s,"
5683                                  " node version %s" %
5684                                  (constants.PROTOCOL_VERSION, result.payload),
5685                                  errors.ECODE_ENVIRON)
5686
5687   def Exec(self, feedback_fn):
5688     """Adds the new node to the cluster.
5689
5690     """
5691     new_node = self.new_node
5692     node = new_node.name
5693
5694     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5695       "Not owning BGL"
5696
5697     # We adding a new node so we assume it's powered
5698     new_node.powered = True
5699
5700     # for re-adds, reset the offline/drained/master-candidate flags;
5701     # we need to reset here, otherwise offline would prevent RPC calls
5702     # later in the procedure; this also means that if the re-add
5703     # fails, we are left with a non-offlined, broken node
5704     if self.op.readd:
5705       new_node.drained = new_node.offline = False # pylint: disable=W0201
5706       self.LogInfo("Readding a node, the offline/drained flags were reset")
5707       # if we demote the node, we do cleanup later in the procedure
5708       new_node.master_candidate = self.master_candidate
5709       if self.changed_primary_ip:
5710         new_node.primary_ip = self.op.primary_ip
5711
5712     # copy the master/vm_capable flags
5713     for attr in self._NFLAGS:
5714       setattr(new_node, attr, getattr(self.op, attr))
5715
5716     # notify the user about any possible mc promotion
5717     if new_node.master_candidate:
5718       self.LogInfo("Node will be a master candidate")
5719
5720     if self.op.ndparams:
5721       new_node.ndparams = self.op.ndparams
5722     else:
5723       new_node.ndparams = {}
5724
5725     if self.op.hv_state:
5726       new_node.hv_state_static = self.new_hv_state
5727
5728     if self.op.disk_state:
5729       new_node.disk_state_static = self.new_disk_state
5730
5731     # Add node to our /etc/hosts, and add key to known_hosts
5732     if self.cfg.GetClusterInfo().modify_etc_hosts:
5733       master_node = self.cfg.GetMasterNode()
5734       result = self.rpc.call_etc_hosts_modify(master_node,
5735                                               constants.ETC_HOSTS_ADD,
5736                                               self.hostname.name,
5737                                               self.hostname.ip)
5738       result.Raise("Can't update hosts file with new host data")
5739
5740     if new_node.secondary_ip != new_node.primary_ip:
5741       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5742                                False)
5743
5744     node_verify_list = [self.cfg.GetMasterNode()]
5745     node_verify_param = {
5746       constants.NV_NODELIST: ([node], {}),
5747       # TODO: do a node-net-test as well?
5748     }
5749
5750     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5751                                        self.cfg.GetClusterName())
5752     for verifier in node_verify_list:
5753       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5754       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5755       if nl_payload:
5756         for failed in nl_payload:
5757           feedback_fn("ssh/hostname verification failed"
5758                       " (checking from %s): %s" %
5759                       (verifier, nl_payload[failed]))
5760         raise errors.OpExecError("ssh/hostname verification failed")
5761
5762     if self.op.readd:
5763       _RedistributeAncillaryFiles(self)
5764       self.context.ReaddNode(new_node)
5765       # make sure we redistribute the config
5766       self.cfg.Update(new_node, feedback_fn)
5767       # and make sure the new node will not have old files around
5768       if not new_node.master_candidate:
5769         result = self.rpc.call_node_demote_from_mc(new_node.name)
5770         msg = result.fail_msg
5771         if msg:
5772           self.LogWarning("Node failed to demote itself from master"
5773                           " candidate status: %s" % msg)
5774     else:
5775       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5776                                   additional_vm=self.op.vm_capable)
5777       self.context.AddNode(new_node, self.proc.GetECId())
5778
5779
5780 class LUNodeSetParams(LogicalUnit):
5781   """Modifies the parameters of a node.
5782
5783   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5784       to the node role (as _ROLE_*)
5785   @cvar _R2F: a dictionary from node role to tuples of flags
5786   @cvar _FLAGS: a list of attribute names corresponding to the flags
5787
5788   """
5789   HPATH = "node-modify"
5790   HTYPE = constants.HTYPE_NODE
5791   REQ_BGL = False
5792   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5793   _F2R = {
5794     (True, False, False): _ROLE_CANDIDATE,
5795     (False, True, False): _ROLE_DRAINED,
5796     (False, False, True): _ROLE_OFFLINE,
5797     (False, False, False): _ROLE_REGULAR,
5798     }
5799   _R2F = dict((v, k) for k, v in _F2R.items())
5800   _FLAGS = ["master_candidate", "drained", "offline"]
5801
5802   def CheckArguments(self):
5803     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5804     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5805                 self.op.master_capable, self.op.vm_capable,
5806                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5807                 self.op.disk_state]
5808     if all_mods.count(None) == len(all_mods):
5809       raise errors.OpPrereqError("Please pass at least one modification",
5810                                  errors.ECODE_INVAL)
5811     if all_mods.count(True) > 1:
5812       raise errors.OpPrereqError("Can't set the node into more than one"
5813                                  " state at the same time",
5814                                  errors.ECODE_INVAL)
5815
5816     # Boolean value that tells us whether we might be demoting from MC
5817     self.might_demote = (self.op.master_candidate == False or
5818                          self.op.offline == True or
5819                          self.op.drained == True or
5820                          self.op.master_capable == False)
5821
5822     if self.op.secondary_ip:
5823       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5824         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5825                                    " address" % self.op.secondary_ip,
5826                                    errors.ECODE_INVAL)
5827
5828     self.lock_all = self.op.auto_promote and self.might_demote
5829     self.lock_instances = self.op.secondary_ip is not None
5830
5831   def _InstanceFilter(self, instance):
5832     """Filter for getting affected instances.
5833
5834     """
5835     return (instance.disk_template in constants.DTS_INT_MIRROR and
5836             self.op.node_name in instance.all_nodes)
5837
5838   def ExpandNames(self):
5839     if self.lock_all:
5840       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5841     else:
5842       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5843
5844     # Since modifying a node can have severe effects on currently running
5845     # operations the resource lock is at least acquired in shared mode
5846     self.needed_locks[locking.LEVEL_NODE_RES] = \
5847       self.needed_locks[locking.LEVEL_NODE]
5848
5849     # Get node resource and instance locks in shared mode; they are not used
5850     # for anything but read-only access
5851     self.share_locks[locking.LEVEL_NODE_RES] = 1
5852     self.share_locks[locking.LEVEL_INSTANCE] = 1
5853
5854     if self.lock_instances:
5855       self.needed_locks[locking.LEVEL_INSTANCE] = \
5856         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5857
5858   def BuildHooksEnv(self):
5859     """Build hooks env.
5860
5861     This runs on the master node.
5862
5863     """
5864     return {
5865       "OP_TARGET": self.op.node_name,
5866       "MASTER_CANDIDATE": str(self.op.master_candidate),
5867       "OFFLINE": str(self.op.offline),
5868       "DRAINED": str(self.op.drained),
5869       "MASTER_CAPABLE": str(self.op.master_capable),
5870       "VM_CAPABLE": str(self.op.vm_capable),
5871       }
5872
5873   def BuildHooksNodes(self):
5874     """Build hooks nodes.
5875
5876     """
5877     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5878     return (nl, nl)
5879
5880   def CheckPrereq(self):
5881     """Check prerequisites.
5882
5883     This only checks the instance list against the existing names.
5884
5885     """
5886     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5887
5888     if self.lock_instances:
5889       affected_instances = \
5890         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5891
5892       # Verify instance locks
5893       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5894       wanted_instances = frozenset(affected_instances.keys())
5895       if wanted_instances - owned_instances:
5896         raise errors.OpPrereqError("Instances affected by changing node %s's"
5897                                    " secondary IP address have changed since"
5898                                    " locks were acquired, wanted '%s', have"
5899                                    " '%s'; retry the operation" %
5900                                    (self.op.node_name,
5901                                     utils.CommaJoin(wanted_instances),
5902                                     utils.CommaJoin(owned_instances)),
5903                                    errors.ECODE_STATE)
5904     else:
5905       affected_instances = None
5906
5907     if (self.op.master_candidate is not None or
5908         self.op.drained is not None or
5909         self.op.offline is not None):
5910       # we can't change the master's node flags
5911       if self.op.node_name == self.cfg.GetMasterNode():
5912         raise errors.OpPrereqError("The master role can be changed"
5913                                    " only via master-failover",
5914                                    errors.ECODE_INVAL)
5915
5916     if self.op.master_candidate and not node.master_capable:
5917       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5918                                  " it a master candidate" % node.name,
5919                                  errors.ECODE_STATE)
5920
5921     if self.op.vm_capable == False:
5922       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5923       if ipri or isec:
5924         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5925                                    " the vm_capable flag" % node.name,
5926                                    errors.ECODE_STATE)
5927
5928     if node.master_candidate and self.might_demote and not self.lock_all:
5929       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5930       # check if after removing the current node, we're missing master
5931       # candidates
5932       (mc_remaining, mc_should, _) = \
5933           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5934       if mc_remaining < mc_should:
5935         raise errors.OpPrereqError("Not enough master candidates, please"
5936                                    " pass auto promote option to allow"
5937                                    " promotion (--auto-promote or RAPI"
5938                                    " auto_promote=True)", errors.ECODE_STATE)
5939
5940     self.old_flags = old_flags = (node.master_candidate,
5941                                   node.drained, node.offline)
5942     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5943     self.old_role = old_role = self._F2R[old_flags]
5944
5945     # Check for ineffective changes
5946     for attr in self._FLAGS:
5947       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5948         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5949         setattr(self.op, attr, None)
5950
5951     # Past this point, any flag change to False means a transition
5952     # away from the respective state, as only real changes are kept
5953
5954     # TODO: We might query the real power state if it supports OOB
5955     if _SupportsOob(self.cfg, node):
5956       if self.op.offline is False and not (node.powered or
5957                                            self.op.powered == True):
5958         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5959                                     " offline status can be reset") %
5960                                    self.op.node_name)
5961     elif self.op.powered is not None:
5962       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5963                                   " as it does not support out-of-band"
5964                                   " handling") % self.op.node_name)
5965
5966     # If we're being deofflined/drained, we'll MC ourself if needed
5967     if (self.op.drained == False or self.op.offline == False or
5968         (self.op.master_capable and not node.master_capable)):
5969       if _DecideSelfPromotion(self):
5970         self.op.master_candidate = True
5971         self.LogInfo("Auto-promoting node to master candidate")
5972
5973     # If we're no longer master capable, we'll demote ourselves from MC
5974     if self.op.master_capable == False and node.master_candidate:
5975       self.LogInfo("Demoting from master candidate")
5976       self.op.master_candidate = False
5977
5978     # Compute new role
5979     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5980     if self.op.master_candidate:
5981       new_role = self._ROLE_CANDIDATE
5982     elif self.op.drained:
5983       new_role = self._ROLE_DRAINED
5984     elif self.op.offline:
5985       new_role = self._ROLE_OFFLINE
5986     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5987       # False is still in new flags, which means we're un-setting (the
5988       # only) True flag
5989       new_role = self._ROLE_REGULAR
5990     else: # no new flags, nothing, keep old role
5991       new_role = old_role
5992
5993     self.new_role = new_role
5994
5995     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5996       # Trying to transition out of offline status
5997       result = self.rpc.call_version([node.name])[node.name]
5998       if result.fail_msg:
5999         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6000                                    " to report its version: %s" %
6001                                    (node.name, result.fail_msg),
6002                                    errors.ECODE_STATE)
6003       else:
6004         self.LogWarning("Transitioning node from offline to online state"
6005                         " without using re-add. Please make sure the node"
6006                         " is healthy!")
6007
6008     # When changing the secondary ip, verify if this is a single-homed to
6009     # multi-homed transition or vice versa, and apply the relevant
6010     # restrictions.
6011     if self.op.secondary_ip:
6012       # Ok even without locking, because this can't be changed by any LU
6013       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6014       master_singlehomed = master.secondary_ip == master.primary_ip
6015       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6016         if self.op.force and node.name == master.name:
6017           self.LogWarning("Transitioning from single-homed to multi-homed"
6018                           " cluster. All nodes will require a secondary ip.")
6019         else:
6020           raise errors.OpPrereqError("Changing the secondary ip on a"
6021                                      " single-homed cluster requires the"
6022                                      " --force option to be passed, and the"
6023                                      " target node to be the master",
6024                                      errors.ECODE_INVAL)
6025       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6026         if self.op.force and node.name == master.name:
6027           self.LogWarning("Transitioning from multi-homed to single-homed"
6028                           " cluster. Secondary IPs will have to be removed.")
6029         else:
6030           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6031                                      " same as the primary IP on a multi-homed"
6032                                      " cluster, unless the --force option is"
6033                                      " passed, and the target node is the"
6034                                      " master", errors.ECODE_INVAL)
6035
6036       assert not (frozenset(affected_instances) -
6037                   self.owned_locks(locking.LEVEL_INSTANCE))
6038
6039       if node.offline:
6040         if affected_instances:
6041           raise errors.OpPrereqError("Cannot change secondary IP address:"
6042                                      " offline node has instances (%s)"
6043                                      " configured to use it" %
6044                                      utils.CommaJoin(affected_instances.keys()))
6045       else:
6046         # On online nodes, check that no instances are running, and that
6047         # the node has the new ip and we can reach it.
6048         for instance in affected_instances.values():
6049           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6050                               msg="cannot change secondary ip")
6051
6052         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6053         if master.name != node.name:
6054           # check reachability from master secondary ip to new secondary ip
6055           if not netutils.TcpPing(self.op.secondary_ip,
6056                                   constants.DEFAULT_NODED_PORT,
6057                                   source=master.secondary_ip):
6058             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6059                                        " based ping to node daemon port",
6060                                        errors.ECODE_ENVIRON)
6061
6062     if self.op.ndparams:
6063       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6064       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6065       self.new_ndparams = new_ndparams
6066
6067     if self.op.hv_state:
6068       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6069                                                  self.node.hv_state_static)
6070
6071     if self.op.disk_state:
6072       self.new_disk_state = \
6073         _MergeAndVerifyDiskState(self.op.disk_state,
6074                                  self.node.disk_state_static)
6075
6076   def Exec(self, feedback_fn):
6077     """Modifies a node.
6078
6079     """
6080     node = self.node
6081     old_role = self.old_role
6082     new_role = self.new_role
6083
6084     result = []
6085
6086     if self.op.ndparams:
6087       node.ndparams = self.new_ndparams
6088
6089     if self.op.powered is not None:
6090       node.powered = self.op.powered
6091
6092     if self.op.hv_state:
6093       node.hv_state_static = self.new_hv_state
6094
6095     if self.op.disk_state:
6096       node.disk_state_static = self.new_disk_state
6097
6098     for attr in ["master_capable", "vm_capable"]:
6099       val = getattr(self.op, attr)
6100       if val is not None:
6101         setattr(node, attr, val)
6102         result.append((attr, str(val)))
6103
6104     if new_role != old_role:
6105       # Tell the node to demote itself, if no longer MC and not offline
6106       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6107         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6108         if msg:
6109           self.LogWarning("Node failed to demote itself: %s", msg)
6110
6111       new_flags = self._R2F[new_role]
6112       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6113         if of != nf:
6114           result.append((desc, str(nf)))
6115       (node.master_candidate, node.drained, node.offline) = new_flags
6116
6117       # we locked all nodes, we adjust the CP before updating this node
6118       if self.lock_all:
6119         _AdjustCandidatePool(self, [node.name])
6120
6121     if self.op.secondary_ip:
6122       node.secondary_ip = self.op.secondary_ip
6123       result.append(("secondary_ip", self.op.secondary_ip))
6124
6125     # this will trigger configuration file update, if needed
6126     self.cfg.Update(node, feedback_fn)
6127
6128     # this will trigger job queue propagation or cleanup if the mc
6129     # flag changed
6130     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6131       self.context.ReaddNode(node)
6132
6133     return result
6134
6135
6136 class LUNodePowercycle(NoHooksLU):
6137   """Powercycles a node.
6138
6139   """
6140   REQ_BGL = False
6141
6142   def CheckArguments(self):
6143     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6144     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6145       raise errors.OpPrereqError("The node is the master and the force"
6146                                  " parameter was not set",
6147                                  errors.ECODE_INVAL)
6148
6149   def ExpandNames(self):
6150     """Locking for PowercycleNode.
6151
6152     This is a last-resort option and shouldn't block on other
6153     jobs. Therefore, we grab no locks.
6154
6155     """
6156     self.needed_locks = {}
6157
6158   def Exec(self, feedback_fn):
6159     """Reboots a node.
6160
6161     """
6162     result = self.rpc.call_node_powercycle(self.op.node_name,
6163                                            self.cfg.GetHypervisorType())
6164     result.Raise("Failed to schedule the reboot")
6165     return result.payload
6166
6167
6168 class LUClusterQuery(NoHooksLU):
6169   """Query cluster configuration.
6170
6171   """
6172   REQ_BGL = False
6173
6174   def ExpandNames(self):
6175     self.needed_locks = {}
6176
6177   def Exec(self, feedback_fn):
6178     """Return cluster config.
6179
6180     """
6181     cluster = self.cfg.GetClusterInfo()
6182     os_hvp = {}
6183
6184     # Filter just for enabled hypervisors
6185     for os_name, hv_dict in cluster.os_hvp.items():
6186       os_hvp[os_name] = {}
6187       for hv_name, hv_params in hv_dict.items():
6188         if hv_name in cluster.enabled_hypervisors:
6189           os_hvp[os_name][hv_name] = hv_params
6190
6191     # Convert ip_family to ip_version
6192     primary_ip_version = constants.IP4_VERSION
6193     if cluster.primary_ip_family == netutils.IP6Address.family:
6194       primary_ip_version = constants.IP6_VERSION
6195
6196     result = {
6197       "software_version": constants.RELEASE_VERSION,
6198       "protocol_version": constants.PROTOCOL_VERSION,
6199       "config_version": constants.CONFIG_VERSION,
6200       "os_api_version": max(constants.OS_API_VERSIONS),
6201       "export_version": constants.EXPORT_VERSION,
6202       "architecture": runtime.GetArchInfo(),
6203       "name": cluster.cluster_name,
6204       "master": cluster.master_node,
6205       "default_hypervisor": cluster.primary_hypervisor,
6206       "enabled_hypervisors": cluster.enabled_hypervisors,
6207       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6208                         for hypervisor_name in cluster.enabled_hypervisors]),
6209       "os_hvp": os_hvp,
6210       "beparams": cluster.beparams,
6211       "osparams": cluster.osparams,
6212       "ipolicy": cluster.ipolicy,
6213       "nicparams": cluster.nicparams,
6214       "ndparams": cluster.ndparams,
6215       "diskparams": cluster.diskparams,
6216       "candidate_pool_size": cluster.candidate_pool_size,
6217       "master_netdev": cluster.master_netdev,
6218       "master_netmask": cluster.master_netmask,
6219       "use_external_mip_script": cluster.use_external_mip_script,
6220       "volume_group_name": cluster.volume_group_name,
6221       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6222       "file_storage_dir": cluster.file_storage_dir,
6223       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6224       "maintain_node_health": cluster.maintain_node_health,
6225       "ctime": cluster.ctime,
6226       "mtime": cluster.mtime,
6227       "uuid": cluster.uuid,
6228       "tags": list(cluster.GetTags()),
6229       "uid_pool": cluster.uid_pool,
6230       "default_iallocator": cluster.default_iallocator,
6231       "reserved_lvs": cluster.reserved_lvs,
6232       "primary_ip_version": primary_ip_version,
6233       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6234       "hidden_os": cluster.hidden_os,
6235       "blacklisted_os": cluster.blacklisted_os,
6236       }
6237
6238     return result
6239
6240
6241 class LUClusterConfigQuery(NoHooksLU):
6242   """Return configuration values.
6243
6244   """
6245   REQ_BGL = False
6246
6247   def CheckArguments(self):
6248     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6249
6250   def ExpandNames(self):
6251     self.cq.ExpandNames(self)
6252
6253   def DeclareLocks(self, level):
6254     self.cq.DeclareLocks(self, level)
6255
6256   def Exec(self, feedback_fn):
6257     result = self.cq.OldStyleQuery(self)
6258
6259     assert len(result) == 1
6260
6261     return result[0]
6262
6263
6264 class _ClusterQuery(_QueryBase):
6265   FIELDS = query.CLUSTER_FIELDS
6266
6267   #: Do not sort (there is only one item)
6268   SORT_FIELD = None
6269
6270   def ExpandNames(self, lu):
6271     lu.needed_locks = {}
6272
6273     # The following variables interact with _QueryBase._GetNames
6274     self.wanted = locking.ALL_SET
6275     self.do_locking = self.use_locking
6276
6277     if self.do_locking:
6278       raise errors.OpPrereqError("Can not use locking for cluster queries",
6279                                  errors.ECODE_INVAL)
6280
6281   def DeclareLocks(self, lu, level):
6282     pass
6283
6284   def _GetQueryData(self, lu):
6285     """Computes the list of nodes and their attributes.
6286
6287     """
6288     # Locking is not used
6289     assert not (compat.any(lu.glm.is_owned(level)
6290                            for level in locking.LEVELS
6291                            if level != locking.LEVEL_CLUSTER) or
6292                 self.do_locking or self.use_locking)
6293
6294     if query.CQ_CONFIG in self.requested_data:
6295       cluster = lu.cfg.GetClusterInfo()
6296     else:
6297       cluster = NotImplemented
6298
6299     if query.CQ_QUEUE_DRAINED in self.requested_data:
6300       drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6301     else:
6302       drain_flag = NotImplemented
6303
6304     if query.CQ_WATCHER_PAUSE in self.requested_data:
6305       watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6306     else:
6307       watcher_pause = NotImplemented
6308
6309     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6310
6311
6312 class LUInstanceActivateDisks(NoHooksLU):
6313   """Bring up an instance's disks.
6314
6315   """
6316   REQ_BGL = False
6317
6318   def ExpandNames(self):
6319     self._ExpandAndLockInstance()
6320     self.needed_locks[locking.LEVEL_NODE] = []
6321     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6322
6323   def DeclareLocks(self, level):
6324     if level == locking.LEVEL_NODE:
6325       self._LockInstancesNodes()
6326
6327   def CheckPrereq(self):
6328     """Check prerequisites.
6329
6330     This checks that the instance is in the cluster.
6331
6332     """
6333     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6334     assert self.instance is not None, \
6335       "Cannot retrieve locked instance %s" % self.op.instance_name
6336     _CheckNodeOnline(self, self.instance.primary_node)
6337
6338   def Exec(self, feedback_fn):
6339     """Activate the disks.
6340
6341     """
6342     disks_ok, disks_info = \
6343               _AssembleInstanceDisks(self, self.instance,
6344                                      ignore_size=self.op.ignore_size)
6345     if not disks_ok:
6346       raise errors.OpExecError("Cannot activate block devices")
6347
6348     if self.op.wait_for_sync:
6349       if not _WaitForSync(self, self.instance):
6350         raise errors.OpExecError("Some disks of the instance are degraded!")
6351
6352     return disks_info
6353
6354
6355 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6356                            ignore_size=False):
6357   """Prepare the block devices for an instance.
6358
6359   This sets up the block devices on all nodes.
6360
6361   @type lu: L{LogicalUnit}
6362   @param lu: the logical unit on whose behalf we execute
6363   @type instance: L{objects.Instance}
6364   @param instance: the instance for whose disks we assemble
6365   @type disks: list of L{objects.Disk} or None
6366   @param disks: which disks to assemble (or all, if None)
6367   @type ignore_secondaries: boolean
6368   @param ignore_secondaries: if true, errors on secondary nodes
6369       won't result in an error return from the function
6370   @type ignore_size: boolean
6371   @param ignore_size: if true, the current known size of the disk
6372       will not be used during the disk activation, useful for cases
6373       when the size is wrong
6374   @return: False if the operation failed, otherwise a list of
6375       (host, instance_visible_name, node_visible_name)
6376       with the mapping from node devices to instance devices
6377
6378   """
6379   device_info = []
6380   disks_ok = True
6381   iname = instance.name
6382   disks = _ExpandCheckDisks(instance, disks)
6383
6384   # With the two passes mechanism we try to reduce the window of
6385   # opportunity for the race condition of switching DRBD to primary
6386   # before handshaking occured, but we do not eliminate it
6387
6388   # The proper fix would be to wait (with some limits) until the
6389   # connection has been made and drbd transitions from WFConnection
6390   # into any other network-connected state (Connected, SyncTarget,
6391   # SyncSource, etc.)
6392
6393   # 1st pass, assemble on all nodes in secondary mode
6394   for idx, inst_disk in enumerate(disks):
6395     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6396       if ignore_size:
6397         node_disk = node_disk.Copy()
6398         node_disk.UnsetSize()
6399       lu.cfg.SetDiskID(node_disk, node)
6400       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6401                                              False, idx)
6402       msg = result.fail_msg
6403       if msg:
6404         is_offline_secondary = (node in instance.secondary_nodes and
6405                                 result.offline)
6406         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6407                            " (is_primary=False, pass=1): %s",
6408                            inst_disk.iv_name, node, msg)
6409         if not (ignore_secondaries or is_offline_secondary):
6410           disks_ok = False
6411
6412   # FIXME: race condition on drbd migration to primary
6413
6414   # 2nd pass, do only the primary node
6415   for idx, inst_disk in enumerate(disks):
6416     dev_path = None
6417
6418     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6419       if node != instance.primary_node:
6420         continue
6421       if ignore_size:
6422         node_disk = node_disk.Copy()
6423         node_disk.UnsetSize()
6424       lu.cfg.SetDiskID(node_disk, node)
6425       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6426                                              True, idx)
6427       msg = result.fail_msg
6428       if msg:
6429         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6430                            " (is_primary=True, pass=2): %s",
6431                            inst_disk.iv_name, node, msg)
6432         disks_ok = False
6433       else:
6434         dev_path = result.payload
6435
6436     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6437
6438   # leave the disks configured for the primary node
6439   # this is a workaround that would be fixed better by
6440   # improving the logical/physical id handling
6441   for disk in disks:
6442     lu.cfg.SetDiskID(disk, instance.primary_node)
6443
6444   return disks_ok, device_info
6445
6446
6447 def _StartInstanceDisks(lu, instance, force):
6448   """Start the disks of an instance.
6449
6450   """
6451   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6452                                            ignore_secondaries=force)
6453   if not disks_ok:
6454     _ShutdownInstanceDisks(lu, instance)
6455     if force is not None and not force:
6456       lu.proc.LogWarning("", hint="If the message above refers to a"
6457                          " secondary node,"
6458                          " you can retry the operation using '--force'.")
6459     raise errors.OpExecError("Disk consistency error")
6460
6461
6462 class LUInstanceDeactivateDisks(NoHooksLU):
6463   """Shutdown an instance's disks.
6464
6465   """
6466   REQ_BGL = False
6467
6468   def ExpandNames(self):
6469     self._ExpandAndLockInstance()
6470     self.needed_locks[locking.LEVEL_NODE] = []
6471     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6472
6473   def DeclareLocks(self, level):
6474     if level == locking.LEVEL_NODE:
6475       self._LockInstancesNodes()
6476
6477   def CheckPrereq(self):
6478     """Check prerequisites.
6479
6480     This checks that the instance is in the cluster.
6481
6482     """
6483     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6484     assert self.instance is not None, \
6485       "Cannot retrieve locked instance %s" % self.op.instance_name
6486
6487   def Exec(self, feedback_fn):
6488     """Deactivate the disks
6489
6490     """
6491     instance = self.instance
6492     if self.op.force:
6493       _ShutdownInstanceDisks(self, instance)
6494     else:
6495       _SafeShutdownInstanceDisks(self, instance)
6496
6497
6498 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6499   """Shutdown block devices of an instance.
6500
6501   This function checks if an instance is running, before calling
6502   _ShutdownInstanceDisks.
6503
6504   """
6505   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6506   _ShutdownInstanceDisks(lu, instance, disks=disks)
6507
6508
6509 def _ExpandCheckDisks(instance, disks):
6510   """Return the instance disks selected by the disks list
6511
6512   @type disks: list of L{objects.Disk} or None
6513   @param disks: selected disks
6514   @rtype: list of L{objects.Disk}
6515   @return: selected instance disks to act on
6516
6517   """
6518   if disks is None:
6519     return instance.disks
6520   else:
6521     if not set(disks).issubset(instance.disks):
6522       raise errors.ProgrammerError("Can only act on disks belonging to the"
6523                                    " target instance")
6524     return disks
6525
6526
6527 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6528   """Shutdown block devices of an instance.
6529
6530   This does the shutdown on all nodes of the instance.
6531
6532   If the ignore_primary is false, errors on the primary node are
6533   ignored.
6534
6535   """
6536   all_result = True
6537   disks = _ExpandCheckDisks(instance, disks)
6538
6539   for disk in disks:
6540     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6541       lu.cfg.SetDiskID(top_disk, node)
6542       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6543       msg = result.fail_msg
6544       if msg:
6545         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6546                       disk.iv_name, node, msg)
6547         if ((node == instance.primary_node and not ignore_primary) or
6548             (node != instance.primary_node and not result.offline)):
6549           all_result = False
6550   return all_result
6551
6552
6553 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6554   """Checks if a node has enough free memory.
6555
6556   This function check if a given node has the needed amount of free
6557   memory. In case the node has less memory or we cannot get the
6558   information from the node, this function raise an OpPrereqError
6559   exception.
6560
6561   @type lu: C{LogicalUnit}
6562   @param lu: a logical unit from which we get configuration data
6563   @type node: C{str}
6564   @param node: the node to check
6565   @type reason: C{str}
6566   @param reason: string to use in the error message
6567   @type requested: C{int}
6568   @param requested: the amount of memory in MiB to check for
6569   @type hypervisor_name: C{str}
6570   @param hypervisor_name: the hypervisor to ask for memory stats
6571   @rtype: integer
6572   @return: node current free memory
6573   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6574       we cannot check the node
6575
6576   """
6577   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6578   nodeinfo[node].Raise("Can't get data from node %s" % node,
6579                        prereq=True, ecode=errors.ECODE_ENVIRON)
6580   (_, _, (hv_info, )) = nodeinfo[node].payload
6581
6582   free_mem = hv_info.get("memory_free", None)
6583   if not isinstance(free_mem, int):
6584     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6585                                " was '%s'" % (node, free_mem),
6586                                errors.ECODE_ENVIRON)
6587   if requested > free_mem:
6588     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6589                                " needed %s MiB, available %s MiB" %
6590                                (node, reason, requested, free_mem),
6591                                errors.ECODE_NORES)
6592   return free_mem
6593
6594
6595 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6596   """Checks if nodes have enough free disk space in the all VGs.
6597
6598   This function check if all given nodes have the needed amount of
6599   free disk. In case any node has less disk or we cannot get the
6600   information from the node, this function raise an OpPrereqError
6601   exception.
6602
6603   @type lu: C{LogicalUnit}
6604   @param lu: a logical unit from which we get configuration data
6605   @type nodenames: C{list}
6606   @param nodenames: the list of node names to check
6607   @type req_sizes: C{dict}
6608   @param req_sizes: the hash of vg and corresponding amount of disk in
6609       MiB to check for
6610   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6611       or we cannot check the node
6612
6613   """
6614   for vg, req_size in req_sizes.items():
6615     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6616
6617
6618 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6619   """Checks if nodes have enough free disk space in the specified VG.
6620
6621   This function check if all given nodes have the needed amount of
6622   free disk. In case any node has less disk or we cannot get the
6623   information from the node, this function raise an OpPrereqError
6624   exception.
6625
6626   @type lu: C{LogicalUnit}
6627   @param lu: a logical unit from which we get configuration data
6628   @type nodenames: C{list}
6629   @param nodenames: the list of node names to check
6630   @type vg: C{str}
6631   @param vg: the volume group to check
6632   @type requested: C{int}
6633   @param requested: the amount of disk in MiB to check for
6634   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6635       or we cannot check the node
6636
6637   """
6638   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6639   for node in nodenames:
6640     info = nodeinfo[node]
6641     info.Raise("Cannot get current information from node %s" % node,
6642                prereq=True, ecode=errors.ECODE_ENVIRON)
6643     (_, (vg_info, ), _) = info.payload
6644     vg_free = vg_info.get("vg_free", None)
6645     if not isinstance(vg_free, int):
6646       raise errors.OpPrereqError("Can't compute free disk space on node"
6647                                  " %s for vg %s, result was '%s'" %
6648                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6649     if requested > vg_free:
6650       raise errors.OpPrereqError("Not enough disk space on target node %s"
6651                                  " vg %s: required %d MiB, available %d MiB" %
6652                                  (node, vg, requested, vg_free),
6653                                  errors.ECODE_NORES)
6654
6655
6656 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6657   """Checks if nodes have enough physical CPUs
6658
6659   This function checks if all given nodes have the needed number of
6660   physical CPUs. In case any node has less CPUs or we cannot get the
6661   information from the node, this function raises an OpPrereqError
6662   exception.
6663
6664   @type lu: C{LogicalUnit}
6665   @param lu: a logical unit from which we get configuration data
6666   @type nodenames: C{list}
6667   @param nodenames: the list of node names to check
6668   @type requested: C{int}
6669   @param requested: the minimum acceptable number of physical CPUs
6670   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6671       or we cannot check the node
6672
6673   """
6674   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6675   for node in nodenames:
6676     info = nodeinfo[node]
6677     info.Raise("Cannot get current information from node %s" % node,
6678                prereq=True, ecode=errors.ECODE_ENVIRON)
6679     (_, _, (hv_info, )) = info.payload
6680     num_cpus = hv_info.get("cpu_total", None)
6681     if not isinstance(num_cpus, int):
6682       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6683                                  " on node %s, result was '%s'" %
6684                                  (node, num_cpus), errors.ECODE_ENVIRON)
6685     if requested > num_cpus:
6686       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6687                                  "required" % (node, num_cpus, requested),
6688                                  errors.ECODE_NORES)
6689
6690
6691 class LUInstanceStartup(LogicalUnit):
6692   """Starts an instance.
6693
6694   """
6695   HPATH = "instance-start"
6696   HTYPE = constants.HTYPE_INSTANCE
6697   REQ_BGL = False
6698
6699   def CheckArguments(self):
6700     # extra beparams
6701     if self.op.beparams:
6702       # fill the beparams dict
6703       objects.UpgradeBeParams(self.op.beparams)
6704       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6705
6706   def ExpandNames(self):
6707     self._ExpandAndLockInstance()
6708     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6709
6710   def DeclareLocks(self, level):
6711     if level == locking.LEVEL_NODE_RES:
6712       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6713
6714   def BuildHooksEnv(self):
6715     """Build hooks env.
6716
6717     This runs on master, primary and secondary nodes of the instance.
6718
6719     """
6720     env = {
6721       "FORCE": self.op.force,
6722       }
6723
6724     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6725
6726     return env
6727
6728   def BuildHooksNodes(self):
6729     """Build hooks nodes.
6730
6731     """
6732     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6733     return (nl, nl)
6734
6735   def CheckPrereq(self):
6736     """Check prerequisites.
6737
6738     This checks that the instance is in the cluster.
6739
6740     """
6741     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6742     assert self.instance is not None, \
6743       "Cannot retrieve locked instance %s" % self.op.instance_name
6744
6745     # extra hvparams
6746     if self.op.hvparams:
6747       # check hypervisor parameter syntax (locally)
6748       cluster = self.cfg.GetClusterInfo()
6749       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6750       filled_hvp = cluster.FillHV(instance)
6751       filled_hvp.update(self.op.hvparams)
6752       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6753       hv_type.CheckParameterSyntax(filled_hvp)
6754       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6755
6756     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6757
6758     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6759
6760     if self.primary_offline and self.op.ignore_offline_nodes:
6761       self.proc.LogWarning("Ignoring offline primary node")
6762
6763       if self.op.hvparams or self.op.beparams:
6764         self.proc.LogWarning("Overridden parameters are ignored")
6765     else:
6766       _CheckNodeOnline(self, instance.primary_node)
6767
6768       bep = self.cfg.GetClusterInfo().FillBE(instance)
6769       bep.update(self.op.beparams)
6770
6771       # check bridges existence
6772       _CheckInstanceBridgesExist(self, instance)
6773
6774       remote_info = self.rpc.call_instance_info(instance.primary_node,
6775                                                 instance.name,
6776                                                 instance.hypervisor)
6777       remote_info.Raise("Error checking node %s" % instance.primary_node,
6778                         prereq=True, ecode=errors.ECODE_ENVIRON)
6779       if not remote_info.payload: # not running already
6780         _CheckNodeFreeMemory(self, instance.primary_node,
6781                              "starting instance %s" % instance.name,
6782                              bep[constants.BE_MINMEM], instance.hypervisor)
6783
6784   def Exec(self, feedback_fn):
6785     """Start the instance.
6786
6787     """
6788     instance = self.instance
6789     force = self.op.force
6790
6791     if not self.op.no_remember:
6792       self.cfg.MarkInstanceUp(instance.name)
6793
6794     if self.primary_offline:
6795       assert self.op.ignore_offline_nodes
6796       self.proc.LogInfo("Primary node offline, marked instance as started")
6797     else:
6798       node_current = instance.primary_node
6799
6800       _StartInstanceDisks(self, instance, force)
6801
6802       result = \
6803         self.rpc.call_instance_start(node_current,
6804                                      (instance, self.op.hvparams,
6805                                       self.op.beparams),
6806                                      self.op.startup_paused)
6807       msg = result.fail_msg
6808       if msg:
6809         _ShutdownInstanceDisks(self, instance)
6810         raise errors.OpExecError("Could not start instance: %s" % msg)
6811
6812
6813 class LUInstanceReboot(LogicalUnit):
6814   """Reboot an instance.
6815
6816   """
6817   HPATH = "instance-reboot"
6818   HTYPE = constants.HTYPE_INSTANCE
6819   REQ_BGL = False
6820
6821   def ExpandNames(self):
6822     self._ExpandAndLockInstance()
6823
6824   def BuildHooksEnv(self):
6825     """Build hooks env.
6826
6827     This runs on master, primary and secondary nodes of the instance.
6828
6829     """
6830     env = {
6831       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6832       "REBOOT_TYPE": self.op.reboot_type,
6833       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6834       }
6835
6836     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6837
6838     return env
6839
6840   def BuildHooksNodes(self):
6841     """Build hooks nodes.
6842
6843     """
6844     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6845     return (nl, nl)
6846
6847   def CheckPrereq(self):
6848     """Check prerequisites.
6849
6850     This checks that the instance is in the cluster.
6851
6852     """
6853     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6854     assert self.instance is not None, \
6855       "Cannot retrieve locked instance %s" % self.op.instance_name
6856     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6857     _CheckNodeOnline(self, instance.primary_node)
6858
6859     # check bridges existence
6860     _CheckInstanceBridgesExist(self, instance)
6861
6862   def Exec(self, feedback_fn):
6863     """Reboot the instance.
6864
6865     """
6866     instance = self.instance
6867     ignore_secondaries = self.op.ignore_secondaries
6868     reboot_type = self.op.reboot_type
6869
6870     remote_info = self.rpc.call_instance_info(instance.primary_node,
6871                                               instance.name,
6872                                               instance.hypervisor)
6873     remote_info.Raise("Error checking node %s" % instance.primary_node)
6874     instance_running = bool(remote_info.payload)
6875
6876     node_current = instance.primary_node
6877
6878     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6879                                             constants.INSTANCE_REBOOT_HARD]:
6880       for disk in instance.disks:
6881         self.cfg.SetDiskID(disk, node_current)
6882       result = self.rpc.call_instance_reboot(node_current, instance,
6883                                              reboot_type,
6884                                              self.op.shutdown_timeout)
6885       result.Raise("Could not reboot instance")
6886     else:
6887       if instance_running:
6888         result = self.rpc.call_instance_shutdown(node_current, instance,
6889                                                  self.op.shutdown_timeout)
6890         result.Raise("Could not shutdown instance for full reboot")
6891         _ShutdownInstanceDisks(self, instance)
6892       else:
6893         self.LogInfo("Instance %s was already stopped, starting now",
6894                      instance.name)
6895       _StartInstanceDisks(self, instance, ignore_secondaries)
6896       result = self.rpc.call_instance_start(node_current,
6897                                             (instance, None, None), False)
6898       msg = result.fail_msg
6899       if msg:
6900         _ShutdownInstanceDisks(self, instance)
6901         raise errors.OpExecError("Could not start instance for"
6902                                  " full reboot: %s" % msg)
6903
6904     self.cfg.MarkInstanceUp(instance.name)
6905
6906
6907 class LUInstanceShutdown(LogicalUnit):
6908   """Shutdown an instance.
6909
6910   """
6911   HPATH = "instance-stop"
6912   HTYPE = constants.HTYPE_INSTANCE
6913   REQ_BGL = False
6914
6915   def ExpandNames(self):
6916     self._ExpandAndLockInstance()
6917
6918   def BuildHooksEnv(self):
6919     """Build hooks env.
6920
6921     This runs on master, primary and secondary nodes of the instance.
6922
6923     """
6924     env = _BuildInstanceHookEnvByObject(self, self.instance)
6925     env["TIMEOUT"] = self.op.timeout
6926     return env
6927
6928   def BuildHooksNodes(self):
6929     """Build hooks nodes.
6930
6931     """
6932     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6933     return (nl, nl)
6934
6935   def CheckPrereq(self):
6936     """Check prerequisites.
6937
6938     This checks that the instance is in the cluster.
6939
6940     """
6941     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6942     assert self.instance is not None, \
6943       "Cannot retrieve locked instance %s" % self.op.instance_name
6944
6945     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6946
6947     self.primary_offline = \
6948       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6949
6950     if self.primary_offline and self.op.ignore_offline_nodes:
6951       self.proc.LogWarning("Ignoring offline primary node")
6952     else:
6953       _CheckNodeOnline(self, self.instance.primary_node)
6954
6955   def Exec(self, feedback_fn):
6956     """Shutdown the instance.
6957
6958     """
6959     instance = self.instance
6960     node_current = instance.primary_node
6961     timeout = self.op.timeout
6962
6963     if not self.op.no_remember:
6964       self.cfg.MarkInstanceDown(instance.name)
6965
6966     if self.primary_offline:
6967       assert self.op.ignore_offline_nodes
6968       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6969     else:
6970       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6971       msg = result.fail_msg
6972       if msg:
6973         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6974
6975       _ShutdownInstanceDisks(self, instance)
6976
6977
6978 class LUInstanceReinstall(LogicalUnit):
6979   """Reinstall an instance.
6980
6981   """
6982   HPATH = "instance-reinstall"
6983   HTYPE = constants.HTYPE_INSTANCE
6984   REQ_BGL = False
6985
6986   def ExpandNames(self):
6987     self._ExpandAndLockInstance()
6988
6989   def BuildHooksEnv(self):
6990     """Build hooks env.
6991
6992     This runs on master, primary and secondary nodes of the instance.
6993
6994     """
6995     return _BuildInstanceHookEnvByObject(self, self.instance)
6996
6997   def BuildHooksNodes(self):
6998     """Build hooks nodes.
6999
7000     """
7001     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7002     return (nl, nl)
7003
7004   def CheckPrereq(self):
7005     """Check prerequisites.
7006
7007     This checks that the instance is in the cluster and is not running.
7008
7009     """
7010     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7011     assert instance is not None, \
7012       "Cannot retrieve locked instance %s" % self.op.instance_name
7013     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7014                      " offline, cannot reinstall")
7015
7016     if instance.disk_template == constants.DT_DISKLESS:
7017       raise errors.OpPrereqError("Instance '%s' has no disks" %
7018                                  self.op.instance_name,
7019                                  errors.ECODE_INVAL)
7020     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7021
7022     if self.op.os_type is not None:
7023       # OS verification
7024       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7025       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7026       instance_os = self.op.os_type
7027     else:
7028       instance_os = instance.os
7029
7030     nodelist = list(instance.all_nodes)
7031
7032     if self.op.osparams:
7033       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7034       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7035       self.os_inst = i_osdict # the new dict (without defaults)
7036     else:
7037       self.os_inst = None
7038
7039     self.instance = instance
7040
7041   def Exec(self, feedback_fn):
7042     """Reinstall the instance.
7043
7044     """
7045     inst = self.instance
7046
7047     if self.op.os_type is not None:
7048       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7049       inst.os = self.op.os_type
7050       # Write to configuration
7051       self.cfg.Update(inst, feedback_fn)
7052
7053     _StartInstanceDisks(self, inst, None)
7054     try:
7055       feedback_fn("Running the instance OS create scripts...")
7056       # FIXME: pass debug option from opcode to backend
7057       result = self.rpc.call_instance_os_add(inst.primary_node,
7058                                              (inst, self.os_inst), True,
7059                                              self.op.debug_level)
7060       result.Raise("Could not install OS for instance %s on node %s" %
7061                    (inst.name, inst.primary_node))
7062     finally:
7063       _ShutdownInstanceDisks(self, inst)
7064
7065
7066 class LUInstanceRecreateDisks(LogicalUnit):
7067   """Recreate an instance's missing disks.
7068
7069   """
7070   HPATH = "instance-recreate-disks"
7071   HTYPE = constants.HTYPE_INSTANCE
7072   REQ_BGL = False
7073
7074   _MODIFYABLE = frozenset([
7075     constants.IDISK_SIZE,
7076     constants.IDISK_MODE,
7077     ])
7078
7079   # New or changed disk parameters may have different semantics
7080   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7081     constants.IDISK_ADOPT,
7082
7083     # TODO: Implement support changing VG while recreating
7084     constants.IDISK_VG,
7085     constants.IDISK_METAVG,
7086     ]))
7087
7088   def CheckArguments(self):
7089     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7090       # Normalize and convert deprecated list of disk indices
7091       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7092
7093     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7094     if duplicates:
7095       raise errors.OpPrereqError("Some disks have been specified more than"
7096                                  " once: %s" % utils.CommaJoin(duplicates),
7097                                  errors.ECODE_INVAL)
7098
7099     for (idx, params) in self.op.disks:
7100       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7101       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7102       if unsupported:
7103         raise errors.OpPrereqError("Parameters for disk %s try to change"
7104                                    " unmodifyable parameter(s): %s" %
7105                                    (idx, utils.CommaJoin(unsupported)),
7106                                    errors.ECODE_INVAL)
7107
7108   def ExpandNames(self):
7109     self._ExpandAndLockInstance()
7110     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7111     if self.op.nodes:
7112       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7113       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7114     else:
7115       self.needed_locks[locking.LEVEL_NODE] = []
7116     self.needed_locks[locking.LEVEL_NODE_RES] = []
7117
7118   def DeclareLocks(self, level):
7119     if level == locking.LEVEL_NODE:
7120       # if we replace the nodes, we only need to lock the old primary,
7121       # otherwise we need to lock all nodes for disk re-creation
7122       primary_only = bool(self.op.nodes)
7123       self._LockInstancesNodes(primary_only=primary_only)
7124     elif level == locking.LEVEL_NODE_RES:
7125       # Copy node locks
7126       self.needed_locks[locking.LEVEL_NODE_RES] = \
7127         self.needed_locks[locking.LEVEL_NODE][:]
7128
7129   def BuildHooksEnv(self):
7130     """Build hooks env.
7131
7132     This runs on master, primary and secondary nodes of the instance.
7133
7134     """
7135     return _BuildInstanceHookEnvByObject(self, self.instance)
7136
7137   def BuildHooksNodes(self):
7138     """Build hooks nodes.
7139
7140     """
7141     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7142     return (nl, nl)
7143
7144   def CheckPrereq(self):
7145     """Check prerequisites.
7146
7147     This checks that the instance is in the cluster and is not running.
7148
7149     """
7150     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7151     assert instance is not None, \
7152       "Cannot retrieve locked instance %s" % self.op.instance_name
7153     if self.op.nodes:
7154       if len(self.op.nodes) != len(instance.all_nodes):
7155         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7156                                    " %d replacement nodes were specified" %
7157                                    (instance.name, len(instance.all_nodes),
7158                                     len(self.op.nodes)),
7159                                    errors.ECODE_INVAL)
7160       assert instance.disk_template != constants.DT_DRBD8 or \
7161           len(self.op.nodes) == 2
7162       assert instance.disk_template != constants.DT_PLAIN or \
7163           len(self.op.nodes) == 1
7164       primary_node = self.op.nodes[0]
7165     else:
7166       primary_node = instance.primary_node
7167     _CheckNodeOnline(self, primary_node)
7168
7169     if instance.disk_template == constants.DT_DISKLESS:
7170       raise errors.OpPrereqError("Instance '%s' has no disks" %
7171                                  self.op.instance_name, errors.ECODE_INVAL)
7172
7173     # if we replace nodes *and* the old primary is offline, we don't
7174     # check
7175     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7176     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7177     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7178     if not (self.op.nodes and old_pnode.offline):
7179       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7180                           msg="cannot recreate disks")
7181
7182     if self.op.disks:
7183       self.disks = dict(self.op.disks)
7184     else:
7185       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7186
7187     maxidx = max(self.disks.keys())
7188     if maxidx >= len(instance.disks):
7189       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7190                                  errors.ECODE_INVAL)
7191
7192     if (self.op.nodes and
7193         sorted(self.disks.keys()) != range(len(instance.disks))):
7194       raise errors.OpPrereqError("Can't recreate disks partially and"
7195                                  " change the nodes at the same time",
7196                                  errors.ECODE_INVAL)
7197
7198     self.instance = instance
7199
7200   def Exec(self, feedback_fn):
7201     """Recreate the disks.
7202
7203     """
7204     instance = self.instance
7205
7206     assert (self.owned_locks(locking.LEVEL_NODE) ==
7207             self.owned_locks(locking.LEVEL_NODE_RES))
7208
7209     to_skip = []
7210     mods = [] # keeps track of needed changes
7211
7212     for idx, disk in enumerate(instance.disks):
7213       try:
7214         changes = self.disks[idx]
7215       except KeyError:
7216         # Disk should not be recreated
7217         to_skip.append(idx)
7218         continue
7219
7220       # update secondaries for disks, if needed
7221       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7222         # need to update the nodes and minors
7223         assert len(self.op.nodes) == 2
7224         assert len(disk.logical_id) == 6 # otherwise disk internals
7225                                          # have changed
7226         (_, _, old_port, _, _, old_secret) = disk.logical_id
7227         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7228         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7229                   new_minors[0], new_minors[1], old_secret)
7230         assert len(disk.logical_id) == len(new_id)
7231       else:
7232         new_id = None
7233
7234       mods.append((idx, new_id, changes))
7235
7236     # now that we have passed all asserts above, we can apply the mods
7237     # in a single run (to avoid partial changes)
7238     for idx, new_id, changes in mods:
7239       disk = instance.disks[idx]
7240       if new_id is not None:
7241         assert disk.dev_type == constants.LD_DRBD8
7242         disk.logical_id = new_id
7243       if changes:
7244         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7245                     mode=changes.get(constants.IDISK_MODE, None))
7246
7247     # change primary node, if needed
7248     if self.op.nodes:
7249       instance.primary_node = self.op.nodes[0]
7250       self.LogWarning("Changing the instance's nodes, you will have to"
7251                       " remove any disks left on the older nodes manually")
7252
7253     if self.op.nodes:
7254       self.cfg.Update(instance, feedback_fn)
7255
7256     _CreateDisks(self, instance, to_skip=to_skip)
7257
7258
7259 class LUInstanceRename(LogicalUnit):
7260   """Rename an instance.
7261
7262   """
7263   HPATH = "instance-rename"
7264   HTYPE = constants.HTYPE_INSTANCE
7265
7266   def CheckArguments(self):
7267     """Check arguments.
7268
7269     """
7270     if self.op.ip_check and not self.op.name_check:
7271       # TODO: make the ip check more flexible and not depend on the name check
7272       raise errors.OpPrereqError("IP address check requires a name check",
7273                                  errors.ECODE_INVAL)
7274
7275   def BuildHooksEnv(self):
7276     """Build hooks env.
7277
7278     This runs on master, primary and secondary nodes of the instance.
7279
7280     """
7281     env = _BuildInstanceHookEnvByObject(self, self.instance)
7282     env["INSTANCE_NEW_NAME"] = self.op.new_name
7283     return env
7284
7285   def BuildHooksNodes(self):
7286     """Build hooks nodes.
7287
7288     """
7289     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7290     return (nl, nl)
7291
7292   def CheckPrereq(self):
7293     """Check prerequisites.
7294
7295     This checks that the instance is in the cluster and is not running.
7296
7297     """
7298     self.op.instance_name = _ExpandInstanceName(self.cfg,
7299                                                 self.op.instance_name)
7300     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7301     assert instance is not None
7302     _CheckNodeOnline(self, instance.primary_node)
7303     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7304                         msg="cannot rename")
7305     self.instance = instance
7306
7307     new_name = self.op.new_name
7308     if self.op.name_check:
7309       hostname = netutils.GetHostname(name=new_name)
7310       if hostname.name != new_name:
7311         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7312                      hostname.name)
7313       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7314         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7315                                     " same as given hostname '%s'") %
7316                                     (hostname.name, self.op.new_name),
7317                                     errors.ECODE_INVAL)
7318       new_name = self.op.new_name = hostname.name
7319       if (self.op.ip_check and
7320           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7321         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7322                                    (hostname.ip, new_name),
7323                                    errors.ECODE_NOTUNIQUE)
7324
7325     instance_list = self.cfg.GetInstanceList()
7326     if new_name in instance_list and new_name != instance.name:
7327       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7328                                  new_name, errors.ECODE_EXISTS)
7329
7330   def Exec(self, feedback_fn):
7331     """Rename the instance.
7332
7333     """
7334     inst = self.instance
7335     old_name = inst.name
7336
7337     rename_file_storage = False
7338     if (inst.disk_template in constants.DTS_FILEBASED and
7339         self.op.new_name != inst.name):
7340       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7341       rename_file_storage = True
7342
7343     self.cfg.RenameInstance(inst.name, self.op.new_name)
7344     # Change the instance lock. This is definitely safe while we hold the BGL.
7345     # Otherwise the new lock would have to be added in acquired mode.
7346     assert self.REQ_BGL
7347     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7348     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7349
7350     # re-read the instance from the configuration after rename
7351     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7352
7353     if rename_file_storage:
7354       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7355       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7356                                                      old_file_storage_dir,
7357                                                      new_file_storage_dir)
7358       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7359                    " (but the instance has been renamed in Ganeti)" %
7360                    (inst.primary_node, old_file_storage_dir,
7361                     new_file_storage_dir))
7362
7363     _StartInstanceDisks(self, inst, None)
7364     try:
7365       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7366                                                  old_name, self.op.debug_level)
7367       msg = result.fail_msg
7368       if msg:
7369         msg = ("Could not run OS rename script for instance %s on node %s"
7370                " (but the instance has been renamed in Ganeti): %s" %
7371                (inst.name, inst.primary_node, msg))
7372         self.proc.LogWarning(msg)
7373     finally:
7374       _ShutdownInstanceDisks(self, inst)
7375
7376     return inst.name
7377
7378
7379 class LUInstanceRemove(LogicalUnit):
7380   """Remove an instance.
7381
7382   """
7383   HPATH = "instance-remove"
7384   HTYPE = constants.HTYPE_INSTANCE
7385   REQ_BGL = False
7386
7387   def ExpandNames(self):
7388     self._ExpandAndLockInstance()
7389     self.needed_locks[locking.LEVEL_NODE] = []
7390     self.needed_locks[locking.LEVEL_NODE_RES] = []
7391     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7392
7393   def DeclareLocks(self, level):
7394     if level == locking.LEVEL_NODE:
7395       self._LockInstancesNodes()
7396     elif level == locking.LEVEL_NODE_RES:
7397       # Copy node locks
7398       self.needed_locks[locking.LEVEL_NODE_RES] = \
7399         self.needed_locks[locking.LEVEL_NODE][:]
7400
7401   def BuildHooksEnv(self):
7402     """Build hooks env.
7403
7404     This runs on master, primary and secondary nodes of the instance.
7405
7406     """
7407     env = _BuildInstanceHookEnvByObject(self, self.instance)
7408     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7409     return env
7410
7411   def BuildHooksNodes(self):
7412     """Build hooks nodes.
7413
7414     """
7415     nl = [self.cfg.GetMasterNode()]
7416     nl_post = list(self.instance.all_nodes) + nl
7417     return (nl, nl_post)
7418
7419   def CheckPrereq(self):
7420     """Check prerequisites.
7421
7422     This checks that the instance is in the cluster.
7423
7424     """
7425     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7426     assert self.instance is not None, \
7427       "Cannot retrieve locked instance %s" % self.op.instance_name
7428
7429   def Exec(self, feedback_fn):
7430     """Remove the instance.
7431
7432     """
7433     instance = self.instance
7434     logging.info("Shutting down instance %s on node %s",
7435                  instance.name, instance.primary_node)
7436
7437     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7438                                              self.op.shutdown_timeout)
7439     msg = result.fail_msg
7440     if msg:
7441       if self.op.ignore_failures:
7442         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7443       else:
7444         raise errors.OpExecError("Could not shutdown instance %s on"
7445                                  " node %s: %s" %
7446                                  (instance.name, instance.primary_node, msg))
7447
7448     assert (self.owned_locks(locking.LEVEL_NODE) ==
7449             self.owned_locks(locking.LEVEL_NODE_RES))
7450     assert not (set(instance.all_nodes) -
7451                 self.owned_locks(locking.LEVEL_NODE)), \
7452       "Not owning correct locks"
7453
7454     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7455
7456
7457 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7458   """Utility function to remove an instance.
7459
7460   """
7461   logging.info("Removing block devices for instance %s", instance.name)
7462
7463   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7464     if not ignore_failures:
7465       raise errors.OpExecError("Can't remove instance's disks")
7466     feedback_fn("Warning: can't remove instance's disks")
7467
7468   logging.info("Removing instance %s out of cluster config", instance.name)
7469
7470   lu.cfg.RemoveInstance(instance.name)
7471
7472   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7473     "Instance lock removal conflict"
7474
7475   # Remove lock for the instance
7476   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7477
7478
7479 class LUInstanceQuery(NoHooksLU):
7480   """Logical unit for querying instances.
7481
7482   """
7483   # pylint: disable=W0142
7484   REQ_BGL = False
7485
7486   def CheckArguments(self):
7487     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7488                              self.op.output_fields, self.op.use_locking)
7489
7490   def ExpandNames(self):
7491     self.iq.ExpandNames(self)
7492
7493   def DeclareLocks(self, level):
7494     self.iq.DeclareLocks(self, level)
7495
7496   def Exec(self, feedback_fn):
7497     return self.iq.OldStyleQuery(self)
7498
7499
7500 class LUInstanceFailover(LogicalUnit):
7501   """Failover an instance.
7502
7503   """
7504   HPATH = "instance-failover"
7505   HTYPE = constants.HTYPE_INSTANCE
7506   REQ_BGL = False
7507
7508   def CheckArguments(self):
7509     """Check the arguments.
7510
7511     """
7512     self.iallocator = getattr(self.op, "iallocator", None)
7513     self.target_node = getattr(self.op, "target_node", None)
7514
7515   def ExpandNames(self):
7516     self._ExpandAndLockInstance()
7517
7518     if self.op.target_node is not None:
7519       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7520
7521     self.needed_locks[locking.LEVEL_NODE] = []
7522     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7523
7524     self.needed_locks[locking.LEVEL_NODE_RES] = []
7525     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7526
7527     ignore_consistency = self.op.ignore_consistency
7528     shutdown_timeout = self.op.shutdown_timeout
7529     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7530                                        cleanup=False,
7531                                        failover=True,
7532                                        ignore_consistency=ignore_consistency,
7533                                        shutdown_timeout=shutdown_timeout,
7534                                        ignore_ipolicy=self.op.ignore_ipolicy)
7535     self.tasklets = [self._migrater]
7536
7537   def DeclareLocks(self, level):
7538     if level == locking.LEVEL_NODE:
7539       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7540       if instance.disk_template in constants.DTS_EXT_MIRROR:
7541         if self.op.target_node is None:
7542           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7543         else:
7544           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7545                                                    self.op.target_node]
7546         del self.recalculate_locks[locking.LEVEL_NODE]
7547       else:
7548         self._LockInstancesNodes()
7549     elif level == locking.LEVEL_NODE_RES:
7550       # Copy node locks
7551       self.needed_locks[locking.LEVEL_NODE_RES] = \
7552         self.needed_locks[locking.LEVEL_NODE][:]
7553
7554   def BuildHooksEnv(self):
7555     """Build hooks env.
7556
7557     This runs on master, primary and secondary nodes of the instance.
7558
7559     """
7560     instance = self._migrater.instance
7561     source_node = instance.primary_node
7562     target_node = self.op.target_node
7563     env = {
7564       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7565       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7566       "OLD_PRIMARY": source_node,
7567       "NEW_PRIMARY": target_node,
7568       }
7569
7570     if instance.disk_template in constants.DTS_INT_MIRROR:
7571       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7572       env["NEW_SECONDARY"] = source_node
7573     else:
7574       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7575
7576     env.update(_BuildInstanceHookEnvByObject(self, instance))
7577
7578     return env
7579
7580   def BuildHooksNodes(self):
7581     """Build hooks nodes.
7582
7583     """
7584     instance = self._migrater.instance
7585     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7586     return (nl, nl + [instance.primary_node])
7587
7588
7589 class LUInstanceMigrate(LogicalUnit):
7590   """Migrate an instance.
7591
7592   This is migration without shutting down, compared to the failover,
7593   which is done with shutdown.
7594
7595   """
7596   HPATH = "instance-migrate"
7597   HTYPE = constants.HTYPE_INSTANCE
7598   REQ_BGL = False
7599
7600   def ExpandNames(self):
7601     self._ExpandAndLockInstance()
7602
7603     if self.op.target_node is not None:
7604       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7605
7606     self.needed_locks[locking.LEVEL_NODE] = []
7607     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7608
7609     self.needed_locks[locking.LEVEL_NODE] = []
7610     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7611
7612     self._migrater = \
7613       TLMigrateInstance(self, self.op.instance_name,
7614                         cleanup=self.op.cleanup,
7615                         failover=False,
7616                         fallback=self.op.allow_failover,
7617                         allow_runtime_changes=self.op.allow_runtime_changes,
7618                         ignore_ipolicy=self.op.ignore_ipolicy)
7619     self.tasklets = [self._migrater]
7620
7621   def DeclareLocks(self, level):
7622     if level == locking.LEVEL_NODE:
7623       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7624       if instance.disk_template in constants.DTS_EXT_MIRROR:
7625         if self.op.target_node is None:
7626           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7627         else:
7628           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7629                                                    self.op.target_node]
7630         del self.recalculate_locks[locking.LEVEL_NODE]
7631       else:
7632         self._LockInstancesNodes()
7633     elif level == locking.LEVEL_NODE_RES:
7634       # Copy node locks
7635       self.needed_locks[locking.LEVEL_NODE_RES] = \
7636         self.needed_locks[locking.LEVEL_NODE][:]
7637
7638   def BuildHooksEnv(self):
7639     """Build hooks env.
7640
7641     This runs on master, primary and secondary nodes of the instance.
7642
7643     """
7644     instance = self._migrater.instance
7645     source_node = instance.primary_node
7646     target_node = self.op.target_node
7647     env = _BuildInstanceHookEnvByObject(self, instance)
7648     env.update({
7649       "MIGRATE_LIVE": self._migrater.live,
7650       "MIGRATE_CLEANUP": self.op.cleanup,
7651       "OLD_PRIMARY": source_node,
7652       "NEW_PRIMARY": target_node,
7653       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7654       })
7655
7656     if instance.disk_template in constants.DTS_INT_MIRROR:
7657       env["OLD_SECONDARY"] = target_node
7658       env["NEW_SECONDARY"] = source_node
7659     else:
7660       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7661
7662     return env
7663
7664   def BuildHooksNodes(self):
7665     """Build hooks nodes.
7666
7667     """
7668     instance = self._migrater.instance
7669     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7670     return (nl, nl + [instance.primary_node])
7671
7672
7673 class LUInstanceMove(LogicalUnit):
7674   """Move an instance by data-copying.
7675
7676   """
7677   HPATH = "instance-move"
7678   HTYPE = constants.HTYPE_INSTANCE
7679   REQ_BGL = False
7680
7681   def ExpandNames(self):
7682     self._ExpandAndLockInstance()
7683     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7684     self.op.target_node = target_node
7685     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7686     self.needed_locks[locking.LEVEL_NODE_RES] = []
7687     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7688
7689   def DeclareLocks(self, level):
7690     if level == locking.LEVEL_NODE:
7691       self._LockInstancesNodes(primary_only=True)
7692     elif level == locking.LEVEL_NODE_RES:
7693       # Copy node locks
7694       self.needed_locks[locking.LEVEL_NODE_RES] = \
7695         self.needed_locks[locking.LEVEL_NODE][:]
7696
7697   def BuildHooksEnv(self):
7698     """Build hooks env.
7699
7700     This runs on master, primary and secondary nodes of the instance.
7701
7702     """
7703     env = {
7704       "TARGET_NODE": self.op.target_node,
7705       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7706       }
7707     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7708     return env
7709
7710   def BuildHooksNodes(self):
7711     """Build hooks nodes.
7712
7713     """
7714     nl = [
7715       self.cfg.GetMasterNode(),
7716       self.instance.primary_node,
7717       self.op.target_node,
7718       ]
7719     return (nl, nl)
7720
7721   def CheckPrereq(self):
7722     """Check prerequisites.
7723
7724     This checks that the instance is in the cluster.
7725
7726     """
7727     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7728     assert self.instance is not None, \
7729       "Cannot retrieve locked instance %s" % self.op.instance_name
7730
7731     node = self.cfg.GetNodeInfo(self.op.target_node)
7732     assert node is not None, \
7733       "Cannot retrieve locked node %s" % self.op.target_node
7734
7735     self.target_node = target_node = node.name
7736
7737     if target_node == instance.primary_node:
7738       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7739                                  (instance.name, target_node),
7740                                  errors.ECODE_STATE)
7741
7742     bep = self.cfg.GetClusterInfo().FillBE(instance)
7743
7744     for idx, dsk in enumerate(instance.disks):
7745       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7746         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7747                                    " cannot copy" % idx, errors.ECODE_STATE)
7748
7749     _CheckNodeOnline(self, target_node)
7750     _CheckNodeNotDrained(self, target_node)
7751     _CheckNodeVmCapable(self, target_node)
7752     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7753                                      self.cfg.GetNodeGroup(node.group))
7754     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7755                             ignore=self.op.ignore_ipolicy)
7756
7757     if instance.admin_state == constants.ADMINST_UP:
7758       # check memory requirements on the secondary node
7759       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7760                            instance.name, bep[constants.BE_MAXMEM],
7761                            instance.hypervisor)
7762     else:
7763       self.LogInfo("Not checking memory on the secondary node as"
7764                    " instance will not be started")
7765
7766     # check bridge existance
7767     _CheckInstanceBridgesExist(self, instance, node=target_node)
7768
7769   def Exec(self, feedback_fn):
7770     """Move an instance.
7771
7772     The move is done by shutting it down on its present node, copying
7773     the data over (slow) and starting it on the new node.
7774
7775     """
7776     instance = self.instance
7777
7778     source_node = instance.primary_node
7779     target_node = self.target_node
7780
7781     self.LogInfo("Shutting down instance %s on source node %s",
7782                  instance.name, source_node)
7783
7784     assert (self.owned_locks(locking.LEVEL_NODE) ==
7785             self.owned_locks(locking.LEVEL_NODE_RES))
7786
7787     result = self.rpc.call_instance_shutdown(source_node, instance,
7788                                              self.op.shutdown_timeout)
7789     msg = result.fail_msg
7790     if msg:
7791       if self.op.ignore_consistency:
7792         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7793                              " Proceeding anyway. Please make sure node"
7794                              " %s is down. Error details: %s",
7795                              instance.name, source_node, source_node, msg)
7796       else:
7797         raise errors.OpExecError("Could not shutdown instance %s on"
7798                                  " node %s: %s" %
7799                                  (instance.name, source_node, msg))
7800
7801     # create the target disks
7802     try:
7803       _CreateDisks(self, instance, target_node=target_node)
7804     except errors.OpExecError:
7805       self.LogWarning("Device creation failed, reverting...")
7806       try:
7807         _RemoveDisks(self, instance, target_node=target_node)
7808       finally:
7809         self.cfg.ReleaseDRBDMinors(instance.name)
7810         raise
7811
7812     cluster_name = self.cfg.GetClusterInfo().cluster_name
7813
7814     errs = []
7815     # activate, get path, copy the data over
7816     for idx, disk in enumerate(instance.disks):
7817       self.LogInfo("Copying data for disk %d", idx)
7818       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7819                                                instance.name, True, idx)
7820       if result.fail_msg:
7821         self.LogWarning("Can't assemble newly created disk %d: %s",
7822                         idx, result.fail_msg)
7823         errs.append(result.fail_msg)
7824         break
7825       dev_path = result.payload
7826       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7827                                              target_node, dev_path,
7828                                              cluster_name)
7829       if result.fail_msg:
7830         self.LogWarning("Can't copy data over for disk %d: %s",
7831                         idx, result.fail_msg)
7832         errs.append(result.fail_msg)
7833         break
7834
7835     if errs:
7836       self.LogWarning("Some disks failed to copy, aborting")
7837       try:
7838         _RemoveDisks(self, instance, target_node=target_node)
7839       finally:
7840         self.cfg.ReleaseDRBDMinors(instance.name)
7841         raise errors.OpExecError("Errors during disk copy: %s" %
7842                                  (",".join(errs),))
7843
7844     instance.primary_node = target_node
7845     self.cfg.Update(instance, feedback_fn)
7846
7847     self.LogInfo("Removing the disks on the original node")
7848     _RemoveDisks(self, instance, target_node=source_node)
7849
7850     # Only start the instance if it's marked as up
7851     if instance.admin_state == constants.ADMINST_UP:
7852       self.LogInfo("Starting instance %s on node %s",
7853                    instance.name, target_node)
7854
7855       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7856                                            ignore_secondaries=True)
7857       if not disks_ok:
7858         _ShutdownInstanceDisks(self, instance)
7859         raise errors.OpExecError("Can't activate the instance's disks")
7860
7861       result = self.rpc.call_instance_start(target_node,
7862                                             (instance, None, None), False)
7863       msg = result.fail_msg
7864       if msg:
7865         _ShutdownInstanceDisks(self, instance)
7866         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7867                                  (instance.name, target_node, msg))
7868
7869
7870 class LUNodeMigrate(LogicalUnit):
7871   """Migrate all instances from a node.
7872
7873   """
7874   HPATH = "node-migrate"
7875   HTYPE = constants.HTYPE_NODE
7876   REQ_BGL = False
7877
7878   def CheckArguments(self):
7879     pass
7880
7881   def ExpandNames(self):
7882     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7883
7884     self.share_locks = _ShareAll()
7885     self.needed_locks = {
7886       locking.LEVEL_NODE: [self.op.node_name],
7887       }
7888
7889   def BuildHooksEnv(self):
7890     """Build hooks env.
7891
7892     This runs on the master, the primary and all the secondaries.
7893
7894     """
7895     return {
7896       "NODE_NAME": self.op.node_name,
7897       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7898       }
7899
7900   def BuildHooksNodes(self):
7901     """Build hooks nodes.
7902
7903     """
7904     nl = [self.cfg.GetMasterNode()]
7905     return (nl, nl)
7906
7907   def CheckPrereq(self):
7908     pass
7909
7910   def Exec(self, feedback_fn):
7911     # Prepare jobs for migration instances
7912     allow_runtime_changes = self.op.allow_runtime_changes
7913     jobs = [
7914       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7915                                  mode=self.op.mode,
7916                                  live=self.op.live,
7917                                  iallocator=self.op.iallocator,
7918                                  target_node=self.op.target_node,
7919                                  allow_runtime_changes=allow_runtime_changes,
7920                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7921       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7922       ]
7923
7924     # TODO: Run iallocator in this opcode and pass correct placement options to
7925     # OpInstanceMigrate. Since other jobs can modify the cluster between
7926     # running the iallocator and the actual migration, a good consistency model
7927     # will have to be found.
7928
7929     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7930             frozenset([self.op.node_name]))
7931
7932     return ResultWithJobs(jobs)
7933
7934
7935 class TLMigrateInstance(Tasklet):
7936   """Tasklet class for instance migration.
7937
7938   @type live: boolean
7939   @ivar live: whether the migration will be done live or non-live;
7940       this variable is initalized only after CheckPrereq has run
7941   @type cleanup: boolean
7942   @ivar cleanup: Wheater we cleanup from a failed migration
7943   @type iallocator: string
7944   @ivar iallocator: The iallocator used to determine target_node
7945   @type target_node: string
7946   @ivar target_node: If given, the target_node to reallocate the instance to
7947   @type failover: boolean
7948   @ivar failover: Whether operation results in failover or migration
7949   @type fallback: boolean
7950   @ivar fallback: Whether fallback to failover is allowed if migration not
7951                   possible
7952   @type ignore_consistency: boolean
7953   @ivar ignore_consistency: Wheter we should ignore consistency between source
7954                             and target node
7955   @type shutdown_timeout: int
7956   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7957   @type ignore_ipolicy: bool
7958   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7959
7960   """
7961
7962   # Constants
7963   _MIGRATION_POLL_INTERVAL = 1      # seconds
7964   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7965
7966   def __init__(self, lu, instance_name, cleanup=False,
7967                failover=False, fallback=False,
7968                ignore_consistency=False,
7969                allow_runtime_changes=True,
7970                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7971                ignore_ipolicy=False):
7972     """Initializes this class.
7973
7974     """
7975     Tasklet.__init__(self, lu)
7976
7977     # Parameters
7978     self.instance_name = instance_name
7979     self.cleanup = cleanup
7980     self.live = False # will be overridden later
7981     self.failover = failover
7982     self.fallback = fallback
7983     self.ignore_consistency = ignore_consistency
7984     self.shutdown_timeout = shutdown_timeout
7985     self.ignore_ipolicy = ignore_ipolicy
7986     self.allow_runtime_changes = allow_runtime_changes
7987
7988   def CheckPrereq(self):
7989     """Check prerequisites.
7990
7991     This checks that the instance is in the cluster.
7992
7993     """
7994     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7995     instance = self.cfg.GetInstanceInfo(instance_name)
7996     assert instance is not None
7997     self.instance = instance
7998     cluster = self.cfg.GetClusterInfo()
7999
8000     if (not self.cleanup and
8001         not instance.admin_state == constants.ADMINST_UP and
8002         not self.failover and self.fallback):
8003       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8004                       " switching to failover")
8005       self.failover = True
8006
8007     if instance.disk_template not in constants.DTS_MIRRORED:
8008       if self.failover:
8009         text = "failovers"
8010       else:
8011         text = "migrations"
8012       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8013                                  " %s" % (instance.disk_template, text),
8014                                  errors.ECODE_STATE)
8015
8016     if instance.disk_template in constants.DTS_EXT_MIRROR:
8017       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8018
8019       if self.lu.op.iallocator:
8020         self._RunAllocator()
8021       else:
8022         # We set set self.target_node as it is required by
8023         # BuildHooksEnv
8024         self.target_node = self.lu.op.target_node
8025
8026       # Check that the target node is correct in terms of instance policy
8027       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8028       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8029       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8030       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8031                               ignore=self.ignore_ipolicy)
8032
8033       # self.target_node is already populated, either directly or by the
8034       # iallocator run
8035       target_node = self.target_node
8036       if self.target_node == instance.primary_node:
8037         raise errors.OpPrereqError("Cannot migrate instance %s"
8038                                    " to its primary (%s)" %
8039                                    (instance.name, instance.primary_node))
8040
8041       if len(self.lu.tasklets) == 1:
8042         # It is safe to release locks only when we're the only tasklet
8043         # in the LU
8044         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8045                       keep=[instance.primary_node, self.target_node])
8046
8047     else:
8048       secondary_nodes = instance.secondary_nodes
8049       if not secondary_nodes:
8050         raise errors.ConfigurationError("No secondary node but using"
8051                                         " %s disk template" %
8052                                         instance.disk_template)
8053       target_node = secondary_nodes[0]
8054       if self.lu.op.iallocator or (self.lu.op.target_node and
8055                                    self.lu.op.target_node != target_node):
8056         if self.failover:
8057           text = "failed over"
8058         else:
8059           text = "migrated"
8060         raise errors.OpPrereqError("Instances with disk template %s cannot"
8061                                    " be %s to arbitrary nodes"
8062                                    " (neither an iallocator nor a target"
8063                                    " node can be passed)" %
8064                                    (instance.disk_template, text),
8065                                    errors.ECODE_INVAL)
8066       nodeinfo = self.cfg.GetNodeInfo(target_node)
8067       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8068       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8069       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8070                               ignore=self.ignore_ipolicy)
8071
8072     i_be = cluster.FillBE(instance)
8073
8074     # check memory requirements on the secondary node
8075     if (not self.cleanup and
8076          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8077       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8078                                                "migrating instance %s" %
8079                                                instance.name,
8080                                                i_be[constants.BE_MINMEM],
8081                                                instance.hypervisor)
8082     else:
8083       self.lu.LogInfo("Not checking memory on the secondary node as"
8084                       " instance will not be started")
8085
8086     # check if failover must be forced instead of migration
8087     if (not self.cleanup and not self.failover and
8088         i_be[constants.BE_ALWAYS_FAILOVER]):
8089       if self.fallback:
8090         self.lu.LogInfo("Instance configured to always failover; fallback"
8091                         " to failover")
8092         self.failover = True
8093       else:
8094         raise errors.OpPrereqError("This instance has been configured to"
8095                                    " always failover, please allow failover",
8096                                    errors.ECODE_STATE)
8097
8098     # check bridge existance
8099     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8100
8101     if not self.cleanup:
8102       _CheckNodeNotDrained(self.lu, target_node)
8103       if not self.failover:
8104         result = self.rpc.call_instance_migratable(instance.primary_node,
8105                                                    instance)
8106         if result.fail_msg and self.fallback:
8107           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8108                           " failover")
8109           self.failover = True
8110         else:
8111           result.Raise("Can't migrate, please use failover",
8112                        prereq=True, ecode=errors.ECODE_STATE)
8113
8114     assert not (self.failover and self.cleanup)
8115
8116     if not self.failover:
8117       if self.lu.op.live is not None and self.lu.op.mode is not None:
8118         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8119                                    " parameters are accepted",
8120                                    errors.ECODE_INVAL)
8121       if self.lu.op.live is not None:
8122         if self.lu.op.live:
8123           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8124         else:
8125           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8126         # reset the 'live' parameter to None so that repeated
8127         # invocations of CheckPrereq do not raise an exception
8128         self.lu.op.live = None
8129       elif self.lu.op.mode is None:
8130         # read the default value from the hypervisor
8131         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8132         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8133
8134       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8135     else:
8136       # Failover is never live
8137       self.live = False
8138
8139     if not (self.failover or self.cleanup):
8140       remote_info = self.rpc.call_instance_info(instance.primary_node,
8141                                                 instance.name,
8142                                                 instance.hypervisor)
8143       remote_info.Raise("Error checking instance on node %s" %
8144                         instance.primary_node)
8145       instance_running = bool(remote_info.payload)
8146       if instance_running:
8147         self.current_mem = int(remote_info.payload["memory"])
8148
8149   def _RunAllocator(self):
8150     """Run the allocator based on input opcode.
8151
8152     """
8153     # FIXME: add a self.ignore_ipolicy option
8154     ial = IAllocator(self.cfg, self.rpc,
8155                      mode=constants.IALLOCATOR_MODE_RELOC,
8156                      name=self.instance_name,
8157                      relocate_from=[self.instance.primary_node],
8158                      )
8159
8160     ial.Run(self.lu.op.iallocator)
8161
8162     if not ial.success:
8163       raise errors.OpPrereqError("Can't compute nodes using"
8164                                  " iallocator '%s': %s" %
8165                                  (self.lu.op.iallocator, ial.info),
8166                                  errors.ECODE_NORES)
8167     if len(ial.result) != ial.required_nodes:
8168       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8169                                  " of nodes (%s), required %s" %
8170                                  (self.lu.op.iallocator, len(ial.result),
8171                                   ial.required_nodes), errors.ECODE_FAULT)
8172     self.target_node = ial.result[0]
8173     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8174                  self.instance_name, self.lu.op.iallocator,
8175                  utils.CommaJoin(ial.result))
8176
8177   def _WaitUntilSync(self):
8178     """Poll with custom rpc for disk sync.
8179
8180     This uses our own step-based rpc call.
8181
8182     """
8183     self.feedback_fn("* wait until resync is done")
8184     all_done = False
8185     while not all_done:
8186       all_done = True
8187       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8188                                             self.nodes_ip,
8189                                             (self.instance.disks,
8190                                              self.instance))
8191       min_percent = 100
8192       for node, nres in result.items():
8193         nres.Raise("Cannot resync disks on node %s" % node)
8194         node_done, node_percent = nres.payload
8195         all_done = all_done and node_done
8196         if node_percent is not None:
8197           min_percent = min(min_percent, node_percent)
8198       if not all_done:
8199         if min_percent < 100:
8200           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8201         time.sleep(2)
8202
8203   def _EnsureSecondary(self, node):
8204     """Demote a node to secondary.
8205
8206     """
8207     self.feedback_fn("* switching node %s to secondary mode" % node)
8208
8209     for dev in self.instance.disks:
8210       self.cfg.SetDiskID(dev, node)
8211
8212     result = self.rpc.call_blockdev_close(node, self.instance.name,
8213                                           self.instance.disks)
8214     result.Raise("Cannot change disk to secondary on node %s" % node)
8215
8216   def _GoStandalone(self):
8217     """Disconnect from the network.
8218
8219     """
8220     self.feedback_fn("* changing into standalone mode")
8221     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8222                                                self.instance.disks)
8223     for node, nres in result.items():
8224       nres.Raise("Cannot disconnect disks node %s" % node)
8225
8226   def _GoReconnect(self, multimaster):
8227     """Reconnect to the network.
8228
8229     """
8230     if multimaster:
8231       msg = "dual-master"
8232     else:
8233       msg = "single-master"
8234     self.feedback_fn("* changing disks into %s mode" % msg)
8235     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8236                                            (self.instance.disks, self.instance),
8237                                            self.instance.name, multimaster)
8238     for node, nres in result.items():
8239       nres.Raise("Cannot change disks config on node %s" % node)
8240
8241   def _ExecCleanup(self):
8242     """Try to cleanup after a failed migration.
8243
8244     The cleanup is done by:
8245       - check that the instance is running only on one node
8246         (and update the config if needed)
8247       - change disks on its secondary node to secondary
8248       - wait until disks are fully synchronized
8249       - disconnect from the network
8250       - change disks into single-master mode
8251       - wait again until disks are fully synchronized
8252
8253     """
8254     instance = self.instance
8255     target_node = self.target_node
8256     source_node = self.source_node
8257
8258     # check running on only one node
8259     self.feedback_fn("* checking where the instance actually runs"
8260                      " (if this hangs, the hypervisor might be in"
8261                      " a bad state)")
8262     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8263     for node, result in ins_l.items():
8264       result.Raise("Can't contact node %s" % node)
8265
8266     runningon_source = instance.name in ins_l[source_node].payload
8267     runningon_target = instance.name in ins_l[target_node].payload
8268
8269     if runningon_source and runningon_target:
8270       raise errors.OpExecError("Instance seems to be running on two nodes,"
8271                                " or the hypervisor is confused; you will have"
8272                                " to ensure manually that it runs only on one"
8273                                " and restart this operation")
8274
8275     if not (runningon_source or runningon_target):
8276       raise errors.OpExecError("Instance does not seem to be running at all;"
8277                                " in this case it's safer to repair by"
8278                                " running 'gnt-instance stop' to ensure disk"
8279                                " shutdown, and then restarting it")
8280
8281     if runningon_target:
8282       # the migration has actually succeeded, we need to update the config
8283       self.feedback_fn("* instance running on secondary node (%s),"
8284                        " updating config" % target_node)
8285       instance.primary_node = target_node
8286       self.cfg.Update(instance, self.feedback_fn)
8287       demoted_node = source_node
8288     else:
8289       self.feedback_fn("* instance confirmed to be running on its"
8290                        " primary node (%s)" % source_node)
8291       demoted_node = target_node
8292
8293     if instance.disk_template in constants.DTS_INT_MIRROR:
8294       self._EnsureSecondary(demoted_node)
8295       try:
8296         self._WaitUntilSync()
8297       except errors.OpExecError:
8298         # we ignore here errors, since if the device is standalone, it
8299         # won't be able to sync
8300         pass
8301       self._GoStandalone()
8302       self._GoReconnect(False)
8303       self._WaitUntilSync()
8304
8305     self.feedback_fn("* done")
8306
8307   def _RevertDiskStatus(self):
8308     """Try to revert the disk status after a failed migration.
8309
8310     """
8311     target_node = self.target_node
8312     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8313       return
8314
8315     try:
8316       self._EnsureSecondary(target_node)
8317       self._GoStandalone()
8318       self._GoReconnect(False)
8319       self._WaitUntilSync()
8320     except errors.OpExecError, err:
8321       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8322                          " please try to recover the instance manually;"
8323                          " error '%s'" % str(err))
8324
8325   def _AbortMigration(self):
8326     """Call the hypervisor code to abort a started migration.
8327
8328     """
8329     instance = self.instance
8330     target_node = self.target_node
8331     source_node = self.source_node
8332     migration_info = self.migration_info
8333
8334     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8335                                                                  instance,
8336                                                                  migration_info,
8337                                                                  False)
8338     abort_msg = abort_result.fail_msg
8339     if abort_msg:
8340       logging.error("Aborting migration failed on target node %s: %s",
8341                     target_node, abort_msg)
8342       # Don't raise an exception here, as we stil have to try to revert the
8343       # disk status, even if this step failed.
8344
8345     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8346         instance, False, self.live)
8347     abort_msg = abort_result.fail_msg
8348     if abort_msg:
8349       logging.error("Aborting migration failed on source node %s: %s",
8350                     source_node, abort_msg)
8351
8352   def _ExecMigration(self):
8353     """Migrate an instance.
8354
8355     The migrate is done by:
8356       - change the disks into dual-master mode
8357       - wait until disks are fully synchronized again
8358       - migrate the instance
8359       - change disks on the new secondary node (the old primary) to secondary
8360       - wait until disks are fully synchronized
8361       - change disks into single-master mode
8362
8363     """
8364     instance = self.instance
8365     target_node = self.target_node
8366     source_node = self.source_node
8367
8368     # Check for hypervisor version mismatch and warn the user.
8369     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8370                                        None, [self.instance.hypervisor])
8371     for ninfo in nodeinfo.values():
8372       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8373                   ninfo.node)
8374     (_, _, (src_info, )) = nodeinfo[source_node].payload
8375     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8376
8377     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8378         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8379       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8380       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8381       if src_version != dst_version:
8382         self.feedback_fn("* warning: hypervisor version mismatch between"
8383                          " source (%s) and target (%s) node" %
8384                          (src_version, dst_version))
8385
8386     self.feedback_fn("* checking disk consistency between source and target")
8387     for (idx, dev) in enumerate(instance.disks):
8388       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8389         raise errors.OpExecError("Disk %s is degraded or not fully"
8390                                  " synchronized on target node,"
8391                                  " aborting migration" % idx)
8392
8393     if self.current_mem > self.tgt_free_mem:
8394       if not self.allow_runtime_changes:
8395         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8396                                  " free memory to fit instance %s on target"
8397                                  " node %s (have %dMB, need %dMB)" %
8398                                  (instance.name, target_node,
8399                                   self.tgt_free_mem, self.current_mem))
8400       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8401       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8402                                                      instance,
8403                                                      self.tgt_free_mem)
8404       rpcres.Raise("Cannot modify instance runtime memory")
8405
8406     # First get the migration information from the remote node
8407     result = self.rpc.call_migration_info(source_node, instance)
8408     msg = result.fail_msg
8409     if msg:
8410       log_err = ("Failed fetching source migration information from %s: %s" %
8411                  (source_node, msg))
8412       logging.error(log_err)
8413       raise errors.OpExecError(log_err)
8414
8415     self.migration_info = migration_info = result.payload
8416
8417     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8418       # Then switch the disks to master/master mode
8419       self._EnsureSecondary(target_node)
8420       self._GoStandalone()
8421       self._GoReconnect(True)
8422       self._WaitUntilSync()
8423
8424     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8425     result = self.rpc.call_accept_instance(target_node,
8426                                            instance,
8427                                            migration_info,
8428                                            self.nodes_ip[target_node])
8429
8430     msg = result.fail_msg
8431     if msg:
8432       logging.error("Instance pre-migration failed, trying to revert"
8433                     " disk status: %s", msg)
8434       self.feedback_fn("Pre-migration failed, aborting")
8435       self._AbortMigration()
8436       self._RevertDiskStatus()
8437       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8438                                (instance.name, msg))
8439
8440     self.feedback_fn("* migrating instance to %s" % target_node)
8441     result = self.rpc.call_instance_migrate(source_node, instance,
8442                                             self.nodes_ip[target_node],
8443                                             self.live)
8444     msg = result.fail_msg
8445     if msg:
8446       logging.error("Instance migration failed, trying to revert"
8447                     " disk status: %s", msg)
8448       self.feedback_fn("Migration failed, aborting")
8449       self._AbortMigration()
8450       self._RevertDiskStatus()
8451       raise errors.OpExecError("Could not migrate instance %s: %s" %
8452                                (instance.name, msg))
8453
8454     self.feedback_fn("* starting memory transfer")
8455     last_feedback = time.time()
8456     while True:
8457       result = self.rpc.call_instance_get_migration_status(source_node,
8458                                                            instance)
8459       msg = result.fail_msg
8460       ms = result.payload   # MigrationStatus instance
8461       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8462         logging.error("Instance migration failed, trying to revert"
8463                       " disk status: %s", msg)
8464         self.feedback_fn("Migration failed, aborting")
8465         self._AbortMigration()
8466         self._RevertDiskStatus()
8467         raise errors.OpExecError("Could not migrate instance %s: %s" %
8468                                  (instance.name, msg))
8469
8470       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8471         self.feedback_fn("* memory transfer complete")
8472         break
8473
8474       if (utils.TimeoutExpired(last_feedback,
8475                                self._MIGRATION_FEEDBACK_INTERVAL) and
8476           ms.transferred_ram is not None):
8477         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8478         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8479         last_feedback = time.time()
8480
8481       time.sleep(self._MIGRATION_POLL_INTERVAL)
8482
8483     result = self.rpc.call_instance_finalize_migration_src(source_node,
8484                                                            instance,
8485                                                            True,
8486                                                            self.live)
8487     msg = result.fail_msg
8488     if msg:
8489       logging.error("Instance migration succeeded, but finalization failed"
8490                     " on the source node: %s", msg)
8491       raise errors.OpExecError("Could not finalize instance migration: %s" %
8492                                msg)
8493
8494     instance.primary_node = target_node
8495
8496     # distribute new instance config to the other nodes
8497     self.cfg.Update(instance, self.feedback_fn)
8498
8499     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8500                                                            instance,
8501                                                            migration_info,
8502                                                            True)
8503     msg = result.fail_msg
8504     if msg:
8505       logging.error("Instance migration succeeded, but finalization failed"
8506                     " on the target node: %s", msg)
8507       raise errors.OpExecError("Could not finalize instance migration: %s" %
8508                                msg)
8509
8510     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8511       self._EnsureSecondary(source_node)
8512       self._WaitUntilSync()
8513       self._GoStandalone()
8514       self._GoReconnect(False)
8515       self._WaitUntilSync()
8516
8517     # If the instance's disk template is `rbd' and there was a successful
8518     # migration, unmap the device from the source node.
8519     if self.instance.disk_template == constants.DT_RBD:
8520       disks = _ExpandCheckDisks(instance, instance.disks)
8521       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8522       for disk in disks:
8523         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8524         msg = result.fail_msg
8525         if msg:
8526           logging.error("Migration was successful, but couldn't unmap the"
8527                         " block device %s on source node %s: %s",
8528                         disk.iv_name, source_node, msg)
8529           logging.error("You need to unmap the device %s manually on %s",
8530                         disk.iv_name, source_node)
8531
8532     self.feedback_fn("* done")
8533
8534   def _ExecFailover(self):
8535     """Failover an instance.
8536
8537     The failover is done by shutting it down on its present node and
8538     starting it on the secondary.
8539
8540     """
8541     instance = self.instance
8542     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8543
8544     source_node = instance.primary_node
8545     target_node = self.target_node
8546
8547     if instance.admin_state == constants.ADMINST_UP:
8548       self.feedback_fn("* checking disk consistency between source and target")
8549       for (idx, dev) in enumerate(instance.disks):
8550         # for drbd, these are drbd over lvm
8551         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8552                                      False):
8553           if primary_node.offline:
8554             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8555                              " target node %s" %
8556                              (primary_node.name, idx, target_node))
8557           elif not self.ignore_consistency:
8558             raise errors.OpExecError("Disk %s is degraded on target node,"
8559                                      " aborting failover" % idx)
8560     else:
8561       self.feedback_fn("* not checking disk consistency as instance is not"
8562                        " running")
8563
8564     self.feedback_fn("* shutting down instance on source node")
8565     logging.info("Shutting down instance %s on node %s",
8566                  instance.name, source_node)
8567
8568     result = self.rpc.call_instance_shutdown(source_node, instance,
8569                                              self.shutdown_timeout)
8570     msg = result.fail_msg
8571     if msg:
8572       if self.ignore_consistency or primary_node.offline:
8573         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8574                            " proceeding anyway; please make sure node"
8575                            " %s is down; error details: %s",
8576                            instance.name, source_node, source_node, msg)
8577       else:
8578         raise errors.OpExecError("Could not shutdown instance %s on"
8579                                  " node %s: %s" %
8580                                  (instance.name, source_node, msg))
8581
8582     self.feedback_fn("* deactivating the instance's disks on source node")
8583     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8584       raise errors.OpExecError("Can't shut down the instance's disks")
8585
8586     instance.primary_node = target_node
8587     # distribute new instance config to the other nodes
8588     self.cfg.Update(instance, self.feedback_fn)
8589
8590     # Only start the instance if it's marked as up
8591     if instance.admin_state == constants.ADMINST_UP:
8592       self.feedback_fn("* activating the instance's disks on target node %s" %
8593                        target_node)
8594       logging.info("Starting instance %s on node %s",
8595                    instance.name, target_node)
8596
8597       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8598                                            ignore_secondaries=True)
8599       if not disks_ok:
8600         _ShutdownInstanceDisks(self.lu, instance)
8601         raise errors.OpExecError("Can't activate the instance's disks")
8602
8603       self.feedback_fn("* starting the instance on the target node %s" %
8604                        target_node)
8605       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8606                                             False)
8607       msg = result.fail_msg
8608       if msg:
8609         _ShutdownInstanceDisks(self.lu, instance)
8610         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8611                                  (instance.name, target_node, msg))
8612
8613   def Exec(self, feedback_fn):
8614     """Perform the migration.
8615
8616     """
8617     self.feedback_fn = feedback_fn
8618     self.source_node = self.instance.primary_node
8619
8620     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8621     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8622       self.target_node = self.instance.secondary_nodes[0]
8623       # Otherwise self.target_node has been populated either
8624       # directly, or through an iallocator.
8625
8626     self.all_nodes = [self.source_node, self.target_node]
8627     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8628                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8629
8630     if self.failover:
8631       feedback_fn("Failover instance %s" % self.instance.name)
8632       self._ExecFailover()
8633     else:
8634       feedback_fn("Migrating instance %s" % self.instance.name)
8635
8636       if self.cleanup:
8637         return self._ExecCleanup()
8638       else:
8639         return self._ExecMigration()
8640
8641
8642 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8643                     force_open):
8644   """Wrapper around L{_CreateBlockDevInner}.
8645
8646   This method annotates the root device first.
8647
8648   """
8649   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8650   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8651                               force_open)
8652
8653
8654 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8655                          info, force_open):
8656   """Create a tree of block devices on a given node.
8657
8658   If this device type has to be created on secondaries, create it and
8659   all its children.
8660
8661   If not, just recurse to children keeping the same 'force' value.
8662
8663   @attention: The device has to be annotated already.
8664
8665   @param lu: the lu on whose behalf we execute
8666   @param node: the node on which to create the device
8667   @type instance: L{objects.Instance}
8668   @param instance: the instance which owns the device
8669   @type device: L{objects.Disk}
8670   @param device: the device to create
8671   @type force_create: boolean
8672   @param force_create: whether to force creation of this device; this
8673       will be change to True whenever we find a device which has
8674       CreateOnSecondary() attribute
8675   @param info: the extra 'metadata' we should attach to the device
8676       (this will be represented as a LVM tag)
8677   @type force_open: boolean
8678   @param force_open: this parameter will be passes to the
8679       L{backend.BlockdevCreate} function where it specifies
8680       whether we run on primary or not, and it affects both
8681       the child assembly and the device own Open() execution
8682
8683   """
8684   if device.CreateOnSecondary():
8685     force_create = True
8686
8687   if device.children:
8688     for child in device.children:
8689       _CreateBlockDevInner(lu, node, instance, child, force_create,
8690                            info, force_open)
8691
8692   if not force_create:
8693     return
8694
8695   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8696
8697
8698 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8699   """Create a single block device on a given node.
8700
8701   This will not recurse over children of the device, so they must be
8702   created in advance.
8703
8704   @param lu: the lu on whose behalf we execute
8705   @param node: the node on which to create the device
8706   @type instance: L{objects.Instance}
8707   @param instance: the instance which owns the device
8708   @type device: L{objects.Disk}
8709   @param device: the device to create
8710   @param info: the extra 'metadata' we should attach to the device
8711       (this will be represented as a LVM tag)
8712   @type force_open: boolean
8713   @param force_open: this parameter will be passes to the
8714       L{backend.BlockdevCreate} function where it specifies
8715       whether we run on primary or not, and it affects both
8716       the child assembly and the device own Open() execution
8717
8718   """
8719   lu.cfg.SetDiskID(device, node)
8720   result = lu.rpc.call_blockdev_create(node, device, device.size,
8721                                        instance.name, force_open, info)
8722   result.Raise("Can't create block device %s on"
8723                " node %s for instance %s" % (device, node, instance.name))
8724   if device.physical_id is None:
8725     device.physical_id = result.payload
8726
8727
8728 def _GenerateUniqueNames(lu, exts):
8729   """Generate a suitable LV name.
8730
8731   This will generate a logical volume name for the given instance.
8732
8733   """
8734   results = []
8735   for val in exts:
8736     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8737     results.append("%s%s" % (new_id, val))
8738   return results
8739
8740
8741 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8742                          iv_name, p_minor, s_minor):
8743   """Generate a drbd8 device complete with its children.
8744
8745   """
8746   assert len(vgnames) == len(names) == 2
8747   port = lu.cfg.AllocatePort()
8748   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8749
8750   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8751                           logical_id=(vgnames[0], names[0]),
8752                           params={})
8753   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8754                           logical_id=(vgnames[1], names[1]),
8755                           params={})
8756   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8757                           logical_id=(primary, secondary, port,
8758                                       p_minor, s_minor,
8759                                       shared_secret),
8760                           children=[dev_data, dev_meta],
8761                           iv_name=iv_name, params={})
8762   return drbd_dev
8763
8764
8765 _DISK_TEMPLATE_NAME_PREFIX = {
8766   constants.DT_PLAIN: "",
8767   constants.DT_RBD: ".rbd",
8768   }
8769
8770
8771 _DISK_TEMPLATE_DEVICE_TYPE = {
8772   constants.DT_PLAIN: constants.LD_LV,
8773   constants.DT_FILE: constants.LD_FILE,
8774   constants.DT_SHARED_FILE: constants.LD_FILE,
8775   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8776   constants.DT_RBD: constants.LD_RBD,
8777   }
8778
8779
8780 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8781     secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8782     feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8783     _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8784   """Generate the entire disk layout for a given template type.
8785
8786   """
8787   #TODO: compute space requirements
8788
8789   vgname = lu.cfg.GetVGName()
8790   disk_count = len(disk_info)
8791   disks = []
8792
8793   if template_name == constants.DT_DISKLESS:
8794     pass
8795   elif template_name == constants.DT_DRBD8:
8796     if len(secondary_nodes) != 1:
8797       raise errors.ProgrammerError("Wrong template configuration")
8798     remote_node = secondary_nodes[0]
8799     minors = lu.cfg.AllocateDRBDMinor(
8800       [primary_node, remote_node] * len(disk_info), instance_name)
8801
8802     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8803                                                        full_disk_params)
8804     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8805
8806     names = []
8807     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8808                                                for i in range(disk_count)]):
8809       names.append(lv_prefix + "_data")
8810       names.append(lv_prefix + "_meta")
8811     for idx, disk in enumerate(disk_info):
8812       disk_index = idx + base_index
8813       data_vg = disk.get(constants.IDISK_VG, vgname)
8814       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8815       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8816                                       disk[constants.IDISK_SIZE],
8817                                       [data_vg, meta_vg],
8818                                       names[idx * 2:idx * 2 + 2],
8819                                       "disk/%d" % disk_index,
8820                                       minors[idx * 2], minors[idx * 2 + 1])
8821       disk_dev.mode = disk[constants.IDISK_MODE]
8822       disks.append(disk_dev)
8823   else:
8824     if secondary_nodes:
8825       raise errors.ProgrammerError("Wrong template configuration")
8826
8827     if template_name == constants.DT_FILE:
8828       _req_file_storage()
8829     elif template_name == constants.DT_SHARED_FILE:
8830       _req_shr_file_storage()
8831
8832     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8833     if name_prefix is None:
8834       names = None
8835     else:
8836       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8837                                         (name_prefix, base_index + i)
8838                                         for i in range(disk_count)])
8839
8840     if template_name == constants.DT_PLAIN:
8841       def logical_id_fn(idx, _, disk):
8842         vg = disk.get(constants.IDISK_VG, vgname)
8843         return (vg, names[idx])
8844     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8845       logical_id_fn = \
8846         lambda _, disk_index, disk: (file_driver,
8847                                      "%s/disk%d" % (file_storage_dir,
8848                                                     disk_index))
8849     elif template_name == constants.DT_BLOCK:
8850       logical_id_fn = \
8851         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8852                                        disk[constants.IDISK_ADOPT])
8853     elif template_name == constants.DT_RBD:
8854       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8855     else:
8856       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8857
8858     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8859
8860     for idx, disk in enumerate(disk_info):
8861       disk_index = idx + base_index
8862       size = disk[constants.IDISK_SIZE]
8863       feedback_fn("* disk %s, size %s" %
8864                   (disk_index, utils.FormatUnit(size, "h")))
8865       disks.append(objects.Disk(dev_type=dev_type, size=size,
8866                                 logical_id=logical_id_fn(idx, disk_index, disk),
8867                                 iv_name="disk/%d" % disk_index,
8868                                 mode=disk[constants.IDISK_MODE],
8869                                 params={}))
8870
8871   return disks
8872
8873
8874 def _GetInstanceInfoText(instance):
8875   """Compute that text that should be added to the disk's metadata.
8876
8877   """
8878   return "originstname+%s" % instance.name
8879
8880
8881 def _CalcEta(time_taken, written, total_size):
8882   """Calculates the ETA based on size written and total size.
8883
8884   @param time_taken: The time taken so far
8885   @param written: amount written so far
8886   @param total_size: The total size of data to be written
8887   @return: The remaining time in seconds
8888
8889   """
8890   avg_time = time_taken / float(written)
8891   return (total_size - written) * avg_time
8892
8893
8894 def _WipeDisks(lu, instance):
8895   """Wipes instance disks.
8896
8897   @type lu: L{LogicalUnit}
8898   @param lu: the logical unit on whose behalf we execute
8899   @type instance: L{objects.Instance}
8900   @param instance: the instance whose disks we should create
8901   @return: the success of the wipe
8902
8903   """
8904   node = instance.primary_node
8905
8906   for device in instance.disks:
8907     lu.cfg.SetDiskID(device, node)
8908
8909   logging.info("Pause sync of instance %s disks", instance.name)
8910   result = lu.rpc.call_blockdev_pause_resume_sync(node,
8911                                                   (instance.disks, instance),
8912                                                   True)
8913
8914   for idx, success in enumerate(result.payload):
8915     if not success:
8916       logging.warn("pause-sync of instance %s for disks %d failed",
8917                    instance.name, idx)
8918
8919   try:
8920     for idx, device in enumerate(instance.disks):
8921       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8922       # MAX_WIPE_CHUNK at max
8923       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8924                             constants.MIN_WIPE_CHUNK_PERCENT)
8925       # we _must_ make this an int, otherwise rounding errors will
8926       # occur
8927       wipe_chunk_size = int(wipe_chunk_size)
8928
8929       lu.LogInfo("* Wiping disk %d", idx)
8930       logging.info("Wiping disk %d for instance %s, node %s using"
8931                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8932
8933       offset = 0
8934       size = device.size
8935       last_output = 0
8936       start_time = time.time()
8937
8938       while offset < size:
8939         wipe_size = min(wipe_chunk_size, size - offset)
8940         logging.debug("Wiping disk %d, offset %s, chunk %s",
8941                       idx, offset, wipe_size)
8942         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8943                                            wipe_size)
8944         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8945                      (idx, offset, wipe_size))
8946         now = time.time()
8947         offset += wipe_size
8948         if now - last_output >= 60:
8949           eta = _CalcEta(now - start_time, offset, size)
8950           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8951                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8952           last_output = now
8953   finally:
8954     logging.info("Resume sync of instance %s disks", instance.name)
8955
8956     result = lu.rpc.call_blockdev_pause_resume_sync(node,
8957                                                     (instance.disks, instance),
8958                                                     False)
8959
8960     for idx, success in enumerate(result.payload):
8961       if not success:
8962         lu.LogWarning("Resume sync of disk %d failed, please have a"
8963                       " look at the status and troubleshoot the issue", idx)
8964         logging.warn("resume-sync of instance %s for disks %d failed",
8965                      instance.name, idx)
8966
8967
8968 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8969   """Create all disks for an instance.
8970
8971   This abstracts away some work from AddInstance.
8972
8973   @type lu: L{LogicalUnit}
8974   @param lu: the logical unit on whose behalf we execute
8975   @type instance: L{objects.Instance}
8976   @param instance: the instance whose disks we should create
8977   @type to_skip: list
8978   @param to_skip: list of indices to skip
8979   @type target_node: string
8980   @param target_node: if passed, overrides the target node for creation
8981   @rtype: boolean
8982   @return: the success of the creation
8983
8984   """
8985   info = _GetInstanceInfoText(instance)
8986   if target_node is None:
8987     pnode = instance.primary_node
8988     all_nodes = instance.all_nodes
8989   else:
8990     pnode = target_node
8991     all_nodes = [pnode]
8992
8993   if instance.disk_template in constants.DTS_FILEBASED:
8994     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8995     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8996
8997     result.Raise("Failed to create directory '%s' on"
8998                  " node %s" % (file_storage_dir, pnode))
8999
9000   # Note: this needs to be kept in sync with adding of disks in
9001   # LUInstanceSetParams
9002   for idx, device in enumerate(instance.disks):
9003     if to_skip and idx in to_skip:
9004       continue
9005     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9006     #HARDCODE
9007     for node in all_nodes:
9008       f_create = node == pnode
9009       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9010
9011
9012 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9013   """Remove all disks for an instance.
9014
9015   This abstracts away some work from `AddInstance()` and
9016   `RemoveInstance()`. Note that in case some of the devices couldn't
9017   be removed, the removal will continue with the other ones (compare
9018   with `_CreateDisks()`).
9019
9020   @type lu: L{LogicalUnit}
9021   @param lu: the logical unit on whose behalf we execute
9022   @type instance: L{objects.Instance}
9023   @param instance: the instance whose disks we should remove
9024   @type target_node: string
9025   @param target_node: used to override the node on which to remove the disks
9026   @rtype: boolean
9027   @return: the success of the removal
9028
9029   """
9030   logging.info("Removing block devices for instance %s", instance.name)
9031
9032   all_result = True
9033   ports_to_release = set()
9034   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9035   for (idx, device) in enumerate(anno_disks):
9036     if target_node:
9037       edata = [(target_node, device)]
9038     else:
9039       edata = device.ComputeNodeTree(instance.primary_node)
9040     for node, disk in edata:
9041       lu.cfg.SetDiskID(disk, node)
9042       result = lu.rpc.call_blockdev_remove(node, disk)
9043       if result.fail_msg:
9044         lu.LogWarning("Could not remove disk %s on node %s,"
9045                       " continuing anyway: %s", idx, node, result.fail_msg)
9046         if not (result.offline and node != instance.primary_node):
9047           all_result = False
9048
9049     # if this is a DRBD disk, return its port to the pool
9050     if device.dev_type in constants.LDS_DRBD:
9051       ports_to_release.add(device.logical_id[2])
9052
9053   if all_result or ignore_failures:
9054     for port in ports_to_release:
9055       lu.cfg.AddTcpUdpPort(port)
9056
9057   if instance.disk_template == constants.DT_FILE:
9058     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9059     if target_node:
9060       tgt = target_node
9061     else:
9062       tgt = instance.primary_node
9063     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9064     if result.fail_msg:
9065       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9066                     file_storage_dir, instance.primary_node, result.fail_msg)
9067       all_result = False
9068
9069   return all_result
9070
9071
9072 def _ComputeDiskSizePerVG(disk_template, disks):
9073   """Compute disk size requirements in the volume group
9074
9075   """
9076   def _compute(disks, payload):
9077     """Universal algorithm.
9078
9079     """
9080     vgs = {}
9081     for disk in disks:
9082       vgs[disk[constants.IDISK_VG]] = \
9083         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9084
9085     return vgs
9086
9087   # Required free disk space as a function of disk and swap space
9088   req_size_dict = {
9089     constants.DT_DISKLESS: {},
9090     constants.DT_PLAIN: _compute(disks, 0),
9091     # 128 MB are added for drbd metadata for each disk
9092     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9093     constants.DT_FILE: {},
9094     constants.DT_SHARED_FILE: {},
9095   }
9096
9097   if disk_template not in req_size_dict:
9098     raise errors.ProgrammerError("Disk template '%s' size requirement"
9099                                  " is unknown" % disk_template)
9100
9101   return req_size_dict[disk_template]
9102
9103
9104 def _ComputeDiskSize(disk_template, disks):
9105   """Compute disk size requirements in the volume group
9106
9107   """
9108   # Required free disk space as a function of disk and swap space
9109   req_size_dict = {
9110     constants.DT_DISKLESS: None,
9111     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9112     # 128 MB are added for drbd metadata for each disk
9113     constants.DT_DRBD8:
9114       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9115     constants.DT_FILE: None,
9116     constants.DT_SHARED_FILE: 0,
9117     constants.DT_BLOCK: 0,
9118     constants.DT_RBD: 0,
9119   }
9120
9121   if disk_template not in req_size_dict:
9122     raise errors.ProgrammerError("Disk template '%s' size requirement"
9123                                  " is unknown" % disk_template)
9124
9125   return req_size_dict[disk_template]
9126
9127
9128 def _FilterVmNodes(lu, nodenames):
9129   """Filters out non-vm_capable nodes from a list.
9130
9131   @type lu: L{LogicalUnit}
9132   @param lu: the logical unit for which we check
9133   @type nodenames: list
9134   @param nodenames: the list of nodes on which we should check
9135   @rtype: list
9136   @return: the list of vm-capable nodes
9137
9138   """
9139   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9140   return [name for name in nodenames if name not in vm_nodes]
9141
9142
9143 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9144   """Hypervisor parameter validation.
9145
9146   This function abstract the hypervisor parameter validation to be
9147   used in both instance create and instance modify.
9148
9149   @type lu: L{LogicalUnit}
9150   @param lu: the logical unit for which we check
9151   @type nodenames: list
9152   @param nodenames: the list of nodes on which we should check
9153   @type hvname: string
9154   @param hvname: the name of the hypervisor we should use
9155   @type hvparams: dict
9156   @param hvparams: the parameters which we need to check
9157   @raise errors.OpPrereqError: if the parameters are not valid
9158
9159   """
9160   nodenames = _FilterVmNodes(lu, nodenames)
9161
9162   cluster = lu.cfg.GetClusterInfo()
9163   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9164
9165   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9166   for node in nodenames:
9167     info = hvinfo[node]
9168     if info.offline:
9169       continue
9170     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9171
9172
9173 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9174   """OS parameters validation.
9175
9176   @type lu: L{LogicalUnit}
9177   @param lu: the logical unit for which we check
9178   @type required: boolean
9179   @param required: whether the validation should fail if the OS is not
9180       found
9181   @type nodenames: list
9182   @param nodenames: the list of nodes on which we should check
9183   @type osname: string
9184   @param osname: the name of the hypervisor we should use
9185   @type osparams: dict
9186   @param osparams: the parameters which we need to check
9187   @raise errors.OpPrereqError: if the parameters are not valid
9188
9189   """
9190   nodenames = _FilterVmNodes(lu, nodenames)
9191   result = lu.rpc.call_os_validate(nodenames, required, osname,
9192                                    [constants.OS_VALIDATE_PARAMETERS],
9193                                    osparams)
9194   for node, nres in result.items():
9195     # we don't check for offline cases since this should be run only
9196     # against the master node and/or an instance's nodes
9197     nres.Raise("OS Parameters validation failed on node %s" % node)
9198     if not nres.payload:
9199       lu.LogInfo("OS %s not found on node %s, validation skipped",
9200                  osname, node)
9201
9202
9203 class LUInstanceCreate(LogicalUnit):
9204   """Create an instance.
9205
9206   """
9207   HPATH = "instance-add"
9208   HTYPE = constants.HTYPE_INSTANCE
9209   REQ_BGL = False
9210
9211   def CheckArguments(self):
9212     """Check arguments.
9213
9214     """
9215     # do not require name_check to ease forward/backward compatibility
9216     # for tools
9217     if self.op.no_install and self.op.start:
9218       self.LogInfo("No-installation mode selected, disabling startup")
9219       self.op.start = False
9220     # validate/normalize the instance name
9221     self.op.instance_name = \
9222       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9223
9224     if self.op.ip_check and not self.op.name_check:
9225       # TODO: make the ip check more flexible and not depend on the name check
9226       raise errors.OpPrereqError("Cannot do IP address check without a name"
9227                                  " check", errors.ECODE_INVAL)
9228
9229     # check nics' parameter names
9230     for nic in self.op.nics:
9231       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9232
9233     # check disks. parameter names and consistent adopt/no-adopt strategy
9234     has_adopt = has_no_adopt = False
9235     for disk in self.op.disks:
9236       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9237       if constants.IDISK_ADOPT in disk:
9238         has_adopt = True
9239       else:
9240         has_no_adopt = True
9241     if has_adopt and has_no_adopt:
9242       raise errors.OpPrereqError("Either all disks are adopted or none is",
9243                                  errors.ECODE_INVAL)
9244     if has_adopt:
9245       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9246         raise errors.OpPrereqError("Disk adoption is not supported for the"
9247                                    " '%s' disk template" %
9248                                    self.op.disk_template,
9249                                    errors.ECODE_INVAL)
9250       if self.op.iallocator is not None:
9251         raise errors.OpPrereqError("Disk adoption not allowed with an"
9252                                    " iallocator script", errors.ECODE_INVAL)
9253       if self.op.mode == constants.INSTANCE_IMPORT:
9254         raise errors.OpPrereqError("Disk adoption not allowed for"
9255                                    " instance import", errors.ECODE_INVAL)
9256     else:
9257       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9258         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9259                                    " but no 'adopt' parameter given" %
9260                                    self.op.disk_template,
9261                                    errors.ECODE_INVAL)
9262
9263     self.adopt_disks = has_adopt
9264
9265     # instance name verification
9266     if self.op.name_check:
9267       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9268       self.op.instance_name = self.hostname1.name
9269       # used in CheckPrereq for ip ping check
9270       self.check_ip = self.hostname1.ip
9271     else:
9272       self.check_ip = None
9273
9274     # file storage checks
9275     if (self.op.file_driver and
9276         not self.op.file_driver in constants.FILE_DRIVER):
9277       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9278                                  self.op.file_driver, errors.ECODE_INVAL)
9279
9280     if self.op.disk_template == constants.DT_FILE:
9281       opcodes.RequireFileStorage()
9282     elif self.op.disk_template == constants.DT_SHARED_FILE:
9283       opcodes.RequireSharedFileStorage()
9284
9285     ### Node/iallocator related checks
9286     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9287
9288     if self.op.pnode is not None:
9289       if self.op.disk_template in constants.DTS_INT_MIRROR:
9290         if self.op.snode is None:
9291           raise errors.OpPrereqError("The networked disk templates need"
9292                                      " a mirror node", errors.ECODE_INVAL)
9293       elif self.op.snode:
9294         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9295                         " template")
9296         self.op.snode = None
9297
9298     self._cds = _GetClusterDomainSecret()
9299
9300     if self.op.mode == constants.INSTANCE_IMPORT:
9301       # On import force_variant must be True, because if we forced it at
9302       # initial install, our only chance when importing it back is that it
9303       # works again!
9304       self.op.force_variant = True
9305
9306       if self.op.no_install:
9307         self.LogInfo("No-installation mode has no effect during import")
9308
9309     elif self.op.mode == constants.INSTANCE_CREATE:
9310       if self.op.os_type is None:
9311         raise errors.OpPrereqError("No guest OS specified",
9312                                    errors.ECODE_INVAL)
9313       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9314         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9315                                    " installation" % self.op.os_type,
9316                                    errors.ECODE_STATE)
9317       if self.op.disk_template is None:
9318         raise errors.OpPrereqError("No disk template specified",
9319                                    errors.ECODE_INVAL)
9320
9321     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9322       # Check handshake to ensure both clusters have the same domain secret
9323       src_handshake = self.op.source_handshake
9324       if not src_handshake:
9325         raise errors.OpPrereqError("Missing source handshake",
9326                                    errors.ECODE_INVAL)
9327
9328       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9329                                                            src_handshake)
9330       if errmsg:
9331         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9332                                    errors.ECODE_INVAL)
9333
9334       # Load and check source CA
9335       self.source_x509_ca_pem = self.op.source_x509_ca
9336       if not self.source_x509_ca_pem:
9337         raise errors.OpPrereqError("Missing source X509 CA",
9338                                    errors.ECODE_INVAL)
9339
9340       try:
9341         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9342                                                     self._cds)
9343       except OpenSSL.crypto.Error, err:
9344         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9345                                    (err, ), errors.ECODE_INVAL)
9346
9347       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9348       if errcode is not None:
9349         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9350                                    errors.ECODE_INVAL)
9351
9352       self.source_x509_ca = cert
9353
9354       src_instance_name = self.op.source_instance_name
9355       if not src_instance_name:
9356         raise errors.OpPrereqError("Missing source instance name",
9357                                    errors.ECODE_INVAL)
9358
9359       self.source_instance_name = \
9360           netutils.GetHostname(name=src_instance_name).name
9361
9362     else:
9363       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9364                                  self.op.mode, errors.ECODE_INVAL)
9365
9366   def ExpandNames(self):
9367     """ExpandNames for CreateInstance.
9368
9369     Figure out the right locks for instance creation.
9370
9371     """
9372     self.needed_locks = {}
9373
9374     instance_name = self.op.instance_name
9375     # this is just a preventive check, but someone might still add this
9376     # instance in the meantime, and creation will fail at lock-add time
9377     if instance_name in self.cfg.GetInstanceList():
9378       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9379                                  instance_name, errors.ECODE_EXISTS)
9380
9381     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9382
9383     if self.op.iallocator:
9384       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9385       # specifying a group on instance creation and then selecting nodes from
9386       # that group
9387       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9388       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9389     else:
9390       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9391       nodelist = [self.op.pnode]
9392       if self.op.snode is not None:
9393         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9394         nodelist.append(self.op.snode)
9395       self.needed_locks[locking.LEVEL_NODE] = nodelist
9396       # Lock resources of instance's primary and secondary nodes (copy to
9397       # prevent accidential modification)
9398       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9399
9400     # in case of import lock the source node too
9401     if self.op.mode == constants.INSTANCE_IMPORT:
9402       src_node = self.op.src_node
9403       src_path = self.op.src_path
9404
9405       if src_path is None:
9406         self.op.src_path = src_path = self.op.instance_name
9407
9408       if src_node is None:
9409         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9410         self.op.src_node = None
9411         if os.path.isabs(src_path):
9412           raise errors.OpPrereqError("Importing an instance from a path"
9413                                      " requires a source node option",
9414                                      errors.ECODE_INVAL)
9415       else:
9416         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9417         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9418           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9419         if not os.path.isabs(src_path):
9420           self.op.src_path = src_path = \
9421             utils.PathJoin(constants.EXPORT_DIR, src_path)
9422
9423   def _RunAllocator(self):
9424     """Run the allocator based on input opcode.
9425
9426     """
9427     nics = [n.ToDict() for n in self.nics]
9428     ial = IAllocator(self.cfg, self.rpc,
9429                      mode=constants.IALLOCATOR_MODE_ALLOC,
9430                      name=self.op.instance_name,
9431                      disk_template=self.op.disk_template,
9432                      tags=self.op.tags,
9433                      os=self.op.os_type,
9434                      vcpus=self.be_full[constants.BE_VCPUS],
9435                      memory=self.be_full[constants.BE_MAXMEM],
9436                      spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9437                      disks=self.disks,
9438                      nics=nics,
9439                      hypervisor=self.op.hypervisor,
9440                      )
9441
9442     ial.Run(self.op.iallocator)
9443
9444     if not ial.success:
9445       raise errors.OpPrereqError("Can't compute nodes using"
9446                                  " iallocator '%s': %s" %
9447                                  (self.op.iallocator, ial.info),
9448                                  errors.ECODE_NORES)
9449     if len(ial.result) != ial.required_nodes:
9450       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9451                                  " of nodes (%s), required %s" %
9452                                  (self.op.iallocator, len(ial.result),
9453                                   ial.required_nodes), errors.ECODE_FAULT)
9454     self.op.pnode = ial.result[0]
9455     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9456                  self.op.instance_name, self.op.iallocator,
9457                  utils.CommaJoin(ial.result))
9458     if ial.required_nodes == 2:
9459       self.op.snode = ial.result[1]
9460
9461   def BuildHooksEnv(self):
9462     """Build hooks env.
9463
9464     This runs on master, primary and secondary nodes of the instance.
9465
9466     """
9467     env = {
9468       "ADD_MODE": self.op.mode,
9469       }
9470     if self.op.mode == constants.INSTANCE_IMPORT:
9471       env["SRC_NODE"] = self.op.src_node
9472       env["SRC_PATH"] = self.op.src_path
9473       env["SRC_IMAGES"] = self.src_images
9474
9475     env.update(_BuildInstanceHookEnv(
9476       name=self.op.instance_name,
9477       primary_node=self.op.pnode,
9478       secondary_nodes=self.secondaries,
9479       status=self.op.start,
9480       os_type=self.op.os_type,
9481       minmem=self.be_full[constants.BE_MINMEM],
9482       maxmem=self.be_full[constants.BE_MAXMEM],
9483       vcpus=self.be_full[constants.BE_VCPUS],
9484       nics=_NICListToTuple(self, self.nics),
9485       disk_template=self.op.disk_template,
9486       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9487              for d in self.disks],
9488       bep=self.be_full,
9489       hvp=self.hv_full,
9490       hypervisor_name=self.op.hypervisor,
9491       tags=self.op.tags,
9492     ))
9493
9494     return env
9495
9496   def BuildHooksNodes(self):
9497     """Build hooks nodes.
9498
9499     """
9500     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9501     return nl, nl
9502
9503   def _ReadExportInfo(self):
9504     """Reads the export information from disk.
9505
9506     It will override the opcode source node and path with the actual
9507     information, if these two were not specified before.
9508
9509     @return: the export information
9510
9511     """
9512     assert self.op.mode == constants.INSTANCE_IMPORT
9513
9514     src_node = self.op.src_node
9515     src_path = self.op.src_path
9516
9517     if src_node is None:
9518       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9519       exp_list = self.rpc.call_export_list(locked_nodes)
9520       found = False
9521       for node in exp_list:
9522         if exp_list[node].fail_msg:
9523           continue
9524         if src_path in exp_list[node].payload:
9525           found = True
9526           self.op.src_node = src_node = node
9527           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9528                                                        src_path)
9529           break
9530       if not found:
9531         raise errors.OpPrereqError("No export found for relative path %s" %
9532                                     src_path, errors.ECODE_INVAL)
9533
9534     _CheckNodeOnline(self, src_node)
9535     result = self.rpc.call_export_info(src_node, src_path)
9536     result.Raise("No export or invalid export found in dir %s" % src_path)
9537
9538     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9539     if not export_info.has_section(constants.INISECT_EXP):
9540       raise errors.ProgrammerError("Corrupted export config",
9541                                    errors.ECODE_ENVIRON)
9542
9543     ei_version = export_info.get(constants.INISECT_EXP, "version")
9544     if (int(ei_version) != constants.EXPORT_VERSION):
9545       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9546                                  (ei_version, constants.EXPORT_VERSION),
9547                                  errors.ECODE_ENVIRON)
9548     return export_info
9549
9550   def _ReadExportParams(self, einfo):
9551     """Use export parameters as defaults.
9552
9553     In case the opcode doesn't specify (as in override) some instance
9554     parameters, then try to use them from the export information, if
9555     that declares them.
9556
9557     """
9558     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9559
9560     if self.op.disk_template is None:
9561       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9562         self.op.disk_template = einfo.get(constants.INISECT_INS,
9563                                           "disk_template")
9564         if self.op.disk_template not in constants.DISK_TEMPLATES:
9565           raise errors.OpPrereqError("Disk template specified in configuration"
9566                                      " file is not one of the allowed values:"
9567                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9568       else:
9569         raise errors.OpPrereqError("No disk template specified and the export"
9570                                    " is missing the disk_template information",
9571                                    errors.ECODE_INVAL)
9572
9573     if not self.op.disks:
9574       disks = []
9575       # TODO: import the disk iv_name too
9576       for idx in range(constants.MAX_DISKS):
9577         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9578           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9579           disks.append({constants.IDISK_SIZE: disk_sz})
9580       self.op.disks = disks
9581       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9582         raise errors.OpPrereqError("No disk info specified and the export"
9583                                    " is missing the disk information",
9584                                    errors.ECODE_INVAL)
9585
9586     if not self.op.nics:
9587       nics = []
9588       for idx in range(constants.MAX_NICS):
9589         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9590           ndict = {}
9591           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9592             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9593             ndict[name] = v
9594           nics.append(ndict)
9595         else:
9596           break
9597       self.op.nics = nics
9598
9599     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9600       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9601
9602     if (self.op.hypervisor is None and
9603         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9604       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9605
9606     if einfo.has_section(constants.INISECT_HYP):
9607       # use the export parameters but do not override the ones
9608       # specified by the user
9609       for name, value in einfo.items(constants.INISECT_HYP):
9610         if name not in self.op.hvparams:
9611           self.op.hvparams[name] = value
9612
9613     if einfo.has_section(constants.INISECT_BEP):
9614       # use the parameters, without overriding
9615       for name, value in einfo.items(constants.INISECT_BEP):
9616         if name not in self.op.beparams:
9617           self.op.beparams[name] = value
9618         # Compatibility for the old "memory" be param
9619         if name == constants.BE_MEMORY:
9620           if constants.BE_MAXMEM not in self.op.beparams:
9621             self.op.beparams[constants.BE_MAXMEM] = value
9622           if constants.BE_MINMEM not in self.op.beparams:
9623             self.op.beparams[constants.BE_MINMEM] = value
9624     else:
9625       # try to read the parameters old style, from the main section
9626       for name in constants.BES_PARAMETERS:
9627         if (name not in self.op.beparams and
9628             einfo.has_option(constants.INISECT_INS, name)):
9629           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9630
9631     if einfo.has_section(constants.INISECT_OSP):
9632       # use the parameters, without overriding
9633       for name, value in einfo.items(constants.INISECT_OSP):
9634         if name not in self.op.osparams:
9635           self.op.osparams[name] = value
9636
9637   def _RevertToDefaults(self, cluster):
9638     """Revert the instance parameters to the default values.
9639
9640     """
9641     # hvparams
9642     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9643     for name in self.op.hvparams.keys():
9644       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9645         del self.op.hvparams[name]
9646     # beparams
9647     be_defs = cluster.SimpleFillBE({})
9648     for name in self.op.beparams.keys():
9649       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9650         del self.op.beparams[name]
9651     # nic params
9652     nic_defs = cluster.SimpleFillNIC({})
9653     for nic in self.op.nics:
9654       for name in constants.NICS_PARAMETERS:
9655         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9656           del nic[name]
9657     # osparams
9658     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9659     for name in self.op.osparams.keys():
9660       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9661         del self.op.osparams[name]
9662
9663   def _CalculateFileStorageDir(self):
9664     """Calculate final instance file storage dir.
9665
9666     """
9667     # file storage dir calculation/check
9668     self.instance_file_storage_dir = None
9669     if self.op.disk_template in constants.DTS_FILEBASED:
9670       # build the full file storage dir path
9671       joinargs = []
9672
9673       if self.op.disk_template == constants.DT_SHARED_FILE:
9674         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9675       else:
9676         get_fsd_fn = self.cfg.GetFileStorageDir
9677
9678       cfg_storagedir = get_fsd_fn()
9679       if not cfg_storagedir:
9680         raise errors.OpPrereqError("Cluster file storage dir not defined")
9681       joinargs.append(cfg_storagedir)
9682
9683       if self.op.file_storage_dir is not None:
9684         joinargs.append(self.op.file_storage_dir)
9685
9686       joinargs.append(self.op.instance_name)
9687
9688       # pylint: disable=W0142
9689       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9690
9691   def CheckPrereq(self): # pylint: disable=R0914
9692     """Check prerequisites.
9693
9694     """
9695     self._CalculateFileStorageDir()
9696
9697     if self.op.mode == constants.INSTANCE_IMPORT:
9698       export_info = self._ReadExportInfo()
9699       self._ReadExportParams(export_info)
9700       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9701     else:
9702       self._old_instance_name = None
9703
9704     if (not self.cfg.GetVGName() and
9705         self.op.disk_template not in constants.DTS_NOT_LVM):
9706       raise errors.OpPrereqError("Cluster does not support lvm-based"
9707                                  " instances", errors.ECODE_STATE)
9708
9709     if (self.op.hypervisor is None or
9710         self.op.hypervisor == constants.VALUE_AUTO):
9711       self.op.hypervisor = self.cfg.GetHypervisorType()
9712
9713     cluster = self.cfg.GetClusterInfo()
9714     enabled_hvs = cluster.enabled_hypervisors
9715     if self.op.hypervisor not in enabled_hvs:
9716       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9717                                  " cluster (%s)" % (self.op.hypervisor,
9718                                   ",".join(enabled_hvs)),
9719                                  errors.ECODE_STATE)
9720
9721     # Check tag validity
9722     for tag in self.op.tags:
9723       objects.TaggableObject.ValidateTag(tag)
9724
9725     # check hypervisor parameter syntax (locally)
9726     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9727     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9728                                       self.op.hvparams)
9729     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9730     hv_type.CheckParameterSyntax(filled_hvp)
9731     self.hv_full = filled_hvp
9732     # check that we don't specify global parameters on an instance
9733     _CheckGlobalHvParams(self.op.hvparams)
9734
9735     # fill and remember the beparams dict
9736     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9737     for param, value in self.op.beparams.iteritems():
9738       if value == constants.VALUE_AUTO:
9739         self.op.beparams[param] = default_beparams[param]
9740     objects.UpgradeBeParams(self.op.beparams)
9741     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9742     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9743
9744     # build os parameters
9745     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9746
9747     # now that hvp/bep are in final format, let's reset to defaults,
9748     # if told to do so
9749     if self.op.identify_defaults:
9750       self._RevertToDefaults(cluster)
9751
9752     # NIC buildup
9753     self.nics = []
9754     for idx, nic in enumerate(self.op.nics):
9755       nic_mode_req = nic.get(constants.INIC_MODE, None)
9756       nic_mode = nic_mode_req
9757       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9758         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9759
9760       # in routed mode, for the first nic, the default ip is 'auto'
9761       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9762         default_ip_mode = constants.VALUE_AUTO
9763       else:
9764         default_ip_mode = constants.VALUE_NONE
9765
9766       # ip validity checks
9767       ip = nic.get(constants.INIC_IP, default_ip_mode)
9768       if ip is None or ip.lower() == constants.VALUE_NONE:
9769         nic_ip = None
9770       elif ip.lower() == constants.VALUE_AUTO:
9771         if not self.op.name_check:
9772           raise errors.OpPrereqError("IP address set to auto but name checks"
9773                                      " have been skipped",
9774                                      errors.ECODE_INVAL)
9775         nic_ip = self.hostname1.ip
9776       else:
9777         if not netutils.IPAddress.IsValid(ip):
9778           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9779                                      errors.ECODE_INVAL)
9780         nic_ip = ip
9781
9782       # TODO: check the ip address for uniqueness
9783       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9784         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9785                                    errors.ECODE_INVAL)
9786
9787       # MAC address verification
9788       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9789       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9790         mac = utils.NormalizeAndValidateMac(mac)
9791
9792         try:
9793           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9794         except errors.ReservationError:
9795           raise errors.OpPrereqError("MAC address %s already in use"
9796                                      " in cluster" % mac,
9797                                      errors.ECODE_NOTUNIQUE)
9798
9799       #  Build nic parameters
9800       link = nic.get(constants.INIC_LINK, None)
9801       if link == constants.VALUE_AUTO:
9802         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9803       nicparams = {}
9804       if nic_mode_req:
9805         nicparams[constants.NIC_MODE] = nic_mode
9806       if link:
9807         nicparams[constants.NIC_LINK] = link
9808
9809       check_params = cluster.SimpleFillNIC(nicparams)
9810       objects.NIC.CheckParameterSyntax(check_params)
9811       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9812
9813     # disk checks/pre-build
9814     default_vg = self.cfg.GetVGName()
9815     self.disks = []
9816     for disk in self.op.disks:
9817       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9818       if mode not in constants.DISK_ACCESS_SET:
9819         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9820                                    mode, errors.ECODE_INVAL)
9821       size = disk.get(constants.IDISK_SIZE, None)
9822       if size is None:
9823         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9824       try:
9825         size = int(size)
9826       except (TypeError, ValueError):
9827         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9828                                    errors.ECODE_INVAL)
9829
9830       data_vg = disk.get(constants.IDISK_VG, default_vg)
9831       new_disk = {
9832         constants.IDISK_SIZE: size,
9833         constants.IDISK_MODE: mode,
9834         constants.IDISK_VG: data_vg,
9835         }
9836       if constants.IDISK_METAVG in disk:
9837         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9838       if constants.IDISK_ADOPT in disk:
9839         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9840       self.disks.append(new_disk)
9841
9842     if self.op.mode == constants.INSTANCE_IMPORT:
9843       disk_images = []
9844       for idx in range(len(self.disks)):
9845         option = "disk%d_dump" % idx
9846         if export_info.has_option(constants.INISECT_INS, option):
9847           # FIXME: are the old os-es, disk sizes, etc. useful?
9848           export_name = export_info.get(constants.INISECT_INS, option)
9849           image = utils.PathJoin(self.op.src_path, export_name)
9850           disk_images.append(image)
9851         else:
9852           disk_images.append(False)
9853
9854       self.src_images = disk_images
9855
9856       if self.op.instance_name == self._old_instance_name:
9857         for idx, nic in enumerate(self.nics):
9858           if nic.mac == constants.VALUE_AUTO:
9859             nic_mac_ini = "nic%d_mac" % idx
9860             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9861
9862     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9863
9864     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9865     if self.op.ip_check:
9866       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9867         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9868                                    (self.check_ip, self.op.instance_name),
9869                                    errors.ECODE_NOTUNIQUE)
9870
9871     #### mac address generation
9872     # By generating here the mac address both the allocator and the hooks get
9873     # the real final mac address rather than the 'auto' or 'generate' value.
9874     # There is a race condition between the generation and the instance object
9875     # creation, which means that we know the mac is valid now, but we're not
9876     # sure it will be when we actually add the instance. If things go bad
9877     # adding the instance will abort because of a duplicate mac, and the
9878     # creation job will fail.
9879     for nic in self.nics:
9880       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9881         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9882
9883     #### allocator run
9884
9885     if self.op.iallocator is not None:
9886       self._RunAllocator()
9887
9888     # Release all unneeded node locks
9889     _ReleaseLocks(self, locking.LEVEL_NODE,
9890                   keep=filter(None, [self.op.pnode, self.op.snode,
9891                                      self.op.src_node]))
9892     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9893                   keep=filter(None, [self.op.pnode, self.op.snode,
9894                                      self.op.src_node]))
9895
9896     #### node related checks
9897
9898     # check primary node
9899     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9900     assert self.pnode is not None, \
9901       "Cannot retrieve locked node %s" % self.op.pnode
9902     if pnode.offline:
9903       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9904                                  pnode.name, errors.ECODE_STATE)
9905     if pnode.drained:
9906       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9907                                  pnode.name, errors.ECODE_STATE)
9908     if not pnode.vm_capable:
9909       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9910                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9911
9912     self.secondaries = []
9913
9914     # mirror node verification
9915     if self.op.disk_template in constants.DTS_INT_MIRROR:
9916       if self.op.snode == pnode.name:
9917         raise errors.OpPrereqError("The secondary node cannot be the"
9918                                    " primary node", errors.ECODE_INVAL)
9919       _CheckNodeOnline(self, self.op.snode)
9920       _CheckNodeNotDrained(self, self.op.snode)
9921       _CheckNodeVmCapable(self, self.op.snode)
9922       self.secondaries.append(self.op.snode)
9923
9924       snode = self.cfg.GetNodeInfo(self.op.snode)
9925       if pnode.group != snode.group:
9926         self.LogWarning("The primary and secondary nodes are in two"
9927                         " different node groups; the disk parameters"
9928                         " from the first disk's node group will be"
9929                         " used")
9930
9931     nodenames = [pnode.name] + self.secondaries
9932
9933     # Verify instance specs
9934     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9935     ispec = {
9936       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9937       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9938       constants.ISPEC_DISK_COUNT: len(self.disks),
9939       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9940       constants.ISPEC_NIC_COUNT: len(self.nics),
9941       constants.ISPEC_SPINDLE_USE: spindle_use,
9942       }
9943
9944     group_info = self.cfg.GetNodeGroup(pnode.group)
9945     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9946     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9947     if not self.op.ignore_ipolicy and res:
9948       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9949                                   " policy: %s") % (pnode.group,
9950                                                     utils.CommaJoin(res)),
9951                                   errors.ECODE_INVAL)
9952
9953     if not self.adopt_disks:
9954       if self.op.disk_template == constants.DT_RBD:
9955         # _CheckRADOSFreeSpace() is just a placeholder.
9956         # Any function that checks prerequisites can be placed here.
9957         # Check if there is enough space on the RADOS cluster.
9958         _CheckRADOSFreeSpace()
9959       else:
9960         # Check lv size requirements, if not adopting
9961         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9962         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9963
9964     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9965       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9966                                 disk[constants.IDISK_ADOPT])
9967                      for disk in self.disks])
9968       if len(all_lvs) != len(self.disks):
9969         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9970                                    errors.ECODE_INVAL)
9971       for lv_name in all_lvs:
9972         try:
9973           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9974           # to ReserveLV uses the same syntax
9975           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9976         except errors.ReservationError:
9977           raise errors.OpPrereqError("LV named %s used by another instance" %
9978                                      lv_name, errors.ECODE_NOTUNIQUE)
9979
9980       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9981       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9982
9983       node_lvs = self.rpc.call_lv_list([pnode.name],
9984                                        vg_names.payload.keys())[pnode.name]
9985       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9986       node_lvs = node_lvs.payload
9987
9988       delta = all_lvs.difference(node_lvs.keys())
9989       if delta:
9990         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9991                                    utils.CommaJoin(delta),
9992                                    errors.ECODE_INVAL)
9993       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9994       if online_lvs:
9995         raise errors.OpPrereqError("Online logical volumes found, cannot"
9996                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9997                                    errors.ECODE_STATE)
9998       # update the size of disk based on what is found
9999       for dsk in self.disks:
10000         dsk[constants.IDISK_SIZE] = \
10001           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10002                                         dsk[constants.IDISK_ADOPT])][0]))
10003
10004     elif self.op.disk_template == constants.DT_BLOCK:
10005       # Normalize and de-duplicate device paths
10006       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10007                        for disk in self.disks])
10008       if len(all_disks) != len(self.disks):
10009         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10010                                    errors.ECODE_INVAL)
10011       baddisks = [d for d in all_disks
10012                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10013       if baddisks:
10014         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10015                                    " cannot be adopted" %
10016                                    (", ".join(baddisks),
10017                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10018                                    errors.ECODE_INVAL)
10019
10020       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10021                                             list(all_disks))[pnode.name]
10022       node_disks.Raise("Cannot get block device information from node %s" %
10023                        pnode.name)
10024       node_disks = node_disks.payload
10025       delta = all_disks.difference(node_disks.keys())
10026       if delta:
10027         raise errors.OpPrereqError("Missing block device(s): %s" %
10028                                    utils.CommaJoin(delta),
10029                                    errors.ECODE_INVAL)
10030       for dsk in self.disks:
10031         dsk[constants.IDISK_SIZE] = \
10032           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10033
10034     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10035
10036     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10037     # check OS parameters (remotely)
10038     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10039
10040     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10041
10042     # memory check on primary node
10043     #TODO(dynmem): use MINMEM for checking
10044     if self.op.start:
10045       _CheckNodeFreeMemory(self, self.pnode.name,
10046                            "creating instance %s" % self.op.instance_name,
10047                            self.be_full[constants.BE_MAXMEM],
10048                            self.op.hypervisor)
10049
10050     self.dry_run_result = list(nodenames)
10051
10052   def Exec(self, feedback_fn):
10053     """Create and add the instance to the cluster.
10054
10055     """
10056     instance = self.op.instance_name
10057     pnode_name = self.pnode.name
10058
10059     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10060                 self.owned_locks(locking.LEVEL_NODE)), \
10061       "Node locks differ from node resource locks"
10062
10063     ht_kind = self.op.hypervisor
10064     if ht_kind in constants.HTS_REQ_PORT:
10065       network_port = self.cfg.AllocatePort()
10066     else:
10067       network_port = None
10068
10069     # This is ugly but we got a chicken-egg problem here
10070     # We can only take the group disk parameters, as the instance
10071     # has no disks yet (we are generating them right here).
10072     node = self.cfg.GetNodeInfo(pnode_name)
10073     nodegroup = self.cfg.GetNodeGroup(node.group)
10074     disks = _GenerateDiskTemplate(self,
10075                                   self.op.disk_template,
10076                                   instance, pnode_name,
10077                                   self.secondaries,
10078                                   self.disks,
10079                                   self.instance_file_storage_dir,
10080                                   self.op.file_driver,
10081                                   0,
10082                                   feedback_fn,
10083                                   self.cfg.GetGroupDiskParams(nodegroup))
10084
10085     iobj = objects.Instance(name=instance, os=self.op.os_type,
10086                             primary_node=pnode_name,
10087                             nics=self.nics, disks=disks,
10088                             disk_template=self.op.disk_template,
10089                             admin_state=constants.ADMINST_DOWN,
10090                             network_port=network_port,
10091                             beparams=self.op.beparams,
10092                             hvparams=self.op.hvparams,
10093                             hypervisor=self.op.hypervisor,
10094                             osparams=self.op.osparams,
10095                             )
10096
10097     if self.op.tags:
10098       for tag in self.op.tags:
10099         iobj.AddTag(tag)
10100
10101     if self.adopt_disks:
10102       if self.op.disk_template == constants.DT_PLAIN:
10103         # rename LVs to the newly-generated names; we need to construct
10104         # 'fake' LV disks with the old data, plus the new unique_id
10105         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10106         rename_to = []
10107         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10108           rename_to.append(t_dsk.logical_id)
10109           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10110           self.cfg.SetDiskID(t_dsk, pnode_name)
10111         result = self.rpc.call_blockdev_rename(pnode_name,
10112                                                zip(tmp_disks, rename_to))
10113         result.Raise("Failed to rename adoped LVs")
10114     else:
10115       feedback_fn("* creating instance disks...")
10116       try:
10117         _CreateDisks(self, iobj)
10118       except errors.OpExecError:
10119         self.LogWarning("Device creation failed, reverting...")
10120         try:
10121           _RemoveDisks(self, iobj)
10122         finally:
10123           self.cfg.ReleaseDRBDMinors(instance)
10124           raise
10125
10126     feedback_fn("adding instance %s to cluster config" % instance)
10127
10128     self.cfg.AddInstance(iobj, self.proc.GetECId())
10129
10130     # Declare that we don't want to remove the instance lock anymore, as we've
10131     # added the instance to the config
10132     del self.remove_locks[locking.LEVEL_INSTANCE]
10133
10134     if self.op.mode == constants.INSTANCE_IMPORT:
10135       # Release unused nodes
10136       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10137     else:
10138       # Release all nodes
10139       _ReleaseLocks(self, locking.LEVEL_NODE)
10140
10141     disk_abort = False
10142     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10143       feedback_fn("* wiping instance disks...")
10144       try:
10145         _WipeDisks(self, iobj)
10146       except errors.OpExecError, err:
10147         logging.exception("Wiping disks failed")
10148         self.LogWarning("Wiping instance disks failed (%s)", err)
10149         disk_abort = True
10150
10151     if disk_abort:
10152       # Something is already wrong with the disks, don't do anything else
10153       pass
10154     elif self.op.wait_for_sync:
10155       disk_abort = not _WaitForSync(self, iobj)
10156     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10157       # make sure the disks are not degraded (still sync-ing is ok)
10158       feedback_fn("* checking mirrors status")
10159       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10160     else:
10161       disk_abort = False
10162
10163     if disk_abort:
10164       _RemoveDisks(self, iobj)
10165       self.cfg.RemoveInstance(iobj.name)
10166       # Make sure the instance lock gets removed
10167       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10168       raise errors.OpExecError("There are some degraded disks for"
10169                                " this instance")
10170
10171     # Release all node resource locks
10172     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10173
10174     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10175       # we need to set the disks ID to the primary node, since the
10176       # preceding code might or might have not done it, depending on
10177       # disk template and other options
10178       for disk in iobj.disks:
10179         self.cfg.SetDiskID(disk, pnode_name)
10180       if self.op.mode == constants.INSTANCE_CREATE:
10181         if not self.op.no_install:
10182           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10183                         not self.op.wait_for_sync)
10184           if pause_sync:
10185             feedback_fn("* pausing disk sync to install instance OS")
10186             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10187                                                               (iobj.disks,
10188                                                                iobj), True)
10189             for idx, success in enumerate(result.payload):
10190               if not success:
10191                 logging.warn("pause-sync of instance %s for disk %d failed",
10192                              instance, idx)
10193
10194           feedback_fn("* running the instance OS create scripts...")
10195           # FIXME: pass debug option from opcode to backend
10196           os_add_result = \
10197             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10198                                           self.op.debug_level)
10199           if pause_sync:
10200             feedback_fn("* resuming disk sync")
10201             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10202                                                               (iobj.disks,
10203                                                                iobj), False)
10204             for idx, success in enumerate(result.payload):
10205               if not success:
10206                 logging.warn("resume-sync of instance %s for disk %d failed",
10207                              instance, idx)
10208
10209           os_add_result.Raise("Could not add os for instance %s"
10210                               " on node %s" % (instance, pnode_name))
10211
10212       else:
10213         if self.op.mode == constants.INSTANCE_IMPORT:
10214           feedback_fn("* running the instance OS import scripts...")
10215
10216           transfers = []
10217
10218           for idx, image in enumerate(self.src_images):
10219             if not image:
10220               continue
10221
10222             # FIXME: pass debug option from opcode to backend
10223             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10224                                                constants.IEIO_FILE, (image, ),
10225                                                constants.IEIO_SCRIPT,
10226                                                (iobj.disks[idx], idx),
10227                                                None)
10228             transfers.append(dt)
10229
10230           import_result = \
10231             masterd.instance.TransferInstanceData(self, feedback_fn,
10232                                                   self.op.src_node, pnode_name,
10233                                                   self.pnode.secondary_ip,
10234                                                   iobj, transfers)
10235           if not compat.all(import_result):
10236             self.LogWarning("Some disks for instance %s on node %s were not"
10237                             " imported successfully" % (instance, pnode_name))
10238
10239           rename_from = self._old_instance_name
10240
10241         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10242           feedback_fn("* preparing remote import...")
10243           # The source cluster will stop the instance before attempting to make
10244           # a connection. In some cases stopping an instance can take a long
10245           # time, hence the shutdown timeout is added to the connection
10246           # timeout.
10247           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10248                              self.op.source_shutdown_timeout)
10249           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10250
10251           assert iobj.primary_node == self.pnode.name
10252           disk_results = \
10253             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10254                                           self.source_x509_ca,
10255                                           self._cds, timeouts)
10256           if not compat.all(disk_results):
10257             # TODO: Should the instance still be started, even if some disks
10258             # failed to import (valid for local imports, too)?
10259             self.LogWarning("Some disks for instance %s on node %s were not"
10260                             " imported successfully" % (instance, pnode_name))
10261
10262           rename_from = self.source_instance_name
10263
10264         else:
10265           # also checked in the prereq part
10266           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10267                                        % self.op.mode)
10268
10269         # Run rename script on newly imported instance
10270         assert iobj.name == instance
10271         feedback_fn("Running rename script for %s" % instance)
10272         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10273                                                    rename_from,
10274                                                    self.op.debug_level)
10275         if result.fail_msg:
10276           self.LogWarning("Failed to run rename script for %s on node"
10277                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10278
10279     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10280
10281     if self.op.start:
10282       iobj.admin_state = constants.ADMINST_UP
10283       self.cfg.Update(iobj, feedback_fn)
10284       logging.info("Starting instance %s on node %s", instance, pnode_name)
10285       feedback_fn("* starting instance...")
10286       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10287                                             False)
10288       result.Raise("Could not start instance")
10289
10290     return list(iobj.all_nodes)
10291
10292
10293 def _CheckRADOSFreeSpace():
10294   """Compute disk size requirements inside the RADOS cluster.
10295
10296   """
10297   # For the RADOS cluster we assume there is always enough space.
10298   pass
10299
10300
10301 class LUInstanceConsole(NoHooksLU):
10302   """Connect to an instance's console.
10303
10304   This is somewhat special in that it returns the command line that
10305   you need to run on the master node in order to connect to the
10306   console.
10307
10308   """
10309   REQ_BGL = False
10310
10311   def ExpandNames(self):
10312     self.share_locks = _ShareAll()
10313     self._ExpandAndLockInstance()
10314
10315   def CheckPrereq(self):
10316     """Check prerequisites.
10317
10318     This checks that the instance is in the cluster.
10319
10320     """
10321     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10322     assert self.instance is not None, \
10323       "Cannot retrieve locked instance %s" % self.op.instance_name
10324     _CheckNodeOnline(self, self.instance.primary_node)
10325
10326   def Exec(self, feedback_fn):
10327     """Connect to the console of an instance
10328
10329     """
10330     instance = self.instance
10331     node = instance.primary_node
10332
10333     node_insts = self.rpc.call_instance_list([node],
10334                                              [instance.hypervisor])[node]
10335     node_insts.Raise("Can't get node information from %s" % node)
10336
10337     if instance.name not in node_insts.payload:
10338       if instance.admin_state == constants.ADMINST_UP:
10339         state = constants.INSTST_ERRORDOWN
10340       elif instance.admin_state == constants.ADMINST_DOWN:
10341         state = constants.INSTST_ADMINDOWN
10342       else:
10343         state = constants.INSTST_ADMINOFFLINE
10344       raise errors.OpExecError("Instance %s is not running (state %s)" %
10345                                (instance.name, state))
10346
10347     logging.debug("Connecting to console of %s on %s", instance.name, node)
10348
10349     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10350
10351
10352 def _GetInstanceConsole(cluster, instance):
10353   """Returns console information for an instance.
10354
10355   @type cluster: L{objects.Cluster}
10356   @type instance: L{objects.Instance}
10357   @rtype: dict
10358
10359   """
10360   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10361   # beparams and hvparams are passed separately, to avoid editing the
10362   # instance and then saving the defaults in the instance itself.
10363   hvparams = cluster.FillHV(instance)
10364   beparams = cluster.FillBE(instance)
10365   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10366
10367   assert console.instance == instance.name
10368   assert console.Validate()
10369
10370   return console.ToDict()
10371
10372
10373 class LUInstanceReplaceDisks(LogicalUnit):
10374   """Replace the disks of an instance.
10375
10376   """
10377   HPATH = "mirrors-replace"
10378   HTYPE = constants.HTYPE_INSTANCE
10379   REQ_BGL = False
10380
10381   def CheckArguments(self):
10382     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10383                                   self.op.iallocator)
10384
10385   def ExpandNames(self):
10386     self._ExpandAndLockInstance()
10387
10388     assert locking.LEVEL_NODE not in self.needed_locks
10389     assert locking.LEVEL_NODE_RES not in self.needed_locks
10390     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10391
10392     assert self.op.iallocator is None or self.op.remote_node is None, \
10393       "Conflicting options"
10394
10395     if self.op.remote_node is not None:
10396       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10397
10398       # Warning: do not remove the locking of the new secondary here
10399       # unless DRBD8.AddChildren is changed to work in parallel;
10400       # currently it doesn't since parallel invocations of
10401       # FindUnusedMinor will conflict
10402       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10403       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10404     else:
10405       self.needed_locks[locking.LEVEL_NODE] = []
10406       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10407
10408       if self.op.iallocator is not None:
10409         # iallocator will select a new node in the same group
10410         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10411
10412     self.needed_locks[locking.LEVEL_NODE_RES] = []
10413
10414     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10415                                    self.op.iallocator, self.op.remote_node,
10416                                    self.op.disks, False, self.op.early_release,
10417                                    self.op.ignore_ipolicy)
10418
10419     self.tasklets = [self.replacer]
10420
10421   def DeclareLocks(self, level):
10422     if level == locking.LEVEL_NODEGROUP:
10423       assert self.op.remote_node is None
10424       assert self.op.iallocator is not None
10425       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10426
10427       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10428       # Lock all groups used by instance optimistically; this requires going
10429       # via the node before it's locked, requiring verification later on
10430       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10431         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10432
10433     elif level == locking.LEVEL_NODE:
10434       if self.op.iallocator is not None:
10435         assert self.op.remote_node is None
10436         assert not self.needed_locks[locking.LEVEL_NODE]
10437
10438         # Lock member nodes of all locked groups
10439         self.needed_locks[locking.LEVEL_NODE] = [node_name
10440           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10441           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10442       else:
10443         self._LockInstancesNodes()
10444     elif level == locking.LEVEL_NODE_RES:
10445       # Reuse node locks
10446       self.needed_locks[locking.LEVEL_NODE_RES] = \
10447         self.needed_locks[locking.LEVEL_NODE]
10448
10449   def BuildHooksEnv(self):
10450     """Build hooks env.
10451
10452     This runs on the master, the primary and all the secondaries.
10453
10454     """
10455     instance = self.replacer.instance
10456     env = {
10457       "MODE": self.op.mode,
10458       "NEW_SECONDARY": self.op.remote_node,
10459       "OLD_SECONDARY": instance.secondary_nodes[0],
10460       }
10461     env.update(_BuildInstanceHookEnvByObject(self, instance))
10462     return env
10463
10464   def BuildHooksNodes(self):
10465     """Build hooks nodes.
10466
10467     """
10468     instance = self.replacer.instance
10469     nl = [
10470       self.cfg.GetMasterNode(),
10471       instance.primary_node,
10472       ]
10473     if self.op.remote_node is not None:
10474       nl.append(self.op.remote_node)
10475     return nl, nl
10476
10477   def CheckPrereq(self):
10478     """Check prerequisites.
10479
10480     """
10481     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10482             self.op.iallocator is None)
10483
10484     # Verify if node group locks are still correct
10485     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10486     if owned_groups:
10487       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10488
10489     return LogicalUnit.CheckPrereq(self)
10490
10491
10492 class TLReplaceDisks(Tasklet):
10493   """Replaces disks for an instance.
10494
10495   Note: Locking is not within the scope of this class.
10496
10497   """
10498   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10499                disks, delay_iallocator, early_release, ignore_ipolicy):
10500     """Initializes this class.
10501
10502     """
10503     Tasklet.__init__(self, lu)
10504
10505     # Parameters
10506     self.instance_name = instance_name
10507     self.mode = mode
10508     self.iallocator_name = iallocator_name
10509     self.remote_node = remote_node
10510     self.disks = disks
10511     self.delay_iallocator = delay_iallocator
10512     self.early_release = early_release
10513     self.ignore_ipolicy = ignore_ipolicy
10514
10515     # Runtime data
10516     self.instance = None
10517     self.new_node = None
10518     self.target_node = None
10519     self.other_node = None
10520     self.remote_node_info = None
10521     self.node_secondary_ip = None
10522
10523   @staticmethod
10524   def CheckArguments(mode, remote_node, iallocator):
10525     """Helper function for users of this class.
10526
10527     """
10528     # check for valid parameter combination
10529     if mode == constants.REPLACE_DISK_CHG:
10530       if remote_node is None and iallocator is None:
10531         raise errors.OpPrereqError("When changing the secondary either an"
10532                                    " iallocator script must be used or the"
10533                                    " new node given", errors.ECODE_INVAL)
10534
10535       if remote_node is not None and iallocator is not None:
10536         raise errors.OpPrereqError("Give either the iallocator or the new"
10537                                    " secondary, not both", errors.ECODE_INVAL)
10538
10539     elif remote_node is not None or iallocator is not None:
10540       # Not replacing the secondary
10541       raise errors.OpPrereqError("The iallocator and new node options can"
10542                                  " only be used when changing the"
10543                                  " secondary node", errors.ECODE_INVAL)
10544
10545   @staticmethod
10546   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10547     """Compute a new secondary node using an IAllocator.
10548
10549     """
10550     ial = IAllocator(lu.cfg, lu.rpc,
10551                      mode=constants.IALLOCATOR_MODE_RELOC,
10552                      name=instance_name,
10553                      relocate_from=list(relocate_from))
10554
10555     ial.Run(iallocator_name)
10556
10557     if not ial.success:
10558       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10559                                  " %s" % (iallocator_name, ial.info),
10560                                  errors.ECODE_NORES)
10561
10562     if len(ial.result) != ial.required_nodes:
10563       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10564                                  " of nodes (%s), required %s" %
10565                                  (iallocator_name,
10566                                   len(ial.result), ial.required_nodes),
10567                                  errors.ECODE_FAULT)
10568
10569     remote_node_name = ial.result[0]
10570
10571     lu.LogInfo("Selected new secondary for instance '%s': %s",
10572                instance_name, remote_node_name)
10573
10574     return remote_node_name
10575
10576   def _FindFaultyDisks(self, node_name):
10577     """Wrapper for L{_FindFaultyInstanceDisks}.
10578
10579     """
10580     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10581                                     node_name, True)
10582
10583   def _CheckDisksActivated(self, instance):
10584     """Checks if the instance disks are activated.
10585
10586     @param instance: The instance to check disks
10587     @return: True if they are activated, False otherwise
10588
10589     """
10590     nodes = instance.all_nodes
10591
10592     for idx, dev in enumerate(instance.disks):
10593       for node in nodes:
10594         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10595         self.cfg.SetDiskID(dev, node)
10596
10597         result = _BlockdevFind(self, node, dev, instance)
10598
10599         if result.offline:
10600           continue
10601         elif result.fail_msg or not result.payload:
10602           return False
10603
10604     return True
10605
10606   def CheckPrereq(self):
10607     """Check prerequisites.
10608
10609     This checks that the instance is in the cluster.
10610
10611     """
10612     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10613     assert instance is not None, \
10614       "Cannot retrieve locked instance %s" % self.instance_name
10615
10616     if instance.disk_template != constants.DT_DRBD8:
10617       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10618                                  " instances", errors.ECODE_INVAL)
10619
10620     if len(instance.secondary_nodes) != 1:
10621       raise errors.OpPrereqError("The instance has a strange layout,"
10622                                  " expected one secondary but found %d" %
10623                                  len(instance.secondary_nodes),
10624                                  errors.ECODE_FAULT)
10625
10626     if not self.delay_iallocator:
10627       self._CheckPrereq2()
10628
10629   def _CheckPrereq2(self):
10630     """Check prerequisites, second part.
10631
10632     This function should always be part of CheckPrereq. It was separated and is
10633     now called from Exec because during node evacuation iallocator was only
10634     called with an unmodified cluster model, not taking planned changes into
10635     account.
10636
10637     """
10638     instance = self.instance
10639     secondary_node = instance.secondary_nodes[0]
10640
10641     if self.iallocator_name is None:
10642       remote_node = self.remote_node
10643     else:
10644       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10645                                        instance.name, instance.secondary_nodes)
10646
10647     if remote_node is None:
10648       self.remote_node_info = None
10649     else:
10650       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10651              "Remote node '%s' is not locked" % remote_node
10652
10653       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10654       assert self.remote_node_info is not None, \
10655         "Cannot retrieve locked node %s" % remote_node
10656
10657     if remote_node == self.instance.primary_node:
10658       raise errors.OpPrereqError("The specified node is the primary node of"
10659                                  " the instance", errors.ECODE_INVAL)
10660
10661     if remote_node == secondary_node:
10662       raise errors.OpPrereqError("The specified node is already the"
10663                                  " secondary node of the instance",
10664                                  errors.ECODE_INVAL)
10665
10666     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10667                                     constants.REPLACE_DISK_CHG):
10668       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10669                                  errors.ECODE_INVAL)
10670
10671     if self.mode == constants.REPLACE_DISK_AUTO:
10672       if not self._CheckDisksActivated(instance):
10673         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10674                                    " first" % self.instance_name,
10675                                    errors.ECODE_STATE)
10676       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10677       faulty_secondary = self._FindFaultyDisks(secondary_node)
10678
10679       if faulty_primary and faulty_secondary:
10680         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10681                                    " one node and can not be repaired"
10682                                    " automatically" % self.instance_name,
10683                                    errors.ECODE_STATE)
10684
10685       if faulty_primary:
10686         self.disks = faulty_primary
10687         self.target_node = instance.primary_node
10688         self.other_node = secondary_node
10689         check_nodes = [self.target_node, self.other_node]
10690       elif faulty_secondary:
10691         self.disks = faulty_secondary
10692         self.target_node = secondary_node
10693         self.other_node = instance.primary_node
10694         check_nodes = [self.target_node, self.other_node]
10695       else:
10696         self.disks = []
10697         check_nodes = []
10698
10699     else:
10700       # Non-automatic modes
10701       if self.mode == constants.REPLACE_DISK_PRI:
10702         self.target_node = instance.primary_node
10703         self.other_node = secondary_node
10704         check_nodes = [self.target_node, self.other_node]
10705
10706       elif self.mode == constants.REPLACE_DISK_SEC:
10707         self.target_node = secondary_node
10708         self.other_node = instance.primary_node
10709         check_nodes = [self.target_node, self.other_node]
10710
10711       elif self.mode == constants.REPLACE_DISK_CHG:
10712         self.new_node = remote_node
10713         self.other_node = instance.primary_node
10714         self.target_node = secondary_node
10715         check_nodes = [self.new_node, self.other_node]
10716
10717         _CheckNodeNotDrained(self.lu, remote_node)
10718         _CheckNodeVmCapable(self.lu, remote_node)
10719
10720         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10721         assert old_node_info is not None
10722         if old_node_info.offline and not self.early_release:
10723           # doesn't make sense to delay the release
10724           self.early_release = True
10725           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10726                           " early-release mode", secondary_node)
10727
10728       else:
10729         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10730                                      self.mode)
10731
10732       # If not specified all disks should be replaced
10733       if not self.disks:
10734         self.disks = range(len(self.instance.disks))
10735
10736     # TODO: This is ugly, but right now we can't distinguish between internal
10737     # submitted opcode and external one. We should fix that.
10738     if self.remote_node_info:
10739       # We change the node, lets verify it still meets instance policy
10740       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10741       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10742                                        new_group_info)
10743       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10744                               ignore=self.ignore_ipolicy)
10745
10746     for node in check_nodes:
10747       _CheckNodeOnline(self.lu, node)
10748
10749     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10750                                                           self.other_node,
10751                                                           self.target_node]
10752                               if node_name is not None)
10753
10754     # Release unneeded node and node resource locks
10755     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10756     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10757
10758     # Release any owned node group
10759     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10760       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10761
10762     # Check whether disks are valid
10763     for disk_idx in self.disks:
10764       instance.FindDisk(disk_idx)
10765
10766     # Get secondary node IP addresses
10767     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10768                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10769
10770   def Exec(self, feedback_fn):
10771     """Execute disk replacement.
10772
10773     This dispatches the disk replacement to the appropriate handler.
10774
10775     """
10776     if self.delay_iallocator:
10777       self._CheckPrereq2()
10778
10779     if __debug__:
10780       # Verify owned locks before starting operation
10781       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10782       assert set(owned_nodes) == set(self.node_secondary_ip), \
10783           ("Incorrect node locks, owning %s, expected %s" %
10784            (owned_nodes, self.node_secondary_ip.keys()))
10785       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10786               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10787
10788       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10789       assert list(owned_instances) == [self.instance_name], \
10790           "Instance '%s' not locked" % self.instance_name
10791
10792       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10793           "Should not own any node group lock at this point"
10794
10795     if not self.disks:
10796       feedback_fn("No disks need replacement")
10797       return
10798
10799     feedback_fn("Replacing disk(s) %s for %s" %
10800                 (utils.CommaJoin(self.disks), self.instance.name))
10801
10802     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10803
10804     # Activate the instance disks if we're replacing them on a down instance
10805     if activate_disks:
10806       _StartInstanceDisks(self.lu, self.instance, True)
10807
10808     try:
10809       # Should we replace the secondary node?
10810       if self.new_node is not None:
10811         fn = self._ExecDrbd8Secondary
10812       else:
10813         fn = self._ExecDrbd8DiskOnly
10814
10815       result = fn(feedback_fn)
10816     finally:
10817       # Deactivate the instance disks if we're replacing them on a
10818       # down instance
10819       if activate_disks:
10820         _SafeShutdownInstanceDisks(self.lu, self.instance)
10821
10822     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10823
10824     if __debug__:
10825       # Verify owned locks
10826       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10827       nodes = frozenset(self.node_secondary_ip)
10828       assert ((self.early_release and not owned_nodes) or
10829               (not self.early_release and not (set(owned_nodes) - nodes))), \
10830         ("Not owning the correct locks, early_release=%s, owned=%r,"
10831          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10832
10833     return result
10834
10835   def _CheckVolumeGroup(self, nodes):
10836     self.lu.LogInfo("Checking volume groups")
10837
10838     vgname = self.cfg.GetVGName()
10839
10840     # Make sure volume group exists on all involved nodes
10841     results = self.rpc.call_vg_list(nodes)
10842     if not results:
10843       raise errors.OpExecError("Can't list volume groups on the nodes")
10844
10845     for node in nodes:
10846       res = results[node]
10847       res.Raise("Error checking node %s" % node)
10848       if vgname not in res.payload:
10849         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10850                                  (vgname, node))
10851
10852   def _CheckDisksExistence(self, nodes):
10853     # Check disk existence
10854     for idx, dev in enumerate(self.instance.disks):
10855       if idx not in self.disks:
10856         continue
10857
10858       for node in nodes:
10859         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10860         self.cfg.SetDiskID(dev, node)
10861
10862         result = _BlockdevFind(self, node, dev, self.instance)
10863
10864         msg = result.fail_msg
10865         if msg or not result.payload:
10866           if not msg:
10867             msg = "disk not found"
10868           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10869                                    (idx, node, msg))
10870
10871   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10872     for idx, dev in enumerate(self.instance.disks):
10873       if idx not in self.disks:
10874         continue
10875
10876       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10877                       (idx, node_name))
10878
10879       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10880                                    on_primary, ldisk=ldisk):
10881         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10882                                  " replace disks for instance %s" %
10883                                  (node_name, self.instance.name))
10884
10885   def _CreateNewStorage(self, node_name):
10886     """Create new storage on the primary or secondary node.
10887
10888     This is only used for same-node replaces, not for changing the
10889     secondary node, hence we don't want to modify the existing disk.
10890
10891     """
10892     iv_names = {}
10893
10894     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10895     for idx, dev in enumerate(disks):
10896       if idx not in self.disks:
10897         continue
10898
10899       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10900
10901       self.cfg.SetDiskID(dev, node_name)
10902
10903       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10904       names = _GenerateUniqueNames(self.lu, lv_names)
10905
10906       (data_disk, meta_disk) = dev.children
10907       vg_data = data_disk.logical_id[0]
10908       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10909                              logical_id=(vg_data, names[0]),
10910                              params=data_disk.params)
10911       vg_meta = meta_disk.logical_id[0]
10912       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10913                              logical_id=(vg_meta, names[1]),
10914                              params=meta_disk.params)
10915
10916       new_lvs = [lv_data, lv_meta]
10917       old_lvs = [child.Copy() for child in dev.children]
10918       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10919
10920       # we pass force_create=True to force the LVM creation
10921       for new_lv in new_lvs:
10922         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10923                              _GetInstanceInfoText(self.instance), False)
10924
10925     return iv_names
10926
10927   def _CheckDevices(self, node_name, iv_names):
10928     for name, (dev, _, _) in iv_names.iteritems():
10929       self.cfg.SetDiskID(dev, node_name)
10930
10931       result = _BlockdevFind(self, node_name, dev, self.instance)
10932
10933       msg = result.fail_msg
10934       if msg or not result.payload:
10935         if not msg:
10936           msg = "disk not found"
10937         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10938                                  (name, msg))
10939
10940       if result.payload.is_degraded:
10941         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10942
10943   def _RemoveOldStorage(self, node_name, iv_names):
10944     for name, (_, old_lvs, _) in iv_names.iteritems():
10945       self.lu.LogInfo("Remove logical volumes for %s" % name)
10946
10947       for lv in old_lvs:
10948         self.cfg.SetDiskID(lv, node_name)
10949
10950         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10951         if msg:
10952           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10953                              hint="remove unused LVs manually")
10954
10955   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10956     """Replace a disk on the primary or secondary for DRBD 8.
10957
10958     The algorithm for replace is quite complicated:
10959
10960       1. for each disk to be replaced:
10961
10962         1. create new LVs on the target node with unique names
10963         1. detach old LVs from the drbd device
10964         1. rename old LVs to name_replaced.<time_t>
10965         1. rename new LVs to old LVs
10966         1. attach the new LVs (with the old names now) to the drbd device
10967
10968       1. wait for sync across all devices
10969
10970       1. for each modified disk:
10971
10972         1. remove old LVs (which have the name name_replaces.<time_t>)
10973
10974     Failures are not very well handled.
10975
10976     """
10977     steps_total = 6
10978
10979     # Step: check device activation
10980     self.lu.LogStep(1, steps_total, "Check device existence")
10981     self._CheckDisksExistence([self.other_node, self.target_node])
10982     self._CheckVolumeGroup([self.target_node, self.other_node])
10983
10984     # Step: check other node consistency
10985     self.lu.LogStep(2, steps_total, "Check peer consistency")
10986     self._CheckDisksConsistency(self.other_node,
10987                                 self.other_node == self.instance.primary_node,
10988                                 False)
10989
10990     # Step: create new storage
10991     self.lu.LogStep(3, steps_total, "Allocate new storage")
10992     iv_names = self._CreateNewStorage(self.target_node)
10993
10994     # Step: for each lv, detach+rename*2+attach
10995     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10996     for dev, old_lvs, new_lvs in iv_names.itervalues():
10997       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10998
10999       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11000                                                      old_lvs)
11001       result.Raise("Can't detach drbd from local storage on node"
11002                    " %s for device %s" % (self.target_node, dev.iv_name))
11003       #dev.children = []
11004       #cfg.Update(instance)
11005
11006       # ok, we created the new LVs, so now we know we have the needed
11007       # storage; as such, we proceed on the target node to rename
11008       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11009       # using the assumption that logical_id == physical_id (which in
11010       # turn is the unique_id on that node)
11011
11012       # FIXME(iustin): use a better name for the replaced LVs
11013       temp_suffix = int(time.time())
11014       ren_fn = lambda d, suff: (d.physical_id[0],
11015                                 d.physical_id[1] + "_replaced-%s" % suff)
11016
11017       # Build the rename list based on what LVs exist on the node
11018       rename_old_to_new = []
11019       for to_ren in old_lvs:
11020         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11021         if not result.fail_msg and result.payload:
11022           # device exists
11023           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11024
11025       self.lu.LogInfo("Renaming the old LVs on the target node")
11026       result = self.rpc.call_blockdev_rename(self.target_node,
11027                                              rename_old_to_new)
11028       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11029
11030       # Now we rename the new LVs to the old LVs
11031       self.lu.LogInfo("Renaming the new LVs on the target node")
11032       rename_new_to_old = [(new, old.physical_id)
11033                            for old, new in zip(old_lvs, new_lvs)]
11034       result = self.rpc.call_blockdev_rename(self.target_node,
11035                                              rename_new_to_old)
11036       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11037
11038       # Intermediate steps of in memory modifications
11039       for old, new in zip(old_lvs, new_lvs):
11040         new.logical_id = old.logical_id
11041         self.cfg.SetDiskID(new, self.target_node)
11042
11043       # We need to modify old_lvs so that removal later removes the
11044       # right LVs, not the newly added ones; note that old_lvs is a
11045       # copy here
11046       for disk in old_lvs:
11047         disk.logical_id = ren_fn(disk, temp_suffix)
11048         self.cfg.SetDiskID(disk, self.target_node)
11049
11050       # Now that the new lvs have the old name, we can add them to the device
11051       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11052       result = self.rpc.call_blockdev_addchildren(self.target_node,
11053                                                   (dev, self.instance), new_lvs)
11054       msg = result.fail_msg
11055       if msg:
11056         for new_lv in new_lvs:
11057           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11058                                                new_lv).fail_msg
11059           if msg2:
11060             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11061                                hint=("cleanup manually the unused logical"
11062                                      "volumes"))
11063         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11064
11065     cstep = itertools.count(5)
11066
11067     if self.early_release:
11068       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11069       self._RemoveOldStorage(self.target_node, iv_names)
11070       # TODO: Check if releasing locks early still makes sense
11071       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11072     else:
11073       # Release all resource locks except those used by the instance
11074       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11075                     keep=self.node_secondary_ip.keys())
11076
11077     # Release all node locks while waiting for sync
11078     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11079
11080     # TODO: Can the instance lock be downgraded here? Take the optional disk
11081     # shutdown in the caller into consideration.
11082
11083     # Wait for sync
11084     # This can fail as the old devices are degraded and _WaitForSync
11085     # does a combined result over all disks, so we don't check its return value
11086     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11087     _WaitForSync(self.lu, self.instance)
11088
11089     # Check all devices manually
11090     self._CheckDevices(self.instance.primary_node, iv_names)
11091
11092     # Step: remove old storage
11093     if not self.early_release:
11094       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11095       self._RemoveOldStorage(self.target_node, iv_names)
11096
11097   def _ExecDrbd8Secondary(self, feedback_fn):
11098     """Replace the secondary node for DRBD 8.
11099
11100     The algorithm for replace is quite complicated:
11101       - for all disks of the instance:
11102         - create new LVs on the new node with same names
11103         - shutdown the drbd device on the old secondary
11104         - disconnect the drbd network on the primary
11105         - create the drbd device on the new secondary
11106         - network attach the drbd on the primary, using an artifice:
11107           the drbd code for Attach() will connect to the network if it
11108           finds a device which is connected to the good local disks but
11109           not network enabled
11110       - wait for sync across all devices
11111       - remove all disks from the old secondary
11112
11113     Failures are not very well handled.
11114
11115     """
11116     steps_total = 6
11117
11118     pnode = self.instance.primary_node
11119
11120     # Step: check device activation
11121     self.lu.LogStep(1, steps_total, "Check device existence")
11122     self._CheckDisksExistence([self.instance.primary_node])
11123     self._CheckVolumeGroup([self.instance.primary_node])
11124
11125     # Step: check other node consistency
11126     self.lu.LogStep(2, steps_total, "Check peer consistency")
11127     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11128
11129     # Step: create new storage
11130     self.lu.LogStep(3, steps_total, "Allocate new storage")
11131     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11132     for idx, dev in enumerate(disks):
11133       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11134                       (self.new_node, idx))
11135       # we pass force_create=True to force LVM creation
11136       for new_lv in dev.children:
11137         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11138                              True, _GetInstanceInfoText(self.instance), False)
11139
11140     # Step 4: dbrd minors and drbd setups changes
11141     # after this, we must manually remove the drbd minors on both the
11142     # error and the success paths
11143     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11144     minors = self.cfg.AllocateDRBDMinor([self.new_node
11145                                          for dev in self.instance.disks],
11146                                         self.instance.name)
11147     logging.debug("Allocated minors %r", minors)
11148
11149     iv_names = {}
11150     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11151       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11152                       (self.new_node, idx))
11153       # create new devices on new_node; note that we create two IDs:
11154       # one without port, so the drbd will be activated without
11155       # networking information on the new node at this stage, and one
11156       # with network, for the latter activation in step 4
11157       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11158       if self.instance.primary_node == o_node1:
11159         p_minor = o_minor1
11160       else:
11161         assert self.instance.primary_node == o_node2, "Three-node instance?"
11162         p_minor = o_minor2
11163
11164       new_alone_id = (self.instance.primary_node, self.new_node, None,
11165                       p_minor, new_minor, o_secret)
11166       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11167                     p_minor, new_minor, o_secret)
11168
11169       iv_names[idx] = (dev, dev.children, new_net_id)
11170       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11171                     new_net_id)
11172       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11173                               logical_id=new_alone_id,
11174                               children=dev.children,
11175                               size=dev.size,
11176                               params={})
11177       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11178                                              self.cfg)
11179       try:
11180         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11181                               anno_new_drbd,
11182                               _GetInstanceInfoText(self.instance), False)
11183       except errors.GenericError:
11184         self.cfg.ReleaseDRBDMinors(self.instance.name)
11185         raise
11186
11187     # We have new devices, shutdown the drbd on the old secondary
11188     for idx, dev in enumerate(self.instance.disks):
11189       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11190       self.cfg.SetDiskID(dev, self.target_node)
11191       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11192                                             (dev, self.instance)).fail_msg
11193       if msg:
11194         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11195                            "node: %s" % (idx, msg),
11196                            hint=("Please cleanup this device manually as"
11197                                  " soon as possible"))
11198
11199     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11200     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11201                                                self.instance.disks)[pnode]
11202
11203     msg = result.fail_msg
11204     if msg:
11205       # detaches didn't succeed (unlikely)
11206       self.cfg.ReleaseDRBDMinors(self.instance.name)
11207       raise errors.OpExecError("Can't detach the disks from the network on"
11208                                " old node: %s" % (msg,))
11209
11210     # if we managed to detach at least one, we update all the disks of
11211     # the instance to point to the new secondary
11212     self.lu.LogInfo("Updating instance configuration")
11213     for dev, _, new_logical_id in iv_names.itervalues():
11214       dev.logical_id = new_logical_id
11215       self.cfg.SetDiskID(dev, self.instance.primary_node)
11216
11217     self.cfg.Update(self.instance, feedback_fn)
11218
11219     # Release all node locks (the configuration has been updated)
11220     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11221
11222     # and now perform the drbd attach
11223     self.lu.LogInfo("Attaching primary drbds to new secondary"
11224                     " (standalone => connected)")
11225     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11226                                             self.new_node],
11227                                            self.node_secondary_ip,
11228                                            (self.instance.disks, self.instance),
11229                                            self.instance.name,
11230                                            False)
11231     for to_node, to_result in result.items():
11232       msg = to_result.fail_msg
11233       if msg:
11234         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11235                            to_node, msg,
11236                            hint=("please do a gnt-instance info to see the"
11237                                  " status of disks"))
11238
11239     cstep = itertools.count(5)
11240
11241     if self.early_release:
11242       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11243       self._RemoveOldStorage(self.target_node, iv_names)
11244       # TODO: Check if releasing locks early still makes sense
11245       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11246     else:
11247       # Release all resource locks except those used by the instance
11248       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11249                     keep=self.node_secondary_ip.keys())
11250
11251     # TODO: Can the instance lock be downgraded here? Take the optional disk
11252     # shutdown in the caller into consideration.
11253
11254     # Wait for sync
11255     # This can fail as the old devices are degraded and _WaitForSync
11256     # does a combined result over all disks, so we don't check its return value
11257     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11258     _WaitForSync(self.lu, self.instance)
11259
11260     # Check all devices manually
11261     self._CheckDevices(self.instance.primary_node, iv_names)
11262
11263     # Step: remove old storage
11264     if not self.early_release:
11265       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11266       self._RemoveOldStorage(self.target_node, iv_names)
11267
11268
11269 class LURepairNodeStorage(NoHooksLU):
11270   """Repairs the volume group on a node.
11271
11272   """
11273   REQ_BGL = False
11274
11275   def CheckArguments(self):
11276     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11277
11278     storage_type = self.op.storage_type
11279
11280     if (constants.SO_FIX_CONSISTENCY not in
11281         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11282       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11283                                  " repaired" % storage_type,
11284                                  errors.ECODE_INVAL)
11285
11286   def ExpandNames(self):
11287     self.needed_locks = {
11288       locking.LEVEL_NODE: [self.op.node_name],
11289       }
11290
11291   def _CheckFaultyDisks(self, instance, node_name):
11292     """Ensure faulty disks abort the opcode or at least warn."""
11293     try:
11294       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11295                                   node_name, True):
11296         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11297                                    " node '%s'" % (instance.name, node_name),
11298                                    errors.ECODE_STATE)
11299     except errors.OpPrereqError, err:
11300       if self.op.ignore_consistency:
11301         self.proc.LogWarning(str(err.args[0]))
11302       else:
11303         raise
11304
11305   def CheckPrereq(self):
11306     """Check prerequisites.
11307
11308     """
11309     # Check whether any instance on this node has faulty disks
11310     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11311       if inst.admin_state != constants.ADMINST_UP:
11312         continue
11313       check_nodes = set(inst.all_nodes)
11314       check_nodes.discard(self.op.node_name)
11315       for inst_node_name in check_nodes:
11316         self._CheckFaultyDisks(inst, inst_node_name)
11317
11318   def Exec(self, feedback_fn):
11319     feedback_fn("Repairing storage unit '%s' on %s ..." %
11320                 (self.op.name, self.op.node_name))
11321
11322     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11323     result = self.rpc.call_storage_execute(self.op.node_name,
11324                                            self.op.storage_type, st_args,
11325                                            self.op.name,
11326                                            constants.SO_FIX_CONSISTENCY)
11327     result.Raise("Failed to repair storage unit '%s' on %s" %
11328                  (self.op.name, self.op.node_name))
11329
11330
11331 class LUNodeEvacuate(NoHooksLU):
11332   """Evacuates instances off a list of nodes.
11333
11334   """
11335   REQ_BGL = False
11336
11337   _MODE2IALLOCATOR = {
11338     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11339     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11340     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11341     }
11342   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11343   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11344           constants.IALLOCATOR_NEVAC_MODES)
11345
11346   def CheckArguments(self):
11347     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11348
11349   def ExpandNames(self):
11350     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11351
11352     if self.op.remote_node is not None:
11353       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11354       assert self.op.remote_node
11355
11356       if self.op.remote_node == self.op.node_name:
11357         raise errors.OpPrereqError("Can not use evacuated node as a new"
11358                                    " secondary node", errors.ECODE_INVAL)
11359
11360       if self.op.mode != constants.NODE_EVAC_SEC:
11361         raise errors.OpPrereqError("Without the use of an iallocator only"
11362                                    " secondary instances can be evacuated",
11363                                    errors.ECODE_INVAL)
11364
11365     # Declare locks
11366     self.share_locks = _ShareAll()
11367     self.needed_locks = {
11368       locking.LEVEL_INSTANCE: [],
11369       locking.LEVEL_NODEGROUP: [],
11370       locking.LEVEL_NODE: [],
11371       }
11372
11373     # Determine nodes (via group) optimistically, needs verification once locks
11374     # have been acquired
11375     self.lock_nodes = self._DetermineNodes()
11376
11377   def _DetermineNodes(self):
11378     """Gets the list of nodes to operate on.
11379
11380     """
11381     if self.op.remote_node is None:
11382       # Iallocator will choose any node(s) in the same group
11383       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11384     else:
11385       group_nodes = frozenset([self.op.remote_node])
11386
11387     # Determine nodes to be locked
11388     return set([self.op.node_name]) | group_nodes
11389
11390   def _DetermineInstances(self):
11391     """Builds list of instances to operate on.
11392
11393     """
11394     assert self.op.mode in constants.NODE_EVAC_MODES
11395
11396     if self.op.mode == constants.NODE_EVAC_PRI:
11397       # Primary instances only
11398       inst_fn = _GetNodePrimaryInstances
11399       assert self.op.remote_node is None, \
11400         "Evacuating primary instances requires iallocator"
11401     elif self.op.mode == constants.NODE_EVAC_SEC:
11402       # Secondary instances only
11403       inst_fn = _GetNodeSecondaryInstances
11404     else:
11405       # All instances
11406       assert self.op.mode == constants.NODE_EVAC_ALL
11407       inst_fn = _GetNodeInstances
11408       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11409       # per instance
11410       raise errors.OpPrereqError("Due to an issue with the iallocator"
11411                                  " interface it is not possible to evacuate"
11412                                  " all instances at once; specify explicitly"
11413                                  " whether to evacuate primary or secondary"
11414                                  " instances",
11415                                  errors.ECODE_INVAL)
11416
11417     return inst_fn(self.cfg, self.op.node_name)
11418
11419   def DeclareLocks(self, level):
11420     if level == locking.LEVEL_INSTANCE:
11421       # Lock instances optimistically, needs verification once node and group
11422       # locks have been acquired
11423       self.needed_locks[locking.LEVEL_INSTANCE] = \
11424         set(i.name for i in self._DetermineInstances())
11425
11426     elif level == locking.LEVEL_NODEGROUP:
11427       # Lock node groups for all potential target nodes optimistically, needs
11428       # verification once nodes have been acquired
11429       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11430         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11431
11432     elif level == locking.LEVEL_NODE:
11433       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11434
11435   def CheckPrereq(self):
11436     # Verify locks
11437     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11438     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11439     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11440
11441     need_nodes = self._DetermineNodes()
11442
11443     if not owned_nodes.issuperset(need_nodes):
11444       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11445                                  " locks were acquired, current nodes are"
11446                                  " are '%s', used to be '%s'; retry the"
11447                                  " operation" %
11448                                  (self.op.node_name,
11449                                   utils.CommaJoin(need_nodes),
11450                                   utils.CommaJoin(owned_nodes)),
11451                                  errors.ECODE_STATE)
11452
11453     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11454     if owned_groups != wanted_groups:
11455       raise errors.OpExecError("Node groups changed since locks were acquired,"
11456                                " current groups are '%s', used to be '%s';"
11457                                " retry the operation" %
11458                                (utils.CommaJoin(wanted_groups),
11459                                 utils.CommaJoin(owned_groups)))
11460
11461     # Determine affected instances
11462     self.instances = self._DetermineInstances()
11463     self.instance_names = [i.name for i in self.instances]
11464
11465     if set(self.instance_names) != owned_instances:
11466       raise errors.OpExecError("Instances on node '%s' changed since locks"
11467                                " were acquired, current instances are '%s',"
11468                                " used to be '%s'; retry the operation" %
11469                                (self.op.node_name,
11470                                 utils.CommaJoin(self.instance_names),
11471                                 utils.CommaJoin(owned_instances)))
11472
11473     if self.instance_names:
11474       self.LogInfo("Evacuating instances from node '%s': %s",
11475                    self.op.node_name,
11476                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11477     else:
11478       self.LogInfo("No instances to evacuate from node '%s'",
11479                    self.op.node_name)
11480
11481     if self.op.remote_node is not None:
11482       for i in self.instances:
11483         if i.primary_node == self.op.remote_node:
11484           raise errors.OpPrereqError("Node %s is the primary node of"
11485                                      " instance %s, cannot use it as"
11486                                      " secondary" %
11487                                      (self.op.remote_node, i.name),
11488                                      errors.ECODE_INVAL)
11489
11490   def Exec(self, feedback_fn):
11491     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11492
11493     if not self.instance_names:
11494       # No instances to evacuate
11495       jobs = []
11496
11497     elif self.op.iallocator is not None:
11498       # TODO: Implement relocation to other group
11499       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11500                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11501                        instances=list(self.instance_names))
11502
11503       ial.Run(self.op.iallocator)
11504
11505       if not ial.success:
11506         raise errors.OpPrereqError("Can't compute node evacuation using"
11507                                    " iallocator '%s': %s" %
11508                                    (self.op.iallocator, ial.info),
11509                                    errors.ECODE_NORES)
11510
11511       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11512
11513     elif self.op.remote_node is not None:
11514       assert self.op.mode == constants.NODE_EVAC_SEC
11515       jobs = [
11516         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11517                                         remote_node=self.op.remote_node,
11518                                         disks=[],
11519                                         mode=constants.REPLACE_DISK_CHG,
11520                                         early_release=self.op.early_release)]
11521         for instance_name in self.instance_names
11522         ]
11523
11524     else:
11525       raise errors.ProgrammerError("No iallocator or remote node")
11526
11527     return ResultWithJobs(jobs)
11528
11529
11530 def _SetOpEarlyRelease(early_release, op):
11531   """Sets C{early_release} flag on opcodes if available.
11532
11533   """
11534   try:
11535     op.early_release = early_release
11536   except AttributeError:
11537     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11538
11539   return op
11540
11541
11542 def _NodeEvacDest(use_nodes, group, nodes):
11543   """Returns group or nodes depending on caller's choice.
11544
11545   """
11546   if use_nodes:
11547     return utils.CommaJoin(nodes)
11548   else:
11549     return group
11550
11551
11552 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11553   """Unpacks the result of change-group and node-evacuate iallocator requests.
11554
11555   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11556   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11557
11558   @type lu: L{LogicalUnit}
11559   @param lu: Logical unit instance
11560   @type alloc_result: tuple/list
11561   @param alloc_result: Result from iallocator
11562   @type early_release: bool
11563   @param early_release: Whether to release locks early if possible
11564   @type use_nodes: bool
11565   @param use_nodes: Whether to display node names instead of groups
11566
11567   """
11568   (moved, failed, jobs) = alloc_result
11569
11570   if failed:
11571     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11572                                  for (name, reason) in failed)
11573     lu.LogWarning("Unable to evacuate instances %s", failreason)
11574     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11575
11576   if moved:
11577     lu.LogInfo("Instances to be moved: %s",
11578                utils.CommaJoin("%s (to %s)" %
11579                                (name, _NodeEvacDest(use_nodes, group, nodes))
11580                                for (name, group, nodes) in moved))
11581
11582   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11583               map(opcodes.OpCode.LoadOpCode, ops))
11584           for ops in jobs]
11585
11586
11587 class LUInstanceGrowDisk(LogicalUnit):
11588   """Grow a disk of an instance.
11589
11590   """
11591   HPATH = "disk-grow"
11592   HTYPE = constants.HTYPE_INSTANCE
11593   REQ_BGL = False
11594
11595   def ExpandNames(self):
11596     self._ExpandAndLockInstance()
11597     self.needed_locks[locking.LEVEL_NODE] = []
11598     self.needed_locks[locking.LEVEL_NODE_RES] = []
11599     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11600     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11601
11602   def DeclareLocks(self, level):
11603     if level == locking.LEVEL_NODE:
11604       self._LockInstancesNodes()
11605     elif level == locking.LEVEL_NODE_RES:
11606       # Copy node locks
11607       self.needed_locks[locking.LEVEL_NODE_RES] = \
11608         self.needed_locks[locking.LEVEL_NODE][:]
11609
11610   def BuildHooksEnv(self):
11611     """Build hooks env.
11612
11613     This runs on the master, the primary and all the secondaries.
11614
11615     """
11616     env = {
11617       "DISK": self.op.disk,
11618       "AMOUNT": self.op.amount,
11619       "ABSOLUTE": self.op.absolute,
11620       }
11621     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11622     return env
11623
11624   def BuildHooksNodes(self):
11625     """Build hooks nodes.
11626
11627     """
11628     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11629     return (nl, nl)
11630
11631   def CheckPrereq(self):
11632     """Check prerequisites.
11633
11634     This checks that the instance is in the cluster.
11635
11636     """
11637     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11638     assert instance is not None, \
11639       "Cannot retrieve locked instance %s" % self.op.instance_name
11640     nodenames = list(instance.all_nodes)
11641     for node in nodenames:
11642       _CheckNodeOnline(self, node)
11643
11644     self.instance = instance
11645
11646     if instance.disk_template not in constants.DTS_GROWABLE:
11647       raise errors.OpPrereqError("Instance's disk layout does not support"
11648                                  " growing", errors.ECODE_INVAL)
11649
11650     self.disk = instance.FindDisk(self.op.disk)
11651
11652     if self.op.absolute:
11653       self.target = self.op.amount
11654       self.delta = self.target - self.disk.size
11655       if self.delta < 0:
11656         raise errors.OpPrereqError("Requested size (%s) is smaller than "
11657                                    "current disk size (%s)" %
11658                                    (utils.FormatUnit(self.target, "h"),
11659                                     utils.FormatUnit(self.disk.size, "h")),
11660                                    errors.ECODE_STATE)
11661     else:
11662       self.delta = self.op.amount
11663       self.target = self.disk.size + self.delta
11664       if self.delta < 0:
11665         raise errors.OpPrereqError("Requested increment (%s) is negative" %
11666                                    utils.FormatUnit(self.delta, "h"),
11667                                    errors.ECODE_INVAL)
11668
11669     if instance.disk_template not in (constants.DT_FILE,
11670                                       constants.DT_SHARED_FILE,
11671                                       constants.DT_RBD):
11672       # TODO: check the free disk space for file, when that feature will be
11673       # supported
11674       _CheckNodesFreeDiskPerVG(self, nodenames,
11675                                self.disk.ComputeGrowth(self.delta))
11676
11677   def Exec(self, feedback_fn):
11678     """Execute disk grow.
11679
11680     """
11681     instance = self.instance
11682     disk = self.disk
11683
11684     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11685     assert (self.owned_locks(locking.LEVEL_NODE) ==
11686             self.owned_locks(locking.LEVEL_NODE_RES))
11687
11688     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11689     if not disks_ok:
11690       raise errors.OpExecError("Cannot activate block device to grow")
11691
11692     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11693                 (self.op.disk, instance.name,
11694                  utils.FormatUnit(self.delta, "h"),
11695                  utils.FormatUnit(self.target, "h")))
11696
11697     # First run all grow ops in dry-run mode
11698     for node in instance.all_nodes:
11699       self.cfg.SetDiskID(disk, node)
11700       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11701                                            True, True)
11702       result.Raise("Grow request failed to node %s" % node)
11703
11704     # We know that (as far as we can test) operations across different
11705     # nodes will succeed, time to run it for real on the backing storage
11706     for node in instance.all_nodes:
11707       self.cfg.SetDiskID(disk, node)
11708       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11709                                            False, True)
11710       result.Raise("Grow request failed to node %s" % node)
11711
11712     # And now execute it for logical storage, on the primary node
11713     node = instance.primary_node
11714     self.cfg.SetDiskID(disk, node)
11715     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11716                                          False, False)
11717     result.Raise("Grow request failed to node %s" % node)
11718
11719     disk.RecordGrow(self.delta)
11720     self.cfg.Update(instance, feedback_fn)
11721
11722     # Changes have been recorded, release node lock
11723     _ReleaseLocks(self, locking.LEVEL_NODE)
11724
11725     # Downgrade lock while waiting for sync
11726     self.glm.downgrade(locking.LEVEL_INSTANCE)
11727
11728     if self.op.wait_for_sync:
11729       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11730       if disk_abort:
11731         self.proc.LogWarning("Disk sync-ing has not returned a good"
11732                              " status; please check the instance")
11733       if instance.admin_state != constants.ADMINST_UP:
11734         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11735     elif instance.admin_state != constants.ADMINST_UP:
11736       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11737                            " not supposed to be running because no wait for"
11738                            " sync mode was requested")
11739
11740     assert self.owned_locks(locking.LEVEL_NODE_RES)
11741     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11742
11743
11744 class LUInstanceQueryData(NoHooksLU):
11745   """Query runtime instance data.
11746
11747   """
11748   REQ_BGL = False
11749
11750   def ExpandNames(self):
11751     self.needed_locks = {}
11752
11753     # Use locking if requested or when non-static information is wanted
11754     if not (self.op.static or self.op.use_locking):
11755       self.LogWarning("Non-static data requested, locks need to be acquired")
11756       self.op.use_locking = True
11757
11758     if self.op.instances or not self.op.use_locking:
11759       # Expand instance names right here
11760       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11761     else:
11762       # Will use acquired locks
11763       self.wanted_names = None
11764
11765     if self.op.use_locking:
11766       self.share_locks = _ShareAll()
11767
11768       if self.wanted_names is None:
11769         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11770       else:
11771         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11772
11773       self.needed_locks[locking.LEVEL_NODEGROUP] = []
11774       self.needed_locks[locking.LEVEL_NODE] = []
11775       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11776
11777   def DeclareLocks(self, level):
11778     if self.op.use_locking:
11779       if level == locking.LEVEL_NODEGROUP:
11780         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11781
11782         # Lock all groups used by instances optimistically; this requires going
11783         # via the node before it's locked, requiring verification later on
11784         self.needed_locks[locking.LEVEL_NODEGROUP] = \
11785           frozenset(group_uuid
11786                     for instance_name in owned_instances
11787                     for group_uuid in
11788                       self.cfg.GetInstanceNodeGroups(instance_name))
11789
11790       elif level == locking.LEVEL_NODE:
11791         self._LockInstancesNodes()
11792
11793   def CheckPrereq(self):
11794     """Check prerequisites.
11795
11796     This only checks the optional instance list against the existing names.
11797
11798     """
11799     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11800     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11801     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11802
11803     if self.wanted_names is None:
11804       assert self.op.use_locking, "Locking was not used"
11805       self.wanted_names = owned_instances
11806
11807     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11808
11809     if self.op.use_locking:
11810       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11811                                 None)
11812     else:
11813       assert not (owned_instances or owned_groups or owned_nodes)
11814
11815     self.wanted_instances = instances.values()
11816
11817   def _ComputeBlockdevStatus(self, node, instance, dev):
11818     """Returns the status of a block device
11819
11820     """
11821     if self.op.static or not node:
11822       return None
11823
11824     self.cfg.SetDiskID(dev, node)
11825
11826     result = self.rpc.call_blockdev_find(node, dev)
11827     if result.offline:
11828       return None
11829
11830     result.Raise("Can't compute disk status for %s" % instance.name)
11831
11832     status = result.payload
11833     if status is None:
11834       return None
11835
11836     return (status.dev_path, status.major, status.minor,
11837             status.sync_percent, status.estimated_time,
11838             status.is_degraded, status.ldisk_status)
11839
11840   def _ComputeDiskStatus(self, instance, snode, dev):
11841     """Compute block device status.
11842
11843     """
11844     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11845
11846     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11847
11848   def _ComputeDiskStatusInner(self, instance, snode, dev):
11849     """Compute block device status.
11850
11851     @attention: The device has to be annotated already.
11852
11853     """
11854     if dev.dev_type in constants.LDS_DRBD:
11855       # we change the snode then (otherwise we use the one passed in)
11856       if dev.logical_id[0] == instance.primary_node:
11857         snode = dev.logical_id[1]
11858       else:
11859         snode = dev.logical_id[0]
11860
11861     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11862                                               instance, dev)
11863     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11864
11865     if dev.children:
11866       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11867                                         instance, snode),
11868                          dev.children)
11869     else:
11870       dev_children = []
11871
11872     return {
11873       "iv_name": dev.iv_name,
11874       "dev_type": dev.dev_type,
11875       "logical_id": dev.logical_id,
11876       "physical_id": dev.physical_id,
11877       "pstatus": dev_pstatus,
11878       "sstatus": dev_sstatus,
11879       "children": dev_children,
11880       "mode": dev.mode,
11881       "size": dev.size,
11882       }
11883
11884   def Exec(self, feedback_fn):
11885     """Gather and return data"""
11886     result = {}
11887
11888     cluster = self.cfg.GetClusterInfo()
11889
11890     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11891     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11892
11893     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11894                                                  for node in nodes.values()))
11895
11896     group2name_fn = lambda uuid: groups[uuid].name
11897
11898     for instance in self.wanted_instances:
11899       pnode = nodes[instance.primary_node]
11900
11901       if self.op.static or pnode.offline:
11902         remote_state = None
11903         if pnode.offline:
11904           self.LogWarning("Primary node %s is marked offline, returning static"
11905                           " information only for instance %s" %
11906                           (pnode.name, instance.name))
11907       else:
11908         remote_info = self.rpc.call_instance_info(instance.primary_node,
11909                                                   instance.name,
11910                                                   instance.hypervisor)
11911         remote_info.Raise("Error checking node %s" % instance.primary_node)
11912         remote_info = remote_info.payload
11913         if remote_info and "state" in remote_info:
11914           remote_state = "up"
11915         else:
11916           if instance.admin_state == constants.ADMINST_UP:
11917             remote_state = "down"
11918           else:
11919             remote_state = instance.admin_state
11920
11921       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11922                   instance.disks)
11923
11924       snodes_group_uuids = [nodes[snode_name].group
11925                             for snode_name in instance.secondary_nodes]
11926
11927       result[instance.name] = {
11928         "name": instance.name,
11929         "config_state": instance.admin_state,
11930         "run_state": remote_state,
11931         "pnode": instance.primary_node,
11932         "pnode_group_uuid": pnode.group,
11933         "pnode_group_name": group2name_fn(pnode.group),
11934         "snodes": instance.secondary_nodes,
11935         "snodes_group_uuids": snodes_group_uuids,
11936         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11937         "os": instance.os,
11938         # this happens to be the same format used for hooks
11939         "nics": _NICListToTuple(self, instance.nics),
11940         "disk_template": instance.disk_template,
11941         "disks": disks,
11942         "hypervisor": instance.hypervisor,
11943         "network_port": instance.network_port,
11944         "hv_instance": instance.hvparams,
11945         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11946         "be_instance": instance.beparams,
11947         "be_actual": cluster.FillBE(instance),
11948         "os_instance": instance.osparams,
11949         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11950         "serial_no": instance.serial_no,
11951         "mtime": instance.mtime,
11952         "ctime": instance.ctime,
11953         "uuid": instance.uuid,
11954         }
11955
11956     return result
11957
11958
11959 def PrepareContainerMods(mods, private_fn):
11960   """Prepares a list of container modifications by adding a private data field.
11961
11962   @type mods: list of tuples; (operation, index, parameters)
11963   @param mods: List of modifications
11964   @type private_fn: callable or None
11965   @param private_fn: Callable for constructing a private data field for a
11966     modification
11967   @rtype: list
11968
11969   """
11970   if private_fn is None:
11971     fn = lambda: None
11972   else:
11973     fn = private_fn
11974
11975   return [(op, idx, params, fn()) for (op, idx, params) in mods]
11976
11977
11978 #: Type description for changes as returned by L{ApplyContainerMods}'s
11979 #: callbacks
11980 _TApplyContModsCbChanges = \
11981   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11982     ht.TNonEmptyString,
11983     ht.TAny,
11984     ])))
11985
11986
11987 def ApplyContainerMods(kind, container, chgdesc, mods,
11988                        create_fn, modify_fn, remove_fn):
11989   """Applies descriptions in C{mods} to C{container}.
11990
11991   @type kind: string
11992   @param kind: One-word item description
11993   @type container: list
11994   @param container: Container to modify
11995   @type chgdesc: None or list
11996   @param chgdesc: List of applied changes
11997   @type mods: list
11998   @param mods: Modifications as returned by L{PrepareContainerMods}
11999   @type create_fn: callable
12000   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12001     receives absolute item index, parameters and private data object as added
12002     by L{PrepareContainerMods}, returns tuple containing new item and changes
12003     as list
12004   @type modify_fn: callable
12005   @param modify_fn: Callback for modifying an existing item
12006     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12007     and private data object as added by L{PrepareContainerMods}, returns
12008     changes as list
12009   @type remove_fn: callable
12010   @param remove_fn: Callback on removing item; receives absolute item index,
12011     item and private data object as added by L{PrepareContainerMods}
12012
12013   """
12014   for (op, idx, params, private) in mods:
12015     if idx == -1:
12016       # Append
12017       absidx = len(container) - 1
12018     elif idx < 0:
12019       raise IndexError("Not accepting negative indices other than -1")
12020     elif idx > len(container):
12021       raise IndexError("Got %s index %s, but there are only %s" %
12022                        (kind, idx, len(container)))
12023     else:
12024       absidx = idx
12025
12026     changes = None
12027
12028     if op == constants.DDM_ADD:
12029       # Calculate where item will be added
12030       if idx == -1:
12031         addidx = len(container)
12032       else:
12033         addidx = idx
12034
12035       if create_fn is None:
12036         item = params
12037       else:
12038         (item, changes) = create_fn(addidx, params, private)
12039
12040       if idx == -1:
12041         container.append(item)
12042       else:
12043         assert idx >= 0
12044         assert idx <= len(container)
12045         # list.insert does so before the specified index
12046         container.insert(idx, item)
12047     else:
12048       # Retrieve existing item
12049       try:
12050         item = container[absidx]
12051       except IndexError:
12052         raise IndexError("Invalid %s index %s" % (kind, idx))
12053
12054       if op == constants.DDM_REMOVE:
12055         assert not params
12056
12057         if remove_fn is not None:
12058           remove_fn(absidx, item, private)
12059
12060         changes = [("%s/%s" % (kind, absidx), "remove")]
12061
12062         assert container[absidx] == item
12063         del container[absidx]
12064       elif op == constants.DDM_MODIFY:
12065         if modify_fn is not None:
12066           changes = modify_fn(absidx, item, params, private)
12067       else:
12068         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12069
12070     assert _TApplyContModsCbChanges(changes)
12071
12072     if not (chgdesc is None or changes is None):
12073       chgdesc.extend(changes)
12074
12075
12076 def _UpdateIvNames(base_index, disks):
12077   """Updates the C{iv_name} attribute of disks.
12078
12079   @type disks: list of L{objects.Disk}
12080
12081   """
12082   for (idx, disk) in enumerate(disks):
12083     disk.iv_name = "disk/%s" % (base_index + idx, )
12084
12085
12086 class _InstNicModPrivate:
12087   """Data structure for network interface modifications.
12088
12089   Used by L{LUInstanceSetParams}.
12090
12091   """
12092   def __init__(self):
12093     self.params = None
12094     self.filled = None
12095
12096
12097 class LUInstanceSetParams(LogicalUnit):
12098   """Modifies an instances's parameters.
12099
12100   """
12101   HPATH = "instance-modify"
12102   HTYPE = constants.HTYPE_INSTANCE
12103   REQ_BGL = False
12104
12105   @staticmethod
12106   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12107     assert ht.TList(mods)
12108     assert not mods or len(mods[0]) in (2, 3)
12109
12110     if mods and len(mods[0]) == 2:
12111       result = []
12112
12113       addremove = 0
12114       for op, params in mods:
12115         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12116           result.append((op, -1, params))
12117           addremove += 1
12118
12119           if addremove > 1:
12120             raise errors.OpPrereqError("Only one %s add or remove operation is"
12121                                        " supported at a time" % kind,
12122                                        errors.ECODE_INVAL)
12123         else:
12124           result.append((constants.DDM_MODIFY, op, params))
12125
12126       assert verify_fn(result)
12127     else:
12128       result = mods
12129
12130     return result
12131
12132   @staticmethod
12133   def _CheckMods(kind, mods, key_types, item_fn):
12134     """Ensures requested disk/NIC modifications are valid.
12135
12136     """
12137     for (op, _, params) in mods:
12138       assert ht.TDict(params)
12139
12140       utils.ForceDictType(params, key_types)
12141
12142       if op == constants.DDM_REMOVE:
12143         if params:
12144           raise errors.OpPrereqError("No settings should be passed when"
12145                                      " removing a %s" % kind,
12146                                      errors.ECODE_INVAL)
12147       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12148         item_fn(op, params)
12149       else:
12150         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12151
12152   @staticmethod
12153   def _VerifyDiskModification(op, params):
12154     """Verifies a disk modification.
12155
12156     """
12157     if op == constants.DDM_ADD:
12158       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12159       if mode not in constants.DISK_ACCESS_SET:
12160         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12161                                    errors.ECODE_INVAL)
12162
12163       size = params.get(constants.IDISK_SIZE, None)
12164       if size is None:
12165         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12166                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12167
12168       try:
12169         size = int(size)
12170       except (TypeError, ValueError), err:
12171         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12172                                    errors.ECODE_INVAL)
12173
12174       params[constants.IDISK_SIZE] = size
12175
12176     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12177       raise errors.OpPrereqError("Disk size change not possible, use"
12178                                  " grow-disk", errors.ECODE_INVAL)
12179
12180   @staticmethod
12181   def _VerifyNicModification(op, params):
12182     """Verifies a network interface modification.
12183
12184     """
12185     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12186       ip = params.get(constants.INIC_IP, None)
12187       if ip is None:
12188         pass
12189       elif ip.lower() == constants.VALUE_NONE:
12190         params[constants.INIC_IP] = None
12191       elif not netutils.IPAddress.IsValid(ip):
12192         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12193                                    errors.ECODE_INVAL)
12194
12195       bridge = params.get("bridge", None)
12196       link = params.get(constants.INIC_LINK, None)
12197       if bridge and link:
12198         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12199                                    " at the same time", errors.ECODE_INVAL)
12200       elif bridge and bridge.lower() == constants.VALUE_NONE:
12201         params["bridge"] = None
12202       elif link and link.lower() == constants.VALUE_NONE:
12203         params[constants.INIC_LINK] = None
12204
12205       if op == constants.DDM_ADD:
12206         macaddr = params.get(constants.INIC_MAC, None)
12207         if macaddr is None:
12208           params[constants.INIC_MAC] = constants.VALUE_AUTO
12209
12210       if constants.INIC_MAC in params:
12211         macaddr = params[constants.INIC_MAC]
12212         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12213           macaddr = utils.NormalizeAndValidateMac(macaddr)
12214
12215         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12216           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12217                                      " modifying an existing NIC",
12218                                      errors.ECODE_INVAL)
12219
12220   def CheckArguments(self):
12221     if not (self.op.nics or self.op.disks or self.op.disk_template or
12222             self.op.hvparams or self.op.beparams or self.op.os_name or
12223             self.op.offline is not None or self.op.runtime_mem):
12224       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12225
12226     if self.op.hvparams:
12227       _CheckGlobalHvParams(self.op.hvparams)
12228
12229     self.op.disks = \
12230       self._UpgradeDiskNicMods("disk", self.op.disks,
12231         opcodes.OpInstanceSetParams.TestDiskModifications)
12232     self.op.nics = \
12233       self._UpgradeDiskNicMods("NIC", self.op.nics,
12234         opcodes.OpInstanceSetParams.TestNicModifications)
12235
12236     # Check disk modifications
12237     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12238                     self._VerifyDiskModification)
12239
12240     if self.op.disks and self.op.disk_template is not None:
12241       raise errors.OpPrereqError("Disk template conversion and other disk"
12242                                  " changes not supported at the same time",
12243                                  errors.ECODE_INVAL)
12244
12245     if (self.op.disk_template and
12246         self.op.disk_template in constants.DTS_INT_MIRROR and
12247         self.op.remote_node is None):
12248       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12249                                  " one requires specifying a secondary node",
12250                                  errors.ECODE_INVAL)
12251
12252     # Check NIC modifications
12253     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12254                     self._VerifyNicModification)
12255
12256   def ExpandNames(self):
12257     self._ExpandAndLockInstance()
12258     # Can't even acquire node locks in shared mode as upcoming changes in
12259     # Ganeti 2.6 will start to modify the node object on disk conversion
12260     self.needed_locks[locking.LEVEL_NODE] = []
12261     self.needed_locks[locking.LEVEL_NODE_RES] = []
12262     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12263
12264   def DeclareLocks(self, level):
12265     # TODO: Acquire group lock in shared mode (disk parameters)
12266     if level == locking.LEVEL_NODE:
12267       self._LockInstancesNodes()
12268       if self.op.disk_template and self.op.remote_node:
12269         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12270         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12271     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12272       # Copy node locks
12273       self.needed_locks[locking.LEVEL_NODE_RES] = \
12274         self.needed_locks[locking.LEVEL_NODE][:]
12275
12276   def BuildHooksEnv(self):
12277     """Build hooks env.
12278
12279     This runs on the master, primary and secondaries.
12280
12281     """
12282     args = dict()
12283     if constants.BE_MINMEM in self.be_new:
12284       args["minmem"] = self.be_new[constants.BE_MINMEM]
12285     if constants.BE_MAXMEM in self.be_new:
12286       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12287     if constants.BE_VCPUS in self.be_new:
12288       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12289     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12290     # information at all.
12291
12292     if self._new_nics is not None:
12293       nics = []
12294
12295       for nic in self._new_nics:
12296         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12297         mode = nicparams[constants.NIC_MODE]
12298         link = nicparams[constants.NIC_LINK]
12299         nics.append((nic.ip, nic.mac, mode, link))
12300
12301       args["nics"] = nics
12302
12303     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12304     if self.op.disk_template:
12305       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12306     if self.op.runtime_mem:
12307       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12308
12309     return env
12310
12311   def BuildHooksNodes(self):
12312     """Build hooks nodes.
12313
12314     """
12315     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12316     return (nl, nl)
12317
12318   def _PrepareNicModification(self, params, private, old_ip, old_params,
12319                               cluster, pnode):
12320     update_params_dict = dict([(key, params[key])
12321                                for key in constants.NICS_PARAMETERS
12322                                if key in params])
12323
12324     if "bridge" in params:
12325       update_params_dict[constants.NIC_LINK] = params["bridge"]
12326
12327     new_params = _GetUpdatedParams(old_params, update_params_dict)
12328     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12329
12330     new_filled_params = cluster.SimpleFillNIC(new_params)
12331     objects.NIC.CheckParameterSyntax(new_filled_params)
12332
12333     new_mode = new_filled_params[constants.NIC_MODE]
12334     if new_mode == constants.NIC_MODE_BRIDGED:
12335       bridge = new_filled_params[constants.NIC_LINK]
12336       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12337       if msg:
12338         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12339         if self.op.force:
12340           self.warn.append(msg)
12341         else:
12342           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12343
12344     elif new_mode == constants.NIC_MODE_ROUTED:
12345       ip = params.get(constants.INIC_IP, old_ip)
12346       if ip is None:
12347         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12348                                    " on a routed NIC", errors.ECODE_INVAL)
12349
12350     if constants.INIC_MAC in params:
12351       mac = params[constants.INIC_MAC]
12352       if mac is None:
12353         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12354                                    errors.ECODE_INVAL)
12355       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12356         # otherwise generate the MAC address
12357         params[constants.INIC_MAC] = \
12358           self.cfg.GenerateMAC(self.proc.GetECId())
12359       else:
12360         # or validate/reserve the current one
12361         try:
12362           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12363         except errors.ReservationError:
12364           raise errors.OpPrereqError("MAC address '%s' already in use"
12365                                      " in cluster" % mac,
12366                                      errors.ECODE_NOTUNIQUE)
12367
12368     private.params = new_params
12369     private.filled = new_filled_params
12370
12371   def CheckPrereq(self):
12372     """Check prerequisites.
12373
12374     This only checks the instance list against the existing names.
12375
12376     """
12377     # checking the new params on the primary/secondary nodes
12378
12379     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12380     cluster = self.cluster = self.cfg.GetClusterInfo()
12381     assert self.instance is not None, \
12382       "Cannot retrieve locked instance %s" % self.op.instance_name
12383     pnode = instance.primary_node
12384     nodelist = list(instance.all_nodes)
12385     pnode_info = self.cfg.GetNodeInfo(pnode)
12386     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12387
12388     # Prepare disk/NIC modifications
12389     self.diskmod = PrepareContainerMods(self.op.disks, None)
12390     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12391
12392     # OS change
12393     if self.op.os_name and not self.op.force:
12394       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12395                       self.op.force_variant)
12396       instance_os = self.op.os_name
12397     else:
12398       instance_os = instance.os
12399
12400     assert not (self.op.disk_template and self.op.disks), \
12401       "Can't modify disk template and apply disk changes at the same time"
12402
12403     if self.op.disk_template:
12404       if instance.disk_template == self.op.disk_template:
12405         raise errors.OpPrereqError("Instance already has disk template %s" %
12406                                    instance.disk_template, errors.ECODE_INVAL)
12407
12408       if (instance.disk_template,
12409           self.op.disk_template) not in self._DISK_CONVERSIONS:
12410         raise errors.OpPrereqError("Unsupported disk template conversion from"
12411                                    " %s to %s" % (instance.disk_template,
12412                                                   self.op.disk_template),
12413                                    errors.ECODE_INVAL)
12414       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12415                           msg="cannot change disk template")
12416       if self.op.disk_template in constants.DTS_INT_MIRROR:
12417         if self.op.remote_node == pnode:
12418           raise errors.OpPrereqError("Given new secondary node %s is the same"
12419                                      " as the primary node of the instance" %
12420                                      self.op.remote_node, errors.ECODE_STATE)
12421         _CheckNodeOnline(self, self.op.remote_node)
12422         _CheckNodeNotDrained(self, self.op.remote_node)
12423         # FIXME: here we assume that the old instance type is DT_PLAIN
12424         assert instance.disk_template == constants.DT_PLAIN
12425         disks = [{constants.IDISK_SIZE: d.size,
12426                   constants.IDISK_VG: d.logical_id[0]}
12427                  for d in instance.disks]
12428         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12429         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12430
12431         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12432         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12433         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12434         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12435                                 ignore=self.op.ignore_ipolicy)
12436         if pnode_info.group != snode_info.group:
12437           self.LogWarning("The primary and secondary nodes are in two"
12438                           " different node groups; the disk parameters"
12439                           " from the first disk's node group will be"
12440                           " used")
12441
12442     # hvparams processing
12443     if self.op.hvparams:
12444       hv_type = instance.hypervisor
12445       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12446       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12447       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12448
12449       # local check
12450       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12451       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12452       self.hv_proposed = self.hv_new = hv_new # the new actual values
12453       self.hv_inst = i_hvdict # the new dict (without defaults)
12454     else:
12455       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12456                                               instance.hvparams)
12457       self.hv_new = self.hv_inst = {}
12458
12459     # beparams processing
12460     if self.op.beparams:
12461       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12462                                    use_none=True)
12463       objects.UpgradeBeParams(i_bedict)
12464       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12465       be_new = cluster.SimpleFillBE(i_bedict)
12466       self.be_proposed = self.be_new = be_new # the new actual values
12467       self.be_inst = i_bedict # the new dict (without defaults)
12468     else:
12469       self.be_new = self.be_inst = {}
12470       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12471     be_old = cluster.FillBE(instance)
12472
12473     # CPU param validation -- checking every time a parameter is
12474     # changed to cover all cases where either CPU mask or vcpus have
12475     # changed
12476     if (constants.BE_VCPUS in self.be_proposed and
12477         constants.HV_CPU_MASK in self.hv_proposed):
12478       cpu_list = \
12479         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12480       # Verify mask is consistent with number of vCPUs. Can skip this
12481       # test if only 1 entry in the CPU mask, which means same mask
12482       # is applied to all vCPUs.
12483       if (len(cpu_list) > 1 and
12484           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12485         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12486                                    " CPU mask [%s]" %
12487                                    (self.be_proposed[constants.BE_VCPUS],
12488                                     self.hv_proposed[constants.HV_CPU_MASK]),
12489                                    errors.ECODE_INVAL)
12490
12491       # Only perform this test if a new CPU mask is given
12492       if constants.HV_CPU_MASK in self.hv_new:
12493         # Calculate the largest CPU number requested
12494         max_requested_cpu = max(map(max, cpu_list))
12495         # Check that all of the instance's nodes have enough physical CPUs to
12496         # satisfy the requested CPU mask
12497         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12498                                 max_requested_cpu + 1, instance.hypervisor)
12499
12500     # osparams processing
12501     if self.op.osparams:
12502       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12503       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12504       self.os_inst = i_osdict # the new dict (without defaults)
12505     else:
12506       self.os_inst = {}
12507
12508     self.warn = []
12509
12510     #TODO(dynmem): do the appropriate check involving MINMEM
12511     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12512         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12513       mem_check_list = [pnode]
12514       if be_new[constants.BE_AUTO_BALANCE]:
12515         # either we changed auto_balance to yes or it was from before
12516         mem_check_list.extend(instance.secondary_nodes)
12517       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12518                                                   instance.hypervisor)
12519       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12520                                          [instance.hypervisor])
12521       pninfo = nodeinfo[pnode]
12522       msg = pninfo.fail_msg
12523       if msg:
12524         # Assume the primary node is unreachable and go ahead
12525         self.warn.append("Can't get info from primary node %s: %s" %
12526                          (pnode, msg))
12527       else:
12528         (_, _, (pnhvinfo, )) = pninfo.payload
12529         if not isinstance(pnhvinfo.get("memory_free", None), int):
12530           self.warn.append("Node data from primary node %s doesn't contain"
12531                            " free memory information" % pnode)
12532         elif instance_info.fail_msg:
12533           self.warn.append("Can't get instance runtime information: %s" %
12534                           instance_info.fail_msg)
12535         else:
12536           if instance_info.payload:
12537             current_mem = int(instance_info.payload["memory"])
12538           else:
12539             # Assume instance not running
12540             # (there is a slight race condition here, but it's not very
12541             # probable, and we have no other way to check)
12542             # TODO: Describe race condition
12543             current_mem = 0
12544           #TODO(dynmem): do the appropriate check involving MINMEM
12545           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12546                       pnhvinfo["memory_free"])
12547           if miss_mem > 0:
12548             raise errors.OpPrereqError("This change will prevent the instance"
12549                                        " from starting, due to %d MB of memory"
12550                                        " missing on its primary node" %
12551                                        miss_mem,
12552                                        errors.ECODE_NORES)
12553
12554       if be_new[constants.BE_AUTO_BALANCE]:
12555         for node, nres in nodeinfo.items():
12556           if node not in instance.secondary_nodes:
12557             continue
12558           nres.Raise("Can't get info from secondary node %s" % node,
12559                      prereq=True, ecode=errors.ECODE_STATE)
12560           (_, _, (nhvinfo, )) = nres.payload
12561           if not isinstance(nhvinfo.get("memory_free", None), int):
12562             raise errors.OpPrereqError("Secondary node %s didn't return free"
12563                                        " memory information" % node,
12564                                        errors.ECODE_STATE)
12565           #TODO(dynmem): do the appropriate check involving MINMEM
12566           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12567             raise errors.OpPrereqError("This change will prevent the instance"
12568                                        " from failover to its secondary node"
12569                                        " %s, due to not enough memory" % node,
12570                                        errors.ECODE_STATE)
12571
12572     if self.op.runtime_mem:
12573       remote_info = self.rpc.call_instance_info(instance.primary_node,
12574                                                 instance.name,
12575                                                 instance.hypervisor)
12576       remote_info.Raise("Error checking node %s" % instance.primary_node)
12577       if not remote_info.payload: # not running already
12578         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12579                                    errors.ECODE_STATE)
12580
12581       current_memory = remote_info.payload["memory"]
12582       if (not self.op.force and
12583            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12584             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12585         raise errors.OpPrereqError("Instance %s must have memory between %d"
12586                                    " and %d MB of memory unless --force is"
12587                                    " given" % (instance.name,
12588                                     self.be_proposed[constants.BE_MINMEM],
12589                                     self.be_proposed[constants.BE_MAXMEM]),
12590                                    errors.ECODE_INVAL)
12591
12592       if self.op.runtime_mem > current_memory:
12593         _CheckNodeFreeMemory(self, instance.primary_node,
12594                              "ballooning memory for instance %s" %
12595                              instance.name,
12596                              self.op.memory - current_memory,
12597                              instance.hypervisor)
12598
12599     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12600       raise errors.OpPrereqError("Disk operations not supported for"
12601                                  " diskless instances",
12602                                  errors.ECODE_INVAL)
12603
12604     def _PrepareNicCreate(_, params, private):
12605       self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12606       return (None, None)
12607
12608     def _PrepareNicMod(_, nic, params, private):
12609       self._PrepareNicModification(params, private, nic.ip,
12610                                    nic.nicparams, cluster, pnode)
12611       return None
12612
12613     # Verify NIC changes (operating on copy)
12614     nics = instance.nics[:]
12615     ApplyContainerMods("NIC", nics, None, self.nicmod,
12616                        _PrepareNicCreate, _PrepareNicMod, None)
12617     if len(nics) > constants.MAX_NICS:
12618       raise errors.OpPrereqError("Instance has too many network interfaces"
12619                                  " (%d), cannot add more" % constants.MAX_NICS,
12620                                  errors.ECODE_STATE)
12621
12622     # Verify disk changes (operating on a copy)
12623     disks = instance.disks[:]
12624     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12625     if len(disks) > constants.MAX_DISKS:
12626       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12627                                  " more" % constants.MAX_DISKS,
12628                                  errors.ECODE_STATE)
12629
12630     if self.op.offline is not None:
12631       if self.op.offline:
12632         msg = "can't change to offline"
12633       else:
12634         msg = "can't change to online"
12635       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12636
12637     # Pre-compute NIC changes (necessary to use result in hooks)
12638     self._nic_chgdesc = []
12639     if self.nicmod:
12640       # Operate on copies as this is still in prereq
12641       nics = [nic.Copy() for nic in instance.nics]
12642       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12643                          self._CreateNewNic, self._ApplyNicMods, None)
12644       self._new_nics = nics
12645     else:
12646       self._new_nics = None
12647
12648   def _ConvertPlainToDrbd(self, feedback_fn):
12649     """Converts an instance from plain to drbd.
12650
12651     """
12652     feedback_fn("Converting template to drbd")
12653     instance = self.instance
12654     pnode = instance.primary_node
12655     snode = self.op.remote_node
12656
12657     assert instance.disk_template == constants.DT_PLAIN
12658
12659     # create a fake disk info for _GenerateDiskTemplate
12660     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12661                   constants.IDISK_VG: d.logical_id[0]}
12662                  for d in instance.disks]
12663     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12664                                       instance.name, pnode, [snode],
12665                                       disk_info, None, None, 0, feedback_fn,
12666                                       self.diskparams)
12667     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12668                                         self.diskparams)
12669     info = _GetInstanceInfoText(instance)
12670     feedback_fn("Creating additional volumes...")
12671     # first, create the missing data and meta devices
12672     for disk in anno_disks:
12673       # unfortunately this is... not too nice
12674       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12675                             info, True)
12676       for child in disk.children:
12677         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12678     # at this stage, all new LVs have been created, we can rename the
12679     # old ones
12680     feedback_fn("Renaming original volumes...")
12681     rename_list = [(o, n.children[0].logical_id)
12682                    for (o, n) in zip(instance.disks, new_disks)]
12683     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12684     result.Raise("Failed to rename original LVs")
12685
12686     feedback_fn("Initializing DRBD devices...")
12687     # all child devices are in place, we can now create the DRBD devices
12688     for disk in anno_disks:
12689       for node in [pnode, snode]:
12690         f_create = node == pnode
12691         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12692
12693     # at this point, the instance has been modified
12694     instance.disk_template = constants.DT_DRBD8
12695     instance.disks = new_disks
12696     self.cfg.Update(instance, feedback_fn)
12697
12698     # Release node locks while waiting for sync
12699     _ReleaseLocks(self, locking.LEVEL_NODE)
12700
12701     # disks are created, waiting for sync
12702     disk_abort = not _WaitForSync(self, instance,
12703                                   oneshot=not self.op.wait_for_sync)
12704     if disk_abort:
12705       raise errors.OpExecError("There are some degraded disks for"
12706                                " this instance, please cleanup manually")
12707
12708     # Node resource locks will be released by caller
12709
12710   def _ConvertDrbdToPlain(self, feedback_fn):
12711     """Converts an instance from drbd to plain.
12712
12713     """
12714     instance = self.instance
12715
12716     assert len(instance.secondary_nodes) == 1
12717     assert instance.disk_template == constants.DT_DRBD8
12718
12719     pnode = instance.primary_node
12720     snode = instance.secondary_nodes[0]
12721     feedback_fn("Converting template to plain")
12722
12723     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12724     new_disks = [d.children[0] for d in instance.disks]
12725
12726     # copy over size and mode
12727     for parent, child in zip(old_disks, new_disks):
12728       child.size = parent.size
12729       child.mode = parent.mode
12730
12731     # this is a DRBD disk, return its port to the pool
12732     # NOTE: this must be done right before the call to cfg.Update!
12733     for disk in old_disks:
12734       tcp_port = disk.logical_id[2]
12735       self.cfg.AddTcpUdpPort(tcp_port)
12736
12737     # update instance structure
12738     instance.disks = new_disks
12739     instance.disk_template = constants.DT_PLAIN
12740     self.cfg.Update(instance, feedback_fn)
12741
12742     # Release locks in case removing disks takes a while
12743     _ReleaseLocks(self, locking.LEVEL_NODE)
12744
12745     feedback_fn("Removing volumes on the secondary node...")
12746     for disk in old_disks:
12747       self.cfg.SetDiskID(disk, snode)
12748       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12749       if msg:
12750         self.LogWarning("Could not remove block device %s on node %s,"
12751                         " continuing anyway: %s", disk.iv_name, snode, msg)
12752
12753     feedback_fn("Removing unneeded volumes on the primary node...")
12754     for idx, disk in enumerate(old_disks):
12755       meta = disk.children[1]
12756       self.cfg.SetDiskID(meta, pnode)
12757       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12758       if msg:
12759         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12760                         " continuing anyway: %s", idx, pnode, msg)
12761
12762   def _CreateNewDisk(self, idx, params, _):
12763     """Creates a new disk.
12764
12765     """
12766     instance = self.instance
12767
12768     # add a new disk
12769     if instance.disk_template in constants.DTS_FILEBASED:
12770       (file_driver, file_path) = instance.disks[0].logical_id
12771       file_path = os.path.dirname(file_path)
12772     else:
12773       file_driver = file_path = None
12774
12775     disk = \
12776       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12777                             instance.primary_node, instance.secondary_nodes,
12778                             [params], file_path, file_driver, idx,
12779                             self.Log, self.diskparams)[0]
12780
12781     info = _GetInstanceInfoText(instance)
12782
12783     logging.info("Creating volume %s for instance %s",
12784                  disk.iv_name, instance.name)
12785     # Note: this needs to be kept in sync with _CreateDisks
12786     #HARDCODE
12787     for node in instance.all_nodes:
12788       f_create = (node == instance.primary_node)
12789       try:
12790         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12791       except errors.OpExecError, err:
12792         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12793                         disk.iv_name, disk, node, err)
12794
12795     return (disk, [
12796       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12797       ])
12798
12799   @staticmethod
12800   def _ModifyDisk(idx, disk, params, _):
12801     """Modifies a disk.
12802
12803     """
12804     disk.mode = params[constants.IDISK_MODE]
12805
12806     return [
12807       ("disk.mode/%d" % idx, disk.mode),
12808       ]
12809
12810   def _RemoveDisk(self, idx, root, _):
12811     """Removes a disk.
12812
12813     """
12814     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12815     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12816       self.cfg.SetDiskID(disk, node)
12817       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12818       if msg:
12819         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12820                         " continuing anyway", idx, node, msg)
12821
12822     # if this is a DRBD disk, return its port to the pool
12823     if root.dev_type in constants.LDS_DRBD:
12824       self.cfg.AddTcpUdpPort(root.logical_id[2])
12825
12826   @staticmethod
12827   def _CreateNewNic(idx, params, private):
12828     """Creates data structure for a new network interface.
12829
12830     """
12831     mac = params[constants.INIC_MAC]
12832     ip = params.get(constants.INIC_IP, None)
12833     nicparams = private.params
12834
12835     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12836       ("nic.%d" % idx,
12837        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12838        (mac, ip, private.filled[constants.NIC_MODE],
12839        private.filled[constants.NIC_LINK])),
12840       ])
12841
12842   @staticmethod
12843   def _ApplyNicMods(idx, nic, params, private):
12844     """Modifies a network interface.
12845
12846     """
12847     changes = []
12848
12849     for key in [constants.INIC_MAC, constants.INIC_IP]:
12850       if key in params:
12851         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12852         setattr(nic, key, params[key])
12853
12854     if private.params:
12855       nic.nicparams = private.params
12856
12857       for (key, val) in params.items():
12858         changes.append(("nic.%s/%d" % (key, idx), val))
12859
12860     return changes
12861
12862   def Exec(self, feedback_fn):
12863     """Modifies an instance.
12864
12865     All parameters take effect only at the next restart of the instance.
12866
12867     """
12868     # Process here the warnings from CheckPrereq, as we don't have a
12869     # feedback_fn there.
12870     # TODO: Replace with self.LogWarning
12871     for warn in self.warn:
12872       feedback_fn("WARNING: %s" % warn)
12873
12874     assert ((self.op.disk_template is None) ^
12875             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12876       "Not owning any node resource locks"
12877
12878     result = []
12879     instance = self.instance
12880
12881     # runtime memory
12882     if self.op.runtime_mem:
12883       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12884                                                      instance,
12885                                                      self.op.runtime_mem)
12886       rpcres.Raise("Cannot modify instance runtime memory")
12887       result.append(("runtime_memory", self.op.runtime_mem))
12888
12889     # Apply disk changes
12890     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12891                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12892     _UpdateIvNames(0, instance.disks)
12893
12894     if self.op.disk_template:
12895       if __debug__:
12896         check_nodes = set(instance.all_nodes)
12897         if self.op.remote_node:
12898           check_nodes.add(self.op.remote_node)
12899         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12900           owned = self.owned_locks(level)
12901           assert not (check_nodes - owned), \
12902             ("Not owning the correct locks, owning %r, expected at least %r" %
12903              (owned, check_nodes))
12904
12905       r_shut = _ShutdownInstanceDisks(self, instance)
12906       if not r_shut:
12907         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12908                                  " proceed with disk template conversion")
12909       mode = (instance.disk_template, self.op.disk_template)
12910       try:
12911         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12912       except:
12913         self.cfg.ReleaseDRBDMinors(instance.name)
12914         raise
12915       result.append(("disk_template", self.op.disk_template))
12916
12917       assert instance.disk_template == self.op.disk_template, \
12918         ("Expected disk template '%s', found '%s'" %
12919          (self.op.disk_template, instance.disk_template))
12920
12921     # Release node and resource locks if there are any (they might already have
12922     # been released during disk conversion)
12923     _ReleaseLocks(self, locking.LEVEL_NODE)
12924     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12925
12926     # Apply NIC changes
12927     if self._new_nics is not None:
12928       instance.nics = self._new_nics
12929       result.extend(self._nic_chgdesc)
12930
12931     # hvparams changes
12932     if self.op.hvparams:
12933       instance.hvparams = self.hv_inst
12934       for key, val in self.op.hvparams.iteritems():
12935         result.append(("hv/%s" % key, val))
12936
12937     # beparams changes
12938     if self.op.beparams:
12939       instance.beparams = self.be_inst
12940       for key, val in self.op.beparams.iteritems():
12941         result.append(("be/%s" % key, val))
12942
12943     # OS change
12944     if self.op.os_name:
12945       instance.os = self.op.os_name
12946
12947     # osparams changes
12948     if self.op.osparams:
12949       instance.osparams = self.os_inst
12950       for key, val in self.op.osparams.iteritems():
12951         result.append(("os/%s" % key, val))
12952
12953     if self.op.offline is None:
12954       # Ignore
12955       pass
12956     elif self.op.offline:
12957       # Mark instance as offline
12958       self.cfg.MarkInstanceOffline(instance.name)
12959       result.append(("admin_state", constants.ADMINST_OFFLINE))
12960     else:
12961       # Mark instance as online, but stopped
12962       self.cfg.MarkInstanceDown(instance.name)
12963       result.append(("admin_state", constants.ADMINST_DOWN))
12964
12965     self.cfg.Update(instance, feedback_fn)
12966
12967     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12968                 self.owned_locks(locking.LEVEL_NODE)), \
12969       "All node locks should have been released by now"
12970
12971     return result
12972
12973   _DISK_CONVERSIONS = {
12974     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12975     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12976     }
12977
12978
12979 class LUInstanceChangeGroup(LogicalUnit):
12980   HPATH = "instance-change-group"
12981   HTYPE = constants.HTYPE_INSTANCE
12982   REQ_BGL = False
12983
12984   def ExpandNames(self):
12985     self.share_locks = _ShareAll()
12986     self.needed_locks = {
12987       locking.LEVEL_NODEGROUP: [],
12988       locking.LEVEL_NODE: [],
12989       }
12990
12991     self._ExpandAndLockInstance()
12992
12993     if self.op.target_groups:
12994       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12995                                   self.op.target_groups)
12996     else:
12997       self.req_target_uuids = None
12998
12999     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13000
13001   def DeclareLocks(self, level):
13002     if level == locking.LEVEL_NODEGROUP:
13003       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13004
13005       if self.req_target_uuids:
13006         lock_groups = set(self.req_target_uuids)
13007
13008         # Lock all groups used by instance optimistically; this requires going
13009         # via the node before it's locked, requiring verification later on
13010         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13011         lock_groups.update(instance_groups)
13012       else:
13013         # No target groups, need to lock all of them
13014         lock_groups = locking.ALL_SET
13015
13016       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13017
13018     elif level == locking.LEVEL_NODE:
13019       if self.req_target_uuids:
13020         # Lock all nodes used by instances
13021         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13022         self._LockInstancesNodes()
13023
13024         # Lock all nodes in all potential target groups
13025         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13026                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13027         member_nodes = [node_name
13028                         for group in lock_groups
13029                         for node_name in self.cfg.GetNodeGroup(group).members]
13030         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13031       else:
13032         # Lock all nodes as all groups are potential targets
13033         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13034
13035   def CheckPrereq(self):
13036     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13037     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13038     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13039
13040     assert (self.req_target_uuids is None or
13041             owned_groups.issuperset(self.req_target_uuids))
13042     assert owned_instances == set([self.op.instance_name])
13043
13044     # Get instance information
13045     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13046
13047     # Check if node groups for locked instance are still correct
13048     assert owned_nodes.issuperset(self.instance.all_nodes), \
13049       ("Instance %s's nodes changed while we kept the lock" %
13050        self.op.instance_name)
13051
13052     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13053                                            owned_groups)
13054
13055     if self.req_target_uuids:
13056       # User requested specific target groups
13057       self.target_uuids = frozenset(self.req_target_uuids)
13058     else:
13059       # All groups except those used by the instance are potential targets
13060       self.target_uuids = owned_groups - inst_groups
13061
13062     conflicting_groups = self.target_uuids & inst_groups
13063     if conflicting_groups:
13064       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13065                                  " used by the instance '%s'" %
13066                                  (utils.CommaJoin(conflicting_groups),
13067                                   self.op.instance_name),
13068                                  errors.ECODE_INVAL)
13069
13070     if not self.target_uuids:
13071       raise errors.OpPrereqError("There are no possible target groups",
13072                                  errors.ECODE_INVAL)
13073
13074   def BuildHooksEnv(self):
13075     """Build hooks env.
13076
13077     """
13078     assert self.target_uuids
13079
13080     env = {
13081       "TARGET_GROUPS": " ".join(self.target_uuids),
13082       }
13083
13084     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13085
13086     return env
13087
13088   def BuildHooksNodes(self):
13089     """Build hooks nodes.
13090
13091     """
13092     mn = self.cfg.GetMasterNode()
13093     return ([mn], [mn])
13094
13095   def Exec(self, feedback_fn):
13096     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13097
13098     assert instances == [self.op.instance_name], "Instance not locked"
13099
13100     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13101                      instances=instances, target_groups=list(self.target_uuids))
13102
13103     ial.Run(self.op.iallocator)
13104
13105     if not ial.success:
13106       raise errors.OpPrereqError("Can't compute solution for changing group of"
13107                                  " instance '%s' using iallocator '%s': %s" %
13108                                  (self.op.instance_name, self.op.iallocator,
13109                                   ial.info),
13110                                  errors.ECODE_NORES)
13111
13112     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13113
13114     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13115                  " instance '%s'", len(jobs), self.op.instance_name)
13116
13117     return ResultWithJobs(jobs)
13118
13119
13120 class LUBackupQuery(NoHooksLU):
13121   """Query the exports list
13122
13123   """
13124   REQ_BGL = False
13125
13126   def CheckArguments(self):
13127     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13128                              ["node", "export"], self.op.use_locking)
13129
13130   def ExpandNames(self):
13131     self.expq.ExpandNames(self)
13132
13133   def DeclareLocks(self, level):
13134     self.expq.DeclareLocks(self, level)
13135
13136   def Exec(self, feedback_fn):
13137     result = {}
13138
13139     for (node, expname) in self.expq.OldStyleQuery(self):
13140       if expname is None:
13141         result[node] = False
13142       else:
13143         result.setdefault(node, []).append(expname)
13144
13145     return result
13146
13147
13148 class _ExportQuery(_QueryBase):
13149   FIELDS = query.EXPORT_FIELDS
13150
13151   #: The node name is not a unique key for this query
13152   SORT_FIELD = "node"
13153
13154   def ExpandNames(self, lu):
13155     lu.needed_locks = {}
13156
13157     # The following variables interact with _QueryBase._GetNames
13158     if self.names:
13159       self.wanted = _GetWantedNodes(lu, self.names)
13160     else:
13161       self.wanted = locking.ALL_SET
13162
13163     self.do_locking = self.use_locking
13164
13165     if self.do_locking:
13166       lu.share_locks = _ShareAll()
13167       lu.needed_locks = {
13168         locking.LEVEL_NODE: self.wanted,
13169         }
13170
13171   def DeclareLocks(self, lu, level):
13172     pass
13173
13174   def _GetQueryData(self, lu):
13175     """Computes the list of nodes and their attributes.
13176
13177     """
13178     # Locking is not used
13179     # TODO
13180     assert not (compat.any(lu.glm.is_owned(level)
13181                            for level in locking.LEVELS
13182                            if level != locking.LEVEL_CLUSTER) or
13183                 self.do_locking or self.use_locking)
13184
13185     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13186
13187     result = []
13188
13189     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13190       if nres.fail_msg:
13191         result.append((node, None))
13192       else:
13193         result.extend((node, expname) for expname in nres.payload)
13194
13195     return result
13196
13197
13198 class LUBackupPrepare(NoHooksLU):
13199   """Prepares an instance for an export and returns useful information.
13200
13201   """
13202   REQ_BGL = False
13203
13204   def ExpandNames(self):
13205     self._ExpandAndLockInstance()
13206
13207   def CheckPrereq(self):
13208     """Check prerequisites.
13209
13210     """
13211     instance_name = self.op.instance_name
13212
13213     self.instance = self.cfg.GetInstanceInfo(instance_name)
13214     assert self.instance is not None, \
13215           "Cannot retrieve locked instance %s" % self.op.instance_name
13216     _CheckNodeOnline(self, self.instance.primary_node)
13217
13218     self._cds = _GetClusterDomainSecret()
13219
13220   def Exec(self, feedback_fn):
13221     """Prepares an instance for an export.
13222
13223     """
13224     instance = self.instance
13225
13226     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13227       salt = utils.GenerateSecret(8)
13228
13229       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13230       result = self.rpc.call_x509_cert_create(instance.primary_node,
13231                                               constants.RIE_CERT_VALIDITY)
13232       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13233
13234       (name, cert_pem) = result.payload
13235
13236       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13237                                              cert_pem)
13238
13239       return {
13240         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13241         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13242                           salt),
13243         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13244         }
13245
13246     return None
13247
13248
13249 class LUBackupExport(LogicalUnit):
13250   """Export an instance to an image in the cluster.
13251
13252   """
13253   HPATH = "instance-export"
13254   HTYPE = constants.HTYPE_INSTANCE
13255   REQ_BGL = False
13256
13257   def CheckArguments(self):
13258     """Check the arguments.
13259
13260     """
13261     self.x509_key_name = self.op.x509_key_name
13262     self.dest_x509_ca_pem = self.op.destination_x509_ca
13263
13264     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13265       if not self.x509_key_name:
13266         raise errors.OpPrereqError("Missing X509 key name for encryption",
13267                                    errors.ECODE_INVAL)
13268
13269       if not self.dest_x509_ca_pem:
13270         raise errors.OpPrereqError("Missing destination X509 CA",
13271                                    errors.ECODE_INVAL)
13272
13273   def ExpandNames(self):
13274     self._ExpandAndLockInstance()
13275
13276     # Lock all nodes for local exports
13277     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13278       # FIXME: lock only instance primary and destination node
13279       #
13280       # Sad but true, for now we have do lock all nodes, as we don't know where
13281       # the previous export might be, and in this LU we search for it and
13282       # remove it from its current node. In the future we could fix this by:
13283       #  - making a tasklet to search (share-lock all), then create the
13284       #    new one, then one to remove, after
13285       #  - removing the removal operation altogether
13286       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13287
13288   def DeclareLocks(self, level):
13289     """Last minute lock declaration."""
13290     # All nodes are locked anyway, so nothing to do here.
13291
13292   def BuildHooksEnv(self):
13293     """Build hooks env.
13294
13295     This will run on the master, primary node and target node.
13296
13297     """
13298     env = {
13299       "EXPORT_MODE": self.op.mode,
13300       "EXPORT_NODE": self.op.target_node,
13301       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13302       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13303       # TODO: Generic function for boolean env variables
13304       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13305       }
13306
13307     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13308
13309     return env
13310
13311   def BuildHooksNodes(self):
13312     """Build hooks nodes.
13313
13314     """
13315     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13316
13317     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13318       nl.append(self.op.target_node)
13319
13320     return (nl, nl)
13321
13322   def CheckPrereq(self):
13323     """Check prerequisites.
13324
13325     This checks that the instance and node names are valid.
13326
13327     """
13328     instance_name = self.op.instance_name
13329
13330     self.instance = self.cfg.GetInstanceInfo(instance_name)
13331     assert self.instance is not None, \
13332           "Cannot retrieve locked instance %s" % self.op.instance_name
13333     _CheckNodeOnline(self, self.instance.primary_node)
13334
13335     if (self.op.remove_instance and
13336         self.instance.admin_state == constants.ADMINST_UP and
13337         not self.op.shutdown):
13338       raise errors.OpPrereqError("Can not remove instance without shutting it"
13339                                  " down before")
13340
13341     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13342       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13343       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13344       assert self.dst_node is not None
13345
13346       _CheckNodeOnline(self, self.dst_node.name)
13347       _CheckNodeNotDrained(self, self.dst_node.name)
13348
13349       self._cds = None
13350       self.dest_disk_info = None
13351       self.dest_x509_ca = None
13352
13353     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13354       self.dst_node = None
13355
13356       if len(self.op.target_node) != len(self.instance.disks):
13357         raise errors.OpPrereqError(("Received destination information for %s"
13358                                     " disks, but instance %s has %s disks") %
13359                                    (len(self.op.target_node), instance_name,
13360                                     len(self.instance.disks)),
13361                                    errors.ECODE_INVAL)
13362
13363       cds = _GetClusterDomainSecret()
13364
13365       # Check X509 key name
13366       try:
13367         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13368       except (TypeError, ValueError), err:
13369         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13370
13371       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13372         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13373                                    errors.ECODE_INVAL)
13374
13375       # Load and verify CA
13376       try:
13377         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13378       except OpenSSL.crypto.Error, err:
13379         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13380                                    (err, ), errors.ECODE_INVAL)
13381
13382       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13383       if errcode is not None:
13384         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13385                                    (msg, ), errors.ECODE_INVAL)
13386
13387       self.dest_x509_ca = cert
13388
13389       # Verify target information
13390       disk_info = []
13391       for idx, disk_data in enumerate(self.op.target_node):
13392         try:
13393           (host, port, magic) = \
13394             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13395         except errors.GenericError, err:
13396           raise errors.OpPrereqError("Target info for disk %s: %s" %
13397                                      (idx, err), errors.ECODE_INVAL)
13398
13399         disk_info.append((host, port, magic))
13400
13401       assert len(disk_info) == len(self.op.target_node)
13402       self.dest_disk_info = disk_info
13403
13404     else:
13405       raise errors.ProgrammerError("Unhandled export mode %r" %
13406                                    self.op.mode)
13407
13408     # instance disk type verification
13409     # TODO: Implement export support for file-based disks
13410     for disk in self.instance.disks:
13411       if disk.dev_type == constants.LD_FILE:
13412         raise errors.OpPrereqError("Export not supported for instances with"
13413                                    " file-based disks", errors.ECODE_INVAL)
13414
13415   def _CleanupExports(self, feedback_fn):
13416     """Removes exports of current instance from all other nodes.
13417
13418     If an instance in a cluster with nodes A..D was exported to node C, its
13419     exports will be removed from the nodes A, B and D.
13420
13421     """
13422     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13423
13424     nodelist = self.cfg.GetNodeList()
13425     nodelist.remove(self.dst_node.name)
13426
13427     # on one-node clusters nodelist will be empty after the removal
13428     # if we proceed the backup would be removed because OpBackupQuery
13429     # substitutes an empty list with the full cluster node list.
13430     iname = self.instance.name
13431     if nodelist:
13432       feedback_fn("Removing old exports for instance %s" % iname)
13433       exportlist = self.rpc.call_export_list(nodelist)
13434       for node in exportlist:
13435         if exportlist[node].fail_msg:
13436           continue
13437         if iname in exportlist[node].payload:
13438           msg = self.rpc.call_export_remove(node, iname).fail_msg
13439           if msg:
13440             self.LogWarning("Could not remove older export for instance %s"
13441                             " on node %s: %s", iname, node, msg)
13442
13443   def Exec(self, feedback_fn):
13444     """Export an instance to an image in the cluster.
13445
13446     """
13447     assert self.op.mode in constants.EXPORT_MODES
13448
13449     instance = self.instance
13450     src_node = instance.primary_node
13451
13452     if self.op.shutdown:
13453       # shutdown the instance, but not the disks
13454       feedback_fn("Shutting down instance %s" % instance.name)
13455       result = self.rpc.call_instance_shutdown(src_node, instance,
13456                                                self.op.shutdown_timeout)
13457       # TODO: Maybe ignore failures if ignore_remove_failures is set
13458       result.Raise("Could not shutdown instance %s on"
13459                    " node %s" % (instance.name, src_node))
13460
13461     # set the disks ID correctly since call_instance_start needs the
13462     # correct drbd minor to create the symlinks
13463     for disk in instance.disks:
13464       self.cfg.SetDiskID(disk, src_node)
13465
13466     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13467
13468     if activate_disks:
13469       # Activate the instance disks if we'exporting a stopped instance
13470       feedback_fn("Activating disks for %s" % instance.name)
13471       _StartInstanceDisks(self, instance, None)
13472
13473     try:
13474       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13475                                                      instance)
13476
13477       helper.CreateSnapshots()
13478       try:
13479         if (self.op.shutdown and
13480             instance.admin_state == constants.ADMINST_UP and
13481             not self.op.remove_instance):
13482           assert not activate_disks
13483           feedback_fn("Starting instance %s" % instance.name)
13484           result = self.rpc.call_instance_start(src_node,
13485                                                 (instance, None, None), False)
13486           msg = result.fail_msg
13487           if msg:
13488             feedback_fn("Failed to start instance: %s" % msg)
13489             _ShutdownInstanceDisks(self, instance)
13490             raise errors.OpExecError("Could not start instance: %s" % msg)
13491
13492         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13493           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13494         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13495           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13496           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13497
13498           (key_name, _, _) = self.x509_key_name
13499
13500           dest_ca_pem = \
13501             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13502                                             self.dest_x509_ca)
13503
13504           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13505                                                      key_name, dest_ca_pem,
13506                                                      timeouts)
13507       finally:
13508         helper.Cleanup()
13509
13510       # Check for backwards compatibility
13511       assert len(dresults) == len(instance.disks)
13512       assert compat.all(isinstance(i, bool) for i in dresults), \
13513              "Not all results are boolean: %r" % dresults
13514
13515     finally:
13516       if activate_disks:
13517         feedback_fn("Deactivating disks for %s" % instance.name)
13518         _ShutdownInstanceDisks(self, instance)
13519
13520     if not (compat.all(dresults) and fin_resu):
13521       failures = []
13522       if not fin_resu:
13523         failures.append("export finalization")
13524       if not compat.all(dresults):
13525         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13526                                if not dsk)
13527         failures.append("disk export: disk(s) %s" % fdsk)
13528
13529       raise errors.OpExecError("Export failed, errors in %s" %
13530                                utils.CommaJoin(failures))
13531
13532     # At this point, the export was successful, we can cleanup/finish
13533
13534     # Remove instance if requested
13535     if self.op.remove_instance:
13536       feedback_fn("Removing instance %s" % instance.name)
13537       _RemoveInstance(self, feedback_fn, instance,
13538                       self.op.ignore_remove_failures)
13539
13540     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13541       self._CleanupExports(feedback_fn)
13542
13543     return fin_resu, dresults
13544
13545
13546 class LUBackupRemove(NoHooksLU):
13547   """Remove exports related to the named instance.
13548
13549   """
13550   REQ_BGL = False
13551
13552   def ExpandNames(self):
13553     self.needed_locks = {}
13554     # We need all nodes to be locked in order for RemoveExport to work, but we
13555     # don't need to lock the instance itself, as nothing will happen to it (and
13556     # we can remove exports also for a removed instance)
13557     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13558
13559   def Exec(self, feedback_fn):
13560     """Remove any export.
13561
13562     """
13563     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13564     # If the instance was not found we'll try with the name that was passed in.
13565     # This will only work if it was an FQDN, though.
13566     fqdn_warn = False
13567     if not instance_name:
13568       fqdn_warn = True
13569       instance_name = self.op.instance_name
13570
13571     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13572     exportlist = self.rpc.call_export_list(locked_nodes)
13573     found = False
13574     for node in exportlist:
13575       msg = exportlist[node].fail_msg
13576       if msg:
13577         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13578         continue
13579       if instance_name in exportlist[node].payload:
13580         found = True
13581         result = self.rpc.call_export_remove(node, instance_name)
13582         msg = result.fail_msg
13583         if msg:
13584           logging.error("Could not remove export for instance %s"
13585                         " on node %s: %s", instance_name, node, msg)
13586
13587     if fqdn_warn and not found:
13588       feedback_fn("Export not found. If trying to remove an export belonging"
13589                   " to a deleted instance please use its Fully Qualified"
13590                   " Domain Name.")
13591
13592
13593 class LUGroupAdd(LogicalUnit):
13594   """Logical unit for creating node groups.
13595
13596   """
13597   HPATH = "group-add"
13598   HTYPE = constants.HTYPE_GROUP
13599   REQ_BGL = False
13600
13601   def ExpandNames(self):
13602     # We need the new group's UUID here so that we can create and acquire the
13603     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13604     # that it should not check whether the UUID exists in the configuration.
13605     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13606     self.needed_locks = {}
13607     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13608
13609   def CheckPrereq(self):
13610     """Check prerequisites.
13611
13612     This checks that the given group name is not an existing node group
13613     already.
13614
13615     """
13616     try:
13617       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13618     except errors.OpPrereqError:
13619       pass
13620     else:
13621       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13622                                  " node group (UUID: %s)" %
13623                                  (self.op.group_name, existing_uuid),
13624                                  errors.ECODE_EXISTS)
13625
13626     if self.op.ndparams:
13627       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13628
13629     if self.op.hv_state:
13630       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13631     else:
13632       self.new_hv_state = None
13633
13634     if self.op.disk_state:
13635       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13636     else:
13637       self.new_disk_state = None
13638
13639     if self.op.diskparams:
13640       for templ in constants.DISK_TEMPLATES:
13641         if templ in self.op.diskparams:
13642           utils.ForceDictType(self.op.diskparams[templ],
13643                               constants.DISK_DT_TYPES)
13644       self.new_diskparams = self.op.diskparams
13645       try:
13646         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13647       except errors.OpPrereqError, err:
13648         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13649                                    errors.ECODE_INVAL)
13650     else:
13651       self.new_diskparams = {}
13652
13653     if self.op.ipolicy:
13654       cluster = self.cfg.GetClusterInfo()
13655       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13656       try:
13657         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13658       except errors.ConfigurationError, err:
13659         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13660                                    errors.ECODE_INVAL)
13661
13662   def BuildHooksEnv(self):
13663     """Build hooks env.
13664
13665     """
13666     return {
13667       "GROUP_NAME": self.op.group_name,
13668       }
13669
13670   def BuildHooksNodes(self):
13671     """Build hooks nodes.
13672
13673     """
13674     mn = self.cfg.GetMasterNode()
13675     return ([mn], [mn])
13676
13677   def Exec(self, feedback_fn):
13678     """Add the node group to the cluster.
13679
13680     """
13681     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13682                                   uuid=self.group_uuid,
13683                                   alloc_policy=self.op.alloc_policy,
13684                                   ndparams=self.op.ndparams,
13685                                   diskparams=self.new_diskparams,
13686                                   ipolicy=self.op.ipolicy,
13687                                   hv_state_static=self.new_hv_state,
13688                                   disk_state_static=self.new_disk_state)
13689
13690     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13691     del self.remove_locks[locking.LEVEL_NODEGROUP]
13692
13693
13694 class LUGroupAssignNodes(NoHooksLU):
13695   """Logical unit for assigning nodes to groups.
13696
13697   """
13698   REQ_BGL = False
13699
13700   def ExpandNames(self):
13701     # These raise errors.OpPrereqError on their own:
13702     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13703     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13704
13705     # We want to lock all the affected nodes and groups. We have readily
13706     # available the list of nodes, and the *destination* group. To gather the
13707     # list of "source" groups, we need to fetch node information later on.
13708     self.needed_locks = {
13709       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13710       locking.LEVEL_NODE: self.op.nodes,
13711       }
13712
13713   def DeclareLocks(self, level):
13714     if level == locking.LEVEL_NODEGROUP:
13715       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13716
13717       # Try to get all affected nodes' groups without having the group or node
13718       # lock yet. Needs verification later in the code flow.
13719       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13720
13721       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13722
13723   def CheckPrereq(self):
13724     """Check prerequisites.
13725
13726     """
13727     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13728     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13729             frozenset(self.op.nodes))
13730
13731     expected_locks = (set([self.group_uuid]) |
13732                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13733     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13734     if actual_locks != expected_locks:
13735       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13736                                " current groups are '%s', used to be '%s'" %
13737                                (utils.CommaJoin(expected_locks),
13738                                 utils.CommaJoin(actual_locks)))
13739
13740     self.node_data = self.cfg.GetAllNodesInfo()
13741     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13742     instance_data = self.cfg.GetAllInstancesInfo()
13743
13744     if self.group is None:
13745       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13746                                (self.op.group_name, self.group_uuid))
13747
13748     (new_splits, previous_splits) = \
13749       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13750                                              for node in self.op.nodes],
13751                                             self.node_data, instance_data)
13752
13753     if new_splits:
13754       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13755
13756       if not self.op.force:
13757         raise errors.OpExecError("The following instances get split by this"
13758                                  " change and --force was not given: %s" %
13759                                  fmt_new_splits)
13760       else:
13761         self.LogWarning("This operation will split the following instances: %s",
13762                         fmt_new_splits)
13763
13764         if previous_splits:
13765           self.LogWarning("In addition, these already-split instances continue"
13766                           " to be split across groups: %s",
13767                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13768
13769   def Exec(self, feedback_fn):
13770     """Assign nodes to a new group.
13771
13772     """
13773     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13774
13775     self.cfg.AssignGroupNodes(mods)
13776
13777   @staticmethod
13778   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13779     """Check for split instances after a node assignment.
13780
13781     This method considers a series of node assignments as an atomic operation,
13782     and returns information about split instances after applying the set of
13783     changes.
13784
13785     In particular, it returns information about newly split instances, and
13786     instances that were already split, and remain so after the change.
13787
13788     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13789     considered.
13790
13791     @type changes: list of (node_name, new_group_uuid) pairs.
13792     @param changes: list of node assignments to consider.
13793     @param node_data: a dict with data for all nodes
13794     @param instance_data: a dict with all instances to consider
13795     @rtype: a two-tuple
13796     @return: a list of instances that were previously okay and result split as a
13797       consequence of this change, and a list of instances that were previously
13798       split and this change does not fix.
13799
13800     """
13801     changed_nodes = dict((node, group) for node, group in changes
13802                          if node_data[node].group != group)
13803
13804     all_split_instances = set()
13805     previously_split_instances = set()
13806
13807     def InstanceNodes(instance):
13808       return [instance.primary_node] + list(instance.secondary_nodes)
13809
13810     for inst in instance_data.values():
13811       if inst.disk_template not in constants.DTS_INT_MIRROR:
13812         continue
13813
13814       instance_nodes = InstanceNodes(inst)
13815
13816       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13817         previously_split_instances.add(inst.name)
13818
13819       if len(set(changed_nodes.get(node, node_data[node].group)
13820                  for node in instance_nodes)) > 1:
13821         all_split_instances.add(inst.name)
13822
13823     return (list(all_split_instances - previously_split_instances),
13824             list(previously_split_instances & all_split_instances))
13825
13826
13827 class _GroupQuery(_QueryBase):
13828   FIELDS = query.GROUP_FIELDS
13829
13830   def ExpandNames(self, lu):
13831     lu.needed_locks = {}
13832
13833     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13834     self._cluster = lu.cfg.GetClusterInfo()
13835     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13836
13837     if not self.names:
13838       self.wanted = [name_to_uuid[name]
13839                      for name in utils.NiceSort(name_to_uuid.keys())]
13840     else:
13841       # Accept names to be either names or UUIDs.
13842       missing = []
13843       self.wanted = []
13844       all_uuid = frozenset(self._all_groups.keys())
13845
13846       for name in self.names:
13847         if name in all_uuid:
13848           self.wanted.append(name)
13849         elif name in name_to_uuid:
13850           self.wanted.append(name_to_uuid[name])
13851         else:
13852           missing.append(name)
13853
13854       if missing:
13855         raise errors.OpPrereqError("Some groups do not exist: %s" %
13856                                    utils.CommaJoin(missing),
13857                                    errors.ECODE_NOENT)
13858
13859   def DeclareLocks(self, lu, level):
13860     pass
13861
13862   def _GetQueryData(self, lu):
13863     """Computes the list of node groups and their attributes.
13864
13865     """
13866     do_nodes = query.GQ_NODE in self.requested_data
13867     do_instances = query.GQ_INST in self.requested_data
13868
13869     group_to_nodes = None
13870     group_to_instances = None
13871
13872     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13873     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13874     # latter GetAllInstancesInfo() is not enough, for we have to go through
13875     # instance->node. Hence, we will need to process nodes even if we only need
13876     # instance information.
13877     if do_nodes or do_instances:
13878       all_nodes = lu.cfg.GetAllNodesInfo()
13879       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13880       node_to_group = {}
13881
13882       for node in all_nodes.values():
13883         if node.group in group_to_nodes:
13884           group_to_nodes[node.group].append(node.name)
13885           node_to_group[node.name] = node.group
13886
13887       if do_instances:
13888         all_instances = lu.cfg.GetAllInstancesInfo()
13889         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13890
13891         for instance in all_instances.values():
13892           node = instance.primary_node
13893           if node in node_to_group:
13894             group_to_instances[node_to_group[node]].append(instance.name)
13895
13896         if not do_nodes:
13897           # Do not pass on node information if it was not requested.
13898           group_to_nodes = None
13899
13900     return query.GroupQueryData(self._cluster,
13901                                 [self._all_groups[uuid]
13902                                  for uuid in self.wanted],
13903                                 group_to_nodes, group_to_instances,
13904                                 query.GQ_DISKPARAMS in self.requested_data)
13905
13906
13907 class LUGroupQuery(NoHooksLU):
13908   """Logical unit for querying node groups.
13909
13910   """
13911   REQ_BGL = False
13912
13913   def CheckArguments(self):
13914     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13915                           self.op.output_fields, False)
13916
13917   def ExpandNames(self):
13918     self.gq.ExpandNames(self)
13919
13920   def DeclareLocks(self, level):
13921     self.gq.DeclareLocks(self, level)
13922
13923   def Exec(self, feedback_fn):
13924     return self.gq.OldStyleQuery(self)
13925
13926
13927 class LUGroupSetParams(LogicalUnit):
13928   """Modifies the parameters of a node group.
13929
13930   """
13931   HPATH = "group-modify"
13932   HTYPE = constants.HTYPE_GROUP
13933   REQ_BGL = False
13934
13935   def CheckArguments(self):
13936     all_changes = [
13937       self.op.ndparams,
13938       self.op.diskparams,
13939       self.op.alloc_policy,
13940       self.op.hv_state,
13941       self.op.disk_state,
13942       self.op.ipolicy,
13943       ]
13944
13945     if all_changes.count(None) == len(all_changes):
13946       raise errors.OpPrereqError("Please pass at least one modification",
13947                                  errors.ECODE_INVAL)
13948
13949   def ExpandNames(self):
13950     # This raises errors.OpPrereqError on its own:
13951     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13952
13953     self.needed_locks = {
13954       locking.LEVEL_INSTANCE: [],
13955       locking.LEVEL_NODEGROUP: [self.group_uuid],
13956       }
13957
13958     self.share_locks[locking.LEVEL_INSTANCE] = 1
13959
13960   def DeclareLocks(self, level):
13961     if level == locking.LEVEL_INSTANCE:
13962       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13963
13964       # Lock instances optimistically, needs verification once group lock has
13965       # been acquired
13966       self.needed_locks[locking.LEVEL_INSTANCE] = \
13967           self.cfg.GetNodeGroupInstances(self.group_uuid)
13968
13969   @staticmethod
13970   def _UpdateAndVerifyDiskParams(old, new):
13971     """Updates and verifies disk parameters.
13972
13973     """
13974     new_params = _GetUpdatedParams(old, new)
13975     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13976     return new_params
13977
13978   def CheckPrereq(self):
13979     """Check prerequisites.
13980
13981     """
13982     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13983
13984     # Check if locked instances are still correct
13985     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13986
13987     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13988     cluster = self.cfg.GetClusterInfo()
13989
13990     if self.group is None:
13991       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13992                                (self.op.group_name, self.group_uuid))
13993
13994     if self.op.ndparams:
13995       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13996       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13997       self.new_ndparams = new_ndparams
13998
13999     if self.op.diskparams:
14000       diskparams = self.group.diskparams
14001       uavdp = self._UpdateAndVerifyDiskParams
14002       # For each disktemplate subdict update and verify the values
14003       new_diskparams = dict((dt,
14004                              uavdp(diskparams.get(dt, {}),
14005                                    self.op.diskparams[dt]))
14006                             for dt in constants.DISK_TEMPLATES
14007                             if dt in self.op.diskparams)
14008       # As we've all subdicts of diskparams ready, lets merge the actual
14009       # dict with all updated subdicts
14010       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14011       try:
14012         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14013       except errors.OpPrereqError, err:
14014         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14015                                    errors.ECODE_INVAL)
14016
14017     if self.op.hv_state:
14018       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14019                                                  self.group.hv_state_static)
14020
14021     if self.op.disk_state:
14022       self.new_disk_state = \
14023         _MergeAndVerifyDiskState(self.op.disk_state,
14024                                  self.group.disk_state_static)
14025
14026     if self.op.ipolicy:
14027       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14028                                             self.op.ipolicy,
14029                                             group_policy=True)
14030
14031       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14032       inst_filter = lambda inst: inst.name in owned_instances
14033       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14034       violations = \
14035           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14036                                                                self.group),
14037                                         new_ipolicy, instances)
14038
14039       if violations:
14040         self.LogWarning("After the ipolicy change the following instances"
14041                         " violate them: %s",
14042                         utils.CommaJoin(violations))
14043
14044   def BuildHooksEnv(self):
14045     """Build hooks env.
14046
14047     """
14048     return {
14049       "GROUP_NAME": self.op.group_name,
14050       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14051       }
14052
14053   def BuildHooksNodes(self):
14054     """Build hooks nodes.
14055
14056     """
14057     mn = self.cfg.GetMasterNode()
14058     return ([mn], [mn])
14059
14060   def Exec(self, feedback_fn):
14061     """Modifies the node group.
14062
14063     """
14064     result = []
14065
14066     if self.op.ndparams:
14067       self.group.ndparams = self.new_ndparams
14068       result.append(("ndparams", str(self.group.ndparams)))
14069
14070     if self.op.diskparams:
14071       self.group.diskparams = self.new_diskparams
14072       result.append(("diskparams", str(self.group.diskparams)))
14073
14074     if self.op.alloc_policy:
14075       self.group.alloc_policy = self.op.alloc_policy
14076
14077     if self.op.hv_state:
14078       self.group.hv_state_static = self.new_hv_state
14079
14080     if self.op.disk_state:
14081       self.group.disk_state_static = self.new_disk_state
14082
14083     if self.op.ipolicy:
14084       self.group.ipolicy = self.new_ipolicy
14085
14086     self.cfg.Update(self.group, feedback_fn)
14087     return result
14088
14089
14090 class LUGroupRemove(LogicalUnit):
14091   HPATH = "group-remove"
14092   HTYPE = constants.HTYPE_GROUP
14093   REQ_BGL = False
14094
14095   def ExpandNames(self):
14096     # This will raises errors.OpPrereqError on its own:
14097     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14098     self.needed_locks = {
14099       locking.LEVEL_NODEGROUP: [self.group_uuid],
14100       }
14101
14102   def CheckPrereq(self):
14103     """Check prerequisites.
14104
14105     This checks that the given group name exists as a node group, that is
14106     empty (i.e., contains no nodes), and that is not the last group of the
14107     cluster.
14108
14109     """
14110     # Verify that the group is empty.
14111     group_nodes = [node.name
14112                    for node in self.cfg.GetAllNodesInfo().values()
14113                    if node.group == self.group_uuid]
14114
14115     if group_nodes:
14116       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14117                                  " nodes: %s" %
14118                                  (self.op.group_name,
14119                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14120                                  errors.ECODE_STATE)
14121
14122     # Verify the cluster would not be left group-less.
14123     if len(self.cfg.GetNodeGroupList()) == 1:
14124       raise errors.OpPrereqError("Group '%s' is the only group,"
14125                                  " cannot be removed" %
14126                                  self.op.group_name,
14127                                  errors.ECODE_STATE)
14128
14129   def BuildHooksEnv(self):
14130     """Build hooks env.
14131
14132     """
14133     return {
14134       "GROUP_NAME": self.op.group_name,
14135       }
14136
14137   def BuildHooksNodes(self):
14138     """Build hooks nodes.
14139
14140     """
14141     mn = self.cfg.GetMasterNode()
14142     return ([mn], [mn])
14143
14144   def Exec(self, feedback_fn):
14145     """Remove the node group.
14146
14147     """
14148     try:
14149       self.cfg.RemoveNodeGroup(self.group_uuid)
14150     except errors.ConfigurationError:
14151       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14152                                (self.op.group_name, self.group_uuid))
14153
14154     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14155
14156
14157 class LUGroupRename(LogicalUnit):
14158   HPATH = "group-rename"
14159   HTYPE = constants.HTYPE_GROUP
14160   REQ_BGL = False
14161
14162   def ExpandNames(self):
14163     # This raises errors.OpPrereqError on its own:
14164     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14165
14166     self.needed_locks = {
14167       locking.LEVEL_NODEGROUP: [self.group_uuid],
14168       }
14169
14170   def CheckPrereq(self):
14171     """Check prerequisites.
14172
14173     Ensures requested new name is not yet used.
14174
14175     """
14176     try:
14177       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14178     except errors.OpPrereqError:
14179       pass
14180     else:
14181       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14182                                  " node group (UUID: %s)" %
14183                                  (self.op.new_name, new_name_uuid),
14184                                  errors.ECODE_EXISTS)
14185
14186   def BuildHooksEnv(self):
14187     """Build hooks env.
14188
14189     """
14190     return {
14191       "OLD_NAME": self.op.group_name,
14192       "NEW_NAME": self.op.new_name,
14193       }
14194
14195   def BuildHooksNodes(self):
14196     """Build hooks nodes.
14197
14198     """
14199     mn = self.cfg.GetMasterNode()
14200
14201     all_nodes = self.cfg.GetAllNodesInfo()
14202     all_nodes.pop(mn, None)
14203
14204     run_nodes = [mn]
14205     run_nodes.extend(node.name for node in all_nodes.values()
14206                      if node.group == self.group_uuid)
14207
14208     return (run_nodes, run_nodes)
14209
14210   def Exec(self, feedback_fn):
14211     """Rename the node group.
14212
14213     """
14214     group = self.cfg.GetNodeGroup(self.group_uuid)
14215
14216     if group is None:
14217       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14218                                (self.op.group_name, self.group_uuid))
14219
14220     group.name = self.op.new_name
14221     self.cfg.Update(group, feedback_fn)
14222
14223     return self.op.new_name
14224
14225
14226 class LUGroupEvacuate(LogicalUnit):
14227   HPATH = "group-evacuate"
14228   HTYPE = constants.HTYPE_GROUP
14229   REQ_BGL = False
14230
14231   def ExpandNames(self):
14232     # This raises errors.OpPrereqError on its own:
14233     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14234
14235     if self.op.target_groups:
14236       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14237                                   self.op.target_groups)
14238     else:
14239       self.req_target_uuids = []
14240
14241     if self.group_uuid in self.req_target_uuids:
14242       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14243                                  " as a target group (targets are %s)" %
14244                                  (self.group_uuid,
14245                                   utils.CommaJoin(self.req_target_uuids)),
14246                                  errors.ECODE_INVAL)
14247
14248     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14249
14250     self.share_locks = _ShareAll()
14251     self.needed_locks = {
14252       locking.LEVEL_INSTANCE: [],
14253       locking.LEVEL_NODEGROUP: [],
14254       locking.LEVEL_NODE: [],
14255       }
14256
14257   def DeclareLocks(self, level):
14258     if level == locking.LEVEL_INSTANCE:
14259       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14260
14261       # Lock instances optimistically, needs verification once node and group
14262       # locks have been acquired
14263       self.needed_locks[locking.LEVEL_INSTANCE] = \
14264         self.cfg.GetNodeGroupInstances(self.group_uuid)
14265
14266     elif level == locking.LEVEL_NODEGROUP:
14267       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14268
14269       if self.req_target_uuids:
14270         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14271
14272         # Lock all groups used by instances optimistically; this requires going
14273         # via the node before it's locked, requiring verification later on
14274         lock_groups.update(group_uuid
14275                            for instance_name in
14276                              self.owned_locks(locking.LEVEL_INSTANCE)
14277                            for group_uuid in
14278                              self.cfg.GetInstanceNodeGroups(instance_name))
14279       else:
14280         # No target groups, need to lock all of them
14281         lock_groups = locking.ALL_SET
14282
14283       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14284
14285     elif level == locking.LEVEL_NODE:
14286       # This will only lock the nodes in the group to be evacuated which
14287       # contain actual instances
14288       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14289       self._LockInstancesNodes()
14290
14291       # Lock all nodes in group to be evacuated and target groups
14292       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14293       assert self.group_uuid in owned_groups
14294       member_nodes = [node_name
14295                       for group in owned_groups
14296                       for node_name in self.cfg.GetNodeGroup(group).members]
14297       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14298
14299   def CheckPrereq(self):
14300     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14301     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14302     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14303
14304     assert owned_groups.issuperset(self.req_target_uuids)
14305     assert self.group_uuid in owned_groups
14306
14307     # Check if locked instances are still correct
14308     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14309
14310     # Get instance information
14311     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14312
14313     # Check if node groups for locked instances are still correct
14314     _CheckInstancesNodeGroups(self.cfg, self.instances,
14315                               owned_groups, owned_nodes, self.group_uuid)
14316
14317     if self.req_target_uuids:
14318       # User requested specific target groups
14319       self.target_uuids = self.req_target_uuids
14320     else:
14321       # All groups except the one to be evacuated are potential targets
14322       self.target_uuids = [group_uuid for group_uuid in owned_groups
14323                            if group_uuid != self.group_uuid]
14324
14325       if not self.target_uuids:
14326         raise errors.OpPrereqError("There are no possible target groups",
14327                                    errors.ECODE_INVAL)
14328
14329   def BuildHooksEnv(self):
14330     """Build hooks env.
14331
14332     """
14333     return {
14334       "GROUP_NAME": self.op.group_name,
14335       "TARGET_GROUPS": " ".join(self.target_uuids),
14336       }
14337
14338   def BuildHooksNodes(self):
14339     """Build hooks nodes.
14340
14341     """
14342     mn = self.cfg.GetMasterNode()
14343
14344     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14345
14346     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14347
14348     return (run_nodes, run_nodes)
14349
14350   def Exec(self, feedback_fn):
14351     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14352
14353     assert self.group_uuid not in self.target_uuids
14354
14355     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14356                      instances=instances, target_groups=self.target_uuids)
14357
14358     ial.Run(self.op.iallocator)
14359
14360     if not ial.success:
14361       raise errors.OpPrereqError("Can't compute group evacuation using"
14362                                  " iallocator '%s': %s" %
14363                                  (self.op.iallocator, ial.info),
14364                                  errors.ECODE_NORES)
14365
14366     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14367
14368     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14369                  len(jobs), self.op.group_name)
14370
14371     return ResultWithJobs(jobs)
14372
14373
14374 class TagsLU(NoHooksLU): # pylint: disable=W0223
14375   """Generic tags LU.
14376
14377   This is an abstract class which is the parent of all the other tags LUs.
14378
14379   """
14380   def ExpandNames(self):
14381     self.group_uuid = None
14382     self.needed_locks = {}
14383
14384     if self.op.kind == constants.TAG_NODE:
14385       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14386       lock_level = locking.LEVEL_NODE
14387       lock_name = self.op.name
14388     elif self.op.kind == constants.TAG_INSTANCE:
14389       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14390       lock_level = locking.LEVEL_INSTANCE
14391       lock_name = self.op.name
14392     elif self.op.kind == constants.TAG_NODEGROUP:
14393       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14394       lock_level = locking.LEVEL_NODEGROUP
14395       lock_name = self.group_uuid
14396     else:
14397       lock_level = None
14398       lock_name = None
14399
14400     if lock_level and getattr(self.op, "use_locking", True):
14401       self.needed_locks[lock_level] = lock_name
14402
14403     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14404     # not possible to acquire the BGL based on opcode parameters)
14405
14406   def CheckPrereq(self):
14407     """Check prerequisites.
14408
14409     """
14410     if self.op.kind == constants.TAG_CLUSTER:
14411       self.target = self.cfg.GetClusterInfo()
14412     elif self.op.kind == constants.TAG_NODE:
14413       self.target = self.cfg.GetNodeInfo(self.op.name)
14414     elif self.op.kind == constants.TAG_INSTANCE:
14415       self.target = self.cfg.GetInstanceInfo(self.op.name)
14416     elif self.op.kind == constants.TAG_NODEGROUP:
14417       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14418     else:
14419       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14420                                  str(self.op.kind), errors.ECODE_INVAL)
14421
14422
14423 class LUTagsGet(TagsLU):
14424   """Returns the tags of a given object.
14425
14426   """
14427   REQ_BGL = False
14428
14429   def ExpandNames(self):
14430     TagsLU.ExpandNames(self)
14431
14432     # Share locks as this is only a read operation
14433     self.share_locks = _ShareAll()
14434
14435   def Exec(self, feedback_fn):
14436     """Returns the tag list.
14437
14438     """
14439     return list(self.target.GetTags())
14440
14441
14442 class LUTagsSearch(NoHooksLU):
14443   """Searches the tags for a given pattern.
14444
14445   """
14446   REQ_BGL = False
14447
14448   def ExpandNames(self):
14449     self.needed_locks = {}
14450
14451   def CheckPrereq(self):
14452     """Check prerequisites.
14453
14454     This checks the pattern passed for validity by compiling it.
14455
14456     """
14457     try:
14458       self.re = re.compile(self.op.pattern)
14459     except re.error, err:
14460       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14461                                  (self.op.pattern, err), errors.ECODE_INVAL)
14462
14463   def Exec(self, feedback_fn):
14464     """Returns the tag list.
14465
14466     """
14467     cfg = self.cfg
14468     tgts = [("/cluster", cfg.GetClusterInfo())]
14469     ilist = cfg.GetAllInstancesInfo().values()
14470     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14471     nlist = cfg.GetAllNodesInfo().values()
14472     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14473     tgts.extend(("/nodegroup/%s" % n.name, n)
14474                 for n in cfg.GetAllNodeGroupsInfo().values())
14475     results = []
14476     for path, target in tgts:
14477       for tag in target.GetTags():
14478         if self.re.search(tag):
14479           results.append((path, tag))
14480     return results
14481
14482
14483 class LUTagsSet(TagsLU):
14484   """Sets a tag on a given object.
14485
14486   """
14487   REQ_BGL = False
14488
14489   def CheckPrereq(self):
14490     """Check prerequisites.
14491
14492     This checks the type and length of the tag name and value.
14493
14494     """
14495     TagsLU.CheckPrereq(self)
14496     for tag in self.op.tags:
14497       objects.TaggableObject.ValidateTag(tag)
14498
14499   def Exec(self, feedback_fn):
14500     """Sets the tag.
14501
14502     """
14503     try:
14504       for tag in self.op.tags:
14505         self.target.AddTag(tag)
14506     except errors.TagError, err:
14507       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14508     self.cfg.Update(self.target, feedback_fn)
14509
14510
14511 class LUTagsDel(TagsLU):
14512   """Delete a list of tags from a given object.
14513
14514   """
14515   REQ_BGL = False
14516
14517   def CheckPrereq(self):
14518     """Check prerequisites.
14519
14520     This checks that we have the given tag.
14521
14522     """
14523     TagsLU.CheckPrereq(self)
14524     for tag in self.op.tags:
14525       objects.TaggableObject.ValidateTag(tag)
14526     del_tags = frozenset(self.op.tags)
14527     cur_tags = self.target.GetTags()
14528
14529     diff_tags = del_tags - cur_tags
14530     if diff_tags:
14531       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14532       raise errors.OpPrereqError("Tag(s) %s not found" %
14533                                  (utils.CommaJoin(diff_names), ),
14534                                  errors.ECODE_NOENT)
14535
14536   def Exec(self, feedback_fn):
14537     """Remove the tag from the object.
14538
14539     """
14540     for tag in self.op.tags:
14541       self.target.RemoveTag(tag)
14542     self.cfg.Update(self.target, feedback_fn)
14543
14544
14545 class LUTestDelay(NoHooksLU):
14546   """Sleep for a specified amount of time.
14547
14548   This LU sleeps on the master and/or nodes for a specified amount of
14549   time.
14550
14551   """
14552   REQ_BGL = False
14553
14554   def ExpandNames(self):
14555     """Expand names and set required locks.
14556
14557     This expands the node list, if any.
14558
14559     """
14560     self.needed_locks = {}
14561     if self.op.on_nodes:
14562       # _GetWantedNodes can be used here, but is not always appropriate to use
14563       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14564       # more information.
14565       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14566       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14567
14568   def _TestDelay(self):
14569     """Do the actual sleep.
14570
14571     """
14572     if self.op.on_master:
14573       if not utils.TestDelay(self.op.duration):
14574         raise errors.OpExecError("Error during master delay test")
14575     if self.op.on_nodes:
14576       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14577       for node, node_result in result.items():
14578         node_result.Raise("Failure during rpc call to node %s" % node)
14579
14580   def Exec(self, feedback_fn):
14581     """Execute the test delay opcode, with the wanted repetitions.
14582
14583     """
14584     if self.op.repeat == 0:
14585       self._TestDelay()
14586     else:
14587       top_value = self.op.repeat - 1
14588       for i in range(self.op.repeat):
14589         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14590         self._TestDelay()
14591
14592
14593 class LUTestJqueue(NoHooksLU):
14594   """Utility LU to test some aspects of the job queue.
14595
14596   """
14597   REQ_BGL = False
14598
14599   # Must be lower than default timeout for WaitForJobChange to see whether it
14600   # notices changed jobs
14601   _CLIENT_CONNECT_TIMEOUT = 20.0
14602   _CLIENT_CONFIRM_TIMEOUT = 60.0
14603
14604   @classmethod
14605   def _NotifyUsingSocket(cls, cb, errcls):
14606     """Opens a Unix socket and waits for another program to connect.
14607
14608     @type cb: callable
14609     @param cb: Callback to send socket name to client
14610     @type errcls: class
14611     @param errcls: Exception class to use for errors
14612
14613     """
14614     # Using a temporary directory as there's no easy way to create temporary
14615     # sockets without writing a custom loop around tempfile.mktemp and
14616     # socket.bind
14617     tmpdir = tempfile.mkdtemp()
14618     try:
14619       tmpsock = utils.PathJoin(tmpdir, "sock")
14620
14621       logging.debug("Creating temporary socket at %s", tmpsock)
14622       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14623       try:
14624         sock.bind(tmpsock)
14625         sock.listen(1)
14626
14627         # Send details to client
14628         cb(tmpsock)
14629
14630         # Wait for client to connect before continuing
14631         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14632         try:
14633           (conn, _) = sock.accept()
14634         except socket.error, err:
14635           raise errcls("Client didn't connect in time (%s)" % err)
14636       finally:
14637         sock.close()
14638     finally:
14639       # Remove as soon as client is connected
14640       shutil.rmtree(tmpdir)
14641
14642     # Wait for client to close
14643     try:
14644       try:
14645         # pylint: disable=E1101
14646         # Instance of '_socketobject' has no ... member
14647         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14648         conn.recv(1)
14649       except socket.error, err:
14650         raise errcls("Client failed to confirm notification (%s)" % err)
14651     finally:
14652       conn.close()
14653
14654   def _SendNotification(self, test, arg, sockname):
14655     """Sends a notification to the client.
14656
14657     @type test: string
14658     @param test: Test name
14659     @param arg: Test argument (depends on test)
14660     @type sockname: string
14661     @param sockname: Socket path
14662
14663     """
14664     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14665
14666   def _Notify(self, prereq, test, arg):
14667     """Notifies the client of a test.
14668
14669     @type prereq: bool
14670     @param prereq: Whether this is a prereq-phase test
14671     @type test: string
14672     @param test: Test name
14673     @param arg: Test argument (depends on test)
14674
14675     """
14676     if prereq:
14677       errcls = errors.OpPrereqError
14678     else:
14679       errcls = errors.OpExecError
14680
14681     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14682                                                   test, arg),
14683                                    errcls)
14684
14685   def CheckArguments(self):
14686     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14687     self.expandnames_calls = 0
14688
14689   def ExpandNames(self):
14690     checkargs_calls = getattr(self, "checkargs_calls", 0)
14691     if checkargs_calls < 1:
14692       raise errors.ProgrammerError("CheckArguments was not called")
14693
14694     self.expandnames_calls += 1
14695
14696     if self.op.notify_waitlock:
14697       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14698
14699     self.LogInfo("Expanding names")
14700
14701     # Get lock on master node (just to get a lock, not for a particular reason)
14702     self.needed_locks = {
14703       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14704       }
14705
14706   def Exec(self, feedback_fn):
14707     if self.expandnames_calls < 1:
14708       raise errors.ProgrammerError("ExpandNames was not called")
14709
14710     if self.op.notify_exec:
14711       self._Notify(False, constants.JQT_EXEC, None)
14712
14713     self.LogInfo("Executing")
14714
14715     if self.op.log_messages:
14716       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14717       for idx, msg in enumerate(self.op.log_messages):
14718         self.LogInfo("Sending log message %s", idx + 1)
14719         feedback_fn(constants.JQT_MSGPREFIX + msg)
14720         # Report how many test messages have been sent
14721         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14722
14723     if self.op.fail:
14724       raise errors.OpExecError("Opcode failure was requested")
14725
14726     return True
14727
14728
14729 class IAllocator(object):
14730   """IAllocator framework.
14731
14732   An IAllocator instance has three sets of attributes:
14733     - cfg that is needed to query the cluster
14734     - input data (all members of the _KEYS class attribute are required)
14735     - four buffer attributes (in|out_data|text), that represent the
14736       input (to the external script) in text and data structure format,
14737       and the output from it, again in two formats
14738     - the result variables from the script (success, info, nodes) for
14739       easy usage
14740
14741   """
14742   # pylint: disable=R0902
14743   # lots of instance attributes
14744
14745   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14746     self.cfg = cfg
14747     self.rpc = rpc_runner
14748     # init buffer variables
14749     self.in_text = self.out_text = self.in_data = self.out_data = None
14750     # init all input fields so that pylint is happy
14751     self.mode = mode
14752     self.memory = self.disks = self.disk_template = self.spindle_use = None
14753     self.os = self.tags = self.nics = self.vcpus = None
14754     self.hypervisor = None
14755     self.relocate_from = None
14756     self.name = None
14757     self.instances = None
14758     self.evac_mode = None
14759     self.target_groups = []
14760     # computed fields
14761     self.required_nodes = None
14762     # init result fields
14763     self.success = self.info = self.result = None
14764
14765     try:
14766       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14767     except KeyError:
14768       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14769                                    " IAllocator" % self.mode)
14770
14771     keyset = [n for (n, _) in keydata]
14772
14773     for key in kwargs:
14774       if key not in keyset:
14775         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14776                                      " IAllocator" % key)
14777       setattr(self, key, kwargs[key])
14778
14779     for key in keyset:
14780       if key not in kwargs:
14781         raise errors.ProgrammerError("Missing input parameter '%s' to"
14782                                      " IAllocator" % key)
14783     self._BuildInputData(compat.partial(fn, self), keydata)
14784
14785   def _ComputeClusterData(self):
14786     """Compute the generic allocator input data.
14787
14788     This is the data that is independent of the actual operation.
14789
14790     """
14791     cfg = self.cfg
14792     cluster_info = cfg.GetClusterInfo()
14793     # cluster data
14794     data = {
14795       "version": constants.IALLOCATOR_VERSION,
14796       "cluster_name": cfg.GetClusterName(),
14797       "cluster_tags": list(cluster_info.GetTags()),
14798       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14799       "ipolicy": cluster_info.ipolicy,
14800       }
14801     ninfo = cfg.GetAllNodesInfo()
14802     iinfo = cfg.GetAllInstancesInfo().values()
14803     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14804
14805     # node data
14806     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14807
14808     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14809       hypervisor_name = self.hypervisor
14810     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14811       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14812     else:
14813       hypervisor_name = cluster_info.primary_hypervisor
14814
14815     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14816                                         [hypervisor_name])
14817     node_iinfo = \
14818       self.rpc.call_all_instances_info(node_list,
14819                                        cluster_info.enabled_hypervisors)
14820
14821     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14822
14823     config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14824     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14825                                                  i_list, config_ndata)
14826     assert len(data["nodes"]) == len(ninfo), \
14827         "Incomplete node data computed"
14828
14829     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14830
14831     self.in_data = data
14832
14833   @staticmethod
14834   def _ComputeNodeGroupData(cfg):
14835     """Compute node groups data.
14836
14837     """
14838     cluster = cfg.GetClusterInfo()
14839     ng = dict((guuid, {
14840       "name": gdata.name,
14841       "alloc_policy": gdata.alloc_policy,
14842       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14843       })
14844       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14845
14846     return ng
14847
14848   @staticmethod
14849   def _ComputeBasicNodeData(cfg, node_cfg):
14850     """Compute global node data.
14851
14852     @rtype: dict
14853     @returns: a dict of name: (node dict, node config)
14854
14855     """
14856     # fill in static (config-based) values
14857     node_results = dict((ninfo.name, {
14858       "tags": list(ninfo.GetTags()),
14859       "primary_ip": ninfo.primary_ip,
14860       "secondary_ip": ninfo.secondary_ip,
14861       "offline": ninfo.offline,
14862       "drained": ninfo.drained,
14863       "master_candidate": ninfo.master_candidate,
14864       "group": ninfo.group,
14865       "master_capable": ninfo.master_capable,
14866       "vm_capable": ninfo.vm_capable,
14867       "ndparams": cfg.GetNdParams(ninfo),
14868       })
14869       for ninfo in node_cfg.values())
14870
14871     return node_results
14872
14873   @staticmethod
14874   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14875                               node_results):
14876     """Compute global node data.
14877
14878     @param node_results: the basic node structures as filled from the config
14879
14880     """
14881     #TODO(dynmem): compute the right data on MAX and MIN memory
14882     # make a copy of the current dict
14883     node_results = dict(node_results)
14884     for nname, nresult in node_data.items():
14885       assert nname in node_results, "Missing basic data for node %s" % nname
14886       ninfo = node_cfg[nname]
14887
14888       if not (ninfo.offline or ninfo.drained):
14889         nresult.Raise("Can't get data for node %s" % nname)
14890         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14891                                 nname)
14892         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14893
14894         for attr in ["memory_total", "memory_free", "memory_dom0",
14895                      "vg_size", "vg_free", "cpu_total"]:
14896           if attr not in remote_info:
14897             raise errors.OpExecError("Node '%s' didn't return attribute"
14898                                      " '%s'" % (nname, attr))
14899           if not isinstance(remote_info[attr], int):
14900             raise errors.OpExecError("Node '%s' returned invalid value"
14901                                      " for '%s': %s" %
14902                                      (nname, attr, remote_info[attr]))
14903         # compute memory used by primary instances
14904         i_p_mem = i_p_up_mem = 0
14905         for iinfo, beinfo in i_list:
14906           if iinfo.primary_node == nname:
14907             i_p_mem += beinfo[constants.BE_MAXMEM]
14908             if iinfo.name not in node_iinfo[nname].payload:
14909               i_used_mem = 0
14910             else:
14911               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14912             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14913             remote_info["memory_free"] -= max(0, i_mem_diff)
14914
14915             if iinfo.admin_state == constants.ADMINST_UP:
14916               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14917
14918         # compute memory used by instances
14919         pnr_dyn = {
14920           "total_memory": remote_info["memory_total"],
14921           "reserved_memory": remote_info["memory_dom0"],
14922           "free_memory": remote_info["memory_free"],
14923           "total_disk": remote_info["vg_size"],
14924           "free_disk": remote_info["vg_free"],
14925           "total_cpus": remote_info["cpu_total"],
14926           "i_pri_memory": i_p_mem,
14927           "i_pri_up_memory": i_p_up_mem,
14928           }
14929         pnr_dyn.update(node_results[nname])
14930         node_results[nname] = pnr_dyn
14931
14932     return node_results
14933
14934   @staticmethod
14935   def _ComputeInstanceData(cluster_info, i_list):
14936     """Compute global instance data.
14937
14938     """
14939     instance_data = {}
14940     for iinfo, beinfo in i_list:
14941       nic_data = []
14942       for nic in iinfo.nics:
14943         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14944         nic_dict = {
14945           "mac": nic.mac,
14946           "ip": nic.ip,
14947           "mode": filled_params[constants.NIC_MODE],
14948           "link": filled_params[constants.NIC_LINK],
14949           }
14950         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14951           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14952         nic_data.append(nic_dict)
14953       pir = {
14954         "tags": list(iinfo.GetTags()),
14955         "admin_state": iinfo.admin_state,
14956         "vcpus": beinfo[constants.BE_VCPUS],
14957         "memory": beinfo[constants.BE_MAXMEM],
14958         "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14959         "os": iinfo.os,
14960         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14961         "nics": nic_data,
14962         "disks": [{constants.IDISK_SIZE: dsk.size,
14963                    constants.IDISK_MODE: dsk.mode}
14964                   for dsk in iinfo.disks],
14965         "disk_template": iinfo.disk_template,
14966         "hypervisor": iinfo.hypervisor,
14967         }
14968       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14969                                                  pir["disks"])
14970       instance_data[iinfo.name] = pir
14971
14972     return instance_data
14973
14974   def _AddNewInstance(self):
14975     """Add new instance data to allocator structure.
14976
14977     This in combination with _AllocatorGetClusterData will create the
14978     correct structure needed as input for the allocator.
14979
14980     The checks for the completeness of the opcode must have already been
14981     done.
14982
14983     """
14984     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14985
14986     if self.disk_template in constants.DTS_INT_MIRROR:
14987       self.required_nodes = 2
14988     else:
14989       self.required_nodes = 1
14990
14991     request = {
14992       "name": self.name,
14993       "disk_template": self.disk_template,
14994       "tags": self.tags,
14995       "os": self.os,
14996       "vcpus": self.vcpus,
14997       "memory": self.memory,
14998       "spindle_use": self.spindle_use,
14999       "disks": self.disks,
15000       "disk_space_total": disk_space,
15001       "nics": self.nics,
15002       "required_nodes": self.required_nodes,
15003       "hypervisor": self.hypervisor,
15004       }
15005
15006     return request
15007
15008   def _AddRelocateInstance(self):
15009     """Add relocate instance data to allocator structure.
15010
15011     This in combination with _IAllocatorGetClusterData will create the
15012     correct structure needed as input for the allocator.
15013
15014     The checks for the completeness of the opcode must have already been
15015     done.
15016
15017     """
15018     instance = self.cfg.GetInstanceInfo(self.name)
15019     if instance is None:
15020       raise errors.ProgrammerError("Unknown instance '%s' passed to"
15021                                    " IAllocator" % self.name)
15022
15023     if instance.disk_template not in constants.DTS_MIRRORED:
15024       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15025                                  errors.ECODE_INVAL)
15026
15027     if instance.disk_template in constants.DTS_INT_MIRROR and \
15028         len(instance.secondary_nodes) != 1:
15029       raise errors.OpPrereqError("Instance has not exactly one secondary node",
15030                                  errors.ECODE_STATE)
15031
15032     self.required_nodes = 1
15033     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15034     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15035
15036     request = {
15037       "name": self.name,
15038       "disk_space_total": disk_space,
15039       "required_nodes": self.required_nodes,
15040       "relocate_from": self.relocate_from,
15041       }
15042     return request
15043
15044   def _AddNodeEvacuate(self):
15045     """Get data for node-evacuate requests.
15046
15047     """
15048     return {
15049       "instances": self.instances,
15050       "evac_mode": self.evac_mode,
15051       }
15052
15053   def _AddChangeGroup(self):
15054     """Get data for node-evacuate requests.
15055
15056     """
15057     return {
15058       "instances": self.instances,
15059       "target_groups": self.target_groups,
15060       }
15061
15062   def _BuildInputData(self, fn, keydata):
15063     """Build input data structures.
15064
15065     """
15066     self._ComputeClusterData()
15067
15068     request = fn()
15069     request["type"] = self.mode
15070     for keyname, keytype in keydata:
15071       if keyname not in request:
15072         raise errors.ProgrammerError("Request parameter %s is missing" %
15073                                      keyname)
15074       val = request[keyname]
15075       if not keytype(val):
15076         raise errors.ProgrammerError("Request parameter %s doesn't pass"
15077                                      " validation, value %s, expected"
15078                                      " type %s" % (keyname, val, keytype))
15079     self.in_data["request"] = request
15080
15081     self.in_text = serializer.Dump(self.in_data)
15082
15083   _STRING_LIST = ht.TListOf(ht.TString)
15084   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15085      # pylint: disable=E1101
15086      # Class '...' has no 'OP_ID' member
15087      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15088                           opcodes.OpInstanceMigrate.OP_ID,
15089                           opcodes.OpInstanceReplaceDisks.OP_ID])
15090      })))
15091
15092   _NEVAC_MOVED = \
15093     ht.TListOf(ht.TAnd(ht.TIsLength(3),
15094                        ht.TItems([ht.TNonEmptyString,
15095                                   ht.TNonEmptyString,
15096                                   ht.TListOf(ht.TNonEmptyString),
15097                                  ])))
15098   _NEVAC_FAILED = \
15099     ht.TListOf(ht.TAnd(ht.TIsLength(2),
15100                        ht.TItems([ht.TNonEmptyString,
15101                                   ht.TMaybeString,
15102                                  ])))
15103   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15104                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15105
15106   _MODE_DATA = {
15107     constants.IALLOCATOR_MODE_ALLOC:
15108       (_AddNewInstance,
15109        [
15110         ("name", ht.TString),
15111         ("memory", ht.TInt),
15112         ("spindle_use", ht.TInt),
15113         ("disks", ht.TListOf(ht.TDict)),
15114         ("disk_template", ht.TString),
15115         ("os", ht.TString),
15116         ("tags", _STRING_LIST),
15117         ("nics", ht.TListOf(ht.TDict)),
15118         ("vcpus", ht.TInt),
15119         ("hypervisor", ht.TString),
15120         ], ht.TList),
15121     constants.IALLOCATOR_MODE_RELOC:
15122       (_AddRelocateInstance,
15123        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15124        ht.TList),
15125      constants.IALLOCATOR_MODE_NODE_EVAC:
15126       (_AddNodeEvacuate, [
15127         ("instances", _STRING_LIST),
15128         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15129         ], _NEVAC_RESULT),
15130      constants.IALLOCATOR_MODE_CHG_GROUP:
15131       (_AddChangeGroup, [
15132         ("instances", _STRING_LIST),
15133         ("target_groups", _STRING_LIST),
15134         ], _NEVAC_RESULT),
15135     }
15136
15137   def Run(self, name, validate=True, call_fn=None):
15138     """Run an instance allocator and return the results.
15139
15140     """
15141     if call_fn is None:
15142       call_fn = self.rpc.call_iallocator_runner
15143
15144     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15145     result.Raise("Failure while running the iallocator script")
15146
15147     self.out_text = result.payload
15148     if validate:
15149       self._ValidateResult()
15150
15151   def _ValidateResult(self):
15152     """Process the allocator results.
15153
15154     This will process and if successful save the result in
15155     self.out_data and the other parameters.
15156
15157     """
15158     try:
15159       rdict = serializer.Load(self.out_text)
15160     except Exception, err:
15161       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15162
15163     if not isinstance(rdict, dict):
15164       raise errors.OpExecError("Can't parse iallocator results: not a dict")
15165
15166     # TODO: remove backwards compatiblity in later versions
15167     if "nodes" in rdict and "result" not in rdict:
15168       rdict["result"] = rdict["nodes"]
15169       del rdict["nodes"]
15170
15171     for key in "success", "info", "result":
15172       if key not in rdict:
15173         raise errors.OpExecError("Can't parse iallocator results:"
15174                                  " missing key '%s'" % key)
15175       setattr(self, key, rdict[key])
15176
15177     if not self._result_check(self.result):
15178       raise errors.OpExecError("Iallocator returned invalid result,"
15179                                " expected %s, got %s" %
15180                                (self._result_check, self.result),
15181                                errors.ECODE_INVAL)
15182
15183     if self.mode == constants.IALLOCATOR_MODE_RELOC:
15184       assert self.relocate_from is not None
15185       assert self.required_nodes == 1
15186
15187       node2group = dict((name, ndata["group"])
15188                         for (name, ndata) in self.in_data["nodes"].items())
15189
15190       fn = compat.partial(self._NodesToGroups, node2group,
15191                           self.in_data["nodegroups"])
15192
15193       instance = self.cfg.GetInstanceInfo(self.name)
15194       request_groups = fn(self.relocate_from + [instance.primary_node])
15195       result_groups = fn(rdict["result"] + [instance.primary_node])
15196
15197       if self.success and not set(result_groups).issubset(request_groups):
15198         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15199                                  " differ from original groups (%s)" %
15200                                  (utils.CommaJoin(result_groups),
15201                                   utils.CommaJoin(request_groups)))
15202
15203     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15204       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15205
15206     self.out_data = rdict
15207
15208   @staticmethod
15209   def _NodesToGroups(node2group, groups, nodes):
15210     """Returns a list of unique group names for a list of nodes.
15211
15212     @type node2group: dict
15213     @param node2group: Map from node name to group UUID
15214     @type groups: dict
15215     @param groups: Group information
15216     @type nodes: list
15217     @param nodes: Node names
15218
15219     """
15220     result = set()
15221
15222     for node in nodes:
15223       try:
15224         group_uuid = node2group[node]
15225       except KeyError:
15226         # Ignore unknown node
15227         pass
15228       else:
15229         try:
15230           group = groups[group_uuid]
15231         except KeyError:
15232           # Can't find group, let's use UUID
15233           group_name = group_uuid
15234         else:
15235           group_name = group["name"]
15236
15237         result.add(group_name)
15238
15239     return sorted(result)
15240
15241
15242 class LUTestAllocator(NoHooksLU):
15243   """Run allocator tests.
15244
15245   This LU runs the allocator tests
15246
15247   """
15248   def CheckPrereq(self):
15249     """Check prerequisites.
15250
15251     This checks the opcode parameters depending on the director and mode test.
15252
15253     """
15254     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15255       for attr in ["memory", "disks", "disk_template",
15256                    "os", "tags", "nics", "vcpus"]:
15257         if not hasattr(self.op, attr):
15258           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15259                                      attr, errors.ECODE_INVAL)
15260       iname = self.cfg.ExpandInstanceName(self.op.name)
15261       if iname is not None:
15262         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15263                                    iname, errors.ECODE_EXISTS)
15264       if not isinstance(self.op.nics, list):
15265         raise errors.OpPrereqError("Invalid parameter 'nics'",
15266                                    errors.ECODE_INVAL)
15267       if not isinstance(self.op.disks, list):
15268         raise errors.OpPrereqError("Invalid parameter 'disks'",
15269                                    errors.ECODE_INVAL)
15270       for row in self.op.disks:
15271         if (not isinstance(row, dict) or
15272             constants.IDISK_SIZE not in row or
15273             not isinstance(row[constants.IDISK_SIZE], int) or
15274             constants.IDISK_MODE not in row or
15275             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15276           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15277                                      " parameter", errors.ECODE_INVAL)
15278       if self.op.hypervisor is None:
15279         self.op.hypervisor = self.cfg.GetHypervisorType()
15280     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15281       fname = _ExpandInstanceName(self.cfg, self.op.name)
15282       self.op.name = fname
15283       self.relocate_from = \
15284           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15285     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15286                           constants.IALLOCATOR_MODE_NODE_EVAC):
15287       if not self.op.instances:
15288         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15289       self.op.instances = _GetWantedInstances(self, self.op.instances)
15290     else:
15291       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15292                                  self.op.mode, errors.ECODE_INVAL)
15293
15294     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15295       if self.op.allocator is None:
15296         raise errors.OpPrereqError("Missing allocator name",
15297                                    errors.ECODE_INVAL)
15298     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15299       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15300                                  self.op.direction, errors.ECODE_INVAL)
15301
15302   def Exec(self, feedback_fn):
15303     """Run the allocator test.
15304
15305     """
15306     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15307       ial = IAllocator(self.cfg, self.rpc,
15308                        mode=self.op.mode,
15309                        name=self.op.name,
15310                        memory=self.op.memory,
15311                        disks=self.op.disks,
15312                        disk_template=self.op.disk_template,
15313                        os=self.op.os,
15314                        tags=self.op.tags,
15315                        nics=self.op.nics,
15316                        vcpus=self.op.vcpus,
15317                        hypervisor=self.op.hypervisor,
15318                        )
15319     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15320       ial = IAllocator(self.cfg, self.rpc,
15321                        mode=self.op.mode,
15322                        name=self.op.name,
15323                        relocate_from=list(self.relocate_from),
15324                        )
15325     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15326       ial = IAllocator(self.cfg, self.rpc,
15327                        mode=self.op.mode,
15328                        instances=self.op.instances,
15329                        target_groups=self.op.target_groups)
15330     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15331       ial = IAllocator(self.cfg, self.rpc,
15332                        mode=self.op.mode,
15333                        instances=self.op.instances,
15334                        evac_mode=self.op.evac_mode)
15335     else:
15336       raise errors.ProgrammerError("Uncatched mode %s in"
15337                                    " LUTestAllocator.Exec", self.op.mode)
15338
15339     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15340       result = ial.in_text
15341     else:
15342       ial.Run(self.op.allocator, validate=False)
15343       result = ial.out_text
15344     return result
15345
15346
15347 #: Query type implementations
15348 _QUERY_IMPL = {
15349   constants.QR_CLUSTER: _ClusterQuery,
15350   constants.QR_INSTANCE: _InstanceQuery,
15351   constants.QR_NODE: _NodeQuery,
15352   constants.QR_GROUP: _GroupQuery,
15353   constants.QR_OS: _OsQuery,
15354   constants.QR_EXPORT: _ExportQuery,
15355   }
15356
15357 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15358
15359
15360 def _GetQueryImplementation(name):
15361   """Returns the implemtnation for a query type.
15362
15363   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15364
15365   """
15366   try:
15367     return _QUERY_IMPL[name]
15368   except KeyError:
15369     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15370                                errors.ECODE_INVAL)