code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti import network
  65 from ganeti.masterd import iallocator
  66
  67 import ganeti.masterd.instance # pylint: disable=W0611
  68
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141     # Dicts used to declare locking needs to mcpu
 142     self.needed_locks = None
 143     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 144     self.add_locks = {}
 145     self.remove_locks = {}
 146     # Used to force good behavior when calling helper functions
 147     self.recalculate_locks = {}
 148     # logging
 149     self.Log = processor.Log # pylint: disable=C0103
 150     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 151     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 152     self.LogStep = processor.LogStep # pylint: disable=C0103
 153     # support for dry-run
 154     self.dry_run_result = None
 155     # support for generic debug attribute
 156     if (not hasattr(self.op, "debug_level") or
 157         not isinstance(self.op.debug_level, int)):
 158       self.op.debug_level = 0
 159
 160     # Tasklets
 161     self.tasklets = None
 162
 163     # Validate opcode parameters and set defaults
 164     self.op.Validate(True)
 165
 166     self.CheckArguments()
 167
 168   def CheckArguments(self):
 169     """Check syntactic validity for the opcode arguments.
 170
 171     This method is for doing a simple syntactic check and ensure
 172     validity of opcode parameters, without any cluster-related
 173     checks. While the same can be accomplished in ExpandNames and/or
 174     CheckPrereq, doing these separate is better because:
 175
 176       - ExpandNames is left as as purely a lock-related function
 177       - CheckPrereq is run after we have acquired locks (and possible
 178         waited for them)
 179
 180     The function is allowed to change the self.op attribute so that
 181     later methods can no longer worry about missing parameters.
 182
 183     """
 184     pass
 185
 186   def ExpandNames(self):
 187     """Expand names for this LU.
 188
 189     This method is called before starting to execute the opcode, and it should
 190     update all the parameters of the opcode to their canonical form (e.g. a
 191     short node name must be fully expanded after this method has successfully
 192     completed). This way locking, hooks, logging, etc. can work correctly.
 193
 194     LUs which implement this method must also populate the self.needed_locks
 195     member, as a dict with lock levels as keys, and a list of needed lock names
 196     as values. Rules:
 197
 198       - use an empty dict if you don't need any lock
 199       - if you don't need any lock at a particular level omit that
 200         level (note that in this case C{DeclareLocks} won't be called
 201         at all for that level)
 202       - if you need locks at a level, but you can't calculate it in
 203         this function, initialise that level with an empty list and do
 204         further processing in L{LogicalUnit.DeclareLocks} (see that
 205         function's docstring)
 206       - don't put anything for the BGL level
 207       - if you want all locks at a level use L{locking.ALL_SET} as a value
 208
 209     If you need to share locks (rather than acquire them exclusively) at one
 210     level you can modify self.share_locks, setting a true value (usually 1) for
 211     that level. By default locks are not shared.
 212
 213     This function can also define a list of tasklets, which then will be
 214     executed in order instead of the usual LU-level CheckPrereq and Exec
 215     functions, if those are not defined by the LU.
 216
 217     Examples::
 218
 219       # Acquire all nodes and one instance
 220       self.needed_locks = {
 221         locking.LEVEL_NODE: locking.ALL_SET,
 222         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 223       }
 224       # Acquire just two nodes
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 227       }
 228       # Acquire no locks
 229       self.needed_locks = {} # No, you can't leave it to the default value None
 230
 231     """
 232     # The implementation of this method is mandatory only if the new LU is
 233     # concurrent, so that old LUs don't need to be changed all at the same
 234     # time.
 235     if self.REQ_BGL:
 236       self.needed_locks = {} # Exclusive LUs don't need locks.
 237     else:
 238       raise NotImplementedError
 239
 240   def DeclareLocks(self, level):
 241     """Declare LU locking needs for a level
 242
 243     While most LUs can just declare their locking needs at ExpandNames time,
 244     sometimes there's the need to calculate some locks after having acquired
 245     the ones before. This function is called just before acquiring locks at a
 246     particular level, but after acquiring the ones at lower levels, and permits
 247     such calculations. It can be used to modify self.needed_locks, and by
 248     default it does nothing.
 249
 250     This function is only called if you have something already set in
 251     self.needed_locks for the level.
 252
 253     @param level: Locking level which is going to be locked
 254     @type level: member of L{ganeti.locking.LEVELS}
 255
 256     """
 257
 258   def CheckPrereq(self):
 259     """Check prerequisites for this LU.
 260
 261     This method should check that the prerequisites for the execution
 262     of this LU are fulfilled. It can do internode communication, but
 263     it should be idempotent - no cluster or system changes are
 264     allowed.
 265
 266     The method should raise errors.OpPrereqError in case something is
 267     not fulfilled. Its return value is ignored.
 268
 269     This method should also update all the parameters of the opcode to
 270     their canonical form if it hasn't been done by ExpandNames before.
 271
 272     """
 273     if self.tasklets is not None:
 274       for (idx, tl) in enumerate(self.tasklets):
 275         logging.debug("Checking prerequisites for tasklet %s/%s",
 276                       idx + 1, len(self.tasklets))
 277         tl.CheckPrereq()
 278     else:
 279       pass
 280
 281   def Exec(self, feedback_fn):
 282     """Execute the LU.
 283
 284     This method should implement the actual work. It should raise
 285     errors.OpExecError for failures that are somewhat dealt with in
 286     code, or expected.
 287
 288     """
 289     if self.tasklets is not None:
 290       for (idx, tl) in enumerate(self.tasklets):
 291         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 292         tl.Exec(feedback_fn)
 293     else:
 294       raise NotImplementedError
 295
 296   def BuildHooksEnv(self):
 297     """Build hooks environment for this LU.
 298
 299     @rtype: dict
 300     @return: Dictionary containing the environment that will be used for
 301       running the hooks for this LU. The keys of the dict must not be prefixed
 302       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 303       will extend the environment with additional variables. If no environment
 304       should be defined, an empty dictionary should be returned (not C{None}).
 305     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 306       will not be called.
 307
 308     """
 309     raise NotImplementedError
 310
 311   def BuildHooksNodes(self):
 312     """Build list of nodes to run LU's hooks.
 313
 314     @rtype: tuple; (list, list)
 315     @return: Tuple containing a list of node names on which the hook
 316       should run before the execution and a list of node names on which the
 317       hook should run after the execution. No nodes should be returned as an
 318       empty list (and not None).
 319     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 320       will not be called.
 321
 322     """
 323     raise NotImplementedError
 324
 325   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 326     """Notify the LU about the results of its hooks.
 327
 328     This method is called every time a hooks phase is executed, and notifies
 329     the Logical Unit about the hooks' result. The LU can then use it to alter
 330     its result based on the hooks.  By default the method does nothing and the
 331     previous result is passed back unchanged but any LU can define it if it
 332     wants to use the local cluster hook-scripts somehow.
 333
 334     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 335         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 336     @param hook_results: the results of the multi-node hooks rpc call
 337     @param feedback_fn: function used send feedback back to the caller
 338     @param lu_result: the previous Exec result this LU had, or None
 339         in the PRE phase
 340     @return: the new Exec result, based on the previous result
 341         and hook results
 342
 343     """
 344     # API must be kept, thus we ignore the unused argument and could
 345     # be a function warnings
 346     # pylint: disable=W0613,R0201
 347     return lu_result
 348
 349   def _ExpandAndLockInstance(self):
 350     """Helper function to expand and lock an instance.
 351
 352     Many LUs that work on an instance take its name in self.op.instance_name
 353     and need to expand it and then declare the expanded name for locking. This
 354     function does it, and then updates self.op.instance_name to the expanded
 355     name. It also initializes needed_locks as a dict, if this hasn't been done
 356     before.
 357
 358     """
 359     if self.needed_locks is None:
 360       self.needed_locks = {}
 361     else:
 362       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 363         "_ExpandAndLockInstance called with instance-level locks set"
 364     self.op.instance_name = _ExpandInstanceName(self.cfg,
 365                                                 self.op.instance_name)
 366     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 367
 368   def _LockInstancesNodes(self, primary_only=False,
 369                           level=locking.LEVEL_NODE):
 370     """Helper function to declare instances' nodes for locking.
 371
 372     This function should be called after locking one or more instances to lock
 373     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 374     with all primary or secondary nodes for instances already locked and
 375     present in self.needed_locks[locking.LEVEL_INSTANCE].
 376
 377     It should be called from DeclareLocks, and for safety only works if
 378     self.recalculate_locks[locking.LEVEL_NODE] is set.
 379
 380     In the future it may grow parameters to just lock some instance's nodes, or
 381     to just lock primaries or secondary nodes, if needed.
 382
 383     If should be called in DeclareLocks in a way similar to::
 384
 385       if level == locking.LEVEL_NODE:
 386         self._LockInstancesNodes()
 387
 388     @type primary_only: boolean
 389     @param primary_only: only lock primary nodes of locked instances
 390     @param level: Which lock level to use for locking nodes
 391
 392     """
 393     assert level in self.recalculate_locks, \
 394       "_LockInstancesNodes helper function called with no nodes to recalculate"
 395
 396     # TODO: check if we're really been called with the instance locks held
 397
 398     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 399     # future we might want to have different behaviors depending on the value
 400     # of self.recalculate_locks[locking.LEVEL_NODE]
 401     wanted_nodes = []
 402     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 403     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 404       wanted_nodes.append(instance.primary_node)
 405       if not primary_only:
 406         wanted_nodes.extend(instance.secondary_nodes)
 407
 408     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 409       self.needed_locks[level] = wanted_nodes
 410     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 411       self.needed_locks[level].extend(wanted_nodes)
 412     else:
 413       raise errors.ProgrammerError("Unknown recalculation mode")
 414
 415     del self.recalculate_locks[level]
 416
 417
 418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 419   """Simple LU which runs no hooks.
 420
 421   This LU is intended as a parent for other LogicalUnits which will
 422   run no hooks, in order to reduce duplicate code.
 423
 424   """
 425   HPATH = None
 426   HTYPE = None
 427
 428   def BuildHooksEnv(self):
 429     """Empty BuildHooksEnv for NoHooksLu.
 430
 431     This just raises an error.
 432
 433     """
 434     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 435
 436   def BuildHooksNodes(self):
 437     """Empty BuildHooksNodes for NoHooksLU.
 438
 439     """
 440     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 441
 442
 443 class Tasklet:
 444   """Tasklet base class.
 445
 446   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 447   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 448   tasklets know nothing about locks.
 449
 450   Subclasses must follow these rules:
 451     - Implement CheckPrereq
 452     - Implement Exec
 453
 454   """
 455   def __init__(self, lu):
 456     self.lu = lu
 457
 458     # Shortcuts
 459     self.cfg = lu.cfg
 460     self.rpc = lu.rpc
 461
 462   def CheckPrereq(self):
 463     """Check prerequisites for this tasklets.
 464
 465     This method should check whether the prerequisites for the execution of
 466     this tasklet are fulfilled. It can do internode communication, but it
 467     should be idempotent - no cluster or system changes are allowed.
 468
 469     The method should raise errors.OpPrereqError in case something is not
 470     fulfilled. Its return value is ignored.
 471
 472     This method should also update all parameters to their canonical form if it
 473     hasn't been done before.
 474
 475     """
 476     pass
 477
 478   def Exec(self, feedback_fn):
 479     """Execute the tasklet.
 480
 481     This method should implement the actual work. It should raise
 482     errors.OpExecError for failures that are somewhat dealt with in code, or
 483     expected.
 484
 485     """
 486     raise NotImplementedError
 487
 488
 489 class _QueryBase:
 490   """Base for query utility classes.
 491
 492   """
 493   #: Attribute holding field definitions
 494   FIELDS = None
 495
 496   #: Field to sort by
 497   SORT_FIELD = "name"
 498
 499   def __init__(self, qfilter, fields, use_locking):
 500     """Initializes this class.
 501
 502     """
 503     self.use_locking = use_locking
 504
 505     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 506                              namefield=self.SORT_FIELD)
 507     self.requested_data = self.query.RequestedData()
 508     self.names = self.query.RequestedNames()
 509
 510     # Sort only if no names were requested
 511     self.sort_by_name = not self.names
 512
 513     self.do_locking = None
 514     self.wanted = None
 515
 516   def _GetNames(self, lu, all_names, lock_level):
 517     """Helper function to determine names asked for in the query.
 518
 519     """
 520     if self.do_locking:
 521       names = lu.owned_locks(lock_level)
 522     else:
 523       names = all_names
 524
 525     if self.wanted == locking.ALL_SET:
 526       assert not self.names
 527       # caller didn't specify names, so ordering is not important
 528       return utils.NiceSort(names)
 529
 530     # caller specified names and we must keep the same order
 531     assert self.names
 532     assert not self.do_locking or lu.glm.is_owned(lock_level)
 533
 534     missing = set(self.wanted).difference(names)
 535     if missing:
 536       raise errors.OpExecError("Some items were removed before retrieving"
 537                                " their data: %s" % missing)
 538
 539     # Return expanded names
 540     return self.wanted
 541
 542   def ExpandNames(self, lu):
 543     """Expand names for this query.
 544
 545     See L{LogicalUnit.ExpandNames}.
 546
 547     """
 548     raise NotImplementedError()
 549
 550   def DeclareLocks(self, lu, level):
 551     """Declare locks for this query.
 552
 553     See L{LogicalUnit.DeclareLocks}.
 554
 555     """
 556     raise NotImplementedError()
 557
 558   def _GetQueryData(self, lu):
 559     """Collects all data for this query.
 560
 561     @return: Query data object
 562
 563     """
 564     raise NotImplementedError()
 565
 566   def NewStyleQuery(self, lu):
 567     """Collect data and execute query.
 568
 569     """
 570     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 571                                   sort_by_name=self.sort_by_name)
 572
 573   def OldStyleQuery(self, lu):
 574     """Collect data and execute query.
 575
 576     """
 577     return self.query.OldStyleQuery(self._GetQueryData(lu),
 578                                     sort_by_name=self.sort_by_name)
 579
 580
 581 def _ShareAll():
 582   """Returns a dict declaring all lock levels shared.
 583
 584   """
 585   return dict.fromkeys(locking.LEVELS, 1)
 586
 587
 588 def _AnnotateDiskParams(instance, devs, cfg):
 589   """Little helper wrapper to the rpc annotation method.
 590
 591   @param instance: The instance object
 592   @type devs: List of L{objects.Disk}
 593   @param devs: The root devices (not any of its children!)
 594   @param cfg: The config object
 595   @returns The annotated disk copies
 596   @see L{rpc.AnnotateDiskParams}
 597
 598   """
 599   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 600                                 cfg.GetInstanceDiskParams(instance))
 601
 602
 603 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 604                               cur_group_uuid):
 605   """Checks if node groups for locked instances are still correct.
 606
 607   @type cfg: L{config.ConfigWriter}
 608   @param cfg: Cluster configuration
 609   @type instances: dict; string as key, L{objects.Instance} as value
 610   @param instances: Dictionary, instance name as key, instance object as value
 611   @type owned_groups: iterable of string
 612   @param owned_groups: List of owned groups
 613   @type owned_nodes: iterable of string
 614   @param owned_nodes: List of owned nodes
 615   @type cur_group_uuid: string or None
 616   @param cur_group_uuid: Optional group UUID to check against instance's groups
 617
 618   """
 619   for (name, inst) in instances.items():
 620     assert owned_nodes.issuperset(inst.all_nodes), \
 621       "Instance %s's nodes changed while we kept the lock" % name
 622
 623     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 624
 625     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 626       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 627
 628
 629 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 630                              primary_only=False):
 631   """Checks if the owned node groups are still correct for an instance.
 632
 633   @type cfg: L{config.ConfigWriter}
 634   @param cfg: The cluster configuration
 635   @type instance_name: string
 636   @param instance_name: Instance name
 637   @type owned_groups: set or frozenset
 638   @param owned_groups: List of currently owned node groups
 639   @type primary_only: boolean
 640   @param primary_only: Whether to check node groups for only the primary node
 641
 642   """
 643   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 644
 645   if not owned_groups.issuperset(inst_groups):
 646     raise errors.OpPrereqError("Instance %s's node groups changed since"
 647                                " locks were acquired, current groups are"
 648                                " are '%s', owning groups '%s'; retry the"
 649                                " operation" %
 650                                (instance_name,
 651                                 utils.CommaJoin(inst_groups),
 652                                 utils.CommaJoin(owned_groups)),
 653                                errors.ECODE_STATE)
 654
 655   return inst_groups
 656
 657
 658 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 659   """Checks if the instances in a node group are still correct.
 660
 661   @type cfg: L{config.ConfigWriter}
 662   @param cfg: The cluster configuration
 663   @type group_uuid: string
 664   @param group_uuid: Node group UUID
 665   @type owned_instances: set or frozenset
 666   @param owned_instances: List of currently owned instances
 667
 668   """
 669   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 670   if owned_instances != wanted_instances:
 671     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 672                                " locks were acquired, wanted '%s', have '%s';"
 673                                " retry the operation" %
 674                                (group_uuid,
 675                                 utils.CommaJoin(wanted_instances),
 676                                 utils.CommaJoin(owned_instances)),
 677                                errors.ECODE_STATE)
 678
 679   return wanted_instances
 680
 681
 682 def _SupportsOob(cfg, node):
 683   """Tells if node supports OOB.
 684
 685   @type cfg: L{config.ConfigWriter}
 686   @param cfg: The cluster configuration
 687   @type node: L{objects.Node}
 688   @param node: The node
 689   @return: The OOB script if supported or an empty string otherwise
 690
 691   """
 692   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 693
 694
 695 def _CopyLockList(names):
 696   """Makes a copy of a list of lock names.
 697
 698   Handles L{locking.ALL_SET} correctly.
 699
 700   """
 701   if names == locking.ALL_SET:
 702     return locking.ALL_SET
 703   else:
 704     return names[:]
 705
 706
 707 def _GetWantedNodes(lu, nodes):
 708   """Returns list of checked and expanded node names.
 709
 710   @type lu: L{LogicalUnit}
 711   @param lu: the logical unit on whose behalf we execute
 712   @type nodes: list
 713   @param nodes: list of node names or None for all nodes
 714   @rtype: list
 715   @return: the list of nodes, sorted
 716   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 717
 718   """
 719   if nodes:
 720     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 721
 722   return utils.NiceSort(lu.cfg.GetNodeList())
 723
 724
 725 def _GetWantedInstances(lu, instances):
 726   """Returns list of checked and expanded instance names.
 727
 728   @type lu: L{LogicalUnit}
 729   @param lu: the logical unit on whose behalf we execute
 730   @type instances: list
 731   @param instances: list of instance names or None for all instances
 732   @rtype: list
 733   @return: the list of instances, sorted
 734   @raise errors.OpPrereqError: if the instances parameter is wrong type
 735   @raise errors.OpPrereqError: if any of the passed instances is not found
 736
 737   """
 738   if instances:
 739     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 740   else:
 741     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 742   return wanted
 743
 744
 745 def _GetUpdatedParams(old_params, update_dict,
 746                       use_default=True, use_none=False):
 747   """Return the new version of a parameter dictionary.
 748
 749   @type old_params: dict
 750   @param old_params: old parameters
 751   @type update_dict: dict
 752   @param update_dict: dict containing new parameter values, or
 753       constants.VALUE_DEFAULT to reset the parameter to its default
 754       value
 755   @param use_default: boolean
 756   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 757       values as 'to be deleted' values
 758   @param use_none: boolean
 759   @type use_none: whether to recognise C{None} values as 'to be
 760       deleted' values
 761   @rtype: dict
 762   @return: the new parameter dictionary
 763
 764   """
 765   params_copy = copy.deepcopy(old_params)
 766   for key, val in update_dict.iteritems():
 767     if ((use_default and val == constants.VALUE_DEFAULT) or
 768         (use_none and val is None)):
 769       try:
 770         del params_copy[key]
 771       except KeyError:
 772         pass
 773     else:
 774       params_copy[key] = val
 775   return params_copy
 776
 777
 778 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 779   """Return the new version of a instance policy.
 780
 781   @param group_policy: whether this policy applies to a group and thus
 782     we should support removal of policy entries
 783
 784   """
 785   use_none = use_default = group_policy
 786   ipolicy = copy.deepcopy(old_ipolicy)
 787   for key, value in new_ipolicy.items():
 788     if key not in constants.IPOLICY_ALL_KEYS:
 789       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 790                                  errors.ECODE_INVAL)
 791     if key in constants.IPOLICY_ISPECS:
 792       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 793       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 794                                        use_none=use_none,
 795                                        use_default=use_default)
 796     else:
 797       if (not value or value == [constants.VALUE_DEFAULT] or
 798           value == constants.VALUE_DEFAULT):
 799         if group_policy:
 800           del ipolicy[key]
 801         else:
 802           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 803                                      " on the cluster'" % key,
 804                                      errors.ECODE_INVAL)
 805       else:
 806         if key in constants.IPOLICY_PARAMETERS:
 807           # FIXME: we assume all such values are float
 808           try:
 809             ipolicy[key] = float(value)
 810           except (TypeError, ValueError), err:
 811             raise errors.OpPrereqError("Invalid value for attribute"
 812                                        " '%s': '%s', error: %s" %
 813                                        (key, value, err), errors.ECODE_INVAL)
 814         else:
 815           # FIXME: we assume all others are lists; this should be redone
 816           # in a nicer way
 817           ipolicy[key] = list(value)
 818   try:
 819     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 820   except errors.ConfigurationError, err:
 821     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 822                                errors.ECODE_INVAL)
 823   return ipolicy
 824
 825
 826 def _UpdateAndVerifySubDict(base, updates, type_check):
 827   """Updates and verifies a dict with sub dicts of the same type.
 828
 829   @param base: The dict with the old data
 830   @param updates: The dict with the new data
 831   @param type_check: Dict suitable to ForceDictType to verify correct types
 832   @returns: A new dict with updated and verified values
 833
 834   """
 835   def fn(old, value):
 836     new = _GetUpdatedParams(old, value)
 837     utils.ForceDictType(new, type_check)
 838     return new
 839
 840   ret = copy.deepcopy(base)
 841   ret.update(dict((key, fn(base.get(key, {}), value))
 842                   for key, value in updates.items()))
 843   return ret
 844
 845
 846 def _MergeAndVerifyHvState(op_input, obj_input):
 847   """Combines the hv state from an opcode with the one of the object
 848
 849   @param op_input: The input dict from the opcode
 850   @param obj_input: The input dict from the objects
 851   @return: The verified and updated dict
 852
 853   """
 854   if op_input:
 855     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 856     if invalid_hvs:
 857       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 858                                  " %s" % utils.CommaJoin(invalid_hvs),
 859                                  errors.ECODE_INVAL)
 860     if obj_input is None:
 861       obj_input = {}
 862     type_check = constants.HVSTS_PARAMETER_TYPES
 863     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 864
 865   return None
 866
 867
 868 def _MergeAndVerifyDiskState(op_input, obj_input):
 869   """Combines the disk state from an opcode with the one of the object
 870
 871   @param op_input: The input dict from the opcode
 872   @param obj_input: The input dict from the objects
 873   @return: The verified and updated dict
 874   """
 875   if op_input:
 876     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 877     if invalid_dst:
 878       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 879                                  utils.CommaJoin(invalid_dst),
 880                                  errors.ECODE_INVAL)
 881     type_check = constants.DSS_PARAMETER_TYPES
 882     if obj_input is None:
 883       obj_input = {}
 884     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 885                                               type_check))
 886                 for key, value in op_input.items())
 887
 888   return None
 889
 890
 891 def _ReleaseLocks(lu, level, names=None, keep=None):
 892   """Releases locks owned by an LU.
 893
 894   @type lu: L{LogicalUnit}
 895   @param level: Lock level
 896   @type names: list or None
 897   @param names: Names of locks to release
 898   @type keep: list or None
 899   @param keep: Names of locks to retain
 900
 901   """
 902   assert not (keep is not None and names is not None), \
 903          "Only one of the 'names' and the 'keep' parameters can be given"
 904
 905   if names is not None:
 906     should_release = names.__contains__
 907   elif keep:
 908     should_release = lambda name: name not in keep
 909   else:
 910     should_release = None
 911
 912   owned = lu.owned_locks(level)
 913   if not owned:
 914     # Not owning any lock at this level, do nothing
 915     pass
 916
 917   elif should_release:
 918     retain = []
 919     release = []
 920
 921     # Determine which locks to release
 922     for name in owned:
 923       if should_release(name):
 924         release.append(name)
 925       else:
 926         retain.append(name)
 927
 928     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 929
 930     # Release just some locks
 931     lu.glm.release(level, names=release)
 932
 933     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 934   else:
 935     # Release everything
 936     lu.glm.release(level)
 937
 938     assert not lu.glm.is_owned(level), "No locks should be owned"
 939
 940
 941 def _MapInstanceDisksToNodes(instances):
 942   """Creates a map from (node, volume) to instance name.
 943
 944   @type instances: list of L{objects.Instance}
 945   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 946
 947   """
 948   return dict(((node, vol), inst.name)
 949               for inst in instances
 950               for (node, vols) in inst.MapLVsByNode().items()
 951               for vol in vols)
 952
 953
 954 def _RunPostHook(lu, node_name):
 955   """Runs the post-hook for an opcode on a single node.
 956
 957   """
 958   hm = lu.proc.BuildHooksManager(lu)
 959   try:
 960     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 961   except Exception, err: # pylint: disable=W0703
 962     lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
 963
 964
 965 def _CheckOutputFields(static, dynamic, selected):
 966   """Checks whether all selected fields are valid.
 967
 968   @type static: L{utils.FieldSet}
 969   @param static: static fields set
 970   @type dynamic: L{utils.FieldSet}
 971   @param dynamic: dynamic fields set
 972
 973   """
 974   f = utils.FieldSet()
 975   f.Extend(static)
 976   f.Extend(dynamic)
 977
 978   delta = f.NonMatching(selected)
 979   if delta:
 980     raise errors.OpPrereqError("Unknown output fields selected: %s"
 981                                % ",".join(delta), errors.ECODE_INVAL)
 982
 983
 984 def _CheckGlobalHvParams(params):
 985   """Validates that given hypervisor params are not global ones.
 986
 987   This will ensure that instances don't get customised versions of
 988   global params.
 989
 990   """
 991   used_globals = constants.HVC_GLOBALS.intersection(params)
 992   if used_globals:
 993     msg = ("The following hypervisor parameters are global and cannot"
 994            " be customized at instance level, please modify them at"
 995            " cluster level: %s" % utils.CommaJoin(used_globals))
 996     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 997
 998
 999 def _CheckNodeOnline(lu, node, msg=None):
1000   """Ensure that a given node is online.
1001
1002   @param lu: the LU on behalf of which we make the check
1003   @param node: the node to check
1004   @param msg: if passed, should be a message to replace the default one
1005   @raise errors.OpPrereqError: if the node is offline
1006
1007   """
1008   if msg is None:
1009     msg = "Can't use offline node"
1010   if lu.cfg.GetNodeInfo(node).offline:
1011     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1012
1013
1014 def _CheckNodeNotDrained(lu, node):
1015   """Ensure that a given node is not drained.
1016
1017   @param lu: the LU on behalf of which we make the check
1018   @param node: the node to check
1019   @raise errors.OpPrereqError: if the node is drained
1020
1021   """
1022   if lu.cfg.GetNodeInfo(node).drained:
1023     raise errors.OpPrereqError("Can't use drained node %s" % node,
1024                                errors.ECODE_STATE)
1025
1026
1027 def _CheckNodeVmCapable(lu, node):
1028   """Ensure that a given node is vm capable.
1029
1030   @param lu: the LU on behalf of which we make the check
1031   @param node: the node to check
1032   @raise errors.OpPrereqError: if the node is not vm capable
1033
1034   """
1035   if not lu.cfg.GetNodeInfo(node).vm_capable:
1036     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1037                                errors.ECODE_STATE)
1038
1039
1040 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1041   """Ensure that a node supports a given OS.
1042
1043   @param lu: the LU on behalf of which we make the check
1044   @param node: the node to check
1045   @param os_name: the OS to query about
1046   @param force_variant: whether to ignore variant errors
1047   @raise errors.OpPrereqError: if the node is not supporting the OS
1048
1049   """
1050   result = lu.rpc.call_os_get(node, os_name)
1051   result.Raise("OS '%s' not in supported OS list for node %s" %
1052                (os_name, node),
1053                prereq=True, ecode=errors.ECODE_INVAL)
1054   if not force_variant:
1055     _CheckOSVariant(result.payload, os_name)
1056
1057
1058 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1059   """Ensure that a node has the given secondary ip.
1060
1061   @type lu: L{LogicalUnit}
1062   @param lu: the LU on behalf of which we make the check
1063   @type node: string
1064   @param node: the node to check
1065   @type secondary_ip: string
1066   @param secondary_ip: the ip to check
1067   @type prereq: boolean
1068   @param prereq: whether to throw a prerequisite or an execute error
1069   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1070   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1071
1072   """
1073   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1074   result.Raise("Failure checking secondary ip on node %s" % node,
1075                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1076   if not result.payload:
1077     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1078            " please fix and re-run this command" % secondary_ip)
1079     if prereq:
1080       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1081     else:
1082       raise errors.OpExecError(msg)
1083
1084
1085 def _GetClusterDomainSecret():
1086   """Reads the cluster domain secret.
1087
1088   """
1089   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1090                                strict=True)
1091
1092
1093 def _CheckInstanceState(lu, instance, req_states, msg=None):
1094   """Ensure that an instance is in one of the required states.
1095
1096   @param lu: the LU on behalf of which we make the check
1097   @param instance: the instance to check
1098   @param msg: if passed, should be a message to replace the default one
1099   @raise errors.OpPrereqError: if the instance is not in the required state
1100
1101   """
1102   if msg is None:
1103     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1104   if instance.admin_state not in req_states:
1105     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1106                                (instance.name, instance.admin_state, msg),
1107                                errors.ECODE_STATE)
1108
1109   if constants.ADMINST_UP not in req_states:
1110     pnode = instance.primary_node
1111     if not lu.cfg.GetNodeInfo(pnode).offline:
1112       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1113       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1114                   prereq=True, ecode=errors.ECODE_ENVIRON)
1115       if instance.name in ins_l.payload:
1116         raise errors.OpPrereqError("Instance %s is running, %s" %
1117                                    (instance.name, msg), errors.ECODE_STATE)
1118     else:
1119       lu.LogWarning("Primary node offline, ignoring check that instance"
1120                      " is down")
1121
1122
1123 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1124   """Computes if value is in the desired range.
1125
1126   @param name: name of the parameter for which we perform the check
1127   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1128       not just 'disk')
1129   @param ipolicy: dictionary containing min, max and std values
1130   @param value: actual value that we want to use
1131   @return: None or element not meeting the criteria
1132
1133
1134   """
1135   if value in [None, constants.VALUE_AUTO]:
1136     return None
1137   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1138   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1139   if value > max_v or min_v > value:
1140     if qualifier:
1141       fqn = "%s/%s" % (name, qualifier)
1142     else:
1143       fqn = name
1144     return ("%s value %s is not in range [%s, %s]" %
1145             (fqn, value, min_v, max_v))
1146   return None
1147
1148
1149 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1150                                  nic_count, disk_sizes, spindle_use,
1151                                  _compute_fn=_ComputeMinMaxSpec):
1152   """Verifies ipolicy against provided specs.
1153
1154   @type ipolicy: dict
1155   @param ipolicy: The ipolicy
1156   @type mem_size: int
1157   @param mem_size: The memory size
1158   @type cpu_count: int
1159   @param cpu_count: Used cpu cores
1160   @type disk_count: int
1161   @param disk_count: Number of disks used
1162   @type nic_count: int
1163   @param nic_count: Number of nics used
1164   @type disk_sizes: list of ints
1165   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1166   @type spindle_use: int
1167   @param spindle_use: The number of spindles this instance uses
1168   @param _compute_fn: The compute function (unittest only)
1169   @return: A list of violations, or an empty list of no violations are found
1170
1171   """
1172   assert disk_count == len(disk_sizes)
1173
1174   test_settings = [
1175     (constants.ISPEC_MEM_SIZE, "", mem_size),
1176     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1177     (constants.ISPEC_DISK_COUNT, "", disk_count),
1178     (constants.ISPEC_NIC_COUNT, "", nic_count),
1179     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1180     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1181          for idx, d in enumerate(disk_sizes)]
1182
1183   return filter(None,
1184                 (_compute_fn(name, qualifier, ipolicy, value)
1185                  for (name, qualifier, value) in test_settings))
1186
1187
1188 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1189                                      _compute_fn=_ComputeIPolicySpecViolation):
1190   """Compute if instance meets the specs of ipolicy.
1191
1192   @type ipolicy: dict
1193   @param ipolicy: The ipolicy to verify against
1194   @type instance: L{objects.Instance}
1195   @param instance: The instance to verify
1196   @param _compute_fn: The function to verify ipolicy (unittest only)
1197   @see: L{_ComputeIPolicySpecViolation}
1198
1199   """
1200   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1201   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1202   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1203   disk_count = len(instance.disks)
1204   disk_sizes = [disk.size for disk in instance.disks]
1205   nic_count = len(instance.nics)
1206
1207   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1208                      disk_sizes, spindle_use)
1209
1210
1211 def _ComputeIPolicyInstanceSpecViolation(
1212   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1213   """Compute if instance specs meets the specs of ipolicy.
1214
1215   @type ipolicy: dict
1216   @param ipolicy: The ipolicy to verify against
1217   @param instance_spec: dict
1218   @param instance_spec: The instance spec to verify
1219   @param _compute_fn: The function to verify ipolicy (unittest only)
1220   @see: L{_ComputeIPolicySpecViolation}
1221
1222   """
1223   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1224   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1225   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1226   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1227   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1228   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1229
1230   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1231                      disk_sizes, spindle_use)
1232
1233
1234 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1235                                  target_group,
1236                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1237   """Compute if instance meets the specs of the new target group.
1238
1239   @param ipolicy: The ipolicy to verify
1240   @param instance: The instance object to verify
1241   @param current_group: The current group of the instance
1242   @param target_group: The new group of the instance
1243   @param _compute_fn: The function to verify ipolicy (unittest only)
1244   @see: L{_ComputeIPolicySpecViolation}
1245
1246   """
1247   if current_group == target_group:
1248     return []
1249   else:
1250     return _compute_fn(ipolicy, instance)
1251
1252
1253 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1254                             _compute_fn=_ComputeIPolicyNodeViolation):
1255   """Checks that the target node is correct in terms of instance policy.
1256
1257   @param ipolicy: The ipolicy to verify
1258   @param instance: The instance object to verify
1259   @param node: The new node to relocate
1260   @param ignore: Ignore violations of the ipolicy
1261   @param _compute_fn: The function to verify ipolicy (unittest only)
1262   @see: L{_ComputeIPolicySpecViolation}
1263
1264   """
1265   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1266   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1267
1268   if res:
1269     msg = ("Instance does not meet target node group's (%s) instance"
1270            " policy: %s") % (node.group, utils.CommaJoin(res))
1271     if ignore:
1272       lu.LogWarning(msg)
1273     else:
1274       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1275
1276
1277 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1278   """Computes a set of any instances that would violate the new ipolicy.
1279
1280   @param old_ipolicy: The current (still in-place) ipolicy
1281   @param new_ipolicy: The new (to become) ipolicy
1282   @param instances: List of instances to verify
1283   @return: A list of instances which violates the new ipolicy but
1284       did not before
1285
1286   """
1287   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1288           _ComputeViolatingInstances(old_ipolicy, instances))
1289
1290
1291 def _ExpandItemName(fn, name, kind):
1292   """Expand an item name.
1293
1294   @param fn: the function to use for expansion
1295   @param name: requested item name
1296   @param kind: text description ('Node' or 'Instance')
1297   @return: the resolved (full) name
1298   @raise errors.OpPrereqError: if the item is not found
1299
1300   """
1301   full_name = fn(name)
1302   if full_name is None:
1303     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1304                                errors.ECODE_NOENT)
1305   return full_name
1306
1307
1308 def _ExpandNodeName(cfg, name):
1309   """Wrapper over L{_ExpandItemName} for nodes."""
1310   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1311
1312
1313 def _ExpandInstanceName(cfg, name):
1314   """Wrapper over L{_ExpandItemName} for instance."""
1315   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1316
1317
1318 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1319                          network_type, mac_prefix, tags):
1320   """Builds network related env variables for hooks
1321
1322   This builds the hook environment from individual variables.
1323
1324   @type name: string
1325   @param name: the name of the network
1326   @type subnet: string
1327   @param subnet: the ipv4 subnet
1328   @type gateway: string
1329   @param gateway: the ipv4 gateway
1330   @type network6: string
1331   @param network6: the ipv6 subnet
1332   @type gateway6: string
1333   @param gateway6: the ipv6 gateway
1334   @type network_type: string
1335   @param network_type: the type of the network
1336   @type mac_prefix: string
1337   @param mac_prefix: the mac_prefix
1338   @type tags: list
1339   @param tags: the tags of the network
1340
1341   """
1342   env = dict()
1343   if name:
1344     env["NETWORK_NAME"] = name
1345   if subnet:
1346     env["NETWORK_SUBNET"] = subnet
1347   if gateway:
1348     env["NETWORK_GATEWAY"] = gateway
1349   if network6:
1350     env["NETWORK_SUBNET6"] = network6
1351   if gateway6:
1352     env["NETWORK_GATEWAY6"] = gateway6
1353   if mac_prefix:
1354     env["NETWORK_MAC_PREFIX"] = mac_prefix
1355   if network_type:
1356     env["NETWORK_TYPE"] = network_type
1357   if tags:
1358     env["NETWORK_TAGS"] = " ".join(tags)
1359
1360   return env
1361
1362
1363 def _BuildNetworkHookEnvByObject(net):
1364   """Builds network related env varliables for hooks
1365
1366   @type net: L{objects.Network}
1367   @param net: the network object
1368
1369   """
1370   args = {
1371     "name": net.name,
1372     "subnet": net.network,
1373     "gateway": net.gateway,
1374     "network6": net.network6,
1375     "gateway6": net.gateway6,
1376     "network_type": net.network_type,
1377     "mac_prefix": net.mac_prefix,
1378     "tags": net.tags,
1379   }
1380
1381   return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
1382
1383
1384 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1385                           minmem, maxmem, vcpus, nics, disk_template, disks,
1386                           bep, hvp, hypervisor_name, tags):
1387   """Builds instance related env variables for hooks
1388
1389   This builds the hook environment from individual variables.
1390
1391   @type name: string
1392   @param name: the name of the instance
1393   @type primary_node: string
1394   @param primary_node: the name of the instance's primary node
1395   @type secondary_nodes: list
1396   @param secondary_nodes: list of secondary nodes as strings
1397   @type os_type: string
1398   @param os_type: the name of the instance's OS
1399   @type status: string
1400   @param status: the desired status of the instance
1401   @type minmem: string
1402   @param minmem: the minimum memory size of the instance
1403   @type maxmem: string
1404   @param maxmem: the maximum memory size of the instance
1405   @type vcpus: string
1406   @param vcpus: the count of VCPUs the instance has
1407   @type nics: list
1408   @param nics: list of tuples (ip, mac, mode, link, network) representing
1409       the NICs the instance has
1410   @type disk_template: string
1411   @param disk_template: the disk template of the instance
1412   @type disks: list
1413   @param disks: the list of (size, mode) pairs
1414   @type bep: dict
1415   @param bep: the backend parameters for the instance
1416   @type hvp: dict
1417   @param hvp: the hypervisor parameters for the instance
1418   @type hypervisor_name: string
1419   @param hypervisor_name: the hypervisor for the instance
1420   @type tags: list
1421   @param tags: list of instance tags as strings
1422   @rtype: dict
1423   @return: the hook environment for this instance
1424
1425   """
1426   env = {
1427     "OP_TARGET": name,
1428     "INSTANCE_NAME": name,
1429     "INSTANCE_PRIMARY": primary_node,
1430     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1431     "INSTANCE_OS_TYPE": os_type,
1432     "INSTANCE_STATUS": status,
1433     "INSTANCE_MINMEM": minmem,
1434     "INSTANCE_MAXMEM": maxmem,
1435     # TODO(2.7) remove deprecated "memory" value
1436     "INSTANCE_MEMORY": maxmem,
1437     "INSTANCE_VCPUS": vcpus,
1438     "INSTANCE_DISK_TEMPLATE": disk_template,
1439     "INSTANCE_HYPERVISOR": hypervisor_name,
1440   }
1441   if nics:
1442     nic_count = len(nics)
1443     for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1444       if ip is None:
1445         ip = ""
1446       env["INSTANCE_NIC%d_IP" % idx] = ip
1447       env["INSTANCE_NIC%d_MAC" % idx] = mac
1448       env["INSTANCE_NIC%d_MODE" % idx] = mode
1449       env["INSTANCE_NIC%d_LINK" % idx] = link
1450       if network:
1451         env["INSTANCE_NIC%d_NETWORK" % idx] = net
1452         if netinfo:
1453           nobj = objects.Network.FromDict(netinfo)
1454           if nobj.network:
1455             env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1456           if nobj.gateway:
1457             env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1458           if nobj.network6:
1459             env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1460           if nobj.gateway6:
1461             env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1462           if nobj.mac_prefix:
1463             env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1464           if nobj.network_type:
1465             env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1466           if nobj.tags:
1467             env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1468       if mode == constants.NIC_MODE_BRIDGED:
1469         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1470   else:
1471     nic_count = 0
1472
1473   env["INSTANCE_NIC_COUNT"] = nic_count
1474
1475   if disks:
1476     disk_count = len(disks)
1477     for idx, (size, mode) in enumerate(disks):
1478       env["INSTANCE_DISK%d_SIZE" % idx] = size
1479       env["INSTANCE_DISK%d_MODE" % idx] = mode
1480   else:
1481     disk_count = 0
1482
1483   env["INSTANCE_DISK_COUNT"] = disk_count
1484
1485   if not tags:
1486     tags = []
1487
1488   env["INSTANCE_TAGS"] = " ".join(tags)
1489
1490   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1491     for key, value in source.items():
1492       env["INSTANCE_%s_%s" % (kind, key)] = value
1493
1494   return env
1495
1496
1497 def _NICToTuple(lu, nic):
1498   """Build a tupple of nic information.
1499
1500   @type lu:  L{LogicalUnit}
1501   @param lu: the logical unit on whose behalf we execute
1502   @type nic: L{objects.NIC}
1503   @param nic: nic to convert to hooks tuple
1504
1505   """
1506   ip = nic.ip
1507   mac = nic.mac
1508   cluster = lu.cfg.GetClusterInfo()
1509   filled_params = cluster.SimpleFillNIC(nic.nicparams)
1510   mode = filled_params[constants.NIC_MODE]
1511   link = filled_params[constants.NIC_LINK]
1512   net = nic.network
1513   netinfo = None
1514   if net:
1515     net_uuid = lu.cfg.LookupNetwork(net)
1516     if net_uuid:
1517       nobj = lu.cfg.GetNetwork(net_uuid)
1518       netinfo = objects.Network.ToDict(nobj)
1519   return (ip, mac, mode, link, net, netinfo)
1520
1521
1522 def _NICListToTuple(lu, nics):
1523   """Build a list of nic information tuples.
1524
1525   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1526   value in LUInstanceQueryData.
1527
1528   @type lu:  L{LogicalUnit}
1529   @param lu: the logical unit on whose behalf we execute
1530   @type nics: list of L{objects.NIC}
1531   @param nics: list of nics to convert to hooks tuples
1532
1533   """
1534   hooks_nics = []
1535   for nic in nics:
1536     hooks_nics.append(_NICToTuple(lu, nic))
1537   return hooks_nics
1538
1539
1540 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1541   """Builds instance related env variables for hooks from an object.
1542
1543   @type lu: L{LogicalUnit}
1544   @param lu: the logical unit on whose behalf we execute
1545   @type instance: L{objects.Instance}
1546   @param instance: the instance for which we should build the
1547       environment
1548   @type override: dict
1549   @param override: dictionary with key/values that will override
1550       our values
1551   @rtype: dict
1552   @return: the hook environment dictionary
1553
1554   """
1555   cluster = lu.cfg.GetClusterInfo()
1556   bep = cluster.FillBE(instance)
1557   hvp = cluster.FillHV(instance)
1558   args = {
1559     "name": instance.name,
1560     "primary_node": instance.primary_node,
1561     "secondary_nodes": instance.secondary_nodes,
1562     "os_type": instance.os,
1563     "status": instance.admin_state,
1564     "maxmem": bep[constants.BE_MAXMEM],
1565     "minmem": bep[constants.BE_MINMEM],
1566     "vcpus": bep[constants.BE_VCPUS],
1567     "nics": _NICListToTuple(lu, instance.nics),
1568     "disk_template": instance.disk_template,
1569     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1570     "bep": bep,
1571     "hvp": hvp,
1572     "hypervisor_name": instance.hypervisor,
1573     "tags": instance.tags,
1574   }
1575   if override:
1576     args.update(override)
1577   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1578
1579
1580 def _AdjustCandidatePool(lu, exceptions):
1581   """Adjust the candidate pool after node operations.
1582
1583   """
1584   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1585   if mod_list:
1586     lu.LogInfo("Promoted nodes to master candidate role: %s",
1587                utils.CommaJoin(node.name for node in mod_list))
1588     for name in mod_list:
1589       lu.context.ReaddNode(name)
1590   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1591   if mc_now > mc_max:
1592     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1593                (mc_now, mc_max))
1594
1595
1596 def _DecideSelfPromotion(lu, exceptions=None):
1597   """Decide whether I should promote myself as a master candidate.
1598
1599   """
1600   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1601   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1602   # the new node will increase mc_max with one, so:
1603   mc_should = min(mc_should + 1, cp_size)
1604   return mc_now < mc_should
1605
1606
1607 def _ComputeViolatingInstances(ipolicy, instances):
1608   """Computes a set of instances who violates given ipolicy.
1609
1610   @param ipolicy: The ipolicy to verify
1611   @type instances: object.Instance
1612   @param instances: List of instances to verify
1613   @return: A frozenset of instance names violating the ipolicy
1614
1615   """
1616   return frozenset([inst.name for inst in instances
1617                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1618
1619
1620 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1621   """Check that the brigdes needed by a list of nics exist.
1622
1623   """
1624   cluster = lu.cfg.GetClusterInfo()
1625   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1626   brlist = [params[constants.NIC_LINK] for params in paramslist
1627             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1628   if brlist:
1629     result = lu.rpc.call_bridges_exist(target_node, brlist)
1630     result.Raise("Error checking bridges on destination node '%s'" %
1631                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1632
1633
1634 def _CheckInstanceBridgesExist(lu, instance, node=None):
1635   """Check that the brigdes needed by an instance exist.
1636
1637   """
1638   if node is None:
1639     node = instance.primary_node
1640   _CheckNicsBridgesExist(lu, instance.nics, node)
1641
1642
1643 def _CheckOSVariant(os_obj, name):
1644   """Check whether an OS name conforms to the os variants specification.
1645
1646   @type os_obj: L{objects.OS}
1647   @param os_obj: OS object to check
1648   @type name: string
1649   @param name: OS name passed by the user, to check for validity
1650
1651   """
1652   variant = objects.OS.GetVariant(name)
1653   if not os_obj.supported_variants:
1654     if variant:
1655       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1656                                  " passed)" % (os_obj.name, variant),
1657                                  errors.ECODE_INVAL)
1658     return
1659   if not variant:
1660     raise errors.OpPrereqError("OS name must include a variant",
1661                                errors.ECODE_INVAL)
1662
1663   if variant not in os_obj.supported_variants:
1664     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1665
1666
1667 def _GetNodeInstancesInner(cfg, fn):
1668   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1669
1670
1671 def _GetNodeInstances(cfg, node_name):
1672   """Returns a list of all primary and secondary instances on a node.
1673
1674   """
1675
1676   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1677
1678
1679 def _GetNodePrimaryInstances(cfg, node_name):
1680   """Returns primary instances on a node.
1681
1682   """
1683   return _GetNodeInstancesInner(cfg,
1684                                 lambda inst: node_name == inst.primary_node)
1685
1686
1687 def _GetNodeSecondaryInstances(cfg, node_name):
1688   """Returns secondary instances on a node.
1689
1690   """
1691   return _GetNodeInstancesInner(cfg,
1692                                 lambda inst: node_name in inst.secondary_nodes)
1693
1694
1695 def _GetStorageTypeArgs(cfg, storage_type):
1696   """Returns the arguments for a storage type.
1697
1698   """
1699   # Special case for file storage
1700   if storage_type == constants.ST_FILE:
1701     # storage.FileStorage wants a list of storage directories
1702     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1703
1704   return []
1705
1706
1707 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1708   faulty = []
1709
1710   for dev in instance.disks:
1711     cfg.SetDiskID(dev, node_name)
1712
1713   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1714                                                                 instance))
1715   result.Raise("Failed to get disk status from node %s" % node_name,
1716                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1717
1718   for idx, bdev_status in enumerate(result.payload):
1719     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1720       faulty.append(idx)
1721
1722   return faulty
1723
1724
1725 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1726   """Check the sanity of iallocator and node arguments and use the
1727   cluster-wide iallocator if appropriate.
1728
1729   Check that at most one of (iallocator, node) is specified. If none is
1730   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1731   then the LU's opcode's iallocator slot is filled with the cluster-wide
1732   default iallocator.
1733
1734   @type iallocator_slot: string
1735   @param iallocator_slot: the name of the opcode iallocator slot
1736   @type node_slot: string
1737   @param node_slot: the name of the opcode target node slot
1738
1739   """
1740   node = getattr(lu.op, node_slot, None)
1741   ialloc = getattr(lu.op, iallocator_slot, None)
1742   if node == []:
1743     node = None
1744
1745   if node is not None and ialloc is not None:
1746     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1747                                errors.ECODE_INVAL)
1748   elif ((node is None and ialloc is None) or
1749         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1750     default_iallocator = lu.cfg.GetDefaultIAllocator()
1751     if default_iallocator:
1752       setattr(lu.op, iallocator_slot, default_iallocator)
1753     else:
1754       raise errors.OpPrereqError("No iallocator or node given and no"
1755                                  " cluster-wide default iallocator found;"
1756                                  " please specify either an iallocator or a"
1757                                  " node, or set a cluster-wide default"
1758                                  " iallocator", errors.ECODE_INVAL)
1759
1760
1761 def _GetDefaultIAllocator(cfg, ialloc):
1762   """Decides on which iallocator to use.
1763
1764   @type cfg: L{config.ConfigWriter}
1765   @param cfg: Cluster configuration object
1766   @type ialloc: string or None
1767   @param ialloc: Iallocator specified in opcode
1768   @rtype: string
1769   @return: Iallocator name
1770
1771   """
1772   if not ialloc:
1773     # Use default iallocator
1774     ialloc = cfg.GetDefaultIAllocator()
1775
1776   if not ialloc:
1777     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1778                                " opcode nor as a cluster-wide default",
1779                                errors.ECODE_INVAL)
1780
1781   return ialloc
1782
1783
1784 def _CheckHostnameSane(lu, name):
1785   """Ensures that a given hostname resolves to a 'sane' name.
1786
1787   The given name is required to be a prefix of the resolved hostname,
1788   to prevent accidental mismatches.
1789
1790   @param lu: the logical unit on behalf of which we're checking
1791   @param name: the name we should resolve and check
1792   @return: the resolved hostname object
1793
1794   """
1795   hostname = netutils.GetHostname(name=name)
1796   if hostname.name != name:
1797     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1798   if not utils.MatchNameComponent(name, [hostname.name]):
1799     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1800                                 " same as given hostname '%s'") %
1801                                 (hostname.name, name), errors.ECODE_INVAL)
1802   return hostname
1803
1804
1805 class LUClusterPostInit(LogicalUnit):
1806   """Logical unit for running hooks after cluster initialization.
1807
1808   """
1809   HPATH = "cluster-init"
1810   HTYPE = constants.HTYPE_CLUSTER
1811
1812   def BuildHooksEnv(self):
1813     """Build hooks env.
1814
1815     """
1816     return {
1817       "OP_TARGET": self.cfg.GetClusterName(),
1818       }
1819
1820   def BuildHooksNodes(self):
1821     """Build hooks nodes.
1822
1823     """
1824     return ([], [self.cfg.GetMasterNode()])
1825
1826   def Exec(self, feedback_fn):
1827     """Nothing to do.
1828
1829     """
1830     return True
1831
1832
1833 class LUClusterDestroy(LogicalUnit):
1834   """Logical unit for destroying the cluster.
1835
1836   """
1837   HPATH = "cluster-destroy"
1838   HTYPE = constants.HTYPE_CLUSTER
1839
1840   def BuildHooksEnv(self):
1841     """Build hooks env.
1842
1843     """
1844     return {
1845       "OP_TARGET": self.cfg.GetClusterName(),
1846       }
1847
1848   def BuildHooksNodes(self):
1849     """Build hooks nodes.
1850
1851     """
1852     return ([], [])
1853
1854   def CheckPrereq(self):
1855     """Check prerequisites.
1856
1857     This checks whether the cluster is empty.
1858
1859     Any errors are signaled by raising errors.OpPrereqError.
1860
1861     """
1862     master = self.cfg.GetMasterNode()
1863
1864     nodelist = self.cfg.GetNodeList()
1865     if len(nodelist) != 1 or nodelist[0] != master:
1866       raise errors.OpPrereqError("There are still %d node(s) in"
1867                                  " this cluster." % (len(nodelist) - 1),
1868                                  errors.ECODE_INVAL)
1869     instancelist = self.cfg.GetInstanceList()
1870     if instancelist:
1871       raise errors.OpPrereqError("There are still %d instance(s) in"
1872                                  " this cluster." % len(instancelist),
1873                                  errors.ECODE_INVAL)
1874
1875   def Exec(self, feedback_fn):
1876     """Destroys the cluster.
1877
1878     """
1879     master_params = self.cfg.GetMasterNetworkParameters()
1880
1881     # Run post hooks on master node before it's removed
1882     _RunPostHook(self, master_params.name)
1883
1884     ems = self.cfg.GetUseExternalMipScript()
1885     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1886                                                      master_params, ems)
1887     if result.fail_msg:
1888       self.LogWarning("Error disabling the master IP address: %s",
1889                       result.fail_msg)
1890
1891     return master_params.name
1892
1893
1894 def _VerifyCertificate(filename):
1895   """Verifies a certificate for L{LUClusterVerifyConfig}.
1896
1897   @type filename: string
1898   @param filename: Path to PEM file
1899
1900   """
1901   try:
1902     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1903                                            utils.ReadFile(filename))
1904   except Exception, err: # pylint: disable=W0703
1905     return (LUClusterVerifyConfig.ETYPE_ERROR,
1906             "Failed to load X509 certificate %s: %s" % (filename, err))
1907
1908   (errcode, msg) = \
1909     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1910                                 constants.SSL_CERT_EXPIRATION_ERROR)
1911
1912   if msg:
1913     fnamemsg = "While verifying %s: %s" % (filename, msg)
1914   else:
1915     fnamemsg = None
1916
1917   if errcode is None:
1918     return (None, fnamemsg)
1919   elif errcode == utils.CERT_WARNING:
1920     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1921   elif errcode == utils.CERT_ERROR:
1922     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1923
1924   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1925
1926
1927 def _GetAllHypervisorParameters(cluster, instances):
1928   """Compute the set of all hypervisor parameters.
1929
1930   @type cluster: L{objects.Cluster}
1931   @param cluster: the cluster object
1932   @param instances: list of L{objects.Instance}
1933   @param instances: additional instances from which to obtain parameters
1934   @rtype: list of (origin, hypervisor, parameters)
1935   @return: a list with all parameters found, indicating the hypervisor they
1936        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1937
1938   """
1939   hvp_data = []
1940
1941   for hv_name in cluster.enabled_hypervisors:
1942     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1943
1944   for os_name, os_hvp in cluster.os_hvp.items():
1945     for hv_name, hv_params in os_hvp.items():
1946       if hv_params:
1947         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1948         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1949
1950   # TODO: collapse identical parameter values in a single one
1951   for instance in instances:
1952     if instance.hvparams:
1953       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1954                        cluster.FillHV(instance)))
1955
1956   return hvp_data
1957
1958
1959 class _VerifyErrors(object):
1960   """Mix-in for cluster/group verify LUs.
1961
1962   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1963   self.op and self._feedback_fn to be available.)
1964
1965   """
1966
1967   ETYPE_FIELD = "code"
1968   ETYPE_ERROR = "ERROR"
1969   ETYPE_WARNING = "WARNING"
1970
1971   def _Error(self, ecode, item, msg, *args, **kwargs):
1972     """Format an error message.
1973
1974     Based on the opcode's error_codes parameter, either format a
1975     parseable error code, or a simpler error string.
1976
1977     This must be called only from Exec and functions called from Exec.
1978
1979     """
1980     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1981     itype, etxt, _ = ecode
1982     # first complete the msg
1983     if args:
1984       msg = msg % args
1985     # then format the whole message
1986     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1987       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1988     else:
1989       if item:
1990         item = " " + item
1991       else:
1992         item = ""
1993       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1994     # and finally report it via the feedback_fn
1995     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1996
1997   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1998     """Log an error message if the passed condition is True.
1999
2000     """
2001     cond = (bool(cond)
2002             or self.op.debug_simulate_errors) # pylint: disable=E1101
2003
2004     # If the error code is in the list of ignored errors, demote the error to a
2005     # warning
2006     (_, etxt, _) = ecode
2007     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
2008       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
2009
2010     if cond:
2011       self._Error(ecode, *args, **kwargs)
2012
2013     # do not mark the operation as failed for WARN cases only
2014     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2015       self.bad = self.bad or cond
2016
2017
2018 class LUClusterVerify(NoHooksLU):
2019   """Submits all jobs necessary to verify the cluster.
2020
2021   """
2022   REQ_BGL = False
2023
2024   def ExpandNames(self):
2025     self.needed_locks = {}
2026
2027   def Exec(self, feedback_fn):
2028     jobs = []
2029
2030     if self.op.group_name:
2031       groups = [self.op.group_name]
2032       depends_fn = lambda: None
2033     else:
2034       groups = self.cfg.GetNodeGroupList()
2035
2036       # Verify global configuration
2037       jobs.append([
2038         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2039         ])
2040
2041       # Always depend on global verification
2042       depends_fn = lambda: [(-len(jobs), [])]
2043
2044     jobs.extend(
2045       [opcodes.OpClusterVerifyGroup(group_name=group,
2046                                     ignore_errors=self.op.ignore_errors,
2047                                     depends=depends_fn())]
2048       for group in groups)
2049
2050     # Fix up all parameters
2051     for op in itertools.chain(*jobs): # pylint: disable=W0142
2052       op.debug_simulate_errors = self.op.debug_simulate_errors
2053       op.verbose = self.op.verbose
2054       op.error_codes = self.op.error_codes
2055       try:
2056         op.skip_checks = self.op.skip_checks
2057       except AttributeError:
2058         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2059
2060     return ResultWithJobs(jobs)
2061
2062
2063 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2064   """Verifies the cluster config.
2065
2066   """
2067   REQ_BGL = False
2068
2069   def _VerifyHVP(self, hvp_data):
2070     """Verifies locally the syntax of the hypervisor parameters.
2071
2072     """
2073     for item, hv_name, hv_params in hvp_data:
2074       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2075              (item, hv_name))
2076       try:
2077         hv_class = hypervisor.GetHypervisor(hv_name)
2078         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2079         hv_class.CheckParameterSyntax(hv_params)
2080       except errors.GenericError, err:
2081         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2082
2083   def ExpandNames(self):
2084     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2085     self.share_locks = _ShareAll()
2086
2087   def CheckPrereq(self):
2088     """Check prerequisites.
2089
2090     """
2091     # Retrieve all information
2092     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2093     self.all_node_info = self.cfg.GetAllNodesInfo()
2094     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2095
2096   def Exec(self, feedback_fn):
2097     """Verify integrity of cluster, performing various test on nodes.
2098
2099     """
2100     self.bad = False
2101     self._feedback_fn = feedback_fn
2102
2103     feedback_fn("* Verifying cluster config")
2104
2105     for msg in self.cfg.VerifyConfig():
2106       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2107
2108     feedback_fn("* Verifying cluster certificate files")
2109
2110     for cert_filename in pathutils.ALL_CERT_FILES:
2111       (errcode, msg) = _VerifyCertificate(cert_filename)
2112       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2113
2114     feedback_fn("* Verifying hypervisor parameters")
2115
2116     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2117                                                 self.all_inst_info.values()))
2118
2119     feedback_fn("* Verifying all nodes belong to an existing group")
2120
2121     # We do this verification here because, should this bogus circumstance
2122     # occur, it would never be caught by VerifyGroup, which only acts on
2123     # nodes/instances reachable from existing node groups.
2124
2125     dangling_nodes = set(node.name for node in self.all_node_info.values()
2126                          if node.group not in self.all_group_info)
2127
2128     dangling_instances = {}
2129     no_node_instances = []
2130
2131     for inst in self.all_inst_info.values():
2132       if inst.primary_node in dangling_nodes:
2133         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2134       elif inst.primary_node not in self.all_node_info:
2135         no_node_instances.append(inst.name)
2136
2137     pretty_dangling = [
2138         "%s (%s)" %
2139         (node.name,
2140          utils.CommaJoin(dangling_instances.get(node.name,
2141                                                 ["no instances"])))
2142         for node in dangling_nodes]
2143
2144     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2145                   None,
2146                   "the following nodes (and their instances) belong to a non"
2147                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2148
2149     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2150                   None,
2151                   "the following instances have a non-existing primary-node:"
2152                   " %s", utils.CommaJoin(no_node_instances))
2153
2154     return not self.bad
2155
2156
2157 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2158   """Verifies the status of a node group.
2159
2160   """
2161   HPATH = "cluster-verify"
2162   HTYPE = constants.HTYPE_CLUSTER
2163   REQ_BGL = False
2164
2165   _HOOKS_INDENT_RE = re.compile("^", re.M)
2166
2167   class NodeImage(object):
2168     """A class representing the logical and physical status of a node.
2169
2170     @type name: string
2171     @ivar name: the node name to which this object refers
2172     @ivar volumes: a structure as returned from
2173         L{ganeti.backend.GetVolumeList} (runtime)
2174     @ivar instances: a list of running instances (runtime)
2175     @ivar pinst: list of configured primary instances (config)
2176     @ivar sinst: list of configured secondary instances (config)
2177     @ivar sbp: dictionary of {primary-node: list of instances} for all
2178         instances for which this node is secondary (config)
2179     @ivar mfree: free memory, as reported by hypervisor (runtime)
2180     @ivar dfree: free disk, as reported by the node (runtime)
2181     @ivar offline: the offline status (config)
2182     @type rpc_fail: boolean
2183     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2184         not whether the individual keys were correct) (runtime)
2185     @type lvm_fail: boolean
2186     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2187     @type hyp_fail: boolean
2188     @ivar hyp_fail: whether the RPC call didn't return the instance list
2189     @type ghost: boolean
2190     @ivar ghost: whether this is a known node or not (config)
2191     @type os_fail: boolean
2192     @ivar os_fail: whether the RPC call didn't return valid OS data
2193     @type oslist: list
2194     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2195     @type vm_capable: boolean
2196     @ivar vm_capable: whether the node can host instances
2197
2198     """
2199     def __init__(self, offline=False, name=None, vm_capable=True):
2200       self.name = name
2201       self.volumes = {}
2202       self.instances = []
2203       self.pinst = []
2204       self.sinst = []
2205       self.sbp = {}
2206       self.mfree = 0
2207       self.dfree = 0
2208       self.offline = offline
2209       self.vm_capable = vm_capable
2210       self.rpc_fail = False
2211       self.lvm_fail = False
2212       self.hyp_fail = False
2213       self.ghost = False
2214       self.os_fail = False
2215       self.oslist = {}
2216
2217   def ExpandNames(self):
2218     # This raises errors.OpPrereqError on its own:
2219     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2220
2221     # Get instances in node group; this is unsafe and needs verification later
2222     inst_names = \
2223       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2224
2225     self.needed_locks = {
2226       locking.LEVEL_INSTANCE: inst_names,
2227       locking.LEVEL_NODEGROUP: [self.group_uuid],
2228       locking.LEVEL_NODE: [],
2229       }
2230
2231     self.share_locks = _ShareAll()
2232
2233   def DeclareLocks(self, level):
2234     if level == locking.LEVEL_NODE:
2235       # Get members of node group; this is unsafe and needs verification later
2236       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2237
2238       all_inst_info = self.cfg.GetAllInstancesInfo()
2239
2240       # In Exec(), we warn about mirrored instances that have primary and
2241       # secondary living in separate node groups. To fully verify that
2242       # volumes for these instances are healthy, we will need to do an
2243       # extra call to their secondaries. We ensure here those nodes will
2244       # be locked.
2245       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2246         # Important: access only the instances whose lock is owned
2247         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2248           nodes.update(all_inst_info[inst].secondary_nodes)
2249
2250       self.needed_locks[locking.LEVEL_NODE] = nodes
2251
2252   def CheckPrereq(self):
2253     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2254     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2255
2256     group_nodes = set(self.group_info.members)
2257     group_instances = \
2258       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2259
2260     unlocked_nodes = \
2261         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2262
2263     unlocked_instances = \
2264         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2265
2266     if unlocked_nodes:
2267       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2268                                  utils.CommaJoin(unlocked_nodes),
2269                                  errors.ECODE_STATE)
2270
2271     if unlocked_instances:
2272       raise errors.OpPrereqError("Missing lock for instances: %s" %
2273                                  utils.CommaJoin(unlocked_instances),
2274                                  errors.ECODE_STATE)
2275
2276     self.all_node_info = self.cfg.GetAllNodesInfo()
2277     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2278
2279     self.my_node_names = utils.NiceSort(group_nodes)
2280     self.my_inst_names = utils.NiceSort(group_instances)
2281
2282     self.my_node_info = dict((name, self.all_node_info[name])
2283                              for name in self.my_node_names)
2284
2285     self.my_inst_info = dict((name, self.all_inst_info[name])
2286                              for name in self.my_inst_names)
2287
2288     # We detect here the nodes that will need the extra RPC calls for verifying
2289     # split LV volumes; they should be locked.
2290     extra_lv_nodes = set()
2291
2292     for inst in self.my_inst_info.values():
2293       if inst.disk_template in constants.DTS_INT_MIRROR:
2294         for nname in inst.all_nodes:
2295           if self.all_node_info[nname].group != self.group_uuid:
2296             extra_lv_nodes.add(nname)
2297
2298     unlocked_lv_nodes = \
2299         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2300
2301     if unlocked_lv_nodes:
2302       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2303                                  utils.CommaJoin(unlocked_lv_nodes),
2304                                  errors.ECODE_STATE)
2305     self.extra_lv_nodes = list(extra_lv_nodes)
2306
2307   def _VerifyNode(self, ninfo, nresult):
2308     """Perform some basic validation on data returned from a node.
2309
2310       - check the result data structure is well formed and has all the
2311         mandatory fields
2312       - check ganeti version
2313
2314     @type ninfo: L{objects.Node}
2315     @param ninfo: the node to check
2316     @param nresult: the results from the node
2317     @rtype: boolean
2318     @return: whether overall this call was successful (and we can expect
2319          reasonable values in the respose)
2320
2321     """
2322     node = ninfo.name
2323     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2324
2325     # main result, nresult should be a non-empty dict
2326     test = not nresult or not isinstance(nresult, dict)
2327     _ErrorIf(test, constants.CV_ENODERPC, node,
2328                   "unable to verify node: no data returned")
2329     if test:
2330       return False
2331
2332     # compares ganeti version
2333     local_version = constants.PROTOCOL_VERSION
2334     remote_version = nresult.get("version", None)
2335     test = not (remote_version and
2336                 isinstance(remote_version, (list, tuple)) and
2337                 len(remote_version) == 2)
2338     _ErrorIf(test, constants.CV_ENODERPC, node,
2339              "connection to node returned invalid data")
2340     if test:
2341       return False
2342
2343     test = local_version != remote_version[0]
2344     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2345              "incompatible protocol versions: master %s,"
2346              " node %s", local_version, remote_version[0])
2347     if test:
2348       return False
2349
2350     # node seems compatible, we can actually try to look into its results
2351
2352     # full package version
2353     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2354                   constants.CV_ENODEVERSION, node,
2355                   "software version mismatch: master %s, node %s",
2356                   constants.RELEASE_VERSION, remote_version[1],
2357                   code=self.ETYPE_WARNING)
2358
2359     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2360     if ninfo.vm_capable and isinstance(hyp_result, dict):
2361       for hv_name, hv_result in hyp_result.iteritems():
2362         test = hv_result is not None
2363         _ErrorIf(test, constants.CV_ENODEHV, node,
2364                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2365
2366     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2367     if ninfo.vm_capable and isinstance(hvp_result, list):
2368       for item, hv_name, hv_result in hvp_result:
2369         _ErrorIf(True, constants.CV_ENODEHV, node,
2370                  "hypervisor %s parameter verify failure (source %s): %s",
2371                  hv_name, item, hv_result)
2372
2373     test = nresult.get(constants.NV_NODESETUP,
2374                        ["Missing NODESETUP results"])
2375     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2376              "; ".join(test))
2377
2378     return True
2379
2380   def _VerifyNodeTime(self, ninfo, nresult,
2381                       nvinfo_starttime, nvinfo_endtime):
2382     """Check the node time.
2383
2384     @type ninfo: L{objects.Node}
2385     @param ninfo: the node to check
2386     @param nresult: the remote results for the node
2387     @param nvinfo_starttime: the start time of the RPC call
2388     @param nvinfo_endtime: the end time of the RPC call
2389
2390     """
2391     node = ninfo.name
2392     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2393
2394     ntime = nresult.get(constants.NV_TIME, None)
2395     try:
2396       ntime_merged = utils.MergeTime(ntime)
2397     except (ValueError, TypeError):
2398       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2399       return
2400
2401     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2402       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2403     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2404       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2405     else:
2406       ntime_diff = None
2407
2408     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2409              "Node time diverges by at least %s from master node time",
2410              ntime_diff)
2411
2412   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2413     """Check the node LVM results.
2414
2415     @type ninfo: L{objects.Node}
2416     @param ninfo: the node to check
2417     @param nresult: the remote results for the node
2418     @param vg_name: the configured VG name
2419
2420     """
2421     if vg_name is None:
2422       return
2423
2424     node = ninfo.name
2425     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2426
2427     # checks vg existence and size > 20G
2428     vglist = nresult.get(constants.NV_VGLIST, None)
2429     test = not vglist
2430     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2431     if not test:
2432       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2433                                             constants.MIN_VG_SIZE)
2434       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2435
2436     # check pv names
2437     pvlist = nresult.get(constants.NV_PVLIST, None)
2438     test = pvlist is None
2439     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2440     if not test:
2441       # check that ':' is not present in PV names, since it's a
2442       # special character for lvcreate (denotes the range of PEs to
2443       # use on the PV)
2444       for _, pvname, owner_vg in pvlist:
2445         test = ":" in pvname
2446         _ErrorIf(test, constants.CV_ENODELVM, node,
2447                  "Invalid character ':' in PV '%s' of VG '%s'",
2448                  pvname, owner_vg)
2449
2450   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2451     """Check the node bridges.
2452
2453     @type ninfo: L{objects.Node}
2454     @param ninfo: the node to check
2455     @param nresult: the remote results for the node
2456     @param bridges: the expected list of bridges
2457
2458     """
2459     if not bridges:
2460       return
2461
2462     node = ninfo.name
2463     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2464
2465     missing = nresult.get(constants.NV_BRIDGES, None)
2466     test = not isinstance(missing, list)
2467     _ErrorIf(test, constants.CV_ENODENET, node,
2468              "did not return valid bridge information")
2469     if not test:
2470       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2471                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2472
2473   def _VerifyNodeUserScripts(self, ninfo, nresult):
2474     """Check the results of user scripts presence and executability on the node
2475
2476     @type ninfo: L{objects.Node}
2477     @param ninfo: the node to check
2478     @param nresult: the remote results for the node
2479
2480     """
2481     node = ninfo.name
2482
2483     test = not constants.NV_USERSCRIPTS in nresult
2484     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2485                   "did not return user scripts information")
2486
2487     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2488     if not test:
2489       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2490                     "user scripts not present or not executable: %s" %
2491                     utils.CommaJoin(sorted(broken_scripts)))
2492
2493   def _VerifyNodeNetwork(self, ninfo, nresult):
2494     """Check the node network connectivity results.
2495
2496     @type ninfo: L{objects.Node}
2497     @param ninfo: the node to check
2498     @param nresult: the remote results for the node
2499
2500     """
2501     node = ninfo.name
2502     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2503
2504     test = constants.NV_NODELIST not in nresult
2505     _ErrorIf(test, constants.CV_ENODESSH, node,
2506              "node hasn't returned node ssh connectivity data")
2507     if not test:
2508       if nresult[constants.NV_NODELIST]:
2509         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2510           _ErrorIf(True, constants.CV_ENODESSH, node,
2511                    "ssh communication with node '%s': %s", a_node, a_msg)
2512
2513     test = constants.NV_NODENETTEST not in nresult
2514     _ErrorIf(test, constants.CV_ENODENET, node,
2515              "node hasn't returned node tcp connectivity data")
2516     if not test:
2517       if nresult[constants.NV_NODENETTEST]:
2518         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2519         for anode in nlist:
2520           _ErrorIf(True, constants.CV_ENODENET, node,
2521                    "tcp communication with node '%s': %s",
2522                    anode, nresult[constants.NV_NODENETTEST][anode])
2523
2524     test = constants.NV_MASTERIP not in nresult
2525     _ErrorIf(test, constants.CV_ENODENET, node,
2526              "node hasn't returned node master IP reachability data")
2527     if not test:
2528       if not nresult[constants.NV_MASTERIP]:
2529         if node == self.master_node:
2530           msg = "the master node cannot reach the master IP (not configured?)"
2531         else:
2532           msg = "cannot reach the master IP"
2533         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2534
2535   def _VerifyInstance(self, instance, instanceconfig, node_image,
2536                       diskstatus):
2537     """Verify an instance.
2538
2539     This function checks to see if the required block devices are
2540     available on the instance's node.
2541
2542     """
2543     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2544     node_current = instanceconfig.primary_node
2545
2546     node_vol_should = {}
2547     instanceconfig.MapLVsByNode(node_vol_should)
2548
2549     cluster = self.cfg.GetClusterInfo()
2550     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2551                                                             self.group_info)
2552     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2553     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2554
2555     for node in node_vol_should:
2556       n_img = node_image[node]
2557       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2558         # ignore missing volumes on offline or broken nodes
2559         continue
2560       for volume in node_vol_should[node]:
2561         test = volume not in n_img.volumes
2562         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2563                  "volume %s missing on node %s", volume, node)
2564
2565     if instanceconfig.admin_state == constants.ADMINST_UP:
2566       pri_img = node_image[node_current]
2567       test = instance not in pri_img.instances and not pri_img.offline
2568       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2569                "instance not running on its primary node %s",
2570                node_current)
2571
2572     diskdata = [(nname, success, status, idx)
2573                 for (nname, disks) in diskstatus.items()
2574                 for idx, (success, status) in enumerate(disks)]
2575
2576     for nname, success, bdev_status, idx in diskdata:
2577       # the 'ghost node' construction in Exec() ensures that we have a
2578       # node here
2579       snode = node_image[nname]
2580       bad_snode = snode.ghost or snode.offline
2581       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2582                not success and not bad_snode,
2583                constants.CV_EINSTANCEFAULTYDISK, instance,
2584                "couldn't retrieve status for disk/%s on %s: %s",
2585                idx, nname, bdev_status)
2586       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2587                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2588                constants.CV_EINSTANCEFAULTYDISK, instance,
2589                "disk/%s on %s is faulty", idx, nname)
2590
2591   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2592     """Verify if there are any unknown volumes in the cluster.
2593
2594     The .os, .swap and backup volumes are ignored. All other volumes are
2595     reported as unknown.
2596
2597     @type reserved: L{ganeti.utils.FieldSet}
2598     @param reserved: a FieldSet of reserved volume names
2599
2600     """
2601     for node, n_img in node_image.items():
2602       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2603           self.all_node_info[node].group != self.group_uuid):
2604         # skip non-healthy nodes
2605         continue
2606       for volume in n_img.volumes:
2607         test = ((node not in node_vol_should or
2608                 volume not in node_vol_should[node]) and
2609                 not reserved.Matches(volume))
2610         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2611                       "volume %s is unknown", volume)
2612
2613   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2614     """Verify N+1 Memory Resilience.
2615
2616     Check that if one single node dies we can still start all the
2617     instances it was primary for.
2618
2619     """
2620     cluster_info = self.cfg.GetClusterInfo()
2621     for node, n_img in node_image.items():
2622       # This code checks that every node which is now listed as
2623       # secondary has enough memory to host all instances it is
2624       # supposed to should a single other node in the cluster fail.
2625       # FIXME: not ready for failover to an arbitrary node
2626       # FIXME: does not support file-backed instances
2627       # WARNING: we currently take into account down instances as well
2628       # as up ones, considering that even if they're down someone
2629       # might want to start them even in the event of a node failure.
2630       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2631         # we're skipping nodes marked offline and nodes in other groups from
2632         # the N+1 warning, since most likely we don't have good memory
2633         # infromation from them; we already list instances living on such
2634         # nodes, and that's enough warning
2635         continue
2636       #TODO(dynmem): also consider ballooning out other instances
2637       for prinode, instances in n_img.sbp.items():
2638         needed_mem = 0
2639         for instance in instances:
2640           bep = cluster_info.FillBE(instance_cfg[instance])
2641           if bep[constants.BE_AUTO_BALANCE]:
2642             needed_mem += bep[constants.BE_MINMEM]
2643         test = n_img.mfree < needed_mem
2644         self._ErrorIf(test, constants.CV_ENODEN1, node,
2645                       "not enough memory to accomodate instance failovers"
2646                       " should node %s fail (%dMiB needed, %dMiB available)",
2647                       prinode, needed_mem, n_img.mfree)
2648
2649   @classmethod
2650   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2651                    (files_all, files_opt, files_mc, files_vm)):
2652     """Verifies file checksums collected from all nodes.
2653
2654     @param errorif: Callback for reporting errors
2655     @param nodeinfo: List of L{objects.Node} objects
2656     @param master_node: Name of master node
2657     @param all_nvinfo: RPC results
2658
2659     """
2660     # Define functions determining which nodes to consider for a file
2661     files2nodefn = [
2662       (files_all, None),
2663       (files_mc, lambda node: (node.master_candidate or
2664                                node.name == master_node)),
2665       (files_vm, lambda node: node.vm_capable),
2666       ]
2667
2668     # Build mapping from filename to list of nodes which should have the file
2669     nodefiles = {}
2670     for (files, fn) in files2nodefn:
2671       if fn is None:
2672         filenodes = nodeinfo
2673       else:
2674         filenodes = filter(fn, nodeinfo)
2675       nodefiles.update((filename,
2676                         frozenset(map(operator.attrgetter("name"), filenodes)))
2677                        for filename in files)
2678
2679     assert set(nodefiles) == (files_all | files_mc | files_vm)
2680
2681     fileinfo = dict((filename, {}) for filename in nodefiles)
2682     ignore_nodes = set()
2683
2684     for node in nodeinfo:
2685       if node.offline:
2686         ignore_nodes.add(node.name)
2687         continue
2688
2689       nresult = all_nvinfo[node.name]
2690
2691       if nresult.fail_msg or not nresult.payload:
2692         node_files = None
2693       else:
2694         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2695         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2696                           for (key, value) in fingerprints.items())
2697         del fingerprints
2698
2699       test = not (node_files and isinstance(node_files, dict))
2700       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2701               "Node did not return file checksum data")
2702       if test:
2703         ignore_nodes.add(node.name)
2704         continue
2705
2706       # Build per-checksum mapping from filename to nodes having it
2707       for (filename, checksum) in node_files.items():
2708         assert filename in nodefiles
2709         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2710
2711     for (filename, checksums) in fileinfo.items():
2712       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2713
2714       # Nodes having the file
2715       with_file = frozenset(node_name
2716                             for nodes in fileinfo[filename].values()
2717                             for node_name in nodes) - ignore_nodes
2718
2719       expected_nodes = nodefiles[filename] - ignore_nodes
2720
2721       # Nodes missing file
2722       missing_file = expected_nodes - with_file
2723
2724       if filename in files_opt:
2725         # All or no nodes
2726         errorif(missing_file and missing_file != expected_nodes,
2727                 constants.CV_ECLUSTERFILECHECK, None,
2728                 "File %s is optional, but it must exist on all or no"
2729                 " nodes (not found on %s)",
2730                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2731       else:
2732         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2733                 "File %s is missing from node(s) %s", filename,
2734                 utils.CommaJoin(utils.NiceSort(missing_file)))
2735
2736         # Warn if a node has a file it shouldn't
2737         unexpected = with_file - expected_nodes
2738         errorif(unexpected,
2739                 constants.CV_ECLUSTERFILECHECK, None,
2740                 "File %s should not exist on node(s) %s",
2741                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2742
2743       # See if there are multiple versions of the file
2744       test = len(checksums) > 1
2745       if test:
2746         variants = ["variant %s on %s" %
2747                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2748                     for (idx, (checksum, nodes)) in
2749                       enumerate(sorted(checksums.items()))]
2750       else:
2751         variants = []
2752
2753       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2754               "File %s found with %s different checksums (%s)",
2755               filename, len(checksums), "; ".join(variants))
2756
2757   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2758                       drbd_map):
2759     """Verifies and the node DRBD status.
2760
2761     @type ninfo: L{objects.Node}
2762     @param ninfo: the node to check
2763     @param nresult: the remote results for the node
2764     @param instanceinfo: the dict of instances
2765     @param drbd_helper: the configured DRBD usermode helper
2766     @param drbd_map: the DRBD map as returned by
2767         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2768
2769     """
2770     node = ninfo.name
2771     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2772
2773     if drbd_helper:
2774       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2775       test = (helper_result is None)
2776       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2777                "no drbd usermode helper returned")
2778       if helper_result:
2779         status, payload = helper_result
2780         test = not status
2781         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2782                  "drbd usermode helper check unsuccessful: %s", payload)
2783         test = status and (payload != drbd_helper)
2784         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2785                  "wrong drbd usermode helper: %s", payload)
2786
2787     # compute the DRBD minors
2788     node_drbd = {}
2789     for minor, instance in drbd_map[node].items():
2790       test = instance not in instanceinfo
2791       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2792                "ghost instance '%s' in temporary DRBD map", instance)
2793         # ghost instance should not be running, but otherwise we
2794         # don't give double warnings (both ghost instance and
2795         # unallocated minor in use)
2796       if test:
2797         node_drbd[minor] = (instance, False)
2798       else:
2799         instance = instanceinfo[instance]
2800         node_drbd[minor] = (instance.name,
2801                             instance.admin_state == constants.ADMINST_UP)
2802
2803     # and now check them
2804     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2805     test = not isinstance(used_minors, (tuple, list))
2806     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2807              "cannot parse drbd status file: %s", str(used_minors))
2808     if test:
2809       # we cannot check drbd status
2810       return
2811
2812     for minor, (iname, must_exist) in node_drbd.items():
2813       test = minor not in used_minors and must_exist
2814       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2815                "drbd minor %d of instance %s is not active", minor, iname)
2816     for minor in used_minors:
2817       test = minor not in node_drbd
2818       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2819                "unallocated drbd minor %d is in use", minor)
2820
2821   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2822     """Builds the node OS structures.
2823
2824     @type ninfo: L{objects.Node}
2825     @param ninfo: the node to check
2826     @param nresult: the remote results for the node
2827     @param nimg: the node image object
2828
2829     """
2830     node = ninfo.name
2831     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2832
2833     remote_os = nresult.get(constants.NV_OSLIST, None)
2834     test = (not isinstance(remote_os, list) or
2835             not compat.all(isinstance(v, list) and len(v) == 7
2836                            for v in remote_os))
2837
2838     _ErrorIf(test, constants.CV_ENODEOS, node,
2839              "node hasn't returned valid OS data")
2840
2841     nimg.os_fail = test
2842
2843     if test:
2844       return
2845
2846     os_dict = {}
2847
2848     for (name, os_path, status, diagnose,
2849          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2850
2851       if name not in os_dict:
2852         os_dict[name] = []
2853
2854       # parameters is a list of lists instead of list of tuples due to
2855       # JSON lacking a real tuple type, fix it:
2856       parameters = [tuple(v) for v in parameters]
2857       os_dict[name].append((os_path, status, diagnose,
2858                             set(variants), set(parameters), set(api_ver)))
2859
2860     nimg.oslist = os_dict
2861
2862   def _VerifyNodeOS(self, ninfo, nimg, base):
2863     """Verifies the node OS list.
2864
2865     @type ninfo: L{objects.Node}
2866     @param ninfo: the node to check
2867     @param nimg: the node image object
2868     @param base: the 'template' node we match against (e.g. from the master)
2869
2870     """
2871     node = ninfo.name
2872     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2873
2874     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2875
2876     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2877     for os_name, os_data in nimg.oslist.items():
2878       assert os_data, "Empty OS status for OS %s?!" % os_name
2879       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2880       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2881                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2882       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2883                "OS '%s' has multiple entries (first one shadows the rest): %s",
2884                os_name, utils.CommaJoin([v[0] for v in os_data]))
2885       # comparisons with the 'base' image
2886       test = os_name not in base.oslist
2887       _ErrorIf(test, constants.CV_ENODEOS, node,
2888                "Extra OS %s not present on reference node (%s)",
2889                os_name, base.name)
2890       if test:
2891         continue
2892       assert base.oslist[os_name], "Base node has empty OS status?"
2893       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2894       if not b_status:
2895         # base OS is invalid, skipping
2896         continue
2897       for kind, a, b in [("API version", f_api, b_api),
2898                          ("variants list", f_var, b_var),
2899                          ("parameters", beautify_params(f_param),
2900                           beautify_params(b_param))]:
2901         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2902                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2903                  kind, os_name, base.name,
2904                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2905
2906     # check any missing OSes
2907     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2908     _ErrorIf(missing, constants.CV_ENODEOS, node,
2909              "OSes present on reference node %s but missing on this node: %s",
2910              base.name, utils.CommaJoin(missing))
2911
2912   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2913     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2914
2915     @type ninfo: L{objects.Node}
2916     @param ninfo: the node to check
2917     @param nresult: the remote results for the node
2918     @type is_master: bool
2919     @param is_master: Whether node is the master node
2920
2921     """
2922     node = ninfo.name
2923
2924     if (is_master and
2925         (constants.ENABLE_FILE_STORAGE or
2926          constants.ENABLE_SHARED_FILE_STORAGE)):
2927       try:
2928         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2929       except KeyError:
2930         # This should never happen
2931         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2932                       "Node did not return forbidden file storage paths")
2933       else:
2934         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2935                       "Found forbidden file storage paths: %s",
2936                       utils.CommaJoin(fspaths))
2937     else:
2938       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2939                     constants.CV_ENODEFILESTORAGEPATHS, node,
2940                     "Node should not have returned forbidden file storage"
2941                     " paths")
2942
2943   def _VerifyOob(self, ninfo, nresult):
2944     """Verifies out of band functionality of a node.
2945
2946     @type ninfo: L{objects.Node}
2947     @param ninfo: the node to check
2948     @param nresult: the remote results for the node
2949
2950     """
2951     node = ninfo.name
2952     # We just have to verify the paths on master and/or master candidates
2953     # as the oob helper is invoked on the master
2954     if ((ninfo.master_candidate or ninfo.master_capable) and
2955         constants.NV_OOB_PATHS in nresult):
2956       for path_result in nresult[constants.NV_OOB_PATHS]:
2957         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2958
2959   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2960     """Verifies and updates the node volume data.
2961
2962     This function will update a L{NodeImage}'s internal structures
2963     with data from the remote call.
2964
2965     @type ninfo: L{objects.Node}
2966     @param ninfo: the node to check
2967     @param nresult: the remote results for the node
2968     @param nimg: the node image object
2969     @param vg_name: the configured VG name
2970
2971     """
2972     node = ninfo.name
2973     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2974
2975     nimg.lvm_fail = True
2976     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2977     if vg_name is None:
2978       pass
2979     elif isinstance(lvdata, basestring):
2980       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2981                utils.SafeEncode(lvdata))
2982     elif not isinstance(lvdata, dict):
2983       _ErrorIf(True, constants.CV_ENODELVM, node,
2984                "rpc call to node failed (lvlist)")
2985     else:
2986       nimg.volumes = lvdata
2987       nimg.lvm_fail = False
2988
2989   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2990     """Verifies and updates the node instance list.
2991
2992     If the listing was successful, then updates this node's instance
2993     list. Otherwise, it marks the RPC call as failed for the instance
2994     list key.
2995
2996     @type ninfo: L{objects.Node}
2997     @param ninfo: the node to check
2998     @param nresult: the remote results for the node
2999     @param nimg: the node image object
3000
3001     """
3002     idata = nresult.get(constants.NV_INSTANCELIST, None)
3003     test = not isinstance(idata, list)
3004     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3005                   "rpc call to node failed (instancelist): %s",
3006                   utils.SafeEncode(str(idata)))
3007     if test:
3008       nimg.hyp_fail = True
3009     else:
3010       nimg.instances = idata
3011
3012   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3013     """Verifies and computes a node information map
3014
3015     @type ninfo: L{objects.Node}
3016     @param ninfo: the node to check
3017     @param nresult: the remote results for the node
3018     @param nimg: the node image object
3019     @param vg_name: the configured VG name
3020
3021     """
3022     node = ninfo.name
3023     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3024
3025     # try to read free memory (from the hypervisor)
3026     hv_info = nresult.get(constants.NV_HVINFO, None)
3027     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3028     _ErrorIf(test, constants.CV_ENODEHV, node,
3029              "rpc call to node failed (hvinfo)")
3030     if not test:
3031       try:
3032         nimg.mfree = int(hv_info["memory_free"])
3033       except (ValueError, TypeError):
3034         _ErrorIf(True, constants.CV_ENODERPC, node,
3035                  "node returned invalid nodeinfo, check hypervisor")
3036
3037     # FIXME: devise a free space model for file based instances as well
3038     if vg_name is not None:
3039       test = (constants.NV_VGLIST not in nresult or
3040               vg_name not in nresult[constants.NV_VGLIST])
3041       _ErrorIf(test, constants.CV_ENODELVM, node,
3042                "node didn't return data for the volume group '%s'"
3043                " - it is either missing or broken", vg_name)
3044       if not test:
3045         try:
3046           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3047         except (ValueError, TypeError):
3048           _ErrorIf(True, constants.CV_ENODERPC, node,
3049                    "node returned invalid LVM info, check LVM status")
3050
3051   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3052     """Gets per-disk status information for all instances.
3053
3054     @type nodelist: list of strings
3055     @param nodelist: Node names
3056     @type node_image: dict of (name, L{objects.Node})
3057     @param node_image: Node objects
3058     @type instanceinfo: dict of (name, L{objects.Instance})
3059     @param instanceinfo: Instance objects
3060     @rtype: {instance: {node: [(succes, payload)]}}
3061     @return: a dictionary of per-instance dictionaries with nodes as
3062         keys and disk information as values; the disk information is a
3063         list of tuples (success, payload)
3064
3065     """
3066     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3067
3068     node_disks = {}
3069     node_disks_devonly = {}
3070     diskless_instances = set()
3071     diskless = constants.DT_DISKLESS
3072
3073     for nname in nodelist:
3074       node_instances = list(itertools.chain(node_image[nname].pinst,
3075                                             node_image[nname].sinst))
3076       diskless_instances.update(inst for inst in node_instances
3077                                 if instanceinfo[inst].disk_template == diskless)
3078       disks = [(inst, disk)
3079                for inst in node_instances
3080                for disk in instanceinfo[inst].disks]
3081
3082       if not disks:
3083         # No need to collect data
3084         continue
3085
3086       node_disks[nname] = disks
3087
3088       # _AnnotateDiskParams makes already copies of the disks
3089       devonly = []
3090       for (inst, dev) in disks:
3091         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3092         self.cfg.SetDiskID(anno_disk, nname)
3093         devonly.append(anno_disk)
3094
3095       node_disks_devonly[nname] = devonly
3096
3097     assert len(node_disks) == len(node_disks_devonly)
3098
3099     # Collect data from all nodes with disks
3100     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3101                                                           node_disks_devonly)
3102
3103     assert len(result) == len(node_disks)
3104
3105     instdisk = {}
3106
3107     for (nname, nres) in result.items():
3108       disks = node_disks[nname]
3109
3110       if nres.offline:
3111         # No data from this node
3112         data = len(disks) * [(False, "node offline")]
3113       else:
3114         msg = nres.fail_msg
3115         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3116                  "while getting disk information: %s", msg)
3117         if msg:
3118           # No data from this node
3119           data = len(disks) * [(False, msg)]
3120         else:
3121           data = []
3122           for idx, i in enumerate(nres.payload):
3123             if isinstance(i, (tuple, list)) and len(i) == 2:
3124               data.append(i)
3125             else:
3126               logging.warning("Invalid result from node %s, entry %d: %s",
3127                               nname, idx, i)
3128               data.append((False, "Invalid result from the remote node"))
3129
3130       for ((inst, _), status) in zip(disks, data):
3131         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3132
3133     # Add empty entries for diskless instances.
3134     for inst in diskless_instances:
3135       assert inst not in instdisk
3136       instdisk[inst] = {}
3137
3138     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3139                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3140                       compat.all(isinstance(s, (tuple, list)) and
3141                                  len(s) == 2 for s in statuses)
3142                       for inst, nnames in instdisk.items()
3143                       for nname, statuses in nnames.items())
3144     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3145
3146     return instdisk
3147
3148   @staticmethod
3149   def _SshNodeSelector(group_uuid, all_nodes):
3150     """Create endless iterators for all potential SSH check hosts.
3151
3152     """
3153     nodes = [node for node in all_nodes
3154              if (node.group != group_uuid and
3155                  not node.offline)]
3156     keyfunc = operator.attrgetter("group")
3157
3158     return map(itertools.cycle,
3159                [sorted(map(operator.attrgetter("name"), names))
3160                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3161                                                   keyfunc)])
3162
3163   @classmethod
3164   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3165     """Choose which nodes should talk to which other nodes.
3166
3167     We will make nodes contact all nodes in their group, and one node from
3168     every other group.
3169
3170     @warning: This algorithm has a known issue if one node group is much
3171       smaller than others (e.g. just one node). In such a case all other
3172       nodes will talk to the single node.
3173
3174     """
3175     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3176     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3177
3178     return (online_nodes,
3179             dict((name, sorted([i.next() for i in sel]))
3180                  for name in online_nodes))
3181
3182   def BuildHooksEnv(self):
3183     """Build hooks env.
3184
3185     Cluster-Verify hooks just ran in the post phase and their failure makes
3186     the output be logged in the verify output and the verification to fail.
3187
3188     """
3189     env = {
3190       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3191       }
3192
3193     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3194                for node in self.my_node_info.values())
3195
3196     return env
3197
3198   def BuildHooksNodes(self):
3199     """Build hooks nodes.
3200
3201     """
3202     return ([], self.my_node_names)
3203
3204   def Exec(self, feedback_fn):
3205     """Verify integrity of the node group, performing various test on nodes.
3206
3207     """
3208     # This method has too many local variables. pylint: disable=R0914
3209     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3210
3211     if not self.my_node_names:
3212       # empty node group
3213       feedback_fn("* Empty node group, skipping verification")
3214       return True
3215
3216     self.bad = False
3217     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3218     verbose = self.op.verbose
3219     self._feedback_fn = feedback_fn
3220
3221     vg_name = self.cfg.GetVGName()
3222     drbd_helper = self.cfg.GetDRBDHelper()
3223     cluster = self.cfg.GetClusterInfo()
3224     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3225     hypervisors = cluster.enabled_hypervisors
3226     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3227
3228     i_non_redundant = [] # Non redundant instances
3229     i_non_a_balanced = [] # Non auto-balanced instances
3230     i_offline = 0 # Count of offline instances
3231     n_offline = 0 # Count of offline nodes
3232     n_drained = 0 # Count of nodes being drained
3233     node_vol_should = {}
3234
3235     # FIXME: verify OS list
3236
3237     # File verification
3238     filemap = _ComputeAncillaryFiles(cluster, False)
3239
3240     # do local checksums
3241     master_node = self.master_node = self.cfg.GetMasterNode()
3242     master_ip = self.cfg.GetMasterIP()
3243
3244     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3245
3246     user_scripts = []
3247     if self.cfg.GetUseExternalMipScript():
3248       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3249
3250     node_verify_param = {
3251       constants.NV_FILELIST:
3252         map(vcluster.MakeVirtualPath,
3253             utils.UniqueSequence(filename
3254                                  for files in filemap
3255                                  for filename in files)),
3256       constants.NV_NODELIST:
3257         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3258                                   self.all_node_info.values()),
3259       constants.NV_HYPERVISOR: hypervisors,
3260       constants.NV_HVPARAMS:
3261         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3262       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3263                                  for node in node_data_list
3264                                  if not node.offline],
3265       constants.NV_INSTANCELIST: hypervisors,
3266       constants.NV_VERSION: None,
3267       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3268       constants.NV_NODESETUP: None,
3269       constants.NV_TIME: None,
3270       constants.NV_MASTERIP: (master_node, master_ip),
3271       constants.NV_OSLIST: None,
3272       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3273       constants.NV_USERSCRIPTS: user_scripts,
3274       }
3275
3276     if vg_name is not None:
3277       node_verify_param[constants.NV_VGLIST] = None
3278       node_verify_param[constants.NV_LVLIST] = vg_name
3279       node_verify_param[constants.NV_PVLIST] = [vg_name]
3280
3281     if drbd_helper:
3282       node_verify_param[constants.NV_DRBDLIST] = None
3283       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3284
3285     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3286       # Load file storage paths only from master node
3287       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3288
3289     # bridge checks
3290     # FIXME: this needs to be changed per node-group, not cluster-wide
3291     bridges = set()
3292     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3293     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3294       bridges.add(default_nicpp[constants.NIC_LINK])
3295     for instance in self.my_inst_info.values():
3296       for nic in instance.nics:
3297         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3298         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3299           bridges.add(full_nic[constants.NIC_LINK])
3300
3301     if bridges:
3302       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3303
3304     # Build our expected cluster state
3305     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3306                                                  name=node.name,
3307                                                  vm_capable=node.vm_capable))
3308                       for node in node_data_list)
3309
3310     # Gather OOB paths
3311     oob_paths = []
3312     for node in self.all_node_info.values():
3313       path = _SupportsOob(self.cfg, node)
3314       if path and path not in oob_paths:
3315         oob_paths.append(path)
3316
3317     if oob_paths:
3318       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3319
3320     for instance in self.my_inst_names:
3321       inst_config = self.my_inst_info[instance]
3322       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3323         i_offline += 1
3324
3325       for nname in inst_config.all_nodes:
3326         if nname not in node_image:
3327           gnode = self.NodeImage(name=nname)
3328           gnode.ghost = (nname not in self.all_node_info)
3329           node_image[nname] = gnode
3330
3331       inst_config.MapLVsByNode(node_vol_should)
3332
3333       pnode = inst_config.primary_node
3334       node_image[pnode].pinst.append(instance)
3335
3336       for snode in inst_config.secondary_nodes:
3337         nimg = node_image[snode]
3338         nimg.sinst.append(instance)
3339         if pnode not in nimg.sbp:
3340           nimg.sbp[pnode] = []
3341         nimg.sbp[pnode].append(instance)
3342
3343     # At this point, we have the in-memory data structures complete,
3344     # except for the runtime information, which we'll gather next
3345
3346     # Due to the way our RPC system works, exact response times cannot be
3347     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3348     # time before and after executing the request, we can at least have a time
3349     # window.
3350     nvinfo_starttime = time.time()
3351     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3352                                            node_verify_param,
3353                                            self.cfg.GetClusterName())
3354     nvinfo_endtime = time.time()
3355
3356     if self.extra_lv_nodes and vg_name is not None:
3357       extra_lv_nvinfo = \
3358           self.rpc.call_node_verify(self.extra_lv_nodes,
3359                                     {constants.NV_LVLIST: vg_name},
3360                                     self.cfg.GetClusterName())
3361     else:
3362       extra_lv_nvinfo = {}
3363
3364     all_drbd_map = self.cfg.ComputeDRBDMap()
3365
3366     feedback_fn("* Gathering disk information (%s nodes)" %
3367                 len(self.my_node_names))
3368     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3369                                      self.my_inst_info)
3370
3371     feedback_fn("* Verifying configuration file consistency")
3372
3373     # If not all nodes are being checked, we need to make sure the master node
3374     # and a non-checked vm_capable node are in the list.
3375     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3376     if absent_nodes:
3377       vf_nvinfo = all_nvinfo.copy()
3378       vf_node_info = list(self.my_node_info.values())
3379       additional_nodes = []
3380       if master_node not in self.my_node_info:
3381         additional_nodes.append(master_node)
3382         vf_node_info.append(self.all_node_info[master_node])
3383       # Add the first vm_capable node we find which is not included,
3384       # excluding the master node (which we already have)
3385       for node in absent_nodes:
3386         nodeinfo = self.all_node_info[node]
3387         if (nodeinfo.vm_capable and not nodeinfo.offline and
3388             node != master_node):
3389           additional_nodes.append(node)
3390           vf_node_info.append(self.all_node_info[node])
3391           break
3392       key = constants.NV_FILELIST
3393       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3394                                                  {key: node_verify_param[key]},
3395                                                  self.cfg.GetClusterName()))
3396     else:
3397       vf_nvinfo = all_nvinfo
3398       vf_node_info = self.my_node_info.values()
3399
3400     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3401
3402     feedback_fn("* Verifying node status")
3403
3404     refos_img = None
3405
3406     for node_i in node_data_list:
3407       node = node_i.name
3408       nimg = node_image[node]
3409
3410       if node_i.offline:
3411         if verbose:
3412           feedback_fn("* Skipping offline node %s" % (node,))
3413         n_offline += 1
3414         continue
3415
3416       if node == master_node:
3417         ntype = "master"
3418       elif node_i.master_candidate:
3419         ntype = "master candidate"
3420       elif node_i.drained:
3421         ntype = "drained"
3422         n_drained += 1
3423       else:
3424         ntype = "regular"
3425       if verbose:
3426         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3427
3428       msg = all_nvinfo[node].fail_msg
3429       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3430                msg)
3431       if msg:
3432         nimg.rpc_fail = True
3433         continue
3434
3435       nresult = all_nvinfo[node].payload
3436
3437       nimg.call_ok = self._VerifyNode(node_i, nresult)
3438       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3439       self._VerifyNodeNetwork(node_i, nresult)
3440       self._VerifyNodeUserScripts(node_i, nresult)
3441       self._VerifyOob(node_i, nresult)
3442       self._VerifyFileStoragePaths(node_i, nresult,
3443                                    node == master_node)
3444
3445       if nimg.vm_capable:
3446         self._VerifyNodeLVM(node_i, nresult, vg_name)
3447         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3448                              all_drbd_map)
3449
3450         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3451         self._UpdateNodeInstances(node_i, nresult, nimg)
3452         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3453         self._UpdateNodeOS(node_i, nresult, nimg)
3454
3455         if not nimg.os_fail:
3456           if refos_img is None:
3457             refos_img = nimg
3458           self._VerifyNodeOS(node_i, nimg, refos_img)
3459         self._VerifyNodeBridges(node_i, nresult, bridges)
3460
3461         # Check whether all running instancies are primary for the node. (This
3462         # can no longer be done from _VerifyInstance below, since some of the
3463         # wrong instances could be from other node groups.)
3464         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3465
3466         for inst in non_primary_inst:
3467           test = inst in self.all_inst_info
3468           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3469                    "instance should not run on node %s", node_i.name)
3470           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3471                    "node is running unknown instance %s", inst)
3472
3473     for node, result in extra_lv_nvinfo.items():
3474       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3475                               node_image[node], vg_name)
3476
3477     feedback_fn("* Verifying instance status")
3478     for instance in self.my_inst_names:
3479       if verbose:
3480         feedback_fn("* Verifying instance %s" % instance)
3481       inst_config = self.my_inst_info[instance]
3482       self._VerifyInstance(instance, inst_config, node_image,
3483                            instdisk[instance])
3484       inst_nodes_offline = []
3485
3486       pnode = inst_config.primary_node
3487       pnode_img = node_image[pnode]
3488       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3489                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3490                " primary node failed", instance)
3491
3492       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3493                pnode_img.offline,
3494                constants.CV_EINSTANCEBADNODE, instance,
3495                "instance is marked as running and lives on offline node %s",
3496                inst_config.primary_node)
3497
3498       # If the instance is non-redundant we cannot survive losing its primary
3499       # node, so we are not N+1 compliant.
3500       if inst_config.disk_template not in constants.DTS_MIRRORED:
3501         i_non_redundant.append(instance)
3502
3503       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3504                constants.CV_EINSTANCELAYOUT,
3505                instance, "instance has multiple secondary nodes: %s",
3506                utils.CommaJoin(inst_config.secondary_nodes),
3507                code=self.ETYPE_WARNING)
3508
3509       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3510         pnode = inst_config.primary_node
3511         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3512         instance_groups = {}
3513
3514         for node in instance_nodes:
3515           instance_groups.setdefault(self.all_node_info[node].group,
3516                                      []).append(node)
3517
3518         pretty_list = [
3519           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3520           # Sort so that we always list the primary node first.
3521           for group, nodes in sorted(instance_groups.items(),
3522                                      key=lambda (_, nodes): pnode in nodes,
3523                                      reverse=True)]
3524
3525         self._ErrorIf(len(instance_groups) > 1,
3526                       constants.CV_EINSTANCESPLITGROUPS,
3527                       instance, "instance has primary and secondary nodes in"
3528                       " different groups: %s", utils.CommaJoin(pretty_list),
3529                       code=self.ETYPE_WARNING)
3530
3531       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3532         i_non_a_balanced.append(instance)
3533
3534       for snode in inst_config.secondary_nodes:
3535         s_img = node_image[snode]
3536         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3537                  snode, "instance %s, connection to secondary node failed",
3538                  instance)
3539
3540         if s_img.offline:
3541           inst_nodes_offline.append(snode)
3542
3543       # warn that the instance lives on offline nodes
3544       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3545                "instance has offline secondary node(s) %s",
3546                utils.CommaJoin(inst_nodes_offline))
3547       # ... or ghost/non-vm_capable nodes
3548       for node in inst_config.all_nodes:
3549         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3550                  instance, "instance lives on ghost node %s", node)
3551         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3552                  instance, "instance lives on non-vm_capable node %s", node)
3553
3554     feedback_fn("* Verifying orphan volumes")
3555     reserved = utils.FieldSet(*cluster.reserved_lvs)
3556
3557     # We will get spurious "unknown volume" warnings if any node of this group
3558     # is secondary for an instance whose primary is in another group. To avoid
3559     # them, we find these instances and add their volumes to node_vol_should.
3560     for inst in self.all_inst_info.values():
3561       for secondary in inst.secondary_nodes:
3562         if (secondary in self.my_node_info
3563             and inst.name not in self.my_inst_info):
3564           inst.MapLVsByNode(node_vol_should)
3565           break
3566
3567     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3568
3569     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3570       feedback_fn("* Verifying N+1 Memory redundancy")
3571       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3572
3573     feedback_fn("* Other Notes")
3574     if i_non_redundant:
3575       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3576                   % len(i_non_redundant))
3577
3578     if i_non_a_balanced:
3579       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3580                   % len(i_non_a_balanced))
3581
3582     if i_offline:
3583       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3584
3585     if n_offline:
3586       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3587
3588     if n_drained:
3589       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3590
3591     return not self.bad
3592
3593   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3594     """Analyze the post-hooks' result
3595
3596     This method analyses the hook result, handles it, and sends some
3597     nicely-formatted feedback back to the user.
3598
3599     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3600         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3601     @param hooks_results: the results of the multi-node hooks rpc call
3602     @param feedback_fn: function used send feedback back to the caller
3603     @param lu_result: previous Exec result
3604     @return: the new Exec result, based on the previous result
3605         and hook results
3606
3607     """
3608     # We only really run POST phase hooks, only for non-empty groups,
3609     # and are only interested in their results
3610     if not self.my_node_names:
3611       # empty node group
3612       pass
3613     elif phase == constants.HOOKS_PHASE_POST:
3614       # Used to change hooks' output to proper indentation
3615       feedback_fn("* Hooks Results")
3616       assert hooks_results, "invalid result from hooks"
3617
3618       for node_name in hooks_results:
3619         res = hooks_results[node_name]
3620         msg = res.fail_msg
3621         test = msg and not res.offline
3622         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3623                       "Communication failure in hooks execution: %s", msg)
3624         if res.offline or msg:
3625           # No need to investigate payload if node is offline or gave
3626           # an error.
3627           continue
3628         for script, hkr, output in res.payload:
3629           test = hkr == constants.HKR_FAIL
3630           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3631                         "Script %s failed, output:", script)
3632           if test:
3633             output = self._HOOKS_INDENT_RE.sub("      ", output)
3634             feedback_fn("%s" % output)
3635             lu_result = False
3636
3637     return lu_result
3638
3639
3640 class LUClusterVerifyDisks(NoHooksLU):
3641   """Verifies the cluster disks status.
3642
3643   """
3644   REQ_BGL = False
3645
3646   def ExpandNames(self):
3647     self.share_locks = _ShareAll()
3648     self.needed_locks = {
3649       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3650       }
3651
3652   def Exec(self, feedback_fn):
3653     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3654
3655     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3656     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3657                            for group in group_names])
3658
3659
3660 class LUGroupVerifyDisks(NoHooksLU):
3661   """Verifies the status of all disks in a node group.
3662
3663   """
3664   REQ_BGL = False
3665
3666   def ExpandNames(self):
3667     # Raises errors.OpPrereqError on its own if group can't be found
3668     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3669
3670     self.share_locks = _ShareAll()
3671     self.needed_locks = {
3672       locking.LEVEL_INSTANCE: [],
3673       locking.LEVEL_NODEGROUP: [],
3674       locking.LEVEL_NODE: [],
3675       }
3676
3677   def DeclareLocks(self, level):
3678     if level == locking.LEVEL_INSTANCE:
3679       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3680
3681       # Lock instances optimistically, needs verification once node and group
3682       # locks have been acquired
3683       self.needed_locks[locking.LEVEL_INSTANCE] = \
3684         self.cfg.GetNodeGroupInstances(self.group_uuid)
3685
3686     elif level == locking.LEVEL_NODEGROUP:
3687       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3688
3689       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3690         set([self.group_uuid] +
3691             # Lock all groups used by instances optimistically; this requires
3692             # going via the node before it's locked, requiring verification
3693             # later on
3694             [group_uuid
3695              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3696              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3697
3698     elif level == locking.LEVEL_NODE:
3699       # This will only lock the nodes in the group to be verified which contain
3700       # actual instances
3701       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3702       self._LockInstancesNodes()
3703
3704       # Lock all nodes in group to be verified
3705       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3706       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3707       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3708
3709   def CheckPrereq(self):
3710     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3711     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3712     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3713
3714     assert self.group_uuid in owned_groups
3715
3716     # Check if locked instances are still correct
3717     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3718
3719     # Get instance information
3720     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3721
3722     # Check if node groups for locked instances are still correct
3723     _CheckInstancesNodeGroups(self.cfg, self.instances,
3724                               owned_groups, owned_nodes, self.group_uuid)
3725
3726   def Exec(self, feedback_fn):
3727     """Verify integrity of cluster disks.
3728
3729     @rtype: tuple of three items
3730     @return: a tuple of (dict of node-to-node_error, list of instances
3731         which need activate-disks, dict of instance: (node, volume) for
3732         missing volumes
3733
3734     """
3735     res_nodes = {}
3736     res_instances = set()
3737     res_missing = {}
3738
3739     nv_dict = _MapInstanceDisksToNodes(
3740       [inst for inst in self.instances.values()
3741        if inst.admin_state == constants.ADMINST_UP])
3742
3743     if nv_dict:
3744       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3745                              set(self.cfg.GetVmCapableNodeList()))
3746
3747       node_lvs = self.rpc.call_lv_list(nodes, [])
3748
3749       for (node, node_res) in node_lvs.items():
3750         if node_res.offline:
3751           continue
3752
3753         msg = node_res.fail_msg
3754         if msg:
3755           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3756           res_nodes[node] = msg
3757           continue
3758
3759         for lv_name, (_, _, lv_online) in node_res.payload.items():
3760           inst = nv_dict.pop((node, lv_name), None)
3761           if not (lv_online or inst is None):
3762             res_instances.add(inst)
3763
3764       # any leftover items in nv_dict are missing LVs, let's arrange the data
3765       # better
3766       for key, inst in nv_dict.iteritems():
3767         res_missing.setdefault(inst, []).append(list(key))
3768
3769     return (res_nodes, list(res_instances), res_missing)
3770
3771
3772 class LUClusterRepairDiskSizes(NoHooksLU):
3773   """Verifies the cluster disks sizes.
3774
3775   """
3776   REQ_BGL = False
3777
3778   def ExpandNames(self):
3779     if self.op.instances:
3780       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3781       self.needed_locks = {
3782         locking.LEVEL_NODE_RES: [],
3783         locking.LEVEL_INSTANCE: self.wanted_names,
3784         }
3785       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3786     else:
3787       self.wanted_names = None
3788       self.needed_locks = {
3789         locking.LEVEL_NODE_RES: locking.ALL_SET,
3790         locking.LEVEL_INSTANCE: locking.ALL_SET,
3791         }
3792     self.share_locks = {
3793       locking.LEVEL_NODE_RES: 1,
3794       locking.LEVEL_INSTANCE: 0,
3795       }
3796
3797   def DeclareLocks(self, level):
3798     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3799       self._LockInstancesNodes(primary_only=True, level=level)
3800
3801   def CheckPrereq(self):
3802     """Check prerequisites.
3803
3804     This only checks the optional instance list against the existing names.
3805
3806     """
3807     if self.wanted_names is None:
3808       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3809
3810     self.wanted_instances = \
3811         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3812
3813   def _EnsureChildSizes(self, disk):
3814     """Ensure children of the disk have the needed disk size.
3815
3816     This is valid mainly for DRBD8 and fixes an issue where the
3817     children have smaller disk size.
3818
3819     @param disk: an L{ganeti.objects.Disk} object
3820
3821     """
3822     if disk.dev_type == constants.LD_DRBD8:
3823       assert disk.children, "Empty children for DRBD8?"
3824       fchild = disk.children[0]
3825       mismatch = fchild.size < disk.size
3826       if mismatch:
3827         self.LogInfo("Child disk has size %d, parent %d, fixing",
3828                      fchild.size, disk.size)
3829         fchild.size = disk.size
3830
3831       # and we recurse on this child only, not on the metadev
3832       return self._EnsureChildSizes(fchild) or mismatch
3833     else:
3834       return False
3835
3836   def Exec(self, feedback_fn):
3837     """Verify the size of cluster disks.
3838
3839     """
3840     # TODO: check child disks too
3841     # TODO: check differences in size between primary/secondary nodes
3842     per_node_disks = {}
3843     for instance in self.wanted_instances:
3844       pnode = instance.primary_node
3845       if pnode not in per_node_disks:
3846         per_node_disks[pnode] = []
3847       for idx, disk in enumerate(instance.disks):
3848         per_node_disks[pnode].append((instance, idx, disk))
3849
3850     assert not (frozenset(per_node_disks.keys()) -
3851                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3852       "Not owning correct locks"
3853     assert not self.owned_locks(locking.LEVEL_NODE)
3854
3855     changed = []
3856     for node, dskl in per_node_disks.items():
3857       newl = [v[2].Copy() for v in dskl]
3858       for dsk in newl:
3859         self.cfg.SetDiskID(dsk, node)
3860       result = self.rpc.call_blockdev_getsize(node, newl)
3861       if result.fail_msg:
3862         self.LogWarning("Failure in blockdev_getsize call to node"
3863                         " %s, ignoring", node)
3864         continue
3865       if len(result.payload) != len(dskl):
3866         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3867                         " result.payload=%s", node, len(dskl), result.payload)
3868         self.LogWarning("Invalid result from node %s, ignoring node results",
3869                         node)
3870         continue
3871       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3872         if size is None:
3873           self.LogWarning("Disk %d of instance %s did not return size"
3874                           " information, ignoring", idx, instance.name)
3875           continue
3876         if not isinstance(size, (int, long)):
3877           self.LogWarning("Disk %d of instance %s did not return valid"
3878                           " size information, ignoring", idx, instance.name)
3879           continue
3880         size = size >> 20
3881         if size != disk.size:
3882           self.LogInfo("Disk %d of instance %s has mismatched size,"
3883                        " correcting: recorded %d, actual %d", idx,
3884                        instance.name, disk.size, size)
3885           disk.size = size
3886           self.cfg.Update(instance, feedback_fn)
3887           changed.append((instance.name, idx, size))
3888         if self._EnsureChildSizes(disk):
3889           self.cfg.Update(instance, feedback_fn)
3890           changed.append((instance.name, idx, disk.size))
3891     return changed
3892
3893
3894 class LUClusterRename(LogicalUnit):
3895   """Rename the cluster.
3896
3897   """
3898   HPATH = "cluster-rename"
3899   HTYPE = constants.HTYPE_CLUSTER
3900
3901   def BuildHooksEnv(self):
3902     """Build hooks env.
3903
3904     """
3905     return {
3906       "OP_TARGET": self.cfg.GetClusterName(),
3907       "NEW_NAME": self.op.name,
3908       }
3909
3910   def BuildHooksNodes(self):
3911     """Build hooks nodes.
3912
3913     """
3914     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3915
3916   def CheckPrereq(self):
3917     """Verify that the passed name is a valid one.
3918
3919     """
3920     hostname = netutils.GetHostname(name=self.op.name,
3921                                     family=self.cfg.GetPrimaryIPFamily())
3922
3923     new_name = hostname.name
3924     self.ip = new_ip = hostname.ip
3925     old_name = self.cfg.GetClusterName()
3926     old_ip = self.cfg.GetMasterIP()
3927     if new_name == old_name and new_ip == old_ip:
3928       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3929                                  " cluster has changed",
3930                                  errors.ECODE_INVAL)
3931     if new_ip != old_ip:
3932       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3933         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3934                                    " reachable on the network" %
3935                                    new_ip, errors.ECODE_NOTUNIQUE)
3936
3937     self.op.name = new_name
3938
3939   def Exec(self, feedback_fn):
3940     """Rename the cluster.
3941
3942     """
3943     clustername = self.op.name
3944     new_ip = self.ip
3945
3946     # shutdown the master IP
3947     master_params = self.cfg.GetMasterNetworkParameters()
3948     ems = self.cfg.GetUseExternalMipScript()
3949     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3950                                                      master_params, ems)
3951     result.Raise("Could not disable the master role")
3952
3953     try:
3954       cluster = self.cfg.GetClusterInfo()
3955       cluster.cluster_name = clustername
3956       cluster.master_ip = new_ip
3957       self.cfg.Update(cluster, feedback_fn)
3958
3959       # update the known hosts file
3960       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3961       node_list = self.cfg.GetOnlineNodeList()
3962       try:
3963         node_list.remove(master_params.name)
3964       except ValueError:
3965         pass
3966       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3967     finally:
3968       master_params.ip = new_ip
3969       result = self.rpc.call_node_activate_master_ip(master_params.name,
3970                                                      master_params, ems)
3971       msg = result.fail_msg
3972       if msg:
3973         self.LogWarning("Could not re-enable the master role on"
3974                         " the master, please restart manually: %s", msg)
3975
3976     return clustername
3977
3978
3979 def _ValidateNetmask(cfg, netmask):
3980   """Checks if a netmask is valid.
3981
3982   @type cfg: L{config.ConfigWriter}
3983   @param cfg: The cluster configuration
3984   @type netmask: int
3985   @param netmask: the netmask to be verified
3986   @raise errors.OpPrereqError: if the validation fails
3987
3988   """
3989   ip_family = cfg.GetPrimaryIPFamily()
3990   try:
3991     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3992   except errors.ProgrammerError:
3993     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3994                                ip_family, errors.ECODE_INVAL)
3995   if not ipcls.ValidateNetmask(netmask):
3996     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3997                                 (netmask), errors.ECODE_INVAL)
3998
3999
4000 class LUClusterSetParams(LogicalUnit):
4001   """Change the parameters of the cluster.
4002
4003   """
4004   HPATH = "cluster-modify"
4005   HTYPE = constants.HTYPE_CLUSTER
4006   REQ_BGL = False
4007
4008   def CheckArguments(self):
4009     """Check parameters
4010
4011     """
4012     if self.op.uid_pool:
4013       uidpool.CheckUidPool(self.op.uid_pool)
4014
4015     if self.op.add_uids:
4016       uidpool.CheckUidPool(self.op.add_uids)
4017
4018     if self.op.remove_uids:
4019       uidpool.CheckUidPool(self.op.remove_uids)
4020
4021     if self.op.master_netmask is not None:
4022       _ValidateNetmask(self.cfg, self.op.master_netmask)
4023
4024     if self.op.diskparams:
4025       for dt_params in self.op.diskparams.values():
4026         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4027       try:
4028         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4029       except errors.OpPrereqError, err:
4030         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4031                                    errors.ECODE_INVAL)
4032
4033   def ExpandNames(self):
4034     # FIXME: in the future maybe other cluster params won't require checking on
4035     # all nodes to be modified.
4036     self.needed_locks = {
4037       locking.LEVEL_NODE: locking.ALL_SET,
4038       locking.LEVEL_INSTANCE: locking.ALL_SET,
4039       locking.LEVEL_NODEGROUP: locking.ALL_SET,
4040     }
4041     self.share_locks = {
4042         locking.LEVEL_NODE: 1,
4043         locking.LEVEL_INSTANCE: 1,
4044         locking.LEVEL_NODEGROUP: 1,
4045     }
4046
4047   def BuildHooksEnv(self):
4048     """Build hooks env.
4049
4050     """
4051     return {
4052       "OP_TARGET": self.cfg.GetClusterName(),
4053       "NEW_VG_NAME": self.op.vg_name,
4054       }
4055
4056   def BuildHooksNodes(self):
4057     """Build hooks nodes.
4058
4059     """
4060     mn = self.cfg.GetMasterNode()
4061     return ([mn], [mn])
4062
4063   def CheckPrereq(self):
4064     """Check prerequisites.
4065
4066     This checks whether the given params don't conflict and
4067     if the given volume group is valid.
4068
4069     """
4070     if self.op.vg_name is not None and not self.op.vg_name:
4071       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4072         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4073                                    " instances exist", errors.ECODE_INVAL)
4074
4075     if self.op.drbd_helper is not None and not self.op.drbd_helper:
4076       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4077         raise errors.OpPrereqError("Cannot disable drbd helper while"
4078                                    " drbd-based instances exist",
4079                                    errors.ECODE_INVAL)
4080
4081     node_list = self.owned_locks(locking.LEVEL_NODE)
4082
4083     # if vg_name not None, checks given volume group on all nodes
4084     if self.op.vg_name:
4085       vglist = self.rpc.call_vg_list(node_list)
4086       for node in node_list:
4087         msg = vglist[node].fail_msg
4088         if msg:
4089           # ignoring down node
4090           self.LogWarning("Error while gathering data on node %s"
4091                           " (ignoring node): %s", node, msg)
4092           continue
4093         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4094                                               self.op.vg_name,
4095                                               constants.MIN_VG_SIZE)
4096         if vgstatus:
4097           raise errors.OpPrereqError("Error on node '%s': %s" %
4098                                      (node, vgstatus), errors.ECODE_ENVIRON)
4099
4100     if self.op.drbd_helper:
4101       # checks given drbd helper on all nodes
4102       helpers = self.rpc.call_drbd_helper(node_list)
4103       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4104         if ninfo.offline:
4105           self.LogInfo("Not checking drbd helper on offline node %s", node)
4106           continue
4107         msg = helpers[node].fail_msg
4108         if msg:
4109           raise errors.OpPrereqError("Error checking drbd helper on node"
4110                                      " '%s': %s" % (node, msg),
4111                                      errors.ECODE_ENVIRON)
4112         node_helper = helpers[node].payload
4113         if node_helper != self.op.drbd_helper:
4114           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4115                                      (node, node_helper), errors.ECODE_ENVIRON)
4116
4117     self.cluster = cluster = self.cfg.GetClusterInfo()
4118     # validate params changes
4119     if self.op.beparams:
4120       objects.UpgradeBeParams(self.op.beparams)
4121       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4122       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4123
4124     if self.op.ndparams:
4125       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4126       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4127
4128       # TODO: we need a more general way to handle resetting
4129       # cluster-level parameters to default values
4130       if self.new_ndparams["oob_program"] == "":
4131         self.new_ndparams["oob_program"] = \
4132             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4133
4134     if self.op.hv_state:
4135       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4136                                             self.cluster.hv_state_static)
4137       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4138                                for hv, values in new_hv_state.items())
4139
4140     if self.op.disk_state:
4141       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4142                                                 self.cluster.disk_state_static)
4143       self.new_disk_state = \
4144         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4145                             for name, values in svalues.items()))
4146              for storage, svalues in new_disk_state.items())
4147
4148     if self.op.ipolicy:
4149       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4150                                             group_policy=False)
4151
4152       all_instances = self.cfg.GetAllInstancesInfo().values()
4153       violations = set()
4154       for group in self.cfg.GetAllNodeGroupsInfo().values():
4155         instances = frozenset([inst for inst in all_instances
4156                                if compat.any(node in group.members
4157                                              for node in inst.all_nodes)])
4158         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4159         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4160         new = _ComputeNewInstanceViolations(ipol,
4161                                             new_ipolicy, instances)
4162         if new:
4163           violations.update(new)
4164
4165       if violations:
4166         self.LogWarning("After the ipolicy change the following instances"
4167                         " violate them: %s",
4168                         utils.CommaJoin(utils.NiceSort(violations)))
4169
4170     if self.op.nicparams:
4171       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4172       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4173       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4174       nic_errors = []
4175
4176       # check all instances for consistency
4177       for instance in self.cfg.GetAllInstancesInfo().values():
4178         for nic_idx, nic in enumerate(instance.nics):
4179           params_copy = copy.deepcopy(nic.nicparams)
4180           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4181
4182           # check parameter syntax
4183           try:
4184             objects.NIC.CheckParameterSyntax(params_filled)
4185           except errors.ConfigurationError, err:
4186             nic_errors.append("Instance %s, nic/%d: %s" %
4187                               (instance.name, nic_idx, err))
4188
4189           # if we're moving instances to routed, check that they have an ip
4190           target_mode = params_filled[constants.NIC_MODE]
4191           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4192             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4193                               " address" % (instance.name, nic_idx))
4194       if nic_errors:
4195         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4196                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4197
4198     # hypervisor list/parameters
4199     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4200     if self.op.hvparams:
4201       for hv_name, hv_dict in self.op.hvparams.items():
4202         if hv_name not in self.new_hvparams:
4203           self.new_hvparams[hv_name] = hv_dict
4204         else:
4205           self.new_hvparams[hv_name].update(hv_dict)
4206
4207     # disk template parameters
4208     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4209     if self.op.diskparams:
4210       for dt_name, dt_params in self.op.diskparams.items():
4211         if dt_name not in self.op.diskparams:
4212           self.new_diskparams[dt_name] = dt_params
4213         else:
4214           self.new_diskparams[dt_name].update(dt_params)
4215
4216     # os hypervisor parameters
4217     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4218     if self.op.os_hvp:
4219       for os_name, hvs in self.op.os_hvp.items():
4220         if os_name not in self.new_os_hvp:
4221           self.new_os_hvp[os_name] = hvs
4222         else:
4223           for hv_name, hv_dict in hvs.items():
4224             if hv_name not in self.new_os_hvp[os_name]:
4225               self.new_os_hvp[os_name][hv_name] = hv_dict
4226             else:
4227               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4228
4229     # os parameters
4230     self.new_osp = objects.FillDict(cluster.osparams, {})
4231     if self.op.osparams:
4232       for os_name, osp in self.op.osparams.items():
4233         if os_name not in self.new_osp:
4234           self.new_osp[os_name] = {}
4235
4236         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4237                                                   use_none=True)
4238
4239         if not self.new_osp[os_name]:
4240           # we removed all parameters
4241           del self.new_osp[os_name]
4242         else:
4243           # check the parameter validity (remote check)
4244           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4245                          os_name, self.new_osp[os_name])
4246
4247     # changes to the hypervisor list
4248     if self.op.enabled_hypervisors is not None:
4249       self.hv_list = self.op.enabled_hypervisors
4250       for hv in self.hv_list:
4251         # if the hypervisor doesn't already exist in the cluster
4252         # hvparams, we initialize it to empty, and then (in both
4253         # cases) we make sure to fill the defaults, as we might not
4254         # have a complete defaults list if the hypervisor wasn't
4255         # enabled before
4256         if hv not in new_hvp:
4257           new_hvp[hv] = {}
4258         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4259         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4260     else:
4261       self.hv_list = cluster.enabled_hypervisors
4262
4263     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4264       # either the enabled list has changed, or the parameters have, validate
4265       for hv_name, hv_params in self.new_hvparams.items():
4266         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4267             (self.op.enabled_hypervisors and
4268              hv_name in self.op.enabled_hypervisors)):
4269           # either this is a new hypervisor, or its parameters have changed
4270           hv_class = hypervisor.GetHypervisor(hv_name)
4271           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4272           hv_class.CheckParameterSyntax(hv_params)
4273           _CheckHVParams(self, node_list, hv_name, hv_params)
4274
4275     if self.op.os_hvp:
4276       # no need to check any newly-enabled hypervisors, since the
4277       # defaults have already been checked in the above code-block
4278       for os_name, os_hvp in self.new_os_hvp.items():
4279         for hv_name, hv_params in os_hvp.items():
4280           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4281           # we need to fill in the new os_hvp on top of the actual hv_p
4282           cluster_defaults = self.new_hvparams.get(hv_name, {})
4283           new_osp = objects.FillDict(cluster_defaults, hv_params)
4284           hv_class = hypervisor.GetHypervisor(hv_name)
4285           hv_class.CheckParameterSyntax(new_osp)
4286           _CheckHVParams(self, node_list, hv_name, new_osp)
4287
4288     if self.op.default_iallocator:
4289       alloc_script = utils.FindFile(self.op.default_iallocator,
4290                                     constants.IALLOCATOR_SEARCH_PATH,
4291                                     os.path.isfile)
4292       if alloc_script is None:
4293         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4294                                    " specified" % self.op.default_iallocator,
4295                                    errors.ECODE_INVAL)
4296
4297   def Exec(self, feedback_fn):
4298     """Change the parameters of the cluster.
4299
4300     """
4301     if self.op.vg_name is not None:
4302       new_volume = self.op.vg_name
4303       if not new_volume:
4304         new_volume = None
4305       if new_volume != self.cfg.GetVGName():
4306         self.cfg.SetVGName(new_volume)
4307       else:
4308         feedback_fn("Cluster LVM configuration already in desired"
4309                     " state, not changing")
4310     if self.op.drbd_helper is not None:
4311       new_helper = self.op.drbd_helper
4312       if not new_helper:
4313         new_helper = None
4314       if new_helper != self.cfg.GetDRBDHelper():
4315         self.cfg.SetDRBDHelper(new_helper)
4316       else:
4317         feedback_fn("Cluster DRBD helper already in desired state,"
4318                     " not changing")
4319     if self.op.hvparams:
4320       self.cluster.hvparams = self.new_hvparams
4321     if self.op.os_hvp:
4322       self.cluster.os_hvp = self.new_os_hvp
4323     if self.op.enabled_hypervisors is not None:
4324       self.cluster.hvparams = self.new_hvparams
4325       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4326     if self.op.beparams:
4327       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4328     if self.op.nicparams:
4329       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4330     if self.op.ipolicy:
4331       self.cluster.ipolicy = self.new_ipolicy
4332     if self.op.osparams:
4333       self.cluster.osparams = self.new_osp
4334     if self.op.ndparams:
4335       self.cluster.ndparams = self.new_ndparams
4336     if self.op.diskparams:
4337       self.cluster.diskparams = self.new_diskparams
4338     if self.op.hv_state:
4339       self.cluster.hv_state_static = self.new_hv_state
4340     if self.op.disk_state:
4341       self.cluster.disk_state_static = self.new_disk_state
4342
4343     if self.op.candidate_pool_size is not None:
4344       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4345       # we need to update the pool size here, otherwise the save will fail
4346       _AdjustCandidatePool(self, [])
4347
4348     if self.op.maintain_node_health is not None:
4349       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4350         feedback_fn("Note: CONFD was disabled at build time, node health"
4351                     " maintenance is not useful (still enabling it)")
4352       self.cluster.maintain_node_health = self.op.maintain_node_health
4353
4354     if self.op.prealloc_wipe_disks is not None:
4355       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4356
4357     if self.op.add_uids is not None:
4358       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4359
4360     if self.op.remove_uids is not None:
4361       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4362
4363     if self.op.uid_pool is not None:
4364       self.cluster.uid_pool = self.op.uid_pool
4365
4366     if self.op.default_iallocator is not None:
4367       self.cluster.default_iallocator = self.op.default_iallocator
4368
4369     if self.op.reserved_lvs is not None:
4370       self.cluster.reserved_lvs = self.op.reserved_lvs
4371
4372     if self.op.use_external_mip_script is not None:
4373       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4374
4375     def helper_os(aname, mods, desc):
4376       desc += " OS list"
4377       lst = getattr(self.cluster, aname)
4378       for key, val in mods:
4379         if key == constants.DDM_ADD:
4380           if val in lst:
4381             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4382           else:
4383             lst.append(val)
4384         elif key == constants.DDM_REMOVE:
4385           if val in lst:
4386             lst.remove(val)
4387           else:
4388             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4389         else:
4390           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4391
4392     if self.op.hidden_os:
4393       helper_os("hidden_os", self.op.hidden_os, "hidden")
4394
4395     if self.op.blacklisted_os:
4396       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4397
4398     if self.op.master_netdev:
4399       master_params = self.cfg.GetMasterNetworkParameters()
4400       ems = self.cfg.GetUseExternalMipScript()
4401       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4402                   self.cluster.master_netdev)
4403       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4404                                                        master_params, ems)
4405       result.Raise("Could not disable the master ip")
4406       feedback_fn("Changing master_netdev from %s to %s" %
4407                   (master_params.netdev, self.op.master_netdev))
4408       self.cluster.master_netdev = self.op.master_netdev
4409
4410     if self.op.master_netmask:
4411       master_params = self.cfg.GetMasterNetworkParameters()
4412       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4413       result = self.rpc.call_node_change_master_netmask(master_params.name,
4414                                                         master_params.netmask,
4415                                                         self.op.master_netmask,
4416                                                         master_params.ip,
4417                                                         master_params.netdev)
4418       if result.fail_msg:
4419         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4420         feedback_fn(msg)
4421
4422       self.cluster.master_netmask = self.op.master_netmask
4423
4424     self.cfg.Update(self.cluster, feedback_fn)
4425
4426     if self.op.master_netdev:
4427       master_params = self.cfg.GetMasterNetworkParameters()
4428       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4429                   self.op.master_netdev)
4430       ems = self.cfg.GetUseExternalMipScript()
4431       result = self.rpc.call_node_activate_master_ip(master_params.name,
4432                                                      master_params, ems)
4433       if result.fail_msg:
4434         self.LogWarning("Could not re-enable the master ip on"
4435                         " the master, please restart manually: %s",
4436                         result.fail_msg)
4437
4438
4439 def _UploadHelper(lu, nodes, fname):
4440   """Helper for uploading a file and showing warnings.
4441
4442   """
4443   if os.path.exists(fname):
4444     result = lu.rpc.call_upload_file(nodes, fname)
4445     for to_node, to_result in result.items():
4446       msg = to_result.fail_msg
4447       if msg:
4448         msg = ("Copy of file %s to node %s failed: %s" %
4449                (fname, to_node, msg))
4450         lu.proc.LogWarning(msg)
4451
4452
4453 def _ComputeAncillaryFiles(cluster, redist):
4454   """Compute files external to Ganeti which need to be consistent.
4455
4456   @type redist: boolean
4457   @param redist: Whether to include files which need to be redistributed
4458
4459   """
4460   # Compute files for all nodes
4461   files_all = set([
4462     pathutils.SSH_KNOWN_HOSTS_FILE,
4463     pathutils.CONFD_HMAC_KEY,
4464     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4465     pathutils.SPICE_CERT_FILE,
4466     pathutils.SPICE_CACERT_FILE,
4467     pathutils.RAPI_USERS_FILE,
4468     ])
4469
4470   if redist:
4471     # we need to ship at least the RAPI certificate
4472     files_all.add(pathutils.RAPI_CERT_FILE)
4473   else:
4474     files_all.update(pathutils.ALL_CERT_FILES)
4475     files_all.update(ssconf.SimpleStore().GetFileList())
4476
4477   if cluster.modify_etc_hosts:
4478     files_all.add(pathutils.ETC_HOSTS)
4479
4480   if cluster.use_external_mip_script:
4481     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4482
4483   # Files which are optional, these must:
4484   # - be present in one other category as well
4485   # - either exist or not exist on all nodes of that category (mc, vm all)
4486   files_opt = set([
4487     pathutils.RAPI_USERS_FILE,
4488     ])
4489
4490   # Files which should only be on master candidates
4491   files_mc = set()
4492
4493   if not redist:
4494     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4495
4496   # File storage
4497   if (not redist and
4498       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4499     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4500     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4501
4502   # Files which should only be on VM-capable nodes
4503   files_vm = set(
4504     filename
4505     for hv_name in cluster.enabled_hypervisors
4506     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4507
4508   files_opt |= set(
4509     filename
4510     for hv_name in cluster.enabled_hypervisors
4511     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4512
4513   # Filenames in each category must be unique
4514   all_files_set = files_all | files_mc | files_vm
4515   assert (len(all_files_set) ==
4516           sum(map(len, [files_all, files_mc, files_vm]))), \
4517          "Found file listed in more than one file list"
4518
4519   # Optional files must be present in one other category
4520   assert all_files_set.issuperset(files_opt), \
4521          "Optional file not in a different required list"
4522
4523   # This one file should never ever be re-distributed via RPC
4524   assert not (redist and
4525               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4526
4527   return (files_all, files_opt, files_mc, files_vm)
4528
4529
4530 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4531   """Distribute additional files which are part of the cluster configuration.
4532
4533   ConfigWriter takes care of distributing the config and ssconf files, but
4534   there are more files which should be distributed to all nodes. This function
4535   makes sure those are copied.
4536
4537   @param lu: calling logical unit
4538   @param additional_nodes: list of nodes not in the config to distribute to
4539   @type additional_vm: boolean
4540   @param additional_vm: whether the additional nodes are vm-capable or not
4541
4542   """
4543   # Gather target nodes
4544   cluster = lu.cfg.GetClusterInfo()
4545   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4546
4547   online_nodes = lu.cfg.GetOnlineNodeList()
4548   online_set = frozenset(online_nodes)
4549   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4550
4551   if additional_nodes is not None:
4552     online_nodes.extend(additional_nodes)
4553     if additional_vm:
4554       vm_nodes.extend(additional_nodes)
4555
4556   # Never distribute to master node
4557   for nodelist in [online_nodes, vm_nodes]:
4558     if master_info.name in nodelist:
4559       nodelist.remove(master_info.name)
4560
4561   # Gather file lists
4562   (files_all, _, files_mc, files_vm) = \
4563     _ComputeAncillaryFiles(cluster, True)
4564
4565   # Never re-distribute configuration file from here
4566   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4567               pathutils.CLUSTER_CONF_FILE in files_vm)
4568   assert not files_mc, "Master candidates not handled in this function"
4569
4570   filemap = [
4571     (online_nodes, files_all),
4572     (vm_nodes, files_vm),
4573     ]
4574
4575   # Upload the files
4576   for (node_list, files) in filemap:
4577     for fname in files:
4578       _UploadHelper(lu, node_list, fname)
4579
4580
4581 class LUClusterRedistConf(NoHooksLU):
4582   """Force the redistribution of cluster configuration.
4583
4584   This is a very simple LU.
4585
4586   """
4587   REQ_BGL = False
4588
4589   def ExpandNames(self):
4590     self.needed_locks = {
4591       locking.LEVEL_NODE: locking.ALL_SET,
4592     }
4593     self.share_locks[locking.LEVEL_NODE] = 1
4594
4595   def Exec(self, feedback_fn):
4596     """Redistribute the configuration.
4597
4598     """
4599     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4600     _RedistributeAncillaryFiles(self)
4601
4602
4603 class LUClusterActivateMasterIp(NoHooksLU):
4604   """Activate the master IP on the master node.
4605
4606   """
4607   def Exec(self, feedback_fn):
4608     """Activate the master IP.
4609
4610     """
4611     master_params = self.cfg.GetMasterNetworkParameters()
4612     ems = self.cfg.GetUseExternalMipScript()
4613     result = self.rpc.call_node_activate_master_ip(master_params.name,
4614                                                    master_params, ems)
4615     result.Raise("Could not activate the master IP")
4616
4617
4618 class LUClusterDeactivateMasterIp(NoHooksLU):
4619   """Deactivate the master IP on the master node.
4620
4621   """
4622   def Exec(self, feedback_fn):
4623     """Deactivate the master IP.
4624
4625     """
4626     master_params = self.cfg.GetMasterNetworkParameters()
4627     ems = self.cfg.GetUseExternalMipScript()
4628     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4629                                                      master_params, ems)
4630     result.Raise("Could not deactivate the master IP")
4631
4632
4633 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4634   """Sleep and poll for an instance's disk to sync.
4635
4636   """
4637   if not instance.disks or disks is not None and not disks:
4638     return True
4639
4640   disks = _ExpandCheckDisks(instance, disks)
4641
4642   if not oneshot:
4643     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4644
4645   node = instance.primary_node
4646
4647   for dev in disks:
4648     lu.cfg.SetDiskID(dev, node)
4649
4650   # TODO: Convert to utils.Retry
4651
4652   retries = 0
4653   degr_retries = 10 # in seconds, as we sleep 1 second each time
4654   while True:
4655     max_time = 0
4656     done = True
4657     cumul_degraded = False
4658     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4659     msg = rstats.fail_msg
4660     if msg:
4661       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4662       retries += 1
4663       if retries >= 10:
4664         raise errors.RemoteError("Can't contact node %s for mirror data,"
4665                                  " aborting." % node)
4666       time.sleep(6)
4667       continue
4668     rstats = rstats.payload
4669     retries = 0
4670     for i, mstat in enumerate(rstats):
4671       if mstat is None:
4672         lu.LogWarning("Can't compute data for node %s/%s",
4673                            node, disks[i].iv_name)
4674         continue
4675
4676       cumul_degraded = (cumul_degraded or
4677                         (mstat.is_degraded and mstat.sync_percent is None))
4678       if mstat.sync_percent is not None:
4679         done = False
4680         if mstat.estimated_time is not None:
4681           rem_time = ("%s remaining (estimated)" %
4682                       utils.FormatSeconds(mstat.estimated_time))
4683           max_time = mstat.estimated_time
4684         else:
4685           rem_time = "no time estimate"
4686         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4687                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4688
4689     # if we're done but degraded, let's do a few small retries, to
4690     # make sure we see a stable and not transient situation; therefore
4691     # we force restart of the loop
4692     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4693       logging.info("Degraded disks found, %d retries left", degr_retries)
4694       degr_retries -= 1
4695       time.sleep(1)
4696       continue
4697
4698     if done or oneshot:
4699       break
4700
4701     time.sleep(min(60, max_time))
4702
4703   if done:
4704     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4705   return not cumul_degraded
4706
4707
4708 def _BlockdevFind(lu, node, dev, instance):
4709   """Wrapper around call_blockdev_find to annotate diskparams.
4710
4711   @param lu: A reference to the lu object
4712   @param node: The node to call out
4713   @param dev: The device to find
4714   @param instance: The instance object the device belongs to
4715   @returns The result of the rpc call
4716
4717   """
4718   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4719   return lu.rpc.call_blockdev_find(node, disk)
4720
4721
4722 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4723   """Wrapper around L{_CheckDiskConsistencyInner}.
4724
4725   """
4726   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4727   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4728                                     ldisk=ldisk)
4729
4730
4731 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4732                                ldisk=False):
4733   """Check that mirrors are not degraded.
4734
4735   @attention: The device has to be annotated already.
4736
4737   The ldisk parameter, if True, will change the test from the
4738   is_degraded attribute (which represents overall non-ok status for
4739   the device(s)) to the ldisk (representing the local storage status).
4740
4741   """
4742   lu.cfg.SetDiskID(dev, node)
4743
4744   result = True
4745
4746   if on_primary or dev.AssembleOnSecondary():
4747     rstats = lu.rpc.call_blockdev_find(node, dev)
4748     msg = rstats.fail_msg
4749     if msg:
4750       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4751       result = False
4752     elif not rstats.payload:
4753       lu.LogWarning("Can't find disk on node %s", node)
4754       result = False
4755     else:
4756       if ldisk:
4757         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4758       else:
4759         result = result and not rstats.payload.is_degraded
4760
4761   if dev.children:
4762     for child in dev.children:
4763       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4764                                                      on_primary)
4765
4766   return result
4767
4768
4769 class LUOobCommand(NoHooksLU):
4770   """Logical unit for OOB handling.
4771
4772   """
4773   REQ_BGL = False
4774   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4775
4776   def ExpandNames(self):
4777     """Gather locks we need.
4778
4779     """
4780     if self.op.node_names:
4781       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4782       lock_names = self.op.node_names
4783     else:
4784       lock_names = locking.ALL_SET
4785
4786     self.needed_locks = {
4787       locking.LEVEL_NODE: lock_names,
4788       }
4789
4790   def CheckPrereq(self):
4791     """Check prerequisites.
4792
4793     This checks:
4794      - the node exists in the configuration
4795      - OOB is supported
4796
4797     Any errors are signaled by raising errors.OpPrereqError.
4798
4799     """
4800     self.nodes = []
4801     self.master_node = self.cfg.GetMasterNode()
4802
4803     assert self.op.power_delay >= 0.0
4804
4805     if self.op.node_names:
4806       if (self.op.command in self._SKIP_MASTER and
4807           self.master_node in self.op.node_names):
4808         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4809         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4810
4811         if master_oob_handler:
4812           additional_text = ("run '%s %s %s' if you want to operate on the"
4813                              " master regardless") % (master_oob_handler,
4814                                                       self.op.command,
4815                                                       self.master_node)
4816         else:
4817           additional_text = "it does not support out-of-band operations"
4818
4819         raise errors.OpPrereqError(("Operating on the master node %s is not"
4820                                     " allowed for %s; %s") %
4821                                    (self.master_node, self.op.command,
4822                                     additional_text), errors.ECODE_INVAL)
4823     else:
4824       self.op.node_names = self.cfg.GetNodeList()
4825       if self.op.command in self._SKIP_MASTER:
4826         self.op.node_names.remove(self.master_node)
4827
4828     if self.op.command in self._SKIP_MASTER:
4829       assert self.master_node not in self.op.node_names
4830
4831     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4832       if node is None:
4833         raise errors.OpPrereqError("Node %s not found" % node_name,
4834                                    errors.ECODE_NOENT)
4835       else:
4836         self.nodes.append(node)
4837
4838       if (not self.op.ignore_status and
4839           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4840         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4841                                     " not marked offline") % node_name,
4842                                    errors.ECODE_STATE)
4843
4844   def Exec(self, feedback_fn):
4845     """Execute OOB and return result if we expect any.
4846
4847     """
4848     master_node = self.master_node
4849     ret = []
4850
4851     for idx, node in enumerate(utils.NiceSort(self.nodes,
4852                                               key=lambda node: node.name)):
4853       node_entry = [(constants.RS_NORMAL, node.name)]
4854       ret.append(node_entry)
4855
4856       oob_program = _SupportsOob(self.cfg, node)
4857
4858       if not oob_program:
4859         node_entry.append((constants.RS_UNAVAIL, None))
4860         continue
4861
4862       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4863                    self.op.command, oob_program, node.name)
4864       result = self.rpc.call_run_oob(master_node, oob_program,
4865                                      self.op.command, node.name,
4866                                      self.op.timeout)
4867
4868       if result.fail_msg:
4869         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4870                         node.name, result.fail_msg)
4871         node_entry.append((constants.RS_NODATA, None))
4872       else:
4873         try:
4874           self._CheckPayload(result)
4875         except errors.OpExecError, err:
4876           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4877                           node.name, err)
4878           node_entry.append((constants.RS_NODATA, None))
4879         else:
4880           if self.op.command == constants.OOB_HEALTH:
4881             # For health we should log important events
4882             for item, status in result.payload:
4883               if status in [constants.OOB_STATUS_WARNING,
4884                             constants.OOB_STATUS_CRITICAL]:
4885                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4886                                 item, node.name, status)
4887
4888           if self.op.command == constants.OOB_POWER_ON:
4889             node.powered = True
4890           elif self.op.command == constants.OOB_POWER_OFF:
4891             node.powered = False
4892           elif self.op.command == constants.OOB_POWER_STATUS:
4893             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4894             if powered != node.powered:
4895               logging.warning(("Recorded power state (%s) of node '%s' does not"
4896                                " match actual power state (%s)"), node.powered,
4897                               node.name, powered)
4898
4899           # For configuration changing commands we should update the node
4900           if self.op.command in (constants.OOB_POWER_ON,
4901                                  constants.OOB_POWER_OFF):
4902             self.cfg.Update(node, feedback_fn)
4903
4904           node_entry.append((constants.RS_NORMAL, result.payload))
4905
4906           if (self.op.command == constants.OOB_POWER_ON and
4907               idx < len(self.nodes) - 1):
4908             time.sleep(self.op.power_delay)
4909
4910     return ret
4911
4912   def _CheckPayload(self, result):
4913     """Checks if the payload is valid.
4914
4915     @param result: RPC result
4916     @raises errors.OpExecError: If payload is not valid
4917
4918     """
4919     errs = []
4920     if self.op.command == constants.OOB_HEALTH:
4921       if not isinstance(result.payload, list):
4922         errs.append("command 'health' is expected to return a list but got %s" %
4923                     type(result.payload))
4924       else:
4925         for item, status in result.payload:
4926           if status not in constants.OOB_STATUSES:
4927             errs.append("health item '%s' has invalid status '%s'" %
4928                         (item, status))
4929
4930     if self.op.command == constants.OOB_POWER_STATUS:
4931       if not isinstance(result.payload, dict):
4932         errs.append("power-status is expected to return a dict but got %s" %
4933                     type(result.payload))
4934
4935     if self.op.command in [
4936       constants.OOB_POWER_ON,
4937       constants.OOB_POWER_OFF,
4938       constants.OOB_POWER_CYCLE,
4939       ]:
4940       if result.payload is not None:
4941         errs.append("%s is expected to not return payload but got '%s'" %
4942                     (self.op.command, result.payload))
4943
4944     if errs:
4945       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4946                                utils.CommaJoin(errs))
4947
4948
4949 class _OsQuery(_QueryBase):
4950   FIELDS = query.OS_FIELDS
4951
4952   def ExpandNames(self, lu):
4953     # Lock all nodes in shared mode
4954     # Temporary removal of locks, should be reverted later
4955     # TODO: reintroduce locks when they are lighter-weight
4956     lu.needed_locks = {}
4957     #self.share_locks[locking.LEVEL_NODE] = 1
4958     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4959
4960     # The following variables interact with _QueryBase._GetNames
4961     if self.names:
4962       self.wanted = self.names
4963     else:
4964       self.wanted = locking.ALL_SET
4965
4966     self.do_locking = self.use_locking
4967
4968   def DeclareLocks(self, lu, level):
4969     pass
4970
4971   @staticmethod
4972   def _DiagnoseByOS(rlist):
4973     """Remaps a per-node return list into an a per-os per-node dictionary
4974
4975     @param rlist: a map with node names as keys and OS objects as values
4976
4977     @rtype: dict
4978     @return: a dictionary with osnames as keys and as value another
4979         map, with nodes as keys and tuples of (path, status, diagnose,
4980         variants, parameters, api_versions) as values, eg::
4981
4982           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4983                                      (/srv/..., False, "invalid api")],
4984                            "node2": [(/srv/..., True, "", [], [])]}
4985           }
4986
4987     """
4988     all_os = {}
4989     # we build here the list of nodes that didn't fail the RPC (at RPC
4990     # level), so that nodes with a non-responding node daemon don't
4991     # make all OSes invalid
4992     good_nodes = [node_name for node_name in rlist
4993                   if not rlist[node_name].fail_msg]
4994     for node_name, nr in rlist.items():
4995       if nr.fail_msg or not nr.payload:
4996         continue
4997       for (name, path, status, diagnose, variants,
4998            params, api_versions) in nr.payload:
4999         if name not in all_os:
5000           # build a list of nodes for this os containing empty lists
5001           # for each node in node_list
5002           all_os[name] = {}
5003           for nname in good_nodes:
5004             all_os[name][nname] = []
5005         # convert params from [name, help] to (name, help)
5006         params = [tuple(v) for v in params]
5007         all_os[name][node_name].append((path, status, diagnose,
5008                                         variants, params, api_versions))
5009     return all_os
5010
5011   def _GetQueryData(self, lu):
5012     """Computes the list of nodes and their attributes.
5013
5014     """
5015     # Locking is not used
5016     assert not (compat.any(lu.glm.is_owned(level)
5017                            for level in locking.LEVELS
5018                            if level != locking.LEVEL_CLUSTER) or
5019                 self.do_locking or self.use_locking)
5020
5021     valid_nodes = [node.name
5022                    for node in lu.cfg.GetAllNodesInfo().values()
5023                    if not node.offline and node.vm_capable]
5024     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5025     cluster = lu.cfg.GetClusterInfo()
5026
5027     data = {}
5028
5029     for (os_name, os_data) in pol.items():
5030       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5031                           hidden=(os_name in cluster.hidden_os),
5032                           blacklisted=(os_name in cluster.blacklisted_os))
5033
5034       variants = set()
5035       parameters = set()
5036       api_versions = set()
5037
5038       for idx, osl in enumerate(os_data.values()):
5039         info.valid = bool(info.valid and osl and osl[0][1])
5040         if not info.valid:
5041           break
5042
5043         (node_variants, node_params, node_api) = osl[0][3:6]
5044         if idx == 0:
5045           # First entry
5046           variants.update(node_variants)
5047           parameters.update(node_params)
5048           api_versions.update(node_api)
5049         else:
5050           # Filter out inconsistent values
5051           variants.intersection_update(node_variants)
5052           parameters.intersection_update(node_params)
5053           api_versions.intersection_update(node_api)
5054
5055       info.variants = list(variants)
5056       info.parameters = list(parameters)
5057       info.api_versions = list(api_versions)
5058
5059       data[os_name] = info
5060
5061     # Prepare data in requested order
5062     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5063             if name in data]
5064
5065
5066 class LUOsDiagnose(NoHooksLU):
5067   """Logical unit for OS diagnose/query.
5068
5069   """
5070   REQ_BGL = False
5071
5072   @staticmethod
5073   def _BuildFilter(fields, names):
5074     """Builds a filter for querying OSes.
5075
5076     """
5077     name_filter = qlang.MakeSimpleFilter("name", names)
5078
5079     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5080     # respective field is not requested
5081     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5082                      for fname in ["hidden", "blacklisted"]
5083                      if fname not in fields]
5084     if "valid" not in fields:
5085       status_filter.append([qlang.OP_TRUE, "valid"])
5086
5087     if status_filter:
5088       status_filter.insert(0, qlang.OP_AND)
5089     else:
5090       status_filter = None
5091
5092     if name_filter and status_filter:
5093       return [qlang.OP_AND, name_filter, status_filter]
5094     elif name_filter:
5095       return name_filter
5096     else:
5097       return status_filter
5098
5099   def CheckArguments(self):
5100     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5101                        self.op.output_fields, False)
5102
5103   def ExpandNames(self):
5104     self.oq.ExpandNames(self)
5105
5106   def Exec(self, feedback_fn):
5107     return self.oq.OldStyleQuery(self)
5108
5109
5110 class LUNodeRemove(LogicalUnit):
5111   """Logical unit for removing a node.
5112
5113   """
5114   HPATH = "node-remove"
5115   HTYPE = constants.HTYPE_NODE
5116
5117   def BuildHooksEnv(self):
5118     """Build hooks env.
5119
5120     """
5121     return {
5122       "OP_TARGET": self.op.node_name,
5123       "NODE_NAME": self.op.node_name,
5124       }
5125
5126   def BuildHooksNodes(self):
5127     """Build hooks nodes.
5128
5129     This doesn't run on the target node in the pre phase as a failed
5130     node would then be impossible to remove.
5131
5132     """
5133     all_nodes = self.cfg.GetNodeList()
5134     try:
5135       all_nodes.remove(self.op.node_name)
5136     except ValueError:
5137       pass
5138     return (all_nodes, all_nodes)
5139
5140   def CheckPrereq(self):
5141     """Check prerequisites.
5142
5143     This checks:
5144      - the node exists in the configuration
5145      - it does not have primary or secondary instances
5146      - it's not the master
5147
5148     Any errors are signaled by raising errors.OpPrereqError.
5149
5150     """
5151     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5152     node = self.cfg.GetNodeInfo(self.op.node_name)
5153     assert node is not None
5154
5155     masternode = self.cfg.GetMasterNode()
5156     if node.name == masternode:
5157       raise errors.OpPrereqError("Node is the master node, failover to another"
5158                                  " node is required", errors.ECODE_INVAL)
5159
5160     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5161       if node.name in instance.all_nodes:
5162         raise errors.OpPrereqError("Instance %s is still running on the node,"
5163                                    " please remove first" % instance_name,
5164                                    errors.ECODE_INVAL)
5165     self.op.node_name = node.name
5166     self.node = node
5167
5168   def Exec(self, feedback_fn):
5169     """Removes the node from the cluster.
5170
5171     """
5172     node = self.node
5173     logging.info("Stopping the node daemon and removing configs from node %s",
5174                  node.name)
5175
5176     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5177
5178     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5179       "Not owning BGL"
5180
5181     # Promote nodes to master candidate as needed
5182     _AdjustCandidatePool(self, exceptions=[node.name])
5183     self.context.RemoveNode(node.name)
5184
5185     # Run post hooks on the node before it's removed
5186     _RunPostHook(self, node.name)
5187
5188     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5189     msg = result.fail_msg
5190     if msg:
5191       self.LogWarning("Errors encountered on the remote node while leaving"
5192                       " the cluster: %s", msg)
5193
5194     # Remove node from our /etc/hosts
5195     if self.cfg.GetClusterInfo().modify_etc_hosts:
5196       master_node = self.cfg.GetMasterNode()
5197       result = self.rpc.call_etc_hosts_modify(master_node,
5198                                               constants.ETC_HOSTS_REMOVE,
5199                                               node.name, None)
5200       result.Raise("Can't update hosts file with new host data")
5201       _RedistributeAncillaryFiles(self)
5202
5203
5204 class _NodeQuery(_QueryBase):
5205   FIELDS = query.NODE_FIELDS
5206
5207   def ExpandNames(self, lu):
5208     lu.needed_locks = {}
5209     lu.share_locks = _ShareAll()
5210
5211     if self.names:
5212       self.wanted = _GetWantedNodes(lu, self.names)
5213     else:
5214       self.wanted = locking.ALL_SET
5215
5216     self.do_locking = (self.use_locking and
5217                        query.NQ_LIVE in self.requested_data)
5218
5219     if self.do_locking:
5220       # If any non-static field is requested we need to lock the nodes
5221       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5222
5223   def DeclareLocks(self, lu, level):
5224     pass
5225
5226   def _GetQueryData(self, lu):
5227     """Computes the list of nodes and their attributes.
5228
5229     """
5230     all_info = lu.cfg.GetAllNodesInfo()
5231
5232     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5233
5234     # Gather data as requested
5235     if query.NQ_LIVE in self.requested_data:
5236       # filter out non-vm_capable nodes
5237       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5238
5239       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5240                                         [lu.cfg.GetHypervisorType()])
5241       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5242                        for (name, nresult) in node_data.items()
5243                        if not nresult.fail_msg and nresult.payload)
5244     else:
5245       live_data = None
5246
5247     if query.NQ_INST in self.requested_data:
5248       node_to_primary = dict([(name, set()) for name in nodenames])
5249       node_to_secondary = dict([(name, set()) for name in nodenames])
5250
5251       inst_data = lu.cfg.GetAllInstancesInfo()
5252
5253       for inst in inst_data.values():
5254         if inst.primary_node in node_to_primary:
5255           node_to_primary[inst.primary_node].add(inst.name)
5256         for secnode in inst.secondary_nodes:
5257           if secnode in node_to_secondary:
5258             node_to_secondary[secnode].add(inst.name)
5259     else:
5260       node_to_primary = None
5261       node_to_secondary = None
5262
5263     if query.NQ_OOB in self.requested_data:
5264       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5265                          for name, node in all_info.iteritems())
5266     else:
5267       oob_support = None
5268
5269     if query.NQ_GROUP in self.requested_data:
5270       groups = lu.cfg.GetAllNodeGroupsInfo()
5271     else:
5272       groups = {}
5273
5274     return query.NodeQueryData([all_info[name] for name in nodenames],
5275                                live_data, lu.cfg.GetMasterNode(),
5276                                node_to_primary, node_to_secondary, groups,
5277                                oob_support, lu.cfg.GetClusterInfo())
5278
5279
5280 class LUNodeQuery(NoHooksLU):
5281   """Logical unit for querying nodes.
5282
5283   """
5284   # pylint: disable=W0142
5285   REQ_BGL = False
5286
5287   def CheckArguments(self):
5288     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5289                          self.op.output_fields, self.op.use_locking)
5290
5291   def ExpandNames(self):
5292     self.nq.ExpandNames(self)
5293
5294   def DeclareLocks(self, level):
5295     self.nq.DeclareLocks(self, level)
5296
5297   def Exec(self, feedback_fn):
5298     return self.nq.OldStyleQuery(self)
5299
5300
5301 class LUNodeQueryvols(NoHooksLU):
5302   """Logical unit for getting volumes on node(s).
5303
5304   """
5305   REQ_BGL = False
5306   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5307   _FIELDS_STATIC = utils.FieldSet("node")
5308
5309   def CheckArguments(self):
5310     _CheckOutputFields(static=self._FIELDS_STATIC,
5311                        dynamic=self._FIELDS_DYNAMIC,
5312                        selected=self.op.output_fields)
5313
5314   def ExpandNames(self):
5315     self.share_locks = _ShareAll()
5316     self.needed_locks = {}
5317
5318     if not self.op.nodes:
5319       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5320     else:
5321       self.needed_locks[locking.LEVEL_NODE] = \
5322         _GetWantedNodes(self, self.op.nodes)
5323
5324   def Exec(self, feedback_fn):
5325     """Computes the list of nodes and their attributes.
5326
5327     """
5328     nodenames = self.owned_locks(locking.LEVEL_NODE)
5329     volumes = self.rpc.call_node_volumes(nodenames)
5330
5331     ilist = self.cfg.GetAllInstancesInfo()
5332     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5333
5334     output = []
5335     for node in nodenames:
5336       nresult = volumes[node]
5337       if nresult.offline:
5338         continue
5339       msg = nresult.fail_msg
5340       if msg:
5341         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5342         continue
5343
5344       node_vols = sorted(nresult.payload,
5345                          key=operator.itemgetter("dev"))
5346
5347       for vol in node_vols:
5348         node_output = []
5349         for field in self.op.output_fields:
5350           if field == "node":
5351             val = node
5352           elif field == "phys":
5353             val = vol["dev"]
5354           elif field == "vg":
5355             val = vol["vg"]
5356           elif field == "name":
5357             val = vol["name"]
5358           elif field == "size":
5359             val = int(float(vol["size"]))
5360           elif field == "instance":
5361             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5362           else:
5363             raise errors.ParameterError(field)
5364           node_output.append(str(val))
5365
5366         output.append(node_output)
5367
5368     return output
5369
5370
5371 class LUNodeQueryStorage(NoHooksLU):
5372   """Logical unit for getting information on storage units on node(s).
5373
5374   """
5375   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5376   REQ_BGL = False
5377
5378   def CheckArguments(self):
5379     _CheckOutputFields(static=self._FIELDS_STATIC,
5380                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5381                        selected=self.op.output_fields)
5382
5383   def ExpandNames(self):
5384     self.share_locks = _ShareAll()
5385     self.needed_locks = {}
5386
5387     if self.op.nodes:
5388       self.needed_locks[locking.LEVEL_NODE] = \
5389         _GetWantedNodes(self, self.op.nodes)
5390     else:
5391       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5392
5393   def Exec(self, feedback_fn):
5394     """Computes the list of nodes and their attributes.
5395
5396     """
5397     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5398
5399     # Always get name to sort by
5400     if constants.SF_NAME in self.op.output_fields:
5401       fields = self.op.output_fields[:]
5402     else:
5403       fields = [constants.SF_NAME] + self.op.output_fields
5404
5405     # Never ask for node or type as it's only known to the LU
5406     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5407       while extra in fields:
5408         fields.remove(extra)
5409
5410     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5411     name_idx = field_idx[constants.SF_NAME]
5412
5413     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5414     data = self.rpc.call_storage_list(self.nodes,
5415                                       self.op.storage_type, st_args,
5416                                       self.op.name, fields)
5417
5418     result = []
5419
5420     for node in utils.NiceSort(self.nodes):
5421       nresult = data[node]
5422       if nresult.offline:
5423         continue
5424
5425       msg = nresult.fail_msg
5426       if msg:
5427         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5428         continue
5429
5430       rows = dict([(row[name_idx], row) for row in nresult.payload])
5431
5432       for name in utils.NiceSort(rows.keys()):
5433         row = rows[name]
5434
5435         out = []
5436
5437         for field in self.op.output_fields:
5438           if field == constants.SF_NODE:
5439             val = node
5440           elif field == constants.SF_TYPE:
5441             val = self.op.storage_type
5442           elif field in field_idx:
5443             val = row[field_idx[field]]
5444           else:
5445             raise errors.ParameterError(field)
5446
5447           out.append(val)
5448
5449         result.append(out)
5450
5451     return result
5452
5453
5454 class _InstanceQuery(_QueryBase):
5455   FIELDS = query.INSTANCE_FIELDS
5456
5457   def ExpandNames(self, lu):
5458     lu.needed_locks = {}
5459     lu.share_locks = _ShareAll()
5460
5461     if self.names:
5462       self.wanted = _GetWantedInstances(lu, self.names)
5463     else:
5464       self.wanted = locking.ALL_SET
5465
5466     self.do_locking = (self.use_locking and
5467                        query.IQ_LIVE in self.requested_data)
5468     if self.do_locking:
5469       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5470       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5471       lu.needed_locks[locking.LEVEL_NODE] = []
5472       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5473
5474     self.do_grouplocks = (self.do_locking and
5475                           query.IQ_NODES in self.requested_data)
5476
5477   def DeclareLocks(self, lu, level):
5478     if self.do_locking:
5479       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5480         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5481
5482         # Lock all groups used by instances optimistically; this requires going
5483         # via the node before it's locked, requiring verification later on
5484         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5485           set(group_uuid
5486               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5487               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5488       elif level == locking.LEVEL_NODE:
5489         lu._LockInstancesNodes() # pylint: disable=W0212
5490
5491   @staticmethod
5492   def _CheckGroupLocks(lu):
5493     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5494     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5495
5496     # Check if node groups for locked instances are still correct
5497     for instance_name in owned_instances:
5498       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5499
5500   def _GetQueryData(self, lu):
5501     """Computes the list of instances and their attributes.
5502
5503     """
5504     if self.do_grouplocks:
5505       self._CheckGroupLocks(lu)
5506
5507     cluster = lu.cfg.GetClusterInfo()
5508     all_info = lu.cfg.GetAllInstancesInfo()
5509
5510     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5511
5512     instance_list = [all_info[name] for name in instance_names]
5513     nodes = frozenset(itertools.chain(*(inst.all_nodes
5514                                         for inst in instance_list)))
5515     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5516     bad_nodes = []
5517     offline_nodes = []
5518     wrongnode_inst = set()
5519
5520     # Gather data as requested
5521     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5522       live_data = {}
5523       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5524       for name in nodes:
5525         result = node_data[name]
5526         if result.offline:
5527           # offline nodes will be in both lists
5528           assert result.fail_msg
5529           offline_nodes.append(name)
5530         if result.fail_msg:
5531           bad_nodes.append(name)
5532         elif result.payload:
5533           for inst in result.payload:
5534             if inst in all_info:
5535               if all_info[inst].primary_node == name:
5536                 live_data.update(result.payload)
5537               else:
5538                 wrongnode_inst.add(inst)
5539             else:
5540               # orphan instance; we don't list it here as we don't
5541               # handle this case yet in the output of instance listing
5542               logging.warning("Orphan instance '%s' found on node %s",
5543                               inst, name)
5544         # else no instance is alive
5545     else:
5546       live_data = {}
5547
5548     if query.IQ_DISKUSAGE in self.requested_data:
5549       gmi = ganeti.masterd.instance
5550       disk_usage = dict((inst.name,
5551                          gmi.ComputeDiskSize(inst.disk_template,
5552                                              [{constants.IDISK_SIZE: disk.size}
5553                                               for disk in inst.disks]))
5554                         for inst in instance_list)
5555     else:
5556       disk_usage = None
5557
5558     if query.IQ_CONSOLE in self.requested_data:
5559       consinfo = {}
5560       for inst in instance_list:
5561         if inst.name in live_data:
5562           # Instance is running
5563           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5564         else:
5565           consinfo[inst.name] = None
5566       assert set(consinfo.keys()) == set(instance_names)
5567     else:
5568       consinfo = None
5569
5570     if query.IQ_NODES in self.requested_data:
5571       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5572                                             instance_list)))
5573       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5574       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5575                     for uuid in set(map(operator.attrgetter("group"),
5576                                         nodes.values())))
5577     else:
5578       nodes = None
5579       groups = None
5580
5581     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5582                                    disk_usage, offline_nodes, bad_nodes,
5583                                    live_data, wrongnode_inst, consinfo,
5584                                    nodes, groups)
5585
5586
5587 class LUQuery(NoHooksLU):
5588   """Query for resources/items of a certain kind.
5589
5590   """
5591   # pylint: disable=W0142
5592   REQ_BGL = False
5593
5594   def CheckArguments(self):
5595     qcls = _GetQueryImplementation(self.op.what)
5596
5597     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5598
5599   def ExpandNames(self):
5600     self.impl.ExpandNames(self)
5601
5602   def DeclareLocks(self, level):
5603     self.impl.DeclareLocks(self, level)
5604
5605   def Exec(self, feedback_fn):
5606     return self.impl.NewStyleQuery(self)
5607
5608
5609 class LUQueryFields(NoHooksLU):
5610   """Query for resources/items of a certain kind.
5611
5612   """
5613   # pylint: disable=W0142
5614   REQ_BGL = False
5615
5616   def CheckArguments(self):
5617     self.qcls = _GetQueryImplementation(self.op.what)
5618
5619   def ExpandNames(self):
5620     self.needed_locks = {}
5621
5622   def Exec(self, feedback_fn):
5623     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5624
5625
5626 class LUNodeModifyStorage(NoHooksLU):
5627   """Logical unit for modifying a storage volume on a node.
5628
5629   """
5630   REQ_BGL = False
5631
5632   def CheckArguments(self):
5633     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5634
5635     storage_type = self.op.storage_type
5636
5637     try:
5638       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5639     except KeyError:
5640       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5641                                  " modified" % storage_type,
5642                                  errors.ECODE_INVAL)
5643
5644     diff = set(self.op.changes.keys()) - modifiable
5645     if diff:
5646       raise errors.OpPrereqError("The following fields can not be modified for"
5647                                  " storage units of type '%s': %r" %
5648                                  (storage_type, list(diff)),
5649                                  errors.ECODE_INVAL)
5650
5651   def ExpandNames(self):
5652     self.needed_locks = {
5653       locking.LEVEL_NODE: self.op.node_name,
5654       }
5655
5656   def Exec(self, feedback_fn):
5657     """Computes the list of nodes and their attributes.
5658
5659     """
5660     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5661     result = self.rpc.call_storage_modify(self.op.node_name,
5662                                           self.op.storage_type, st_args,
5663                                           self.op.name, self.op.changes)
5664     result.Raise("Failed to modify storage unit '%s' on %s" %
5665                  (self.op.name, self.op.node_name))
5666
5667
5668 class LUNodeAdd(LogicalUnit):
5669   """Logical unit for adding node to the cluster.
5670
5671   """
5672   HPATH = "node-add"
5673   HTYPE = constants.HTYPE_NODE
5674   _NFLAGS = ["master_capable", "vm_capable"]
5675
5676   def CheckArguments(self):
5677     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5678     # validate/normalize the node name
5679     self.hostname = netutils.GetHostname(name=self.op.node_name,
5680                                          family=self.primary_ip_family)
5681     self.op.node_name = self.hostname.name
5682
5683     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5684       raise errors.OpPrereqError("Cannot readd the master node",
5685                                  errors.ECODE_STATE)
5686
5687     if self.op.readd and self.op.group:
5688       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5689                                  " being readded", errors.ECODE_INVAL)
5690
5691   def BuildHooksEnv(self):
5692     """Build hooks env.
5693
5694     This will run on all nodes before, and on all nodes + the new node after.
5695
5696     """
5697     return {
5698       "OP_TARGET": self.op.node_name,
5699       "NODE_NAME": self.op.node_name,
5700       "NODE_PIP": self.op.primary_ip,
5701       "NODE_SIP": self.op.secondary_ip,
5702       "MASTER_CAPABLE": str(self.op.master_capable),
5703       "VM_CAPABLE": str(self.op.vm_capable),
5704       }
5705
5706   def BuildHooksNodes(self):
5707     """Build hooks nodes.
5708
5709     """
5710     # Exclude added node
5711     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5712     post_nodes = pre_nodes + [self.op.node_name, ]
5713
5714     return (pre_nodes, post_nodes)
5715
5716   def CheckPrereq(self):
5717     """Check prerequisites.
5718
5719     This checks:
5720      - the new node is not already in the config
5721      - it is resolvable
5722      - its parameters (single/dual homed) matches the cluster
5723
5724     Any errors are signaled by raising errors.OpPrereqError.
5725
5726     """
5727     cfg = self.cfg
5728     hostname = self.hostname
5729     node = hostname.name
5730     primary_ip = self.op.primary_ip = hostname.ip
5731     if self.op.secondary_ip is None:
5732       if self.primary_ip_family == netutils.IP6Address.family:
5733         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5734                                    " IPv4 address must be given as secondary",
5735                                    errors.ECODE_INVAL)
5736       self.op.secondary_ip = primary_ip
5737
5738     secondary_ip = self.op.secondary_ip
5739     if not netutils.IP4Address.IsValid(secondary_ip):
5740       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5741                                  " address" % secondary_ip, errors.ECODE_INVAL)
5742
5743     node_list = cfg.GetNodeList()
5744     if not self.op.readd and node in node_list:
5745       raise errors.OpPrereqError("Node %s is already in the configuration" %
5746                                  node, errors.ECODE_EXISTS)
5747     elif self.op.readd and node not in node_list:
5748       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5749                                  errors.ECODE_NOENT)
5750
5751     self.changed_primary_ip = False
5752
5753     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5754       if self.op.readd and node == existing_node_name:
5755         if existing_node.secondary_ip != secondary_ip:
5756           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5757                                      " address configuration as before",
5758                                      errors.ECODE_INVAL)
5759         if existing_node.primary_ip != primary_ip:
5760           self.changed_primary_ip = True
5761
5762         continue
5763
5764       if (existing_node.primary_ip == primary_ip or
5765           existing_node.secondary_ip == primary_ip or
5766           existing_node.primary_ip == secondary_ip or
5767           existing_node.secondary_ip == secondary_ip):
5768         raise errors.OpPrereqError("New node ip address(es) conflict with"
5769                                    " existing node %s" % existing_node.name,
5770                                    errors.ECODE_NOTUNIQUE)
5771
5772     # After this 'if' block, None is no longer a valid value for the
5773     # _capable op attributes
5774     if self.op.readd:
5775       old_node = self.cfg.GetNodeInfo(node)
5776       assert old_node is not None, "Can't retrieve locked node %s" % node
5777       for attr in self._NFLAGS:
5778         if getattr(self.op, attr) is None:
5779           setattr(self.op, attr, getattr(old_node, attr))
5780     else:
5781       for attr in self._NFLAGS:
5782         if getattr(self.op, attr) is None:
5783           setattr(self.op, attr, True)
5784
5785     if self.op.readd and not self.op.vm_capable:
5786       pri, sec = cfg.GetNodeInstances(node)
5787       if pri or sec:
5788         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5789                                    " flag set to false, but it already holds"
5790                                    " instances" % node,
5791                                    errors.ECODE_STATE)
5792
5793     # check that the type of the node (single versus dual homed) is the
5794     # same as for the master
5795     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5796     master_singlehomed = myself.secondary_ip == myself.primary_ip
5797     newbie_singlehomed = secondary_ip == primary_ip
5798     if master_singlehomed != newbie_singlehomed:
5799       if master_singlehomed:
5800         raise errors.OpPrereqError("The master has no secondary ip but the"
5801                                    " new node has one",
5802                                    errors.ECODE_INVAL)
5803       else:
5804         raise errors.OpPrereqError("The master has a secondary ip but the"
5805                                    " new node doesn't have one",
5806                                    errors.ECODE_INVAL)
5807
5808     # checks reachability
5809     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5810       raise errors.OpPrereqError("Node not reachable by ping",
5811                                  errors.ECODE_ENVIRON)
5812
5813     if not newbie_singlehomed:
5814       # check reachability from my secondary ip to newbie's secondary ip
5815       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5816                               source=myself.secondary_ip):
5817         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5818                                    " based ping to node daemon port",
5819                                    errors.ECODE_ENVIRON)
5820
5821     if self.op.readd:
5822       exceptions = [node]
5823     else:
5824       exceptions = []
5825
5826     if self.op.master_capable:
5827       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5828     else:
5829       self.master_candidate = False
5830
5831     if self.op.readd:
5832       self.new_node = old_node
5833     else:
5834       node_group = cfg.LookupNodeGroup(self.op.group)
5835       self.new_node = objects.Node(name=node,
5836                                    primary_ip=primary_ip,
5837                                    secondary_ip=secondary_ip,
5838                                    master_candidate=self.master_candidate,
5839                                    offline=False, drained=False,
5840                                    group=node_group)
5841
5842     if self.op.ndparams:
5843       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5844
5845     if self.op.hv_state:
5846       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5847
5848     if self.op.disk_state:
5849       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5850
5851     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5852     #       it a property on the base class.
5853     result = rpc.DnsOnlyRunner().call_version([node])[node]
5854     result.Raise("Can't get version information from node %s" % node)
5855     if constants.PROTOCOL_VERSION == result.payload:
5856       logging.info("Communication to node %s fine, sw version %s match",
5857                    node, result.payload)
5858     else:
5859       raise errors.OpPrereqError("Version mismatch master version %s,"
5860                                  " node version %s" %
5861                                  (constants.PROTOCOL_VERSION, result.payload),
5862                                  errors.ECODE_ENVIRON)
5863
5864   def Exec(self, feedback_fn):
5865     """Adds the new node to the cluster.
5866
5867     """
5868     new_node = self.new_node
5869     node = new_node.name
5870
5871     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5872       "Not owning BGL"
5873
5874     # We adding a new node so we assume it's powered
5875     new_node.powered = True
5876
5877     # for re-adds, reset the offline/drained/master-candidate flags;
5878     # we need to reset here, otherwise offline would prevent RPC calls
5879     # later in the procedure; this also means that if the re-add
5880     # fails, we are left with a non-offlined, broken node
5881     if self.op.readd:
5882       new_node.drained = new_node.offline = False # pylint: disable=W0201
5883       self.LogInfo("Readding a node, the offline/drained flags were reset")
5884       # if we demote the node, we do cleanup later in the procedure
5885       new_node.master_candidate = self.master_candidate
5886       if self.changed_primary_ip:
5887         new_node.primary_ip = self.op.primary_ip
5888
5889     # copy the master/vm_capable flags
5890     for attr in self._NFLAGS:
5891       setattr(new_node, attr, getattr(self.op, attr))
5892
5893     # notify the user about any possible mc promotion
5894     if new_node.master_candidate:
5895       self.LogInfo("Node will be a master candidate")
5896
5897     if self.op.ndparams:
5898       new_node.ndparams = self.op.ndparams
5899     else:
5900       new_node.ndparams = {}
5901
5902     if self.op.hv_state:
5903       new_node.hv_state_static = self.new_hv_state
5904
5905     if self.op.disk_state:
5906       new_node.disk_state_static = self.new_disk_state
5907
5908     # Add node to our /etc/hosts, and add key to known_hosts
5909     if self.cfg.GetClusterInfo().modify_etc_hosts:
5910       master_node = self.cfg.GetMasterNode()
5911       result = self.rpc.call_etc_hosts_modify(master_node,
5912                                               constants.ETC_HOSTS_ADD,
5913                                               self.hostname.name,
5914                                               self.hostname.ip)
5915       result.Raise("Can't update hosts file with new host data")
5916
5917     if new_node.secondary_ip != new_node.primary_ip:
5918       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5919                                False)
5920
5921     node_verify_list = [self.cfg.GetMasterNode()]
5922     node_verify_param = {
5923       constants.NV_NODELIST: ([node], {}),
5924       # TODO: do a node-net-test as well?
5925     }
5926
5927     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5928                                        self.cfg.GetClusterName())
5929     for verifier in node_verify_list:
5930       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5931       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5932       if nl_payload:
5933         for failed in nl_payload:
5934           feedback_fn("ssh/hostname verification failed"
5935                       " (checking from %s): %s" %
5936                       (verifier, nl_payload[failed]))
5937         raise errors.OpExecError("ssh/hostname verification failed")
5938
5939     if self.op.readd:
5940       _RedistributeAncillaryFiles(self)
5941       self.context.ReaddNode(new_node)
5942       # make sure we redistribute the config
5943       self.cfg.Update(new_node, feedback_fn)
5944       # and make sure the new node will not have old files around
5945       if not new_node.master_candidate:
5946         result = self.rpc.call_node_demote_from_mc(new_node.name)
5947         msg = result.fail_msg
5948         if msg:
5949           self.LogWarning("Node failed to demote itself from master"
5950                           " candidate status: %s" % msg)
5951     else:
5952       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5953                                   additional_vm=self.op.vm_capable)
5954       self.context.AddNode(new_node, self.proc.GetECId())
5955
5956
5957 class LUNodeSetParams(LogicalUnit):
5958   """Modifies the parameters of a node.
5959
5960   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5961       to the node role (as _ROLE_*)
5962   @cvar _R2F: a dictionary from node role to tuples of flags
5963   @cvar _FLAGS: a list of attribute names corresponding to the flags
5964
5965   """
5966   HPATH = "node-modify"
5967   HTYPE = constants.HTYPE_NODE
5968   REQ_BGL = False
5969   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5970   _F2R = {
5971     (True, False, False): _ROLE_CANDIDATE,
5972     (False, True, False): _ROLE_DRAINED,
5973     (False, False, True): _ROLE_OFFLINE,
5974     (False, False, False): _ROLE_REGULAR,
5975     }
5976   _R2F = dict((v, k) for k, v in _F2R.items())
5977   _FLAGS = ["master_candidate", "drained", "offline"]
5978
5979   def CheckArguments(self):
5980     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5981     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5982                 self.op.master_capable, self.op.vm_capable,
5983                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5984                 self.op.disk_state]
5985     if all_mods.count(None) == len(all_mods):
5986       raise errors.OpPrereqError("Please pass at least one modification",
5987                                  errors.ECODE_INVAL)
5988     if all_mods.count(True) > 1:
5989       raise errors.OpPrereqError("Can't set the node into more than one"
5990                                  " state at the same time",
5991                                  errors.ECODE_INVAL)
5992
5993     # Boolean value that tells us whether we might be demoting from MC
5994     self.might_demote = (self.op.master_candidate is False or
5995                          self.op.offline is True or
5996                          self.op.drained is True or
5997                          self.op.master_capable is False)
5998
5999     if self.op.secondary_ip:
6000       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6001         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6002                                    " address" % self.op.secondary_ip,
6003                                    errors.ECODE_INVAL)
6004
6005     self.lock_all = self.op.auto_promote and self.might_demote
6006     self.lock_instances = self.op.secondary_ip is not None
6007
6008   def _InstanceFilter(self, instance):
6009     """Filter for getting affected instances.
6010
6011     """
6012     return (instance.disk_template in constants.DTS_INT_MIRROR and
6013             self.op.node_name in instance.all_nodes)
6014
6015   def ExpandNames(self):
6016     if self.lock_all:
6017       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
6018     else:
6019       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
6020
6021     # Since modifying a node can have severe effects on currently running
6022     # operations the resource lock is at least acquired in shared mode
6023     self.needed_locks[locking.LEVEL_NODE_RES] = \
6024       self.needed_locks[locking.LEVEL_NODE]
6025
6026     # Get node resource and instance locks in shared mode; they are not used
6027     # for anything but read-only access
6028     self.share_locks[locking.LEVEL_NODE_RES] = 1
6029     self.share_locks[locking.LEVEL_INSTANCE] = 1
6030
6031     if self.lock_instances:
6032       self.needed_locks[locking.LEVEL_INSTANCE] = \
6033         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6034
6035   def BuildHooksEnv(self):
6036     """Build hooks env.
6037
6038     This runs on the master node.
6039
6040     """
6041     return {
6042       "OP_TARGET": self.op.node_name,
6043       "MASTER_CANDIDATE": str(self.op.master_candidate),
6044       "OFFLINE": str(self.op.offline),
6045       "DRAINED": str(self.op.drained),
6046       "MASTER_CAPABLE": str(self.op.master_capable),
6047       "VM_CAPABLE": str(self.op.vm_capable),
6048       }
6049
6050   def BuildHooksNodes(self):
6051     """Build hooks nodes.
6052
6053     """
6054     nl = [self.cfg.GetMasterNode(), self.op.node_name]
6055     return (nl, nl)
6056
6057   def CheckPrereq(self):
6058     """Check prerequisites.
6059
6060     This only checks the instance list against the existing names.
6061
6062     """
6063     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6064
6065     if self.lock_instances:
6066       affected_instances = \
6067         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6068
6069       # Verify instance locks
6070       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6071       wanted_instances = frozenset(affected_instances.keys())
6072       if wanted_instances - owned_instances:
6073         raise errors.OpPrereqError("Instances affected by changing node %s's"
6074                                    " secondary IP address have changed since"
6075                                    " locks were acquired, wanted '%s', have"
6076                                    " '%s'; retry the operation" %
6077                                    (self.op.node_name,
6078                                     utils.CommaJoin(wanted_instances),
6079                                     utils.CommaJoin(owned_instances)),
6080                                    errors.ECODE_STATE)
6081     else:
6082       affected_instances = None
6083
6084     if (self.op.master_candidate is not None or
6085         self.op.drained is not None or
6086         self.op.offline is not None):
6087       # we can't change the master's node flags
6088       if self.op.node_name == self.cfg.GetMasterNode():
6089         raise errors.OpPrereqError("The master role can be changed"
6090                                    " only via master-failover",
6091                                    errors.ECODE_INVAL)
6092
6093     if self.op.master_candidate and not node.master_capable:
6094       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6095                                  " it a master candidate" % node.name,
6096                                  errors.ECODE_STATE)
6097
6098     if self.op.vm_capable is False:
6099       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6100       if ipri or isec:
6101         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6102                                    " the vm_capable flag" % node.name,
6103                                    errors.ECODE_STATE)
6104
6105     if node.master_candidate and self.might_demote and not self.lock_all:
6106       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6107       # check if after removing the current node, we're missing master
6108       # candidates
6109       (mc_remaining, mc_should, _) = \
6110           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6111       if mc_remaining < mc_should:
6112         raise errors.OpPrereqError("Not enough master candidates, please"
6113                                    " pass auto promote option to allow"
6114                                    " promotion (--auto-promote or RAPI"
6115                                    " auto_promote=True)", errors.ECODE_STATE)
6116
6117     self.old_flags = old_flags = (node.master_candidate,
6118                                   node.drained, node.offline)
6119     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6120     self.old_role = old_role = self._F2R[old_flags]
6121
6122     # Check for ineffective changes
6123     for attr in self._FLAGS:
6124       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6125         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6126         setattr(self.op, attr, None)
6127
6128     # Past this point, any flag change to False means a transition
6129     # away from the respective state, as only real changes are kept
6130
6131     # TODO: We might query the real power state if it supports OOB
6132     if _SupportsOob(self.cfg, node):
6133       if self.op.offline is False and not (node.powered or
6134                                            self.op.powered is True):
6135         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6136                                     " offline status can be reset") %
6137                                    self.op.node_name, errors.ECODE_STATE)
6138     elif self.op.powered is not None:
6139       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6140                                   " as it does not support out-of-band"
6141                                   " handling") % self.op.node_name,
6142                                  errors.ECODE_STATE)
6143
6144     # If we're being deofflined/drained, we'll MC ourself if needed
6145     if (self.op.drained is False or self.op.offline is False or
6146         (self.op.master_capable and not node.master_capable)):
6147       if _DecideSelfPromotion(self):
6148         self.op.master_candidate = True
6149         self.LogInfo("Auto-promoting node to master candidate")
6150
6151     # If we're no longer master capable, we'll demote ourselves from MC
6152     if self.op.master_capable is False and node.master_candidate:
6153       self.LogInfo("Demoting from master candidate")
6154       self.op.master_candidate = False
6155
6156     # Compute new role
6157     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6158     if self.op.master_candidate:
6159       new_role = self._ROLE_CANDIDATE
6160     elif self.op.drained:
6161       new_role = self._ROLE_DRAINED
6162     elif self.op.offline:
6163       new_role = self._ROLE_OFFLINE
6164     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6165       # False is still in new flags, which means we're un-setting (the
6166       # only) True flag
6167       new_role = self._ROLE_REGULAR
6168     else: # no new flags, nothing, keep old role
6169       new_role = old_role
6170
6171     self.new_role = new_role
6172
6173     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6174       # Trying to transition out of offline status
6175       result = self.rpc.call_version([node.name])[node.name]
6176       if result.fail_msg:
6177         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6178                                    " to report its version: %s" %
6179                                    (node.name, result.fail_msg),
6180                                    errors.ECODE_STATE)
6181       else:
6182         self.LogWarning("Transitioning node from offline to online state"
6183                         " without using re-add. Please make sure the node"
6184                         " is healthy!")
6185
6186     # When changing the secondary ip, verify if this is a single-homed to
6187     # multi-homed transition or vice versa, and apply the relevant
6188     # restrictions.
6189     if self.op.secondary_ip:
6190       # Ok even without locking, because this can't be changed by any LU
6191       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6192       master_singlehomed = master.secondary_ip == master.primary_ip
6193       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6194         if self.op.force and node.name == master.name:
6195           self.LogWarning("Transitioning from single-homed to multi-homed"
6196                           " cluster. All nodes will require a secondary ip.")
6197         else:
6198           raise errors.OpPrereqError("Changing the secondary ip on a"
6199                                      " single-homed cluster requires the"
6200                                      " --force option to be passed, and the"
6201                                      " target node to be the master",
6202                                      errors.ECODE_INVAL)
6203       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6204         if self.op.force and node.name == master.name:
6205           self.LogWarning("Transitioning from multi-homed to single-homed"
6206                           " cluster. Secondary IPs will have to be removed.")
6207         else:
6208           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6209                                      " same as the primary IP on a multi-homed"
6210                                      " cluster, unless the --force option is"
6211                                      " passed, and the target node is the"
6212                                      " master", errors.ECODE_INVAL)
6213
6214       assert not (frozenset(affected_instances) -
6215                   self.owned_locks(locking.LEVEL_INSTANCE))
6216
6217       if node.offline:
6218         if affected_instances:
6219           msg = ("Cannot change secondary IP address: offline node has"
6220                  " instances (%s) configured to use it" %
6221                  utils.CommaJoin(affected_instances.keys()))
6222           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6223       else:
6224         # On online nodes, check that no instances are running, and that
6225         # the node has the new ip and we can reach it.
6226         for instance in affected_instances.values():
6227           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6228                               msg="cannot change secondary ip")
6229
6230         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6231         if master.name != node.name:
6232           # check reachability from master secondary ip to new secondary ip
6233           if not netutils.TcpPing(self.op.secondary_ip,
6234                                   constants.DEFAULT_NODED_PORT,
6235                                   source=master.secondary_ip):
6236             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6237                                        " based ping to node daemon port",
6238                                        errors.ECODE_ENVIRON)
6239
6240     if self.op.ndparams:
6241       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6242       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6243       self.new_ndparams = new_ndparams
6244
6245     if self.op.hv_state:
6246       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6247                                                  self.node.hv_state_static)
6248
6249     if self.op.disk_state:
6250       self.new_disk_state = \
6251         _MergeAndVerifyDiskState(self.op.disk_state,
6252                                  self.node.disk_state_static)
6253
6254   def Exec(self, feedback_fn):
6255     """Modifies a node.
6256
6257     """
6258     node = self.node
6259     old_role = self.old_role
6260     new_role = self.new_role
6261
6262     result = []
6263
6264     if self.op.ndparams:
6265       node.ndparams = self.new_ndparams
6266
6267     if self.op.powered is not None:
6268       node.powered = self.op.powered
6269
6270     if self.op.hv_state:
6271       node.hv_state_static = self.new_hv_state
6272
6273     if self.op.disk_state:
6274       node.disk_state_static = self.new_disk_state
6275
6276     for attr in ["master_capable", "vm_capable"]:
6277       val = getattr(self.op, attr)
6278       if val is not None:
6279         setattr(node, attr, val)
6280         result.append((attr, str(val)))
6281
6282     if new_role != old_role:
6283       # Tell the node to demote itself, if no longer MC and not offline
6284       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6285         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6286         if msg:
6287           self.LogWarning("Node failed to demote itself: %s", msg)
6288
6289       new_flags = self._R2F[new_role]
6290       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6291         if of != nf:
6292           result.append((desc, str(nf)))
6293       (node.master_candidate, node.drained, node.offline) = new_flags
6294
6295       # we locked all nodes, we adjust the CP before updating this node
6296       if self.lock_all:
6297         _AdjustCandidatePool(self, [node.name])
6298
6299     if self.op.secondary_ip:
6300       node.secondary_ip = self.op.secondary_ip
6301       result.append(("secondary_ip", self.op.secondary_ip))
6302
6303     # this will trigger configuration file update, if needed
6304     self.cfg.Update(node, feedback_fn)
6305
6306     # this will trigger job queue propagation or cleanup if the mc
6307     # flag changed
6308     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6309       self.context.ReaddNode(node)
6310
6311     return result
6312
6313
6314 class LUNodePowercycle(NoHooksLU):
6315   """Powercycles a node.
6316
6317   """
6318   REQ_BGL = False
6319
6320   def CheckArguments(self):
6321     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6322     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6323       raise errors.OpPrereqError("The node is the master and the force"
6324                                  " parameter was not set",
6325                                  errors.ECODE_INVAL)
6326
6327   def ExpandNames(self):
6328     """Locking for PowercycleNode.
6329
6330     This is a last-resort option and shouldn't block on other
6331     jobs. Therefore, we grab no locks.
6332
6333     """
6334     self.needed_locks = {}
6335
6336   def Exec(self, feedback_fn):
6337     """Reboots a node.
6338
6339     """
6340     result = self.rpc.call_node_powercycle(self.op.node_name,
6341                                            self.cfg.GetHypervisorType())
6342     result.Raise("Failed to schedule the reboot")
6343     return result.payload
6344
6345
6346 class LUClusterQuery(NoHooksLU):
6347   """Query cluster configuration.
6348
6349   """
6350   REQ_BGL = False
6351
6352   def ExpandNames(self):
6353     self.needed_locks = {}
6354
6355   def Exec(self, feedback_fn):
6356     """Return cluster config.
6357
6358     """
6359     cluster = self.cfg.GetClusterInfo()
6360     os_hvp = {}
6361
6362     # Filter just for enabled hypervisors
6363     for os_name, hv_dict in cluster.os_hvp.items():
6364       os_hvp[os_name] = {}
6365       for hv_name, hv_params in hv_dict.items():
6366         if hv_name in cluster.enabled_hypervisors:
6367           os_hvp[os_name][hv_name] = hv_params
6368
6369     # Convert ip_family to ip_version
6370     primary_ip_version = constants.IP4_VERSION
6371     if cluster.primary_ip_family == netutils.IP6Address.family:
6372       primary_ip_version = constants.IP6_VERSION
6373
6374     result = {
6375       "software_version": constants.RELEASE_VERSION,
6376       "protocol_version": constants.PROTOCOL_VERSION,
6377       "config_version": constants.CONFIG_VERSION,
6378       "os_api_version": max(constants.OS_API_VERSIONS),
6379       "export_version": constants.EXPORT_VERSION,
6380       "architecture": runtime.GetArchInfo(),
6381       "name": cluster.cluster_name,
6382       "master": cluster.master_node,
6383       "default_hypervisor": cluster.primary_hypervisor,
6384       "enabled_hypervisors": cluster.enabled_hypervisors,
6385       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6386                         for hypervisor_name in cluster.enabled_hypervisors]),
6387       "os_hvp": os_hvp,
6388       "beparams": cluster.beparams,
6389       "osparams": cluster.osparams,
6390       "ipolicy": cluster.ipolicy,
6391       "nicparams": cluster.nicparams,
6392       "ndparams": cluster.ndparams,
6393       "diskparams": cluster.diskparams,
6394       "candidate_pool_size": cluster.candidate_pool_size,
6395       "master_netdev": cluster.master_netdev,
6396       "master_netmask": cluster.master_netmask,
6397       "use_external_mip_script": cluster.use_external_mip_script,
6398       "volume_group_name": cluster.volume_group_name,
6399       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6400       "file_storage_dir": cluster.file_storage_dir,
6401       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6402       "maintain_node_health": cluster.maintain_node_health,
6403       "ctime": cluster.ctime,
6404       "mtime": cluster.mtime,
6405       "uuid": cluster.uuid,
6406       "tags": list(cluster.GetTags()),
6407       "uid_pool": cluster.uid_pool,
6408       "default_iallocator": cluster.default_iallocator,
6409       "reserved_lvs": cluster.reserved_lvs,
6410       "primary_ip_version": primary_ip_version,
6411       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6412       "hidden_os": cluster.hidden_os,
6413       "blacklisted_os": cluster.blacklisted_os,
6414       }
6415
6416     return result
6417
6418
6419 class LUClusterConfigQuery(NoHooksLU):
6420   """Return configuration values.
6421
6422   """
6423   REQ_BGL = False
6424
6425   def CheckArguments(self):
6426     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6427
6428   def ExpandNames(self):
6429     self.cq.ExpandNames(self)
6430
6431   def DeclareLocks(self, level):
6432     self.cq.DeclareLocks(self, level)
6433
6434   def Exec(self, feedback_fn):
6435     result = self.cq.OldStyleQuery(self)
6436
6437     assert len(result) == 1
6438
6439     return result[0]
6440
6441
6442 class _ClusterQuery(_QueryBase):
6443   FIELDS = query.CLUSTER_FIELDS
6444
6445   #: Do not sort (there is only one item)
6446   SORT_FIELD = None
6447
6448   def ExpandNames(self, lu):
6449     lu.needed_locks = {}
6450
6451     # The following variables interact with _QueryBase._GetNames
6452     self.wanted = locking.ALL_SET
6453     self.do_locking = self.use_locking
6454
6455     if self.do_locking:
6456       raise errors.OpPrereqError("Can not use locking for cluster queries",
6457                                  errors.ECODE_INVAL)
6458
6459   def DeclareLocks(self, lu, level):
6460     pass
6461
6462   def _GetQueryData(self, lu):
6463     """Computes the list of nodes and their attributes.
6464
6465     """
6466     # Locking is not used
6467     assert not (compat.any(lu.glm.is_owned(level)
6468                            for level in locking.LEVELS
6469                            if level != locking.LEVEL_CLUSTER) or
6470                 self.do_locking or self.use_locking)
6471
6472     if query.CQ_CONFIG in self.requested_data:
6473       cluster = lu.cfg.GetClusterInfo()
6474     else:
6475       cluster = NotImplemented
6476
6477     if query.CQ_QUEUE_DRAINED in self.requested_data:
6478       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6479     else:
6480       drain_flag = NotImplemented
6481
6482     if query.CQ_WATCHER_PAUSE in self.requested_data:
6483       watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6484     else:
6485       watcher_pause = NotImplemented
6486
6487     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6488
6489
6490 class LUInstanceActivateDisks(NoHooksLU):
6491   """Bring up an instance's disks.
6492
6493   """
6494   REQ_BGL = False
6495
6496   def ExpandNames(self):
6497     self._ExpandAndLockInstance()
6498     self.needed_locks[locking.LEVEL_NODE] = []
6499     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6500
6501   def DeclareLocks(self, level):
6502     if level == locking.LEVEL_NODE:
6503       self._LockInstancesNodes()
6504
6505   def CheckPrereq(self):
6506     """Check prerequisites.
6507
6508     This checks that the instance is in the cluster.
6509
6510     """
6511     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6512     assert self.instance is not None, \
6513       "Cannot retrieve locked instance %s" % self.op.instance_name
6514     _CheckNodeOnline(self, self.instance.primary_node)
6515
6516   def Exec(self, feedback_fn):
6517     """Activate the disks.
6518
6519     """
6520     disks_ok, disks_info = \
6521               _AssembleInstanceDisks(self, self.instance,
6522                                      ignore_size=self.op.ignore_size)
6523     if not disks_ok:
6524       raise errors.OpExecError("Cannot activate block devices")
6525
6526     if self.op.wait_for_sync:
6527       if not _WaitForSync(self, self.instance):
6528         raise errors.OpExecError("Some disks of the instance are degraded!")
6529
6530     return disks_info
6531
6532
6533 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6534                            ignore_size=False):
6535   """Prepare the block devices for an instance.
6536
6537   This sets up the block devices on all nodes.
6538
6539   @type lu: L{LogicalUnit}
6540   @param lu: the logical unit on whose behalf we execute
6541   @type instance: L{objects.Instance}
6542   @param instance: the instance for whose disks we assemble
6543   @type disks: list of L{objects.Disk} or None
6544   @param disks: which disks to assemble (or all, if None)
6545   @type ignore_secondaries: boolean
6546   @param ignore_secondaries: if true, errors on secondary nodes
6547       won't result in an error return from the function
6548   @type ignore_size: boolean
6549   @param ignore_size: if true, the current known size of the disk
6550       will not be used during the disk activation, useful for cases
6551       when the size is wrong
6552   @return: False if the operation failed, otherwise a list of
6553       (host, instance_visible_name, node_visible_name)
6554       with the mapping from node devices to instance devices
6555
6556   """
6557   device_info = []
6558   disks_ok = True
6559   iname = instance.name
6560   disks = _ExpandCheckDisks(instance, disks)
6561
6562   # With the two passes mechanism we try to reduce the window of
6563   # opportunity for the race condition of switching DRBD to primary
6564   # before handshaking occured, but we do not eliminate it
6565
6566   # The proper fix would be to wait (with some limits) until the
6567   # connection has been made and drbd transitions from WFConnection
6568   # into any other network-connected state (Connected, SyncTarget,
6569   # SyncSource, etc.)
6570
6571   # 1st pass, assemble on all nodes in secondary mode
6572   for idx, inst_disk in enumerate(disks):
6573     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6574       if ignore_size:
6575         node_disk = node_disk.Copy()
6576         node_disk.UnsetSize()
6577       lu.cfg.SetDiskID(node_disk, node)
6578       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6579                                              False, idx)
6580       msg = result.fail_msg
6581       if msg:
6582         is_offline_secondary = (node in instance.secondary_nodes and
6583                                 result.offline)
6584         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6585                            " (is_primary=False, pass=1): %s",
6586                            inst_disk.iv_name, node, msg)
6587         if not (ignore_secondaries or is_offline_secondary):
6588           disks_ok = False
6589
6590   # FIXME: race condition on drbd migration to primary
6591
6592   # 2nd pass, do only the primary node
6593   for idx, inst_disk in enumerate(disks):
6594     dev_path = None
6595
6596     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6597       if node != instance.primary_node:
6598         continue
6599       if ignore_size:
6600         node_disk = node_disk.Copy()
6601         node_disk.UnsetSize()
6602       lu.cfg.SetDiskID(node_disk, node)
6603       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6604                                              True, idx)
6605       msg = result.fail_msg
6606       if msg:
6607         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6608                            " (is_primary=True, pass=2): %s",
6609                            inst_disk.iv_name, node, msg)
6610         disks_ok = False
6611       else:
6612         dev_path = result.payload
6613
6614     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6615
6616   # leave the disks configured for the primary node
6617   # this is a workaround that would be fixed better by
6618   # improving the logical/physical id handling
6619   for disk in disks:
6620     lu.cfg.SetDiskID(disk, instance.primary_node)
6621
6622   return disks_ok, device_info
6623
6624
6625 def _StartInstanceDisks(lu, instance, force):
6626   """Start the disks of an instance.
6627
6628   """
6629   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6630                                            ignore_secondaries=force)
6631   if not disks_ok:
6632     _ShutdownInstanceDisks(lu, instance)
6633     if force is not None and not force:
6634       lu.proc.LogWarning("", hint="If the message above refers to a"
6635                          " secondary node,"
6636                          " you can retry the operation using '--force'.")
6637     raise errors.OpExecError("Disk consistency error")
6638
6639
6640 class LUInstanceDeactivateDisks(NoHooksLU):
6641   """Shutdown an instance's disks.
6642
6643   """
6644   REQ_BGL = False
6645
6646   def ExpandNames(self):
6647     self._ExpandAndLockInstance()
6648     self.needed_locks[locking.LEVEL_NODE] = []
6649     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6650
6651   def DeclareLocks(self, level):
6652     if level == locking.LEVEL_NODE:
6653       self._LockInstancesNodes()
6654
6655   def CheckPrereq(self):
6656     """Check prerequisites.
6657
6658     This checks that the instance is in the cluster.
6659
6660     """
6661     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6662     assert self.instance is not None, \
6663       "Cannot retrieve locked instance %s" % self.op.instance_name
6664
6665   def Exec(self, feedback_fn):
6666     """Deactivate the disks
6667
6668     """
6669     instance = self.instance
6670     if self.op.force:
6671       _ShutdownInstanceDisks(self, instance)
6672     else:
6673       _SafeShutdownInstanceDisks(self, instance)
6674
6675
6676 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6677   """Shutdown block devices of an instance.
6678
6679   This function checks if an instance is running, before calling
6680   _ShutdownInstanceDisks.
6681
6682   """
6683   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6684   _ShutdownInstanceDisks(lu, instance, disks=disks)
6685
6686
6687 def _ExpandCheckDisks(instance, disks):
6688   """Return the instance disks selected by the disks list
6689
6690   @type disks: list of L{objects.Disk} or None
6691   @param disks: selected disks
6692   @rtype: list of L{objects.Disk}
6693   @return: selected instance disks to act on
6694
6695   """
6696   if disks is None:
6697     return instance.disks
6698   else:
6699     if not set(disks).issubset(instance.disks):
6700       raise errors.ProgrammerError("Can only act on disks belonging to the"
6701                                    " target instance")
6702     return disks
6703
6704
6705 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6706   """Shutdown block devices of an instance.
6707
6708   This does the shutdown on all nodes of the instance.
6709
6710   If the ignore_primary is false, errors on the primary node are
6711   ignored.
6712
6713   """
6714   all_result = True
6715   disks = _ExpandCheckDisks(instance, disks)
6716
6717   for disk in disks:
6718     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6719       lu.cfg.SetDiskID(top_disk, node)
6720       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6721       msg = result.fail_msg
6722       if msg:
6723         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6724                       disk.iv_name, node, msg)
6725         if ((node == instance.primary_node and not ignore_primary) or
6726             (node != instance.primary_node and not result.offline)):
6727           all_result = False
6728   return all_result
6729
6730
6731 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6732   """Checks if a node has enough free memory.
6733
6734   This function check if a given node has the needed amount of free
6735   memory. In case the node has less memory or we cannot get the
6736   information from the node, this function raise an OpPrereqError
6737   exception.
6738
6739   @type lu: C{LogicalUnit}
6740   @param lu: a logical unit from which we get configuration data
6741   @type node: C{str}
6742   @param node: the node to check
6743   @type reason: C{str}
6744   @param reason: string to use in the error message
6745   @type requested: C{int}
6746   @param requested: the amount of memory in MiB to check for
6747   @type hypervisor_name: C{str}
6748   @param hypervisor_name: the hypervisor to ask for memory stats
6749   @rtype: integer
6750   @return: node current free memory
6751   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6752       we cannot check the node
6753
6754   """
6755   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6756   nodeinfo[node].Raise("Can't get data from node %s" % node,
6757                        prereq=True, ecode=errors.ECODE_ENVIRON)
6758   (_, _, (hv_info, )) = nodeinfo[node].payload
6759
6760   free_mem = hv_info.get("memory_free", None)
6761   if not isinstance(free_mem, int):
6762     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6763                                " was '%s'" % (node, free_mem),
6764                                errors.ECODE_ENVIRON)
6765   if requested > free_mem:
6766     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6767                                " needed %s MiB, available %s MiB" %
6768                                (node, reason, requested, free_mem),
6769                                errors.ECODE_NORES)
6770   return free_mem
6771
6772
6773 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6774   """Checks if nodes have enough free disk space in the all VGs.
6775
6776   This function check if all given nodes have the needed amount of
6777   free disk. In case any node has less disk or we cannot get the
6778   information from the node, this function raise an OpPrereqError
6779   exception.
6780
6781   @type lu: C{LogicalUnit}
6782   @param lu: a logical unit from which we get configuration data
6783   @type nodenames: C{list}
6784   @param nodenames: the list of node names to check
6785   @type req_sizes: C{dict}
6786   @param req_sizes: the hash of vg and corresponding amount of disk in
6787       MiB to check for
6788   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6789       or we cannot check the node
6790
6791   """
6792   for vg, req_size in req_sizes.items():
6793     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6794
6795
6796 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6797   """Checks if nodes have enough free disk space in the specified VG.
6798
6799   This function check if all given nodes have the needed amount of
6800   free disk. In case any node has less disk or we cannot get the
6801   information from the node, this function raise an OpPrereqError
6802   exception.
6803
6804   @type lu: C{LogicalUnit}
6805   @param lu: a logical unit from which we get configuration data
6806   @type nodenames: C{list}
6807   @param nodenames: the list of node names to check
6808   @type vg: C{str}
6809   @param vg: the volume group to check
6810   @type requested: C{int}
6811   @param requested: the amount of disk in MiB to check for
6812   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6813       or we cannot check the node
6814
6815   """
6816   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6817   for node in nodenames:
6818     info = nodeinfo[node]
6819     info.Raise("Cannot get current information from node %s" % node,
6820                prereq=True, ecode=errors.ECODE_ENVIRON)
6821     (_, (vg_info, ), _) = info.payload
6822     vg_free = vg_info.get("vg_free", None)
6823     if not isinstance(vg_free, int):
6824       raise errors.OpPrereqError("Can't compute free disk space on node"
6825                                  " %s for vg %s, result was '%s'" %
6826                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6827     if requested > vg_free:
6828       raise errors.OpPrereqError("Not enough disk space on target node %s"
6829                                  " vg %s: required %d MiB, available %d MiB" %
6830                                  (node, vg, requested, vg_free),
6831                                  errors.ECODE_NORES)
6832
6833
6834 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6835   """Checks if nodes have enough physical CPUs
6836
6837   This function checks if all given nodes have the needed number of
6838   physical CPUs. In case any node has less CPUs or we cannot get the
6839   information from the node, this function raises an OpPrereqError
6840   exception.
6841
6842   @type lu: C{LogicalUnit}
6843   @param lu: a logical unit from which we get configuration data
6844   @type nodenames: C{list}
6845   @param nodenames: the list of node names to check
6846   @type requested: C{int}
6847   @param requested: the minimum acceptable number of physical CPUs
6848   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6849       or we cannot check the node
6850
6851   """
6852   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6853   for node in nodenames:
6854     info = nodeinfo[node]
6855     info.Raise("Cannot get current information from node %s" % node,
6856                prereq=True, ecode=errors.ECODE_ENVIRON)
6857     (_, _, (hv_info, )) = info.payload
6858     num_cpus = hv_info.get("cpu_total", None)
6859     if not isinstance(num_cpus, int):
6860       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6861                                  " on node %s, result was '%s'" %
6862                                  (node, num_cpus), errors.ECODE_ENVIRON)
6863     if requested > num_cpus:
6864       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6865                                  "required" % (node, num_cpus, requested),
6866                                  errors.ECODE_NORES)
6867
6868
6869 class LUInstanceStartup(LogicalUnit):
6870   """Starts an instance.
6871
6872   """
6873   HPATH = "instance-start"
6874   HTYPE = constants.HTYPE_INSTANCE
6875   REQ_BGL = False
6876
6877   def CheckArguments(self):
6878     # extra beparams
6879     if self.op.beparams:
6880       # fill the beparams dict
6881       objects.UpgradeBeParams(self.op.beparams)
6882       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6883
6884   def ExpandNames(self):
6885     self._ExpandAndLockInstance()
6886     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6887
6888   def DeclareLocks(self, level):
6889     if level == locking.LEVEL_NODE_RES:
6890       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6891
6892   def BuildHooksEnv(self):
6893     """Build hooks env.
6894
6895     This runs on master, primary and secondary nodes of the instance.
6896
6897     """
6898     env = {
6899       "FORCE": self.op.force,
6900       }
6901
6902     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6903
6904     return env
6905
6906   def BuildHooksNodes(self):
6907     """Build hooks nodes.
6908
6909     """
6910     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6911     return (nl, nl)
6912
6913   def CheckPrereq(self):
6914     """Check prerequisites.
6915
6916     This checks that the instance is in the cluster.
6917
6918     """
6919     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6920     assert self.instance is not None, \
6921       "Cannot retrieve locked instance %s" % self.op.instance_name
6922
6923     # extra hvparams
6924     if self.op.hvparams:
6925       # check hypervisor parameter syntax (locally)
6926       cluster = self.cfg.GetClusterInfo()
6927       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6928       filled_hvp = cluster.FillHV(instance)
6929       filled_hvp.update(self.op.hvparams)
6930       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6931       hv_type.CheckParameterSyntax(filled_hvp)
6932       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6933
6934     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6935
6936     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6937
6938     if self.primary_offline and self.op.ignore_offline_nodes:
6939       self.proc.LogWarning("Ignoring offline primary node")
6940
6941       if self.op.hvparams or self.op.beparams:
6942         self.proc.LogWarning("Overridden parameters are ignored")
6943     else:
6944       _CheckNodeOnline(self, instance.primary_node)
6945
6946       bep = self.cfg.GetClusterInfo().FillBE(instance)
6947       bep.update(self.op.beparams)
6948
6949       # check bridges existence
6950       _CheckInstanceBridgesExist(self, instance)
6951
6952       remote_info = self.rpc.call_instance_info(instance.primary_node,
6953                                                 instance.name,
6954                                                 instance.hypervisor)
6955       remote_info.Raise("Error checking node %s" % instance.primary_node,
6956                         prereq=True, ecode=errors.ECODE_ENVIRON)
6957       if not remote_info.payload: # not running already
6958         _CheckNodeFreeMemory(self, instance.primary_node,
6959                              "starting instance %s" % instance.name,
6960                              bep[constants.BE_MINMEM], instance.hypervisor)
6961
6962   def Exec(self, feedback_fn):
6963     """Start the instance.
6964
6965     """
6966     instance = self.instance
6967     force = self.op.force
6968
6969     if not self.op.no_remember:
6970       self.cfg.MarkInstanceUp(instance.name)
6971
6972     if self.primary_offline:
6973       assert self.op.ignore_offline_nodes
6974       self.proc.LogInfo("Primary node offline, marked instance as started")
6975     else:
6976       node_current = instance.primary_node
6977
6978       _StartInstanceDisks(self, instance, force)
6979
6980       result = \
6981         self.rpc.call_instance_start(node_current,
6982                                      (instance, self.op.hvparams,
6983                                       self.op.beparams),
6984                                      self.op.startup_paused)
6985       msg = result.fail_msg
6986       if msg:
6987         _ShutdownInstanceDisks(self, instance)
6988         raise errors.OpExecError("Could not start instance: %s" % msg)
6989
6990
6991 class LUInstanceReboot(LogicalUnit):
6992   """Reboot an instance.
6993
6994   """
6995   HPATH = "instance-reboot"
6996   HTYPE = constants.HTYPE_INSTANCE
6997   REQ_BGL = False
6998
6999   def ExpandNames(self):
7000     self._ExpandAndLockInstance()
7001
7002   def BuildHooksEnv(self):
7003     """Build hooks env.
7004
7005     This runs on master, primary and secondary nodes of the instance.
7006
7007     """
7008     env = {
7009       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7010       "REBOOT_TYPE": self.op.reboot_type,
7011       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7012       }
7013
7014     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7015
7016     return env
7017
7018   def BuildHooksNodes(self):
7019     """Build hooks nodes.
7020
7021     """
7022     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7023     return (nl, nl)
7024
7025   def CheckPrereq(self):
7026     """Check prerequisites.
7027
7028     This checks that the instance is in the cluster.
7029
7030     """
7031     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7032     assert self.instance is not None, \
7033       "Cannot retrieve locked instance %s" % self.op.instance_name
7034     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7035     _CheckNodeOnline(self, instance.primary_node)
7036
7037     # check bridges existence
7038     _CheckInstanceBridgesExist(self, instance)
7039
7040   def Exec(self, feedback_fn):
7041     """Reboot the instance.
7042
7043     """
7044     instance = self.instance
7045     ignore_secondaries = self.op.ignore_secondaries
7046     reboot_type = self.op.reboot_type
7047
7048     remote_info = self.rpc.call_instance_info(instance.primary_node,
7049                                               instance.name,
7050                                               instance.hypervisor)
7051     remote_info.Raise("Error checking node %s" % instance.primary_node)
7052     instance_running = bool(remote_info.payload)
7053
7054     node_current = instance.primary_node
7055
7056     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7057                                             constants.INSTANCE_REBOOT_HARD]:
7058       for disk in instance.disks:
7059         self.cfg.SetDiskID(disk, node_current)
7060       result = self.rpc.call_instance_reboot(node_current, instance,
7061                                              reboot_type,
7062                                              self.op.shutdown_timeout)
7063       result.Raise("Could not reboot instance")
7064     else:
7065       if instance_running:
7066         result = self.rpc.call_instance_shutdown(node_current, instance,
7067                                                  self.op.shutdown_timeout)
7068         result.Raise("Could not shutdown instance for full reboot")
7069         _ShutdownInstanceDisks(self, instance)
7070       else:
7071         self.LogInfo("Instance %s was already stopped, starting now",
7072                      instance.name)
7073       _StartInstanceDisks(self, instance, ignore_secondaries)
7074       result = self.rpc.call_instance_start(node_current,
7075                                             (instance, None, None), False)
7076       msg = result.fail_msg
7077       if msg:
7078         _ShutdownInstanceDisks(self, instance)
7079         raise errors.OpExecError("Could not start instance for"
7080                                  " full reboot: %s" % msg)
7081
7082     self.cfg.MarkInstanceUp(instance.name)
7083
7084
7085 class LUInstanceShutdown(LogicalUnit):
7086   """Shutdown an instance.
7087
7088   """
7089   HPATH = "instance-stop"
7090   HTYPE = constants.HTYPE_INSTANCE
7091   REQ_BGL = False
7092
7093   def ExpandNames(self):
7094     self._ExpandAndLockInstance()
7095
7096   def BuildHooksEnv(self):
7097     """Build hooks env.
7098
7099     This runs on master, primary and secondary nodes of the instance.
7100
7101     """
7102     env = _BuildInstanceHookEnvByObject(self, self.instance)
7103     env["TIMEOUT"] = self.op.timeout
7104     return env
7105
7106   def BuildHooksNodes(self):
7107     """Build hooks nodes.
7108
7109     """
7110     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7111     return (nl, nl)
7112
7113   def CheckPrereq(self):
7114     """Check prerequisites.
7115
7116     This checks that the instance is in the cluster.
7117
7118     """
7119     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7120     assert self.instance is not None, \
7121       "Cannot retrieve locked instance %s" % self.op.instance_name
7122
7123     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7124
7125     self.primary_offline = \
7126       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7127
7128     if self.primary_offline and self.op.ignore_offline_nodes:
7129       self.proc.LogWarning("Ignoring offline primary node")
7130     else:
7131       _CheckNodeOnline(self, self.instance.primary_node)
7132
7133   def Exec(self, feedback_fn):
7134     """Shutdown the instance.
7135
7136     """
7137     instance = self.instance
7138     node_current = instance.primary_node
7139     timeout = self.op.timeout
7140
7141     if not self.op.no_remember:
7142       self.cfg.MarkInstanceDown(instance.name)
7143
7144     if self.primary_offline:
7145       assert self.op.ignore_offline_nodes
7146       self.proc.LogInfo("Primary node offline, marked instance as stopped")
7147     else:
7148       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7149       msg = result.fail_msg
7150       if msg:
7151         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
7152
7153       _ShutdownInstanceDisks(self, instance)
7154
7155
7156 class LUInstanceReinstall(LogicalUnit):
7157   """Reinstall an instance.
7158
7159   """
7160   HPATH = "instance-reinstall"
7161   HTYPE = constants.HTYPE_INSTANCE
7162   REQ_BGL = False
7163
7164   def ExpandNames(self):
7165     self._ExpandAndLockInstance()
7166
7167   def BuildHooksEnv(self):
7168     """Build hooks env.
7169
7170     This runs on master, primary and secondary nodes of the instance.
7171
7172     """
7173     return _BuildInstanceHookEnvByObject(self, self.instance)
7174
7175   def BuildHooksNodes(self):
7176     """Build hooks nodes.
7177
7178     """
7179     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7180     return (nl, nl)
7181
7182   def CheckPrereq(self):
7183     """Check prerequisites.
7184
7185     This checks that the instance is in the cluster and is not running.
7186
7187     """
7188     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7189     assert instance is not None, \
7190       "Cannot retrieve locked instance %s" % self.op.instance_name
7191     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7192                      " offline, cannot reinstall")
7193
7194     if instance.disk_template == constants.DT_DISKLESS:
7195       raise errors.OpPrereqError("Instance '%s' has no disks" %
7196                                  self.op.instance_name,
7197                                  errors.ECODE_INVAL)
7198     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7199
7200     if self.op.os_type is not None:
7201       # OS verification
7202       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7203       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7204       instance_os = self.op.os_type
7205     else:
7206       instance_os = instance.os
7207
7208     nodelist = list(instance.all_nodes)
7209
7210     if self.op.osparams:
7211       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7212       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7213       self.os_inst = i_osdict # the new dict (without defaults)
7214     else:
7215       self.os_inst = None
7216
7217     self.instance = instance
7218
7219   def Exec(self, feedback_fn):
7220     """Reinstall the instance.
7221
7222     """
7223     inst = self.instance
7224
7225     if self.op.os_type is not None:
7226       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7227       inst.os = self.op.os_type
7228       # Write to configuration
7229       self.cfg.Update(inst, feedback_fn)
7230
7231     _StartInstanceDisks(self, inst, None)
7232     try:
7233       feedback_fn("Running the instance OS create scripts...")
7234       # FIXME: pass debug option from opcode to backend
7235       result = self.rpc.call_instance_os_add(inst.primary_node,
7236                                              (inst, self.os_inst), True,
7237                                              self.op.debug_level)
7238       result.Raise("Could not install OS for instance %s on node %s" %
7239                    (inst.name, inst.primary_node))
7240     finally:
7241       _ShutdownInstanceDisks(self, inst)
7242
7243
7244 class LUInstanceRecreateDisks(LogicalUnit):
7245   """Recreate an instance's missing disks.
7246
7247   """
7248   HPATH = "instance-recreate-disks"
7249   HTYPE = constants.HTYPE_INSTANCE
7250   REQ_BGL = False
7251
7252   _MODIFYABLE = frozenset([
7253     constants.IDISK_SIZE,
7254     constants.IDISK_MODE,
7255     ])
7256
7257   # New or changed disk parameters may have different semantics
7258   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7259     constants.IDISK_ADOPT,
7260
7261     # TODO: Implement support changing VG while recreating
7262     constants.IDISK_VG,
7263     constants.IDISK_METAVG,
7264     ]))
7265
7266   def _RunAllocator(self):
7267     """Run the allocator based on input opcode.
7268
7269     """
7270     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7271
7272     # FIXME
7273     # The allocator should actually run in "relocate" mode, but current
7274     # allocators don't support relocating all the nodes of an instance at
7275     # the same time. As a workaround we use "allocate" mode, but this is
7276     # suboptimal for two reasons:
7277     # - The instance name passed to the allocator is present in the list of
7278     #   existing instances, so there could be a conflict within the
7279     #   internal structures of the allocator. This doesn't happen with the
7280     #   current allocators, but it's a liability.
7281     # - The allocator counts the resources used by the instance twice: once
7282     #   because the instance exists already, and once because it tries to
7283     #   allocate a new instance.
7284     # The allocator could choose some of the nodes on which the instance is
7285     # running, but that's not a problem. If the instance nodes are broken,
7286     # they should be already be marked as drained or offline, and hence
7287     # skipped by the allocator. If instance disks have been lost for other
7288     # reasons, then recreating the disks on the same nodes should be fine.
7289     disk_template = self.instance.disk_template
7290     spindle_use = be_full[constants.BE_SPINDLE_USE]
7291     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7292                                         disk_template=disk_template,
7293                                         tags=list(self.instance.GetTags()),
7294                                         os=self.instance.os,
7295                                         nics=[{}],
7296                                         vcpus=be_full[constants.BE_VCPUS],
7297                                         memory=be_full[constants.BE_MAXMEM],
7298                                         spindle_use=spindle_use,
7299                                         disks=[{constants.IDISK_SIZE: d.size,
7300                                                 constants.IDISK_MODE: d.mode}
7301                                                 for d in self.instance.disks],
7302                                         hypervisor=self.instance.hypervisor)
7303     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7304
7305     ial.Run(self.op.iallocator)
7306
7307     assert req.RequiredNodes() == len(self.instance.all_nodes)
7308
7309     if not ial.success:
7310       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7311                                  " %s" % (self.op.iallocator, ial.info),
7312                                  errors.ECODE_NORES)
7313
7314     self.op.nodes = ial.result
7315     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7316                  self.op.instance_name, self.op.iallocator,
7317                  utils.CommaJoin(ial.result))
7318
7319   def CheckArguments(self):
7320     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7321       # Normalize and convert deprecated list of disk indices
7322       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7323
7324     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7325     if duplicates:
7326       raise errors.OpPrereqError("Some disks have been specified more than"
7327                                  " once: %s" % utils.CommaJoin(duplicates),
7328                                  errors.ECODE_INVAL)
7329
7330     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7331     # when neither iallocator nor nodes are specified
7332     if self.op.iallocator or self.op.nodes:
7333       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7334
7335     for (idx, params) in self.op.disks:
7336       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7337       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7338       if unsupported:
7339         raise errors.OpPrereqError("Parameters for disk %s try to change"
7340                                    " unmodifyable parameter(s): %s" %
7341                                    (idx, utils.CommaJoin(unsupported)),
7342                                    errors.ECODE_INVAL)
7343
7344   def ExpandNames(self):
7345     self._ExpandAndLockInstance()
7346     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7347     if self.op.nodes:
7348       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7349       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7350     else:
7351       self.needed_locks[locking.LEVEL_NODE] = []
7352       if self.op.iallocator:
7353         # iallocator will select a new node in the same group
7354         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7355     self.needed_locks[locking.LEVEL_NODE_RES] = []
7356
7357   def DeclareLocks(self, level):
7358     if level == locking.LEVEL_NODEGROUP:
7359       assert self.op.iallocator is not None
7360       assert not self.op.nodes
7361       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7362       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7363       # Lock the primary group used by the instance optimistically; this
7364       # requires going via the node before it's locked, requiring
7365       # verification later on
7366       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7367         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7368
7369     elif level == locking.LEVEL_NODE:
7370       # If an allocator is used, then we lock all the nodes in the current
7371       # instance group, as we don't know yet which ones will be selected;
7372       # if we replace the nodes without using an allocator, locks are
7373       # already declared in ExpandNames; otherwise, we need to lock all the
7374       # instance nodes for disk re-creation
7375       if self.op.iallocator:
7376         assert not self.op.nodes
7377         assert not self.needed_locks[locking.LEVEL_NODE]
7378         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7379
7380         # Lock member nodes of the group of the primary node
7381         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7382           self.needed_locks[locking.LEVEL_NODE].extend(
7383             self.cfg.GetNodeGroup(group_uuid).members)
7384       elif not self.op.nodes:
7385         self._LockInstancesNodes(primary_only=False)
7386     elif level == locking.LEVEL_NODE_RES:
7387       # Copy node locks
7388       self.needed_locks[locking.LEVEL_NODE_RES] = \
7389         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7390
7391   def BuildHooksEnv(self):
7392     """Build hooks env.
7393
7394     This runs on master, primary and secondary nodes of the instance.
7395
7396     """
7397     return _BuildInstanceHookEnvByObject(self, self.instance)
7398
7399   def BuildHooksNodes(self):
7400     """Build hooks nodes.
7401
7402     """
7403     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7404     return (nl, nl)
7405
7406   def CheckPrereq(self):
7407     """Check prerequisites.
7408
7409     This checks that the instance is in the cluster and is not running.
7410
7411     """
7412     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7413     assert instance is not None, \
7414       "Cannot retrieve locked instance %s" % self.op.instance_name
7415     if self.op.nodes:
7416       if len(self.op.nodes) != len(instance.all_nodes):
7417         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7418                                    " %d replacement nodes were specified" %
7419                                    (instance.name, len(instance.all_nodes),
7420                                     len(self.op.nodes)),
7421                                    errors.ECODE_INVAL)
7422       assert instance.disk_template != constants.DT_DRBD8 or \
7423           len(self.op.nodes) == 2
7424       assert instance.disk_template != constants.DT_PLAIN or \
7425           len(self.op.nodes) == 1
7426       primary_node = self.op.nodes[0]
7427     else:
7428       primary_node = instance.primary_node
7429     if not self.op.iallocator:
7430       _CheckNodeOnline(self, primary_node)
7431
7432     if instance.disk_template == constants.DT_DISKLESS:
7433       raise errors.OpPrereqError("Instance '%s' has no disks" %
7434                                  self.op.instance_name, errors.ECODE_INVAL)
7435
7436     # Verify if node group locks are still correct
7437     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7438     if owned_groups:
7439       # Node group locks are acquired only for the primary node (and only
7440       # when the allocator is used)
7441       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7442                                primary_only=True)
7443
7444     # if we replace nodes *and* the old primary is offline, we don't
7445     # check the instance state
7446     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7447     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7448       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7449                           msg="cannot recreate disks")
7450
7451     if self.op.disks:
7452       self.disks = dict(self.op.disks)
7453     else:
7454       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7455
7456     maxidx = max(self.disks.keys())
7457     if maxidx >= len(instance.disks):
7458       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7459                                  errors.ECODE_INVAL)
7460
7461     if ((self.op.nodes or self.op.iallocator) and
7462         sorted(self.disks.keys()) != range(len(instance.disks))):
7463       raise errors.OpPrereqError("Can't recreate disks partially and"
7464                                  " change the nodes at the same time",
7465                                  errors.ECODE_INVAL)
7466
7467     self.instance = instance
7468
7469     if self.op.iallocator:
7470       self._RunAllocator()
7471       # Release unneeded node and node resource locks
7472       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7473       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7474
7475   def Exec(self, feedback_fn):
7476     """Recreate the disks.
7477
7478     """
7479     instance = self.instance
7480
7481     assert (self.owned_locks(locking.LEVEL_NODE) ==
7482             self.owned_locks(locking.LEVEL_NODE_RES))
7483
7484     to_skip = []
7485     mods = [] # keeps track of needed changes
7486
7487     for idx, disk in enumerate(instance.disks):
7488       try:
7489         changes = self.disks[idx]
7490       except KeyError:
7491         # Disk should not be recreated
7492         to_skip.append(idx)
7493         continue
7494
7495       # update secondaries for disks, if needed
7496       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7497         # need to update the nodes and minors
7498         assert len(self.op.nodes) == 2
7499         assert len(disk.logical_id) == 6 # otherwise disk internals
7500                                          # have changed
7501         (_, _, old_port, _, _, old_secret) = disk.logical_id
7502         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7503         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7504                   new_minors[0], new_minors[1], old_secret)
7505         assert len(disk.logical_id) == len(new_id)
7506       else:
7507         new_id = None
7508
7509       mods.append((idx, new_id, changes))
7510
7511     # now that we have passed all asserts above, we can apply the mods
7512     # in a single run (to avoid partial changes)
7513     for idx, new_id, changes in mods:
7514       disk = instance.disks[idx]
7515       if new_id is not None:
7516         assert disk.dev_type == constants.LD_DRBD8
7517         disk.logical_id = new_id
7518       if changes:
7519         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7520                     mode=changes.get(constants.IDISK_MODE, None))
7521
7522     # change primary node, if needed
7523     if self.op.nodes:
7524       instance.primary_node = self.op.nodes[0]
7525       self.LogWarning("Changing the instance's nodes, you will have to"
7526                       " remove any disks left on the older nodes manually")
7527
7528     if self.op.nodes:
7529       self.cfg.Update(instance, feedback_fn)
7530
7531     # All touched nodes must be locked
7532     mylocks = self.owned_locks(locking.LEVEL_NODE)
7533     assert mylocks.issuperset(frozenset(instance.all_nodes))
7534     _CreateDisks(self, instance, to_skip=to_skip)
7535
7536
7537 class LUInstanceRename(LogicalUnit):
7538   """Rename an instance.
7539
7540   """
7541   HPATH = "instance-rename"
7542   HTYPE = constants.HTYPE_INSTANCE
7543
7544   def CheckArguments(self):
7545     """Check arguments.
7546
7547     """
7548     if self.op.ip_check and not self.op.name_check:
7549       # TODO: make the ip check more flexible and not depend on the name check
7550       raise errors.OpPrereqError("IP address check requires a name check",
7551                                  errors.ECODE_INVAL)
7552
7553   def BuildHooksEnv(self):
7554     """Build hooks env.
7555
7556     This runs on master, primary and secondary nodes of the instance.
7557
7558     """
7559     env = _BuildInstanceHookEnvByObject(self, self.instance)
7560     env["INSTANCE_NEW_NAME"] = self.op.new_name
7561     return env
7562
7563   def BuildHooksNodes(self):
7564     """Build hooks nodes.
7565
7566     """
7567     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7568     return (nl, nl)
7569
7570   def CheckPrereq(self):
7571     """Check prerequisites.
7572
7573     This checks that the instance is in the cluster and is not running.
7574
7575     """
7576     self.op.instance_name = _ExpandInstanceName(self.cfg,
7577                                                 self.op.instance_name)
7578     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7579     assert instance is not None
7580     _CheckNodeOnline(self, instance.primary_node)
7581     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7582                         msg="cannot rename")
7583     self.instance = instance
7584
7585     new_name = self.op.new_name
7586     if self.op.name_check:
7587       hostname = _CheckHostnameSane(self, new_name)
7588       new_name = self.op.new_name = hostname.name
7589       if (self.op.ip_check and
7590           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7591         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7592                                    (hostname.ip, new_name),
7593                                    errors.ECODE_NOTUNIQUE)
7594
7595     instance_list = self.cfg.GetInstanceList()
7596     if new_name in instance_list and new_name != instance.name:
7597       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7598                                  new_name, errors.ECODE_EXISTS)
7599
7600   def Exec(self, feedback_fn):
7601     """Rename the instance.
7602
7603     """
7604     inst = self.instance
7605     old_name = inst.name
7606
7607     rename_file_storage = False
7608     if (inst.disk_template in constants.DTS_FILEBASED and
7609         self.op.new_name != inst.name):
7610       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7611       rename_file_storage = True
7612
7613     self.cfg.RenameInstance(inst.name, self.op.new_name)
7614     # Change the instance lock. This is definitely safe while we hold the BGL.
7615     # Otherwise the new lock would have to be added in acquired mode.
7616     assert self.REQ_BGL
7617     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7618     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7619
7620     # re-read the instance from the configuration after rename
7621     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7622
7623     if rename_file_storage:
7624       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7625       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7626                                                      old_file_storage_dir,
7627                                                      new_file_storage_dir)
7628       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7629                    " (but the instance has been renamed in Ganeti)" %
7630                    (inst.primary_node, old_file_storage_dir,
7631                     new_file_storage_dir))
7632
7633     _StartInstanceDisks(self, inst, None)
7634     # update info on disks
7635     info = _GetInstanceInfoText(inst)
7636     for (idx, disk) in enumerate(inst.disks):
7637       for node in inst.all_nodes:
7638         self.cfg.SetDiskID(disk, node)
7639         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7640         if result.fail_msg:
7641           self.LogWarning("Error setting info on node %s for disk %s: %s",
7642                           node, idx, result.fail_msg)
7643     try:
7644       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7645                                                  old_name, self.op.debug_level)
7646       msg = result.fail_msg
7647       if msg:
7648         msg = ("Could not run OS rename script for instance %s on node %s"
7649                " (but the instance has been renamed in Ganeti): %s" %
7650                (inst.name, inst.primary_node, msg))
7651         self.proc.LogWarning(msg)
7652     finally:
7653       _ShutdownInstanceDisks(self, inst)
7654
7655     return inst.name
7656
7657
7658 class LUInstanceRemove(LogicalUnit):
7659   """Remove an instance.
7660
7661   """
7662   HPATH = "instance-remove"
7663   HTYPE = constants.HTYPE_INSTANCE
7664   REQ_BGL = False
7665
7666   def ExpandNames(self):
7667     self._ExpandAndLockInstance()
7668     self.needed_locks[locking.LEVEL_NODE] = []
7669     self.needed_locks[locking.LEVEL_NODE_RES] = []
7670     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7671
7672   def DeclareLocks(self, level):
7673     if level == locking.LEVEL_NODE:
7674       self._LockInstancesNodes()
7675     elif level == locking.LEVEL_NODE_RES:
7676       # Copy node locks
7677       self.needed_locks[locking.LEVEL_NODE_RES] = \
7678         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7679
7680   def BuildHooksEnv(self):
7681     """Build hooks env.
7682
7683     This runs on master, primary and secondary nodes of the instance.
7684
7685     """
7686     env = _BuildInstanceHookEnvByObject(self, self.instance)
7687     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7688     return env
7689
7690   def BuildHooksNodes(self):
7691     """Build hooks nodes.
7692
7693     """
7694     nl = [self.cfg.GetMasterNode()]
7695     nl_post = list(self.instance.all_nodes) + nl
7696     return (nl, nl_post)
7697
7698   def CheckPrereq(self):
7699     """Check prerequisites.
7700
7701     This checks that the instance is in the cluster.
7702
7703     """
7704     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7705     assert self.instance is not None, \
7706       "Cannot retrieve locked instance %s" % self.op.instance_name
7707
7708   def Exec(self, feedback_fn):
7709     """Remove the instance.
7710
7711     """
7712     instance = self.instance
7713     logging.info("Shutting down instance %s on node %s",
7714                  instance.name, instance.primary_node)
7715
7716     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7717                                              self.op.shutdown_timeout)
7718     msg = result.fail_msg
7719     if msg:
7720       if self.op.ignore_failures:
7721         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7722       else:
7723         raise errors.OpExecError("Could not shutdown instance %s on"
7724                                  " node %s: %s" %
7725                                  (instance.name, instance.primary_node, msg))
7726
7727     assert (self.owned_locks(locking.LEVEL_NODE) ==
7728             self.owned_locks(locking.LEVEL_NODE_RES))
7729     assert not (set(instance.all_nodes) -
7730                 self.owned_locks(locking.LEVEL_NODE)), \
7731       "Not owning correct locks"
7732
7733     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7734
7735
7736 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7737   """Utility function to remove an instance.
7738
7739   """
7740   logging.info("Removing block devices for instance %s", instance.name)
7741
7742   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7743     if not ignore_failures:
7744       raise errors.OpExecError("Can't remove instance's disks")
7745     feedback_fn("Warning: can't remove instance's disks")
7746
7747   logging.info("Removing instance %s out of cluster config", instance.name)
7748
7749   lu.cfg.RemoveInstance(instance.name)
7750
7751   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7752     "Instance lock removal conflict"
7753
7754   # Remove lock for the instance
7755   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7756
7757
7758 class LUInstanceQuery(NoHooksLU):
7759   """Logical unit for querying instances.
7760
7761   """
7762   # pylint: disable=W0142
7763   REQ_BGL = False
7764
7765   def CheckArguments(self):
7766     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7767                              self.op.output_fields, self.op.use_locking)
7768
7769   def ExpandNames(self):
7770     self.iq.ExpandNames(self)
7771
7772   def DeclareLocks(self, level):
7773     self.iq.DeclareLocks(self, level)
7774
7775   def Exec(self, feedback_fn):
7776     return self.iq.OldStyleQuery(self)
7777
7778
7779 class LUInstanceFailover(LogicalUnit):
7780   """Failover an instance.
7781
7782   """
7783   HPATH = "instance-failover"
7784   HTYPE = constants.HTYPE_INSTANCE
7785   REQ_BGL = False
7786
7787   def CheckArguments(self):
7788     """Check the arguments.
7789
7790     """
7791     self.iallocator = getattr(self.op, "iallocator", None)
7792     self.target_node = getattr(self.op, "target_node", None)
7793
7794   def ExpandNames(self):
7795     self._ExpandAndLockInstance()
7796
7797     if self.op.target_node is not None:
7798       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7799
7800     self.needed_locks[locking.LEVEL_NODE] = []
7801     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7802
7803     self.needed_locks[locking.LEVEL_NODE_RES] = []
7804     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7805
7806     ignore_consistency = self.op.ignore_consistency
7807     shutdown_timeout = self.op.shutdown_timeout
7808     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7809                                        cleanup=False,
7810                                        failover=True,
7811                                        ignore_consistency=ignore_consistency,
7812                                        shutdown_timeout=shutdown_timeout,
7813                                        ignore_ipolicy=self.op.ignore_ipolicy)
7814     self.tasklets = [self._migrater]
7815
7816   def DeclareLocks(self, level):
7817     if level == locking.LEVEL_NODE:
7818       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7819       if instance.disk_template in constants.DTS_EXT_MIRROR:
7820         if self.op.target_node is None:
7821           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7822         else:
7823           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7824                                                    self.op.target_node]
7825         del self.recalculate_locks[locking.LEVEL_NODE]
7826       else:
7827         self._LockInstancesNodes()
7828     elif level == locking.LEVEL_NODE_RES:
7829       # Copy node locks
7830       self.needed_locks[locking.LEVEL_NODE_RES] = \
7831         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7832
7833   def BuildHooksEnv(self):
7834     """Build hooks env.
7835
7836     This runs on master, primary and secondary nodes of the instance.
7837
7838     """
7839     instance = self._migrater.instance
7840     source_node = instance.primary_node
7841     target_node = self.op.target_node
7842     env = {
7843       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7844       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7845       "OLD_PRIMARY": source_node,
7846       "NEW_PRIMARY": target_node,
7847       }
7848
7849     if instance.disk_template in constants.DTS_INT_MIRROR:
7850       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7851       env["NEW_SECONDARY"] = source_node
7852     else:
7853       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7854
7855     env.update(_BuildInstanceHookEnvByObject(self, instance))
7856
7857     return env
7858
7859   def BuildHooksNodes(self):
7860     """Build hooks nodes.
7861
7862     """
7863     instance = self._migrater.instance
7864     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7865     return (nl, nl + [instance.primary_node])
7866
7867
7868 class LUInstanceMigrate(LogicalUnit):
7869   """Migrate an instance.
7870
7871   This is migration without shutting down, compared to the failover,
7872   which is done with shutdown.
7873
7874   """
7875   HPATH = "instance-migrate"
7876   HTYPE = constants.HTYPE_INSTANCE
7877   REQ_BGL = False
7878
7879   def ExpandNames(self):
7880     self._ExpandAndLockInstance()
7881
7882     if self.op.target_node is not None:
7883       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7884
7885     self.needed_locks[locking.LEVEL_NODE] = []
7886     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7887
7888     self.needed_locks[locking.LEVEL_NODE] = []
7889     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7890
7891     self._migrater = \
7892       TLMigrateInstance(self, self.op.instance_name,
7893                         cleanup=self.op.cleanup,
7894                         failover=False,
7895                         fallback=self.op.allow_failover,
7896                         allow_runtime_changes=self.op.allow_runtime_changes,
7897                         ignore_ipolicy=self.op.ignore_ipolicy)
7898     self.tasklets = [self._migrater]
7899
7900   def DeclareLocks(self, level):
7901     if level == locking.LEVEL_NODE:
7902       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7903       if instance.disk_template in constants.DTS_EXT_MIRROR:
7904         if self.op.target_node is None:
7905           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7906         else:
7907           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7908                                                    self.op.target_node]
7909         del self.recalculate_locks[locking.LEVEL_NODE]
7910       else:
7911         self._LockInstancesNodes()
7912     elif level == locking.LEVEL_NODE_RES:
7913       # Copy node locks
7914       self.needed_locks[locking.LEVEL_NODE_RES] = \
7915         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7916
7917   def BuildHooksEnv(self):
7918     """Build hooks env.
7919
7920     This runs on master, primary and secondary nodes of the instance.
7921
7922     """
7923     instance = self._migrater.instance
7924     source_node = instance.primary_node
7925     target_node = self.op.target_node
7926     env = _BuildInstanceHookEnvByObject(self, instance)
7927     env.update({
7928       "MIGRATE_LIVE": self._migrater.live,
7929       "MIGRATE_CLEANUP": self.op.cleanup,
7930       "OLD_PRIMARY": source_node,
7931       "NEW_PRIMARY": target_node,
7932       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7933       })
7934
7935     if instance.disk_template in constants.DTS_INT_MIRROR:
7936       env["OLD_SECONDARY"] = target_node
7937       env["NEW_SECONDARY"] = source_node
7938     else:
7939       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7940
7941     return env
7942
7943   def BuildHooksNodes(self):
7944     """Build hooks nodes.
7945
7946     """
7947     instance = self._migrater.instance
7948     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7949     return (nl, nl + [instance.primary_node])
7950
7951
7952 class LUInstanceMove(LogicalUnit):
7953   """Move an instance by data-copying.
7954
7955   """
7956   HPATH = "instance-move"
7957   HTYPE = constants.HTYPE_INSTANCE
7958   REQ_BGL = False
7959
7960   def ExpandNames(self):
7961     self._ExpandAndLockInstance()
7962     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7963     self.op.target_node = target_node
7964     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7965     self.needed_locks[locking.LEVEL_NODE_RES] = []
7966     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7967
7968   def DeclareLocks(self, level):
7969     if level == locking.LEVEL_NODE:
7970       self._LockInstancesNodes(primary_only=True)
7971     elif level == locking.LEVEL_NODE_RES:
7972       # Copy node locks
7973       self.needed_locks[locking.LEVEL_NODE_RES] = \
7974         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7975
7976   def BuildHooksEnv(self):
7977     """Build hooks env.
7978
7979     This runs on master, primary and secondary nodes of the instance.
7980
7981     """
7982     env = {
7983       "TARGET_NODE": self.op.target_node,
7984       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7985       }
7986     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7987     return env
7988
7989   def BuildHooksNodes(self):
7990     """Build hooks nodes.
7991
7992     """
7993     nl = [
7994       self.cfg.GetMasterNode(),
7995       self.instance.primary_node,
7996       self.op.target_node,
7997       ]
7998     return (nl, nl)
7999
8000   def CheckPrereq(self):
8001     """Check prerequisites.
8002
8003     This checks that the instance is in the cluster.
8004
8005     """
8006     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8007     assert self.instance is not None, \
8008       "Cannot retrieve locked instance %s" % self.op.instance_name
8009
8010     node = self.cfg.GetNodeInfo(self.op.target_node)
8011     assert node is not None, \
8012       "Cannot retrieve locked node %s" % self.op.target_node
8013
8014     self.target_node = target_node = node.name
8015
8016     if target_node == instance.primary_node:
8017       raise errors.OpPrereqError("Instance %s is already on the node %s" %
8018                                  (instance.name, target_node),
8019                                  errors.ECODE_STATE)
8020
8021     bep = self.cfg.GetClusterInfo().FillBE(instance)
8022
8023     for idx, dsk in enumerate(instance.disks):
8024       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8025         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8026                                    " cannot copy" % idx, errors.ECODE_STATE)
8027
8028     _CheckNodeOnline(self, target_node)
8029     _CheckNodeNotDrained(self, target_node)
8030     _CheckNodeVmCapable(self, target_node)
8031     cluster = self.cfg.GetClusterInfo()
8032     group_info = self.cfg.GetNodeGroup(node.group)
8033     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8034     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8035                             ignore=self.op.ignore_ipolicy)
8036
8037     if instance.admin_state == constants.ADMINST_UP:
8038       # check memory requirements on the secondary node
8039       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8040                            instance.name, bep[constants.BE_MAXMEM],
8041                            instance.hypervisor)
8042     else:
8043       self.LogInfo("Not checking memory on the secondary node as"
8044                    " instance will not be started")
8045
8046     # check bridge existance
8047     _CheckInstanceBridgesExist(self, instance, node=target_node)
8048
8049   def Exec(self, feedback_fn):
8050     """Move an instance.
8051
8052     The move is done by shutting it down on its present node, copying
8053     the data over (slow) and starting it on the new node.
8054
8055     """
8056     instance = self.instance
8057
8058     source_node = instance.primary_node
8059     target_node = self.target_node
8060
8061     self.LogInfo("Shutting down instance %s on source node %s",
8062                  instance.name, source_node)
8063
8064     assert (self.owned_locks(locking.LEVEL_NODE) ==
8065             self.owned_locks(locking.LEVEL_NODE_RES))
8066
8067     result = self.rpc.call_instance_shutdown(source_node, instance,
8068                                              self.op.shutdown_timeout)
8069     msg = result.fail_msg
8070     if msg:
8071       if self.op.ignore_consistency:
8072         self.proc.LogWarning("Could not shutdown instance %s on node %s."
8073                              " Proceeding anyway. Please make sure node"
8074                              " %s is down. Error details: %s",
8075                              instance.name, source_node, source_node, msg)
8076       else:
8077         raise errors.OpExecError("Could not shutdown instance %s on"
8078                                  " node %s: %s" %
8079                                  (instance.name, source_node, msg))
8080
8081     # create the target disks
8082     try:
8083       _CreateDisks(self, instance, target_node=target_node)
8084     except errors.OpExecError:
8085       self.LogWarning("Device creation failed, reverting...")
8086       try:
8087         _RemoveDisks(self, instance, target_node=target_node)
8088       finally:
8089         self.cfg.ReleaseDRBDMinors(instance.name)
8090         raise
8091
8092     cluster_name = self.cfg.GetClusterInfo().cluster_name
8093
8094     errs = []
8095     # activate, get path, copy the data over
8096     for idx, disk in enumerate(instance.disks):
8097       self.LogInfo("Copying data for disk %d", idx)
8098       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8099                                                instance.name, True, idx)
8100       if result.fail_msg:
8101         self.LogWarning("Can't assemble newly created disk %d: %s",
8102                         idx, result.fail_msg)
8103         errs.append(result.fail_msg)
8104         break
8105       dev_path = result.payload
8106       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8107                                              target_node, dev_path,
8108                                              cluster_name)
8109       if result.fail_msg:
8110         self.LogWarning("Can't copy data over for disk %d: %s",
8111                         idx, result.fail_msg)
8112         errs.append(result.fail_msg)
8113         break
8114
8115     if errs:
8116       self.LogWarning("Some disks failed to copy, aborting")
8117       try:
8118         _RemoveDisks(self, instance, target_node=target_node)
8119       finally:
8120         self.cfg.ReleaseDRBDMinors(instance.name)
8121         raise errors.OpExecError("Errors during disk copy: %s" %
8122                                  (",".join(errs),))
8123
8124     instance.primary_node = target_node
8125     self.cfg.Update(instance, feedback_fn)
8126
8127     self.LogInfo("Removing the disks on the original node")
8128     _RemoveDisks(self, instance, target_node=source_node)
8129
8130     # Only start the instance if it's marked as up
8131     if instance.admin_state == constants.ADMINST_UP:
8132       self.LogInfo("Starting instance %s on node %s",
8133                    instance.name, target_node)
8134
8135       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8136                                            ignore_secondaries=True)
8137       if not disks_ok:
8138         _ShutdownInstanceDisks(self, instance)
8139         raise errors.OpExecError("Can't activate the instance's disks")
8140
8141       result = self.rpc.call_instance_start(target_node,
8142                                             (instance, None, None), False)
8143       msg = result.fail_msg
8144       if msg:
8145         _ShutdownInstanceDisks(self, instance)
8146         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8147                                  (instance.name, target_node, msg))
8148
8149
8150 class LUNodeMigrate(LogicalUnit):
8151   """Migrate all instances from a node.
8152
8153   """
8154   HPATH = "node-migrate"
8155   HTYPE = constants.HTYPE_NODE
8156   REQ_BGL = False
8157
8158   def CheckArguments(self):
8159     pass
8160
8161   def ExpandNames(self):
8162     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8163
8164     self.share_locks = _ShareAll()
8165     self.needed_locks = {
8166       locking.LEVEL_NODE: [self.op.node_name],
8167       }
8168
8169   def BuildHooksEnv(self):
8170     """Build hooks env.
8171
8172     This runs on the master, the primary and all the secondaries.
8173
8174     """
8175     return {
8176       "NODE_NAME": self.op.node_name,
8177       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8178       }
8179
8180   def BuildHooksNodes(self):
8181     """Build hooks nodes.
8182
8183     """
8184     nl = [self.cfg.GetMasterNode()]
8185     return (nl, nl)
8186
8187   def CheckPrereq(self):
8188     pass
8189
8190   def Exec(self, feedback_fn):
8191     # Prepare jobs for migration instances
8192     allow_runtime_changes = self.op.allow_runtime_changes
8193     jobs = [
8194       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8195                                  mode=self.op.mode,
8196                                  live=self.op.live,
8197                                  iallocator=self.op.iallocator,
8198                                  target_node=self.op.target_node,
8199                                  allow_runtime_changes=allow_runtime_changes,
8200                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8201       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8202
8203     # TODO: Run iallocator in this opcode and pass correct placement options to
8204     # OpInstanceMigrate. Since other jobs can modify the cluster between
8205     # running the iallocator and the actual migration, a good consistency model
8206     # will have to be found.
8207
8208     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8209             frozenset([self.op.node_name]))
8210
8211     return ResultWithJobs(jobs)
8212
8213
8214 class TLMigrateInstance(Tasklet):
8215   """Tasklet class for instance migration.
8216
8217   @type live: boolean
8218   @ivar live: whether the migration will be done live or non-live;
8219       this variable is initalized only after CheckPrereq has run
8220   @type cleanup: boolean
8221   @ivar cleanup: Wheater we cleanup from a failed migration
8222   @type iallocator: string
8223   @ivar iallocator: The iallocator used to determine target_node
8224   @type target_node: string
8225   @ivar target_node: If given, the target_node to reallocate the instance to
8226   @type failover: boolean
8227   @ivar failover: Whether operation results in failover or migration
8228   @type fallback: boolean
8229   @ivar fallback: Whether fallback to failover is allowed if migration not
8230                   possible
8231   @type ignore_consistency: boolean
8232   @ivar ignore_consistency: Wheter we should ignore consistency between source
8233                             and target node
8234   @type shutdown_timeout: int
8235   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8236   @type ignore_ipolicy: bool
8237   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8238
8239   """
8240
8241   # Constants
8242   _MIGRATION_POLL_INTERVAL = 1      # seconds
8243   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8244
8245   def __init__(self, lu, instance_name, cleanup=False,
8246                failover=False, fallback=False,
8247                ignore_consistency=False,
8248                allow_runtime_changes=True,
8249                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8250                ignore_ipolicy=False):
8251     """Initializes this class.
8252
8253     """
8254     Tasklet.__init__(self, lu)
8255
8256     # Parameters
8257     self.instance_name = instance_name
8258     self.cleanup = cleanup
8259     self.live = False # will be overridden later
8260     self.failover = failover
8261     self.fallback = fallback
8262     self.ignore_consistency = ignore_consistency
8263     self.shutdown_timeout = shutdown_timeout
8264     self.ignore_ipolicy = ignore_ipolicy
8265     self.allow_runtime_changes = allow_runtime_changes
8266
8267   def CheckPrereq(self):
8268     """Check prerequisites.
8269
8270     This checks that the instance is in the cluster.
8271
8272     """
8273     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8274     instance = self.cfg.GetInstanceInfo(instance_name)
8275     assert instance is not None
8276     self.instance = instance
8277     cluster = self.cfg.GetClusterInfo()
8278
8279     if (not self.cleanup and
8280         not instance.admin_state == constants.ADMINST_UP and
8281         not self.failover and self.fallback):
8282       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8283                       " switching to failover")
8284       self.failover = True
8285
8286     if instance.disk_template not in constants.DTS_MIRRORED:
8287       if self.failover:
8288         text = "failovers"
8289       else:
8290         text = "migrations"
8291       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8292                                  " %s" % (instance.disk_template, text),
8293                                  errors.ECODE_STATE)
8294
8295     if instance.disk_template in constants.DTS_EXT_MIRROR:
8296       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8297
8298       if self.lu.op.iallocator:
8299         self._RunAllocator()
8300       else:
8301         # We set set self.target_node as it is required by
8302         # BuildHooksEnv
8303         self.target_node = self.lu.op.target_node
8304
8305       # Check that the target node is correct in terms of instance policy
8306       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8307       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8308       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8309                                                               group_info)
8310       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8311                               ignore=self.ignore_ipolicy)
8312
8313       # self.target_node is already populated, either directly or by the
8314       # iallocator run
8315       target_node = self.target_node
8316       if self.target_node == instance.primary_node:
8317         raise errors.OpPrereqError("Cannot migrate instance %s"
8318                                    " to its primary (%s)" %
8319                                    (instance.name, instance.primary_node),
8320                                    errors.ECODE_STATE)
8321
8322       if len(self.lu.tasklets) == 1:
8323         # It is safe to release locks only when we're the only tasklet
8324         # in the LU
8325         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8326                       keep=[instance.primary_node, self.target_node])
8327
8328     else:
8329       secondary_nodes = instance.secondary_nodes
8330       if not secondary_nodes:
8331         raise errors.ConfigurationError("No secondary node but using"
8332                                         " %s disk template" %
8333                                         instance.disk_template)
8334       target_node = secondary_nodes[0]
8335       if self.lu.op.iallocator or (self.lu.op.target_node and
8336                                    self.lu.op.target_node != target_node):
8337         if self.failover:
8338           text = "failed over"
8339         else:
8340           text = "migrated"
8341         raise errors.OpPrereqError("Instances with disk template %s cannot"
8342                                    " be %s to arbitrary nodes"
8343                                    " (neither an iallocator nor a target"
8344                                    " node can be passed)" %
8345                                    (instance.disk_template, text),
8346                                    errors.ECODE_INVAL)
8347       nodeinfo = self.cfg.GetNodeInfo(target_node)
8348       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8349       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8350                                                               group_info)
8351       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8352                               ignore=self.ignore_ipolicy)
8353
8354     i_be = cluster.FillBE(instance)
8355
8356     # check memory requirements on the secondary node
8357     if (not self.cleanup and
8358          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8359       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8360                                                "migrating instance %s" %
8361                                                instance.name,
8362                                                i_be[constants.BE_MINMEM],
8363                                                instance.hypervisor)
8364     else:
8365       self.lu.LogInfo("Not checking memory on the secondary node as"
8366                       " instance will not be started")
8367
8368     # check if failover must be forced instead of migration
8369     if (not self.cleanup and not self.failover and
8370         i_be[constants.BE_ALWAYS_FAILOVER]):
8371       self.lu.LogInfo("Instance configured to always failover; fallback"
8372                       " to failover")
8373       self.failover = True
8374
8375     # check bridge existance
8376     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8377
8378     if not self.cleanup:
8379       _CheckNodeNotDrained(self.lu, target_node)
8380       if not self.failover:
8381         result = self.rpc.call_instance_migratable(instance.primary_node,
8382                                                    instance)
8383         if result.fail_msg and self.fallback:
8384           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8385                           " failover")
8386           self.failover = True
8387         else:
8388           result.Raise("Can't migrate, please use failover",
8389                        prereq=True, ecode=errors.ECODE_STATE)
8390
8391     assert not (self.failover and self.cleanup)
8392
8393     if not self.failover:
8394       if self.lu.op.live is not None and self.lu.op.mode is not None:
8395         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8396                                    " parameters are accepted",
8397                                    errors.ECODE_INVAL)
8398       if self.lu.op.live is not None:
8399         if self.lu.op.live:
8400           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8401         else:
8402           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8403         # reset the 'live' parameter to None so that repeated
8404         # invocations of CheckPrereq do not raise an exception
8405         self.lu.op.live = None
8406       elif self.lu.op.mode is None:
8407         # read the default value from the hypervisor
8408         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8409         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8410
8411       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8412     else:
8413       # Failover is never live
8414       self.live = False
8415
8416     if not (self.failover or self.cleanup):
8417       remote_info = self.rpc.call_instance_info(instance.primary_node,
8418                                                 instance.name,
8419                                                 instance.hypervisor)
8420       remote_info.Raise("Error checking instance on node %s" %
8421                         instance.primary_node)
8422       instance_running = bool(remote_info.payload)
8423       if instance_running:
8424         self.current_mem = int(remote_info.payload["memory"])
8425
8426   def _RunAllocator(self):
8427     """Run the allocator based on input opcode.
8428
8429     """
8430     # FIXME: add a self.ignore_ipolicy option
8431     req = iallocator.IAReqRelocate(name=self.instance_name,
8432                                    relocate_from=[self.instance.primary_node])
8433     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8434
8435     ial.Run(self.lu.op.iallocator)
8436
8437     if not ial.success:
8438       raise errors.OpPrereqError("Can't compute nodes using"
8439                                  " iallocator '%s': %s" %
8440                                  (self.lu.op.iallocator, ial.info),
8441                                  errors.ECODE_NORES)
8442     self.target_node = ial.result[0]
8443     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8444                     self.instance_name, self.lu.op.iallocator,
8445                     utils.CommaJoin(ial.result))
8446
8447   def _WaitUntilSync(self):
8448     """Poll with custom rpc for disk sync.
8449
8450     This uses our own step-based rpc call.
8451
8452     """
8453     self.feedback_fn("* wait until resync is done")
8454     all_done = False
8455     while not all_done:
8456       all_done = True
8457       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8458                                             self.nodes_ip,
8459                                             (self.instance.disks,
8460                                              self.instance))
8461       min_percent = 100
8462       for node, nres in result.items():
8463         nres.Raise("Cannot resync disks on node %s" % node)
8464         node_done, node_percent = nres.payload
8465         all_done = all_done and node_done
8466         if node_percent is not None:
8467           min_percent = min(min_percent, node_percent)
8468       if not all_done:
8469         if min_percent < 100:
8470           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8471         time.sleep(2)
8472
8473   def _EnsureSecondary(self, node):
8474     """Demote a node to secondary.
8475
8476     """
8477     self.feedback_fn("* switching node %s to secondary mode" % node)
8478
8479     for dev in self.instance.disks:
8480       self.cfg.SetDiskID(dev, node)
8481
8482     result = self.rpc.call_blockdev_close(node, self.instance.name,
8483                                           self.instance.disks)
8484     result.Raise("Cannot change disk to secondary on node %s" % node)
8485
8486   def _GoStandalone(self):
8487     """Disconnect from the network.
8488
8489     """
8490     self.feedback_fn("* changing into standalone mode")
8491     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8492                                                self.instance.disks)
8493     for node, nres in result.items():
8494       nres.Raise("Cannot disconnect disks node %s" % node)
8495
8496   def _GoReconnect(self, multimaster):
8497     """Reconnect to the network.
8498
8499     """
8500     if multimaster:
8501       msg = "dual-master"
8502     else:
8503       msg = "single-master"
8504     self.feedback_fn("* changing disks into %s mode" % msg)
8505     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8506                                            (self.instance.disks, self.instance),
8507                                            self.instance.name, multimaster)
8508     for node, nres in result.items():
8509       nres.Raise("Cannot change disks config on node %s" % node)
8510
8511   def _ExecCleanup(self):
8512     """Try to cleanup after a failed migration.
8513
8514     The cleanup is done by:
8515       - check that the instance is running only on one node
8516         (and update the config if needed)
8517       - change disks on its secondary node to secondary
8518       - wait until disks are fully synchronized
8519       - disconnect from the network
8520       - change disks into single-master mode
8521       - wait again until disks are fully synchronized
8522
8523     """
8524     instance = self.instance
8525     target_node = self.target_node
8526     source_node = self.source_node
8527
8528     # check running on only one node
8529     self.feedback_fn("* checking where the instance actually runs"
8530                      " (if this hangs, the hypervisor might be in"
8531                      " a bad state)")
8532     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8533     for node, result in ins_l.items():
8534       result.Raise("Can't contact node %s" % node)
8535
8536     runningon_source = instance.name in ins_l[source_node].payload
8537     runningon_target = instance.name in ins_l[target_node].payload
8538
8539     if runningon_source and runningon_target:
8540       raise errors.OpExecError("Instance seems to be running on two nodes,"
8541                                " or the hypervisor is confused; you will have"
8542                                " to ensure manually that it runs only on one"
8543                                " and restart this operation")
8544
8545     if not (runningon_source or runningon_target):
8546       raise errors.OpExecError("Instance does not seem to be running at all;"
8547                                " in this case it's safer to repair by"
8548                                " running 'gnt-instance stop' to ensure disk"
8549                                " shutdown, and then restarting it")
8550
8551     if runningon_target:
8552       # the migration has actually succeeded, we need to update the config
8553       self.feedback_fn("* instance running on secondary node (%s),"
8554                        " updating config" % target_node)
8555       instance.primary_node = target_node
8556       self.cfg.Update(instance, self.feedback_fn)
8557       demoted_node = source_node
8558     else:
8559       self.feedback_fn("* instance confirmed to be running on its"
8560                        " primary node (%s)" % source_node)
8561       demoted_node = target_node
8562
8563     if instance.disk_template in constants.DTS_INT_MIRROR:
8564       self._EnsureSecondary(demoted_node)
8565       try:
8566         self._WaitUntilSync()
8567       except errors.OpExecError:
8568         # we ignore here errors, since if the device is standalone, it
8569         # won't be able to sync
8570         pass
8571       self._GoStandalone()
8572       self._GoReconnect(False)
8573       self._WaitUntilSync()
8574
8575     self.feedback_fn("* done")
8576
8577   def _RevertDiskStatus(self):
8578     """Try to revert the disk status after a failed migration.
8579
8580     """
8581     target_node = self.target_node
8582     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8583       return
8584
8585     try:
8586       self._EnsureSecondary(target_node)
8587       self._GoStandalone()
8588       self._GoReconnect(False)
8589       self._WaitUntilSync()
8590     except errors.OpExecError, err:
8591       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8592                          " please try to recover the instance manually;"
8593                          " error '%s'" % str(err))
8594
8595   def _AbortMigration(self):
8596     """Call the hypervisor code to abort a started migration.
8597
8598     """
8599     instance = self.instance
8600     target_node = self.target_node
8601     source_node = self.source_node
8602     migration_info = self.migration_info
8603
8604     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8605                                                                  instance,
8606                                                                  migration_info,
8607                                                                  False)
8608     abort_msg = abort_result.fail_msg
8609     if abort_msg:
8610       logging.error("Aborting migration failed on target node %s: %s",
8611                     target_node, abort_msg)
8612       # Don't raise an exception here, as we stil have to try to revert the
8613       # disk status, even if this step failed.
8614
8615     abort_result = self.rpc.call_instance_finalize_migration_src(
8616       source_node, instance, False, self.live)
8617     abort_msg = abort_result.fail_msg
8618     if abort_msg:
8619       logging.error("Aborting migration failed on source node %s: %s",
8620                     source_node, abort_msg)
8621
8622   def _ExecMigration(self):
8623     """Migrate an instance.
8624
8625     The migrate is done by:
8626       - change the disks into dual-master mode
8627       - wait until disks are fully synchronized again
8628       - migrate the instance
8629       - change disks on the new secondary node (the old primary) to secondary
8630       - wait until disks are fully synchronized
8631       - change disks into single-master mode
8632
8633     """
8634     instance = self.instance
8635     target_node = self.target_node
8636     source_node = self.source_node
8637
8638     # Check for hypervisor version mismatch and warn the user.
8639     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8640                                        None, [self.instance.hypervisor])
8641     for ninfo in nodeinfo.values():
8642       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8643                   ninfo.node)
8644     (_, _, (src_info, )) = nodeinfo[source_node].payload
8645     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8646
8647     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8648         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8649       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8650       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8651       if src_version != dst_version:
8652         self.feedback_fn("* warning: hypervisor version mismatch between"
8653                          " source (%s) and target (%s) node" %
8654                          (src_version, dst_version))
8655
8656     self.feedback_fn("* checking disk consistency between source and target")
8657     for (idx, dev) in enumerate(instance.disks):
8658       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8659         raise errors.OpExecError("Disk %s is degraded or not fully"
8660                                  " synchronized on target node,"
8661                                  " aborting migration" % idx)
8662
8663     if self.current_mem > self.tgt_free_mem:
8664       if not self.allow_runtime_changes:
8665         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8666                                  " free memory to fit instance %s on target"
8667                                  " node %s (have %dMB, need %dMB)" %
8668                                  (instance.name, target_node,
8669                                   self.tgt_free_mem, self.current_mem))
8670       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8671       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8672                                                      instance,
8673                                                      self.tgt_free_mem)
8674       rpcres.Raise("Cannot modify instance runtime memory")
8675
8676     # First get the migration information from the remote node
8677     result = self.rpc.call_migration_info(source_node, instance)
8678     msg = result.fail_msg
8679     if msg:
8680       log_err = ("Failed fetching source migration information from %s: %s" %
8681                  (source_node, msg))
8682       logging.error(log_err)
8683       raise errors.OpExecError(log_err)
8684
8685     self.migration_info = migration_info = result.payload
8686
8687     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8688       # Then switch the disks to master/master mode
8689       self._EnsureSecondary(target_node)
8690       self._GoStandalone()
8691       self._GoReconnect(True)
8692       self._WaitUntilSync()
8693
8694     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8695     result = self.rpc.call_accept_instance(target_node,
8696                                            instance,
8697                                            migration_info,
8698                                            self.nodes_ip[target_node])
8699
8700     msg = result.fail_msg
8701     if msg:
8702       logging.error("Instance pre-migration failed, trying to revert"
8703                     " disk status: %s", msg)
8704       self.feedback_fn("Pre-migration failed, aborting")
8705       self._AbortMigration()
8706       self._RevertDiskStatus()
8707       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8708                                (instance.name, msg))
8709
8710     self.feedback_fn("* migrating instance to %s" % target_node)
8711     result = self.rpc.call_instance_migrate(source_node, instance,
8712                                             self.nodes_ip[target_node],
8713                                             self.live)
8714     msg = result.fail_msg
8715     if msg:
8716       logging.error("Instance migration failed, trying to revert"
8717                     " disk status: %s", msg)
8718       self.feedback_fn("Migration failed, aborting")
8719       self._AbortMigration()
8720       self._RevertDiskStatus()
8721       raise errors.OpExecError("Could not migrate instance %s: %s" %
8722                                (instance.name, msg))
8723
8724     self.feedback_fn("* starting memory transfer")
8725     last_feedback = time.time()
8726     while True:
8727       result = self.rpc.call_instance_get_migration_status(source_node,
8728                                                            instance)
8729       msg = result.fail_msg
8730       ms = result.payload   # MigrationStatus instance
8731       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8732         logging.error("Instance migration failed, trying to revert"
8733                       " disk status: %s", msg)
8734         self.feedback_fn("Migration failed, aborting")
8735         self._AbortMigration()
8736         self._RevertDiskStatus()
8737         if not msg:
8738           msg = "hypervisor returned failure"
8739         raise errors.OpExecError("Could not migrate instance %s: %s" %
8740                                  (instance.name, msg))
8741
8742       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8743         self.feedback_fn("* memory transfer complete")
8744         break
8745
8746       if (utils.TimeoutExpired(last_feedback,
8747                                self._MIGRATION_FEEDBACK_INTERVAL) and
8748           ms.transferred_ram is not None):
8749         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8750         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8751         last_feedback = time.time()
8752
8753       time.sleep(self._MIGRATION_POLL_INTERVAL)
8754
8755     result = self.rpc.call_instance_finalize_migration_src(source_node,
8756                                                            instance,
8757                                                            True,
8758                                                            self.live)
8759     msg = result.fail_msg
8760     if msg:
8761       logging.error("Instance migration succeeded, but finalization failed"
8762                     " on the source node: %s", msg)
8763       raise errors.OpExecError("Could not finalize instance migration: %s" %
8764                                msg)
8765
8766     instance.primary_node = target_node
8767
8768     # distribute new instance config to the other nodes
8769     self.cfg.Update(instance, self.feedback_fn)
8770
8771     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8772                                                            instance,
8773                                                            migration_info,
8774                                                            True)
8775     msg = result.fail_msg
8776     if msg:
8777       logging.error("Instance migration succeeded, but finalization failed"
8778                     " on the target node: %s", msg)
8779       raise errors.OpExecError("Could not finalize instance migration: %s" %
8780                                msg)
8781
8782     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8783       self._EnsureSecondary(source_node)
8784       self._WaitUntilSync()
8785       self._GoStandalone()
8786       self._GoReconnect(False)
8787       self._WaitUntilSync()
8788
8789     # If the instance's disk template is `rbd' and there was a successful
8790     # migration, unmap the device from the source node.
8791     if self.instance.disk_template == constants.DT_RBD:
8792       disks = _ExpandCheckDisks(instance, instance.disks)
8793       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8794       for disk in disks:
8795         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8796         msg = result.fail_msg
8797         if msg:
8798           logging.error("Migration was successful, but couldn't unmap the"
8799                         " block device %s on source node %s: %s",
8800                         disk.iv_name, source_node, msg)
8801           logging.error("You need to unmap the device %s manually on %s",
8802                         disk.iv_name, source_node)
8803
8804     self.feedback_fn("* done")
8805
8806   def _ExecFailover(self):
8807     """Failover an instance.
8808
8809     The failover is done by shutting it down on its present node and
8810     starting it on the secondary.
8811
8812     """
8813     instance = self.instance
8814     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8815
8816     source_node = instance.primary_node
8817     target_node = self.target_node
8818
8819     if instance.admin_state == constants.ADMINST_UP:
8820       self.feedback_fn("* checking disk consistency between source and target")
8821       for (idx, dev) in enumerate(instance.disks):
8822         # for drbd, these are drbd over lvm
8823         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8824                                      False):
8825           if primary_node.offline:
8826             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8827                              " target node %s" %
8828                              (primary_node.name, idx, target_node))
8829           elif not self.ignore_consistency:
8830             raise errors.OpExecError("Disk %s is degraded on target node,"
8831                                      " aborting failover" % idx)
8832     else:
8833       self.feedback_fn("* not checking disk consistency as instance is not"
8834                        " running")
8835
8836     self.feedback_fn("* shutting down instance on source node")
8837     logging.info("Shutting down instance %s on node %s",
8838                  instance.name, source_node)
8839
8840     result = self.rpc.call_instance_shutdown(source_node, instance,
8841                                              self.shutdown_timeout)
8842     msg = result.fail_msg
8843     if msg:
8844       if self.ignore_consistency or primary_node.offline:
8845         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8846                            " proceeding anyway; please make sure node"
8847                            " %s is down; error details: %s",
8848                            instance.name, source_node, source_node, msg)
8849       else:
8850         raise errors.OpExecError("Could not shutdown instance %s on"
8851                                  " node %s: %s" %
8852                                  (instance.name, source_node, msg))
8853
8854     self.feedback_fn("* deactivating the instance's disks on source node")
8855     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8856       raise errors.OpExecError("Can't shut down the instance's disks")
8857
8858     instance.primary_node = target_node
8859     # distribute new instance config to the other nodes
8860     self.cfg.Update(instance, self.feedback_fn)
8861
8862     # Only start the instance if it's marked as up
8863     if instance.admin_state == constants.ADMINST_UP:
8864       self.feedback_fn("* activating the instance's disks on target node %s" %
8865                        target_node)
8866       logging.info("Starting instance %s on node %s",
8867                    instance.name, target_node)
8868
8869       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8870                                            ignore_secondaries=True)
8871       if not disks_ok:
8872         _ShutdownInstanceDisks(self.lu, instance)
8873         raise errors.OpExecError("Can't activate the instance's disks")
8874
8875       self.feedback_fn("* starting the instance on the target node %s" %
8876                        target_node)
8877       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8878                                             False)
8879       msg = result.fail_msg
8880       if msg:
8881         _ShutdownInstanceDisks(self.lu, instance)
8882         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8883                                  (instance.name, target_node, msg))
8884
8885   def Exec(self, feedback_fn):
8886     """Perform the migration.
8887
8888     """
8889     self.feedback_fn = feedback_fn
8890     self.source_node = self.instance.primary_node
8891
8892     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8893     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8894       self.target_node = self.instance.secondary_nodes[0]
8895       # Otherwise self.target_node has been populated either
8896       # directly, or through an iallocator.
8897
8898     self.all_nodes = [self.source_node, self.target_node]
8899     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8900                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8901
8902     if self.failover:
8903       feedback_fn("Failover instance %s" % self.instance.name)
8904       self._ExecFailover()
8905     else:
8906       feedback_fn("Migrating instance %s" % self.instance.name)
8907
8908       if self.cleanup:
8909         return self._ExecCleanup()
8910       else:
8911         return self._ExecMigration()
8912
8913
8914 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8915                     force_open):
8916   """Wrapper around L{_CreateBlockDevInner}.
8917
8918   This method annotates the root device first.
8919
8920   """
8921   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8922   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8923                               force_open)
8924
8925
8926 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8927                          info, force_open):
8928   """Create a tree of block devices on a given node.
8929
8930   If this device type has to be created on secondaries, create it and
8931   all its children.
8932
8933   If not, just recurse to children keeping the same 'force' value.
8934
8935   @attention: The device has to be annotated already.
8936
8937   @param lu: the lu on whose behalf we execute
8938   @param node: the node on which to create the device
8939   @type instance: L{objects.Instance}
8940   @param instance: the instance which owns the device
8941   @type device: L{objects.Disk}
8942   @param device: the device to create
8943   @type force_create: boolean
8944   @param force_create: whether to force creation of this device; this
8945       will be change to True whenever we find a device which has
8946       CreateOnSecondary() attribute
8947   @param info: the extra 'metadata' we should attach to the device
8948       (this will be represented as a LVM tag)
8949   @type force_open: boolean
8950   @param force_open: this parameter will be passes to the
8951       L{backend.BlockdevCreate} function where it specifies
8952       whether we run on primary or not, and it affects both
8953       the child assembly and the device own Open() execution
8954
8955   """
8956   if device.CreateOnSecondary():
8957     force_create = True
8958
8959   if device.children:
8960     for child in device.children:
8961       _CreateBlockDevInner(lu, node, instance, child, force_create,
8962                            info, force_open)
8963
8964   if not force_create:
8965     return
8966
8967   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8968
8969
8970 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8971   """Create a single block device on a given node.
8972
8973   This will not recurse over children of the device, so they must be
8974   created in advance.
8975
8976   @param lu: the lu on whose behalf we execute
8977   @param node: the node on which to create the device
8978   @type instance: L{objects.Instance}
8979   @param instance: the instance which owns the device
8980   @type device: L{objects.Disk}
8981   @param device: the device to create
8982   @param info: the extra 'metadata' we should attach to the device
8983       (this will be represented as a LVM tag)
8984   @type force_open: boolean
8985   @param force_open: this parameter will be passes to the
8986       L{backend.BlockdevCreate} function where it specifies
8987       whether we run on primary or not, and it affects both
8988       the child assembly and the device own Open() execution
8989
8990   """
8991   lu.cfg.SetDiskID(device, node)
8992   result = lu.rpc.call_blockdev_create(node, device, device.size,
8993                                        instance.name, force_open, info)
8994   result.Raise("Can't create block device %s on"
8995                " node %s for instance %s" % (device, node, instance.name))
8996   if device.physical_id is None:
8997     device.physical_id = result.payload
8998
8999
9000 def _GenerateUniqueNames(lu, exts):
9001   """Generate a suitable LV name.
9002
9003   This will generate a logical volume name for the given instance.
9004
9005   """
9006   results = []
9007   for val in exts:
9008     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9009     results.append("%s%s" % (new_id, val))
9010   return results
9011
9012
9013 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9014                          iv_name, p_minor, s_minor):
9015   """Generate a drbd8 device complete with its children.
9016
9017   """
9018   assert len(vgnames) == len(names) == 2
9019   port = lu.cfg.AllocatePort()
9020   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9021
9022   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9023                           logical_id=(vgnames[0], names[0]),
9024                           params={})
9025   dev_meta = objects.Disk(dev_type=constants.LD_LV,
9026                           size=constants.DRBD_META_SIZE,
9027                           logical_id=(vgnames[1], names[1]),
9028                           params={})
9029   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9030                           logical_id=(primary, secondary, port,
9031                                       p_minor, s_minor,
9032                                       shared_secret),
9033                           children=[dev_data, dev_meta],
9034                           iv_name=iv_name, params={})
9035   return drbd_dev
9036
9037
9038 _DISK_TEMPLATE_NAME_PREFIX = {
9039   constants.DT_PLAIN: "",
9040   constants.DT_RBD: ".rbd",
9041   }
9042
9043
9044 _DISK_TEMPLATE_DEVICE_TYPE = {
9045   constants.DT_PLAIN: constants.LD_LV,
9046   constants.DT_FILE: constants.LD_FILE,
9047   constants.DT_SHARED_FILE: constants.LD_FILE,
9048   constants.DT_BLOCK: constants.LD_BLOCKDEV,
9049   constants.DT_RBD: constants.LD_RBD,
9050   }
9051
9052
9053 def _GenerateDiskTemplate(
9054   lu, template_name, instance_name, primary_node, secondary_nodes,
9055   disk_info, file_storage_dir, file_driver, base_index,
9056   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9057   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9058   """Generate the entire disk layout for a given template type.
9059
9060   """
9061   #TODO: compute space requirements
9062
9063   vgname = lu.cfg.GetVGName()
9064   disk_count = len(disk_info)
9065   disks = []
9066
9067   if template_name == constants.DT_DISKLESS:
9068     pass
9069   elif template_name == constants.DT_DRBD8:
9070     if len(secondary_nodes) != 1:
9071       raise errors.ProgrammerError("Wrong template configuration")
9072     remote_node = secondary_nodes[0]
9073     minors = lu.cfg.AllocateDRBDMinor(
9074       [primary_node, remote_node] * len(disk_info), instance_name)
9075
9076     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9077                                                        full_disk_params)
9078     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9079
9080     names = []
9081     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9082                                                for i in range(disk_count)]):
9083       names.append(lv_prefix + "_data")
9084       names.append(lv_prefix + "_meta")
9085     for idx, disk in enumerate(disk_info):
9086       disk_index = idx + base_index
9087       data_vg = disk.get(constants.IDISK_VG, vgname)
9088       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9089       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9090                                       disk[constants.IDISK_SIZE],
9091                                       [data_vg, meta_vg],
9092                                       names[idx * 2:idx * 2 + 2],
9093                                       "disk/%d" % disk_index,
9094                                       minors[idx * 2], minors[idx * 2 + 1])
9095       disk_dev.mode = disk[constants.IDISK_MODE]
9096       disks.append(disk_dev)
9097   else:
9098     if secondary_nodes:
9099       raise errors.ProgrammerError("Wrong template configuration")
9100
9101     if template_name == constants.DT_FILE:
9102       _req_file_storage()
9103     elif template_name == constants.DT_SHARED_FILE:
9104       _req_shr_file_storage()
9105
9106     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9107     if name_prefix is None:
9108       names = None
9109     else:
9110       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9111                                         (name_prefix, base_index + i)
9112                                         for i in range(disk_count)])
9113
9114     if template_name == constants.DT_PLAIN:
9115
9116       def logical_id_fn(idx, _, disk):
9117         vg = disk.get(constants.IDISK_VG, vgname)
9118         return (vg, names[idx])
9119
9120     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9121       logical_id_fn = \
9122         lambda _, disk_index, disk: (file_driver,
9123                                      "%s/disk%d" % (file_storage_dir,
9124                                                     disk_index))
9125     elif template_name == constants.DT_BLOCK:
9126       logical_id_fn = \
9127         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9128                                        disk[constants.IDISK_ADOPT])
9129     elif template_name == constants.DT_RBD:
9130       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9131     else:
9132       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9133
9134     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9135
9136     for idx, disk in enumerate(disk_info):
9137       disk_index = idx + base_index
9138       size = disk[constants.IDISK_SIZE]
9139       feedback_fn("* disk %s, size %s" %
9140                   (disk_index, utils.FormatUnit(size, "h")))
9141       disks.append(objects.Disk(dev_type=dev_type, size=size,
9142                                 logical_id=logical_id_fn(idx, disk_index, disk),
9143                                 iv_name="disk/%d" % disk_index,
9144                                 mode=disk[constants.IDISK_MODE],
9145                                 params={}))
9146
9147   return disks
9148
9149
9150 def _GetInstanceInfoText(instance):
9151   """Compute that text that should be added to the disk's metadata.
9152
9153   """
9154   return "originstname+%s" % instance.name
9155
9156
9157 def _CalcEta(time_taken, written, total_size):
9158   """Calculates the ETA based on size written and total size.
9159
9160   @param time_taken: The time taken so far
9161   @param written: amount written so far
9162   @param total_size: The total size of data to be written
9163   @return: The remaining time in seconds
9164
9165   """
9166   avg_time = time_taken / float(written)
9167   return (total_size - written) * avg_time
9168
9169
9170 def _WipeDisks(lu, instance, disks=None):
9171   """Wipes instance disks.
9172
9173   @type lu: L{LogicalUnit}
9174   @param lu: the logical unit on whose behalf we execute
9175   @type instance: L{objects.Instance}
9176   @param instance: the instance whose disks we should create
9177   @return: the success of the wipe
9178
9179   """
9180   node = instance.primary_node
9181
9182   if disks is None:
9183     disks = [(idx, disk, 0)
9184              for (idx, disk) in enumerate(instance.disks)]
9185
9186   for (_, device, _) in disks:
9187     lu.cfg.SetDiskID(device, node)
9188
9189   logging.info("Pausing synchronization of disks of instance '%s'",
9190                instance.name)
9191   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9192                                                   (map(compat.snd, disks),
9193                                                    instance),
9194                                                   True)
9195   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9196
9197   for idx, success in enumerate(result.payload):
9198     if not success:
9199       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9200                    " failed", idx, instance.name)
9201
9202   try:
9203     for (idx, device, offset) in disks:
9204       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9205       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9206       wipe_chunk_size = \
9207         int(min(constants.MAX_WIPE_CHUNK,
9208                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9209
9210       size = device.size
9211       last_output = 0
9212       start_time = time.time()
9213
9214       if offset == 0:
9215         info_text = ""
9216       else:
9217         info_text = (" (from %s to %s)" %
9218                      (utils.FormatUnit(offset, "h"),
9219                       utils.FormatUnit(size, "h")))
9220
9221       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9222
9223       logging.info("Wiping disk %d for instance %s on node %s using"
9224                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9225
9226       while offset < size:
9227         wipe_size = min(wipe_chunk_size, size - offset)
9228
9229         logging.debug("Wiping disk %d, offset %s, chunk %s",
9230                       idx, offset, wipe_size)
9231
9232         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9233                                            wipe_size)
9234         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9235                      (idx, offset, wipe_size))
9236
9237         now = time.time()
9238         offset += wipe_size
9239         if now - last_output >= 60:
9240           eta = _CalcEta(now - start_time, offset, size)
9241           lu.LogInfo(" - done: %.1f%% ETA: %s",
9242                      offset / float(size) * 100, utils.FormatSeconds(eta))
9243           last_output = now
9244   finally:
9245     logging.info("Resuming synchronization of disks for instance '%s'",
9246                  instance.name)
9247
9248     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9249                                                     (map(compat.snd, disks),
9250                                                      instance),
9251                                                     False)
9252
9253     if result.fail_msg:
9254       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9255                     node, result.fail_msg)
9256     else:
9257       for idx, success in enumerate(result.payload):
9258         if not success:
9259           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9260                         " failed", idx, instance.name)
9261
9262
9263 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9264   """Create all disks for an instance.
9265
9266   This abstracts away some work from AddInstance.
9267
9268   @type lu: L{LogicalUnit}
9269   @param lu: the logical unit on whose behalf we execute
9270   @type instance: L{objects.Instance}
9271   @param instance: the instance whose disks we should create
9272   @type to_skip: list
9273   @param to_skip: list of indices to skip
9274   @type target_node: string
9275   @param target_node: if passed, overrides the target node for creation
9276   @rtype: boolean
9277   @return: the success of the creation
9278
9279   """
9280   info = _GetInstanceInfoText(instance)
9281   if target_node is None:
9282     pnode = instance.primary_node
9283     all_nodes = instance.all_nodes
9284   else:
9285     pnode = target_node
9286     all_nodes = [pnode]
9287
9288   if instance.disk_template in constants.DTS_FILEBASED:
9289     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9290     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9291
9292     result.Raise("Failed to create directory '%s' on"
9293                  " node %s" % (file_storage_dir, pnode))
9294
9295   # Note: this needs to be kept in sync with adding of disks in
9296   # LUInstanceSetParams
9297   for idx, device in enumerate(instance.disks):
9298     if to_skip and idx in to_skip:
9299       continue
9300     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9301     #HARDCODE
9302     for node in all_nodes:
9303       f_create = node == pnode
9304       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9305
9306
9307 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9308   """Remove all disks for an instance.
9309
9310   This abstracts away some work from `AddInstance()` and
9311   `RemoveInstance()`. Note that in case some of the devices couldn't
9312   be removed, the removal will continue with the other ones (compare
9313   with `_CreateDisks()`).
9314
9315   @type lu: L{LogicalUnit}
9316   @param lu: the logical unit on whose behalf we execute
9317   @type instance: L{objects.Instance}
9318   @param instance: the instance whose disks we should remove
9319   @type target_node: string
9320   @param target_node: used to override the node on which to remove the disks
9321   @rtype: boolean
9322   @return: the success of the removal
9323
9324   """
9325   logging.info("Removing block devices for instance %s", instance.name)
9326
9327   all_result = True
9328   ports_to_release = set()
9329   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9330   for (idx, device) in enumerate(anno_disks):
9331     if target_node:
9332       edata = [(target_node, device)]
9333     else:
9334       edata = device.ComputeNodeTree(instance.primary_node)
9335     for node, disk in edata:
9336       lu.cfg.SetDiskID(disk, node)
9337       result = lu.rpc.call_blockdev_remove(node, disk)
9338       if result.fail_msg:
9339         lu.LogWarning("Could not remove disk %s on node %s,"
9340                       " continuing anyway: %s", idx, node, result.fail_msg)
9341         if not (result.offline and node != instance.primary_node):
9342           all_result = False
9343
9344     # if this is a DRBD disk, return its port to the pool
9345     if device.dev_type in constants.LDS_DRBD:
9346       ports_to_release.add(device.logical_id[2])
9347
9348   if all_result or ignore_failures:
9349     for port in ports_to_release:
9350       lu.cfg.AddTcpUdpPort(port)
9351
9352   if instance.disk_template in constants.DTS_FILEBASED:
9353     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9354     if target_node:
9355       tgt = target_node
9356     else:
9357       tgt = instance.primary_node
9358     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9359     if result.fail_msg:
9360       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9361                     file_storage_dir, instance.primary_node, result.fail_msg)
9362       all_result = False
9363
9364   return all_result
9365
9366
9367 def _ComputeDiskSizePerVG(disk_template, disks):
9368   """Compute disk size requirements in the volume group
9369
9370   """
9371   def _compute(disks, payload):
9372     """Universal algorithm.
9373
9374     """
9375     vgs = {}
9376     for disk in disks:
9377       vgs[disk[constants.IDISK_VG]] = \
9378         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9379
9380     return vgs
9381
9382   # Required free disk space as a function of disk and swap space
9383   req_size_dict = {
9384     constants.DT_DISKLESS: {},
9385     constants.DT_PLAIN: _compute(disks, 0),
9386     # 128 MB are added for drbd metadata for each disk
9387     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9388     constants.DT_FILE: {},
9389     constants.DT_SHARED_FILE: {},
9390   }
9391
9392   if disk_template not in req_size_dict:
9393     raise errors.ProgrammerError("Disk template '%s' size requirement"
9394                                  " is unknown" % disk_template)
9395
9396   return req_size_dict[disk_template]
9397
9398
9399 def _FilterVmNodes(lu, nodenames):
9400   """Filters out non-vm_capable nodes from a list.
9401
9402   @type lu: L{LogicalUnit}
9403   @param lu: the logical unit for which we check
9404   @type nodenames: list
9405   @param nodenames: the list of nodes on which we should check
9406   @rtype: list
9407   @return: the list of vm-capable nodes
9408
9409   """
9410   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9411   return [name for name in nodenames if name not in vm_nodes]
9412
9413
9414 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9415   """Hypervisor parameter validation.
9416
9417   This function abstract the hypervisor parameter validation to be
9418   used in both instance create and instance modify.
9419
9420   @type lu: L{LogicalUnit}
9421   @param lu: the logical unit for which we check
9422   @type nodenames: list
9423   @param nodenames: the list of nodes on which we should check
9424   @type hvname: string
9425   @param hvname: the name of the hypervisor we should use
9426   @type hvparams: dict
9427   @param hvparams: the parameters which we need to check
9428   @raise errors.OpPrereqError: if the parameters are not valid
9429
9430   """
9431   nodenames = _FilterVmNodes(lu, nodenames)
9432
9433   cluster = lu.cfg.GetClusterInfo()
9434   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9435
9436   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9437   for node in nodenames:
9438     info = hvinfo[node]
9439     if info.offline:
9440       continue
9441     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9442
9443
9444 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9445   """OS parameters validation.
9446
9447   @type lu: L{LogicalUnit}
9448   @param lu: the logical unit for which we check
9449   @type required: boolean
9450   @param required: whether the validation should fail if the OS is not
9451       found
9452   @type nodenames: list
9453   @param nodenames: the list of nodes on which we should check
9454   @type osname: string
9455   @param osname: the name of the hypervisor we should use
9456   @type osparams: dict
9457   @param osparams: the parameters which we need to check
9458   @raise errors.OpPrereqError: if the parameters are not valid
9459
9460   """
9461   nodenames = _FilterVmNodes(lu, nodenames)
9462   result = lu.rpc.call_os_validate(nodenames, required, osname,
9463                                    [constants.OS_VALIDATE_PARAMETERS],
9464                                    osparams)
9465   for node, nres in result.items():
9466     # we don't check for offline cases since this should be run only
9467     # against the master node and/or an instance's nodes
9468     nres.Raise("OS Parameters validation failed on node %s" % node)
9469     if not nres.payload:
9470       lu.LogInfo("OS %s not found on node %s, validation skipped",
9471                  osname, node)
9472
9473
9474 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9475   """Wrapper around IAReqInstanceAlloc.
9476
9477   @param op: The instance opcode
9478   @param disks: The computed disks
9479   @param nics: The computed nics
9480   @param beparams: The full filled beparams
9481
9482   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9483
9484   """
9485   spindle_use = beparams[constants.BE_SPINDLE_USE]
9486   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9487                                        disk_template=op.disk_template,
9488                                        tags=op.tags,
9489                                        os=op.os_type,
9490                                        vcpus=beparams[constants.BE_VCPUS],
9491                                        memory=beparams[constants.BE_MAXMEM],
9492                                        spindle_use=spindle_use,
9493                                        disks=disks,
9494                                        nics=[n.ToDict() for n in nics],
9495                                        hypervisor=op.hypervisor)
9496
9497
9498 def _ComputeNics(op, cluster, default_ip, cfg, proc):
9499   """Computes the nics.
9500
9501   @param op: The instance opcode
9502   @param cluster: Cluster configuration object
9503   @param default_ip: The default ip to assign
9504   @param cfg: An instance of the configuration object
9505   @param proc: The executer instance
9506
9507   @returns: The build up nics
9508
9509   """
9510   nics = []
9511   for nic in op.nics:
9512     nic_mode_req = nic.get(constants.INIC_MODE, None)
9513     nic_mode = nic_mode_req
9514     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9515       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9516
9517     net = nic.get(constants.INIC_NETWORK, None)
9518     link = nic.get(constants.NIC_LINK, None)
9519     ip = nic.get(constants.INIC_IP, None)
9520
9521     if net is None or net.lower() == constants.VALUE_NONE:
9522       net = None
9523     else:
9524       if nic_mode_req is not None or link is not None:
9525         raise errors.OpPrereqError("If network is given, no mode or link"
9526                                    " is allowed to be passed",
9527                                    errors.ECODE_INVAL)
9528
9529     # ip validity checks
9530     if ip is None or ip.lower() == constants.VALUE_NONE:
9531       nic_ip = None
9532     elif ip.lower() == constants.VALUE_AUTO:
9533       if not op.name_check:
9534         raise errors.OpPrereqError("IP address set to auto but name checks"
9535                                    " have been skipped",
9536                                    errors.ECODE_INVAL)
9537       nic_ip = default_ip
9538     else:
9539       # We defer pool operations until later, so that the iallocator has
9540       # filled in the instance's node(s) dimara
9541       if ip.lower() == constants.NIC_IP_POOL:
9542         if net is None:
9543           raise errors.OpPrereqError("if ip=pool, parameter network"
9544                                      " must be passed too",
9545                                      errors.ECODE_INVAL)
9546
9547       elif not netutils.IPAddress.IsValid(ip):
9548         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9549                                    errors.ECODE_INVAL)
9550
9551       nic_ip = ip
9552
9553     # TODO: check the ip address for uniqueness
9554     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9555       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9556                                  errors.ECODE_INVAL)
9557
9558     # MAC address verification
9559     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9560     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9561       mac = utils.NormalizeAndValidateMac(mac)
9562
9563       try:
9564         # TODO: We need to factor this out
9565         cfg.ReserveMAC(mac, proc.GetECId())
9566       except errors.ReservationError:
9567         raise errors.OpPrereqError("MAC address %s already in use"
9568                                    " in cluster" % mac,
9569                                    errors.ECODE_NOTUNIQUE)
9570
9571     #  Build nic parameters
9572     nicparams = {}
9573     if nic_mode_req:
9574       nicparams[constants.NIC_MODE] = nic_mode
9575     if link:
9576       nicparams[constants.NIC_LINK] = link
9577
9578     check_params = cluster.SimpleFillNIC(nicparams)
9579     objects.NIC.CheckParameterSyntax(check_params)
9580     nics.append(objects.NIC(mac=mac, ip=nic_ip,
9581                             network=net, nicparams=nicparams))
9582
9583   return nics
9584
9585
9586 def _ComputeDisks(op, default_vg):
9587   """Computes the instance disks.
9588
9589   @param op: The instance opcode
9590   @param default_vg: The default_vg to assume
9591
9592   @return: The computer disks
9593
9594   """
9595   disks = []
9596   for disk in op.disks:
9597     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9598     if mode not in constants.DISK_ACCESS_SET:
9599       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9600                                  mode, errors.ECODE_INVAL)
9601     size = disk.get(constants.IDISK_SIZE, None)
9602     if size is None:
9603       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9604     try:
9605       size = int(size)
9606     except (TypeError, ValueError):
9607       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9608                                  errors.ECODE_INVAL)
9609
9610     data_vg = disk.get(constants.IDISK_VG, default_vg)
9611     new_disk = {
9612       constants.IDISK_SIZE: size,
9613       constants.IDISK_MODE: mode,
9614       constants.IDISK_VG: data_vg,
9615       }
9616     if constants.IDISK_METAVG in disk:
9617       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9618     if constants.IDISK_ADOPT in disk:
9619       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9620     disks.append(new_disk)
9621
9622   return disks
9623
9624
9625 def _ComputeFullBeParams(op, cluster):
9626   """Computes the full beparams.
9627
9628   @param op: The instance opcode
9629   @param cluster: The cluster config object
9630
9631   @return: The fully filled beparams
9632
9633   """
9634   default_beparams = cluster.beparams[constants.PP_DEFAULT]
9635   for param, value in op.beparams.iteritems():
9636     if value == constants.VALUE_AUTO:
9637       op.beparams[param] = default_beparams[param]
9638   objects.UpgradeBeParams(op.beparams)
9639   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9640   return cluster.SimpleFillBE(op.beparams)
9641
9642
9643 class LUInstanceCreate(LogicalUnit):
9644   """Create an instance.
9645
9646   """
9647   HPATH = "instance-add"
9648   HTYPE = constants.HTYPE_INSTANCE
9649   REQ_BGL = False
9650
9651   def CheckArguments(self):
9652     """Check arguments.
9653
9654     """
9655     # do not require name_check to ease forward/backward compatibility
9656     # for tools
9657     if self.op.no_install and self.op.start:
9658       self.LogInfo("No-installation mode selected, disabling startup")
9659       self.op.start = False
9660     # validate/normalize the instance name
9661     self.op.instance_name = \
9662       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9663
9664     if self.op.ip_check and not self.op.name_check:
9665       # TODO: make the ip check more flexible and not depend on the name check
9666       raise errors.OpPrereqError("Cannot do IP address check without a name"
9667                                  " check", errors.ECODE_INVAL)
9668
9669     # check nics' parameter names
9670     for nic in self.op.nics:
9671       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9672
9673     # check disks. parameter names and consistent adopt/no-adopt strategy
9674     has_adopt = has_no_adopt = False
9675     for disk in self.op.disks:
9676       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9677       if constants.IDISK_ADOPT in disk:
9678         has_adopt = True
9679       else:
9680         has_no_adopt = True
9681     if has_adopt and has_no_adopt:
9682       raise errors.OpPrereqError("Either all disks are adopted or none is",
9683                                  errors.ECODE_INVAL)
9684     if has_adopt:
9685       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9686         raise errors.OpPrereqError("Disk adoption is not supported for the"
9687                                    " '%s' disk template" %
9688                                    self.op.disk_template,
9689                                    errors.ECODE_INVAL)
9690       if self.op.iallocator is not None:
9691         raise errors.OpPrereqError("Disk adoption not allowed with an"
9692                                    " iallocator script", errors.ECODE_INVAL)
9693       if self.op.mode == constants.INSTANCE_IMPORT:
9694         raise errors.OpPrereqError("Disk adoption not allowed for"
9695                                    " instance import", errors.ECODE_INVAL)
9696     else:
9697       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9698         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9699                                    " but no 'adopt' parameter given" %
9700                                    self.op.disk_template,
9701                                    errors.ECODE_INVAL)
9702
9703     self.adopt_disks = has_adopt
9704
9705     # instance name verification
9706     if self.op.name_check:
9707       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9708       self.op.instance_name = self.hostname1.name
9709       # used in CheckPrereq for ip ping check
9710       self.check_ip = self.hostname1.ip
9711     else:
9712       self.check_ip = None
9713
9714     # file storage checks
9715     if (self.op.file_driver and
9716         not self.op.file_driver in constants.FILE_DRIVER):
9717       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9718                                  self.op.file_driver, errors.ECODE_INVAL)
9719
9720     if self.op.disk_template == constants.DT_FILE:
9721       opcodes.RequireFileStorage()
9722     elif self.op.disk_template == constants.DT_SHARED_FILE:
9723       opcodes.RequireSharedFileStorage()
9724
9725     ### Node/iallocator related checks
9726     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9727
9728     if self.op.pnode is not None:
9729       if self.op.disk_template in constants.DTS_INT_MIRROR:
9730         if self.op.snode is None:
9731           raise errors.OpPrereqError("The networked disk templates need"
9732                                      " a mirror node", errors.ECODE_INVAL)
9733       elif self.op.snode:
9734         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9735                         " template")
9736         self.op.snode = None
9737
9738     self._cds = _GetClusterDomainSecret()
9739
9740     if self.op.mode == constants.INSTANCE_IMPORT:
9741       # On import force_variant must be True, because if we forced it at
9742       # initial install, our only chance when importing it back is that it
9743       # works again!
9744       self.op.force_variant = True
9745
9746       if self.op.no_install:
9747         self.LogInfo("No-installation mode has no effect during import")
9748
9749     elif self.op.mode == constants.INSTANCE_CREATE:
9750       if self.op.os_type is None:
9751         raise errors.OpPrereqError("No guest OS specified",
9752                                    errors.ECODE_INVAL)
9753       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9754         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9755                                    " installation" % self.op.os_type,
9756                                    errors.ECODE_STATE)
9757       if self.op.disk_template is None:
9758         raise errors.OpPrereqError("No disk template specified",
9759                                    errors.ECODE_INVAL)
9760
9761     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9762       # Check handshake to ensure both clusters have the same domain secret
9763       src_handshake = self.op.source_handshake
9764       if not src_handshake:
9765         raise errors.OpPrereqError("Missing source handshake",
9766                                    errors.ECODE_INVAL)
9767
9768       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9769                                                            src_handshake)
9770       if errmsg:
9771         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9772                                    errors.ECODE_INVAL)
9773
9774       # Load and check source CA
9775       self.source_x509_ca_pem = self.op.source_x509_ca
9776       if not self.source_x509_ca_pem:
9777         raise errors.OpPrereqError("Missing source X509 CA",
9778                                    errors.ECODE_INVAL)
9779
9780       try:
9781         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9782                                                     self._cds)
9783       except OpenSSL.crypto.Error, err:
9784         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9785                                    (err, ), errors.ECODE_INVAL)
9786
9787       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9788       if errcode is not None:
9789         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9790                                    errors.ECODE_INVAL)
9791
9792       self.source_x509_ca = cert
9793
9794       src_instance_name = self.op.source_instance_name
9795       if not src_instance_name:
9796         raise errors.OpPrereqError("Missing source instance name",
9797                                    errors.ECODE_INVAL)
9798
9799       self.source_instance_name = \
9800           netutils.GetHostname(name=src_instance_name).name
9801
9802     else:
9803       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9804                                  self.op.mode, errors.ECODE_INVAL)
9805
9806   def ExpandNames(self):
9807     """ExpandNames for CreateInstance.
9808
9809     Figure out the right locks for instance creation.
9810
9811     """
9812     self.needed_locks = {}
9813
9814     instance_name = self.op.instance_name
9815     # this is just a preventive check, but someone might still add this
9816     # instance in the meantime, and creation will fail at lock-add time
9817     if instance_name in self.cfg.GetInstanceList():
9818       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9819                                  instance_name, errors.ECODE_EXISTS)
9820
9821     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9822
9823     if self.op.iallocator:
9824       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9825       # specifying a group on instance creation and then selecting nodes from
9826       # that group
9827       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9828       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9829     else:
9830       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9831       nodelist = [self.op.pnode]
9832       if self.op.snode is not None:
9833         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9834         nodelist.append(self.op.snode)
9835       self.needed_locks[locking.LEVEL_NODE] = nodelist
9836       # Lock resources of instance's primary and secondary nodes (copy to
9837       # prevent accidential modification)
9838       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9839
9840     # in case of import lock the source node too
9841     if self.op.mode == constants.INSTANCE_IMPORT:
9842       src_node = self.op.src_node
9843       src_path = self.op.src_path
9844
9845       if src_path is None:
9846         self.op.src_path = src_path = self.op.instance_name
9847
9848       if src_node is None:
9849         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9850         self.op.src_node = None
9851         if os.path.isabs(src_path):
9852           raise errors.OpPrereqError("Importing an instance from a path"
9853                                      " requires a source node option",
9854                                      errors.ECODE_INVAL)
9855       else:
9856         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9857         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9858           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9859         if not os.path.isabs(src_path):
9860           self.op.src_path = src_path = \
9861             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9862
9863   def _RunAllocator(self):
9864     """Run the allocator based on input opcode.
9865
9866     """
9867     #TODO Export network to iallocator so that it chooses a pnode
9868     #     in a nodegroup that has the desired network connected to
9869     req = _CreateInstanceAllocRequest(self.op, self.disks,
9870                                       self.nics, self.be_full)
9871     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9872
9873     ial.Run(self.op.iallocator)
9874
9875     if not ial.success:
9876       raise errors.OpPrereqError("Can't compute nodes using"
9877                                  " iallocator '%s': %s" %
9878                                  (self.op.iallocator, ial.info),
9879                                  errors.ECODE_NORES)
9880     self.op.pnode = ial.result[0]
9881     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9882                  self.op.instance_name, self.op.iallocator,
9883                  utils.CommaJoin(ial.result))
9884
9885     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9886
9887     if req.RequiredNodes() == 2:
9888       self.op.snode = ial.result[1]
9889
9890   def BuildHooksEnv(self):
9891     """Build hooks env.
9892
9893     This runs on master, primary and secondary nodes of the instance.
9894
9895     """
9896     env = {
9897       "ADD_MODE": self.op.mode,
9898       }
9899     if self.op.mode == constants.INSTANCE_IMPORT:
9900       env["SRC_NODE"] = self.op.src_node
9901       env["SRC_PATH"] = self.op.src_path
9902       env["SRC_IMAGES"] = self.src_images
9903
9904     env.update(_BuildInstanceHookEnv(
9905       name=self.op.instance_name,
9906       primary_node=self.op.pnode,
9907       secondary_nodes=self.secondaries,
9908       status=self.op.start,
9909       os_type=self.op.os_type,
9910       minmem=self.be_full[constants.BE_MINMEM],
9911       maxmem=self.be_full[constants.BE_MAXMEM],
9912       vcpus=self.be_full[constants.BE_VCPUS],
9913       nics=_NICListToTuple(self, self.nics),
9914       disk_template=self.op.disk_template,
9915       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9916              for d in self.disks],
9917       bep=self.be_full,
9918       hvp=self.hv_full,
9919       hypervisor_name=self.op.hypervisor,
9920       tags=self.op.tags,
9921     ))
9922
9923     return env
9924
9925   def BuildHooksNodes(self):
9926     """Build hooks nodes.
9927
9928     """
9929     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9930     return nl, nl
9931
9932   def _ReadExportInfo(self):
9933     """Reads the export information from disk.
9934
9935     It will override the opcode source node and path with the actual
9936     information, if these two were not specified before.
9937
9938     @return: the export information
9939
9940     """
9941     assert self.op.mode == constants.INSTANCE_IMPORT
9942
9943     src_node = self.op.src_node
9944     src_path = self.op.src_path
9945
9946     if src_node is None:
9947       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9948       exp_list = self.rpc.call_export_list(locked_nodes)
9949       found = False
9950       for node in exp_list:
9951         if exp_list[node].fail_msg:
9952           continue
9953         if src_path in exp_list[node].payload:
9954           found = True
9955           self.op.src_node = src_node = node
9956           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9957                                                        src_path)
9958           break
9959       if not found:
9960         raise errors.OpPrereqError("No export found for relative path %s" %
9961                                     src_path, errors.ECODE_INVAL)
9962
9963     _CheckNodeOnline(self, src_node)
9964     result = self.rpc.call_export_info(src_node, src_path)
9965     result.Raise("No export or invalid export found in dir %s" % src_path)
9966
9967     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9968     if not export_info.has_section(constants.INISECT_EXP):
9969       raise errors.ProgrammerError("Corrupted export config",
9970                                    errors.ECODE_ENVIRON)
9971
9972     ei_version = export_info.get(constants.INISECT_EXP, "version")
9973     if (int(ei_version) != constants.EXPORT_VERSION):
9974       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9975                                  (ei_version, constants.EXPORT_VERSION),
9976                                  errors.ECODE_ENVIRON)
9977     return export_info
9978
9979   def _ReadExportParams(self, einfo):
9980     """Use export parameters as defaults.
9981
9982     In case the opcode doesn't specify (as in override) some instance
9983     parameters, then try to use them from the export information, if
9984     that declares them.
9985
9986     """
9987     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9988
9989     if self.op.disk_template is None:
9990       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9991         self.op.disk_template = einfo.get(constants.INISECT_INS,
9992                                           "disk_template")
9993         if self.op.disk_template not in constants.DISK_TEMPLATES:
9994           raise errors.OpPrereqError("Disk template specified in configuration"
9995                                      " file is not one of the allowed values:"
9996                                      " %s" %
9997                                      " ".join(constants.DISK_TEMPLATES),
9998                                      errors.ECODE_INVAL)
9999       else:
10000         raise errors.OpPrereqError("No disk template specified and the export"
10001                                    " is missing the disk_template information",
10002                                    errors.ECODE_INVAL)
10003
10004     if not self.op.disks:
10005       disks = []
10006       # TODO: import the disk iv_name too
10007       for idx in range(constants.MAX_DISKS):
10008         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10009           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10010           disks.append({constants.IDISK_SIZE: disk_sz})
10011       self.op.disks = disks
10012       if not disks and self.op.disk_template != constants.DT_DISKLESS:
10013         raise errors.OpPrereqError("No disk info specified and the export"
10014                                    " is missing the disk information",
10015                                    errors.ECODE_INVAL)
10016
10017     if not self.op.nics:
10018       nics = []
10019       for idx in range(constants.MAX_NICS):
10020         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10021           ndict = {}
10022           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10023             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10024             ndict[name] = v
10025           nics.append(ndict)
10026         else:
10027           break
10028       self.op.nics = nics
10029
10030     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10031       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10032
10033     if (self.op.hypervisor is None and
10034         einfo.has_option(constants.INISECT_INS, "hypervisor")):
10035       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10036
10037     if einfo.has_section(constants.INISECT_HYP):
10038       # use the export parameters but do not override the ones
10039       # specified by the user
10040       for name, value in einfo.items(constants.INISECT_HYP):
10041         if name not in self.op.hvparams:
10042           self.op.hvparams[name] = value
10043
10044     if einfo.has_section(constants.INISECT_BEP):
10045       # use the parameters, without overriding
10046       for name, value in einfo.items(constants.INISECT_BEP):
10047         if name not in self.op.beparams:
10048           self.op.beparams[name] = value
10049         # Compatibility for the old "memory" be param
10050         if name == constants.BE_MEMORY:
10051           if constants.BE_MAXMEM not in self.op.beparams:
10052             self.op.beparams[constants.BE_MAXMEM] = value
10053           if constants.BE_MINMEM not in self.op.beparams:
10054             self.op.beparams[constants.BE_MINMEM] = value
10055     else:
10056       # try to read the parameters old style, from the main section
10057       for name in constants.BES_PARAMETERS:
10058         if (name not in self.op.beparams and
10059             einfo.has_option(constants.INISECT_INS, name)):
10060           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10061
10062     if einfo.has_section(constants.INISECT_OSP):
10063       # use the parameters, without overriding
10064       for name, value in einfo.items(constants.INISECT_OSP):
10065         if name not in self.op.osparams:
10066           self.op.osparams[name] = value
10067
10068   def _RevertToDefaults(self, cluster):
10069     """Revert the instance parameters to the default values.
10070
10071     """
10072     # hvparams
10073     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10074     for name in self.op.hvparams.keys():
10075       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10076         del self.op.hvparams[name]
10077     # beparams
10078     be_defs = cluster.SimpleFillBE({})
10079     for name in self.op.beparams.keys():
10080       if name in be_defs and be_defs[name] == self.op.beparams[name]:
10081         del self.op.beparams[name]
10082     # nic params
10083     nic_defs = cluster.SimpleFillNIC({})
10084     for nic in self.op.nics:
10085       for name in constants.NICS_PARAMETERS:
10086         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10087           del nic[name]
10088     # osparams
10089     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10090     for name in self.op.osparams.keys():
10091       if name in os_defs and os_defs[name] == self.op.osparams[name]:
10092         del self.op.osparams[name]
10093
10094   def _CalculateFileStorageDir(self):
10095     """Calculate final instance file storage dir.
10096
10097     """
10098     # file storage dir calculation/check
10099     self.instance_file_storage_dir = None
10100     if self.op.disk_template in constants.DTS_FILEBASED:
10101       # build the full file storage dir path
10102       joinargs = []
10103
10104       if self.op.disk_template == constants.DT_SHARED_FILE:
10105         get_fsd_fn = self.cfg.GetSharedFileStorageDir
10106       else:
10107         get_fsd_fn = self.cfg.GetFileStorageDir
10108
10109       cfg_storagedir = get_fsd_fn()
10110       if not cfg_storagedir:
10111         raise errors.OpPrereqError("Cluster file storage dir not defined",
10112                                    errors.ECODE_STATE)
10113       joinargs.append(cfg_storagedir)
10114
10115       if self.op.file_storage_dir is not None:
10116         joinargs.append(self.op.file_storage_dir)
10117
10118       joinargs.append(self.op.instance_name)
10119
10120       # pylint: disable=W0142
10121       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10122
10123   def CheckPrereq(self): # pylint: disable=R0914
10124     """Check prerequisites.
10125
10126     """
10127     self._CalculateFileStorageDir()
10128
10129     if self.op.mode == constants.INSTANCE_IMPORT:
10130       export_info = self._ReadExportInfo()
10131       self._ReadExportParams(export_info)
10132       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10133     else:
10134       self._old_instance_name = None
10135
10136     if (not self.cfg.GetVGName() and
10137         self.op.disk_template not in constants.DTS_NOT_LVM):
10138       raise errors.OpPrereqError("Cluster does not support lvm-based"
10139                                  " instances", errors.ECODE_STATE)
10140
10141     if (self.op.hypervisor is None or
10142         self.op.hypervisor == constants.VALUE_AUTO):
10143       self.op.hypervisor = self.cfg.GetHypervisorType()
10144
10145     cluster = self.cfg.GetClusterInfo()
10146     enabled_hvs = cluster.enabled_hypervisors
10147     if self.op.hypervisor not in enabled_hvs:
10148       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10149                                  " cluster (%s)" %
10150                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10151                                  errors.ECODE_STATE)
10152
10153     # Check tag validity
10154     for tag in self.op.tags:
10155       objects.TaggableObject.ValidateTag(tag)
10156
10157     # check hypervisor parameter syntax (locally)
10158     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10159     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10160                                       self.op.hvparams)
10161     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10162     hv_type.CheckParameterSyntax(filled_hvp)
10163     self.hv_full = filled_hvp
10164     # check that we don't specify global parameters on an instance
10165     _CheckGlobalHvParams(self.op.hvparams)
10166
10167     # fill and remember the beparams dict
10168     self.be_full = _ComputeFullBeParams(self.op, cluster)
10169
10170     # build os parameters
10171     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10172
10173     # now that hvp/bep are in final format, let's reset to defaults,
10174     # if told to do so
10175     if self.op.identify_defaults:
10176       self._RevertToDefaults(cluster)
10177
10178     # NIC buildup
10179     self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10180                              self.proc)
10181
10182     # disk checks/pre-build
10183     default_vg = self.cfg.GetVGName()
10184     self.disks = _ComputeDisks(self.op, default_vg)
10185
10186     if self.op.mode == constants.INSTANCE_IMPORT:
10187       disk_images = []
10188       for idx in range(len(self.disks)):
10189         option = "disk%d_dump" % idx
10190         if export_info.has_option(constants.INISECT_INS, option):
10191           # FIXME: are the old os-es, disk sizes, etc. useful?
10192           export_name = export_info.get(constants.INISECT_INS, option)
10193           image = utils.PathJoin(self.op.src_path, export_name)
10194           disk_images.append(image)
10195         else:
10196           disk_images.append(False)
10197
10198       self.src_images = disk_images
10199
10200       if self.op.instance_name == self._old_instance_name:
10201         for idx, nic in enumerate(self.nics):
10202           if nic.mac == constants.VALUE_AUTO:
10203             nic_mac_ini = "nic%d_mac" % idx
10204             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10205
10206     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10207
10208     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10209     if self.op.ip_check:
10210       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10211         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10212                                    (self.check_ip, self.op.instance_name),
10213                                    errors.ECODE_NOTUNIQUE)
10214
10215     #### mac address generation
10216     # By generating here the mac address both the allocator and the hooks get
10217     # the real final mac address rather than the 'auto' or 'generate' value.
10218     # There is a race condition between the generation and the instance object
10219     # creation, which means that we know the mac is valid now, but we're not
10220     # sure it will be when we actually add the instance. If things go bad
10221     # adding the instance will abort because of a duplicate mac, and the
10222     # creation job will fail.
10223     for nic in self.nics:
10224       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10225         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10226
10227     #### allocator run
10228
10229     if self.op.iallocator is not None:
10230       self._RunAllocator()
10231
10232     # Release all unneeded node locks
10233     _ReleaseLocks(self, locking.LEVEL_NODE,
10234                   keep=filter(None, [self.op.pnode, self.op.snode,
10235                                      self.op.src_node]))
10236     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10237                   keep=filter(None, [self.op.pnode, self.op.snode,
10238                                      self.op.src_node]))
10239
10240     #### node related checks
10241
10242     # check primary node
10243     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10244     assert self.pnode is not None, \
10245       "Cannot retrieve locked node %s" % self.op.pnode
10246     if pnode.offline:
10247       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10248                                  pnode.name, errors.ECODE_STATE)
10249     if pnode.drained:
10250       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10251                                  pnode.name, errors.ECODE_STATE)
10252     if not pnode.vm_capable:
10253       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10254                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10255
10256     self.secondaries = []
10257
10258     # Fill in any IPs from IP pools. This must happen here, because we need to
10259     # know the nic's primary node, as specified by the iallocator
10260     for idx, nic in enumerate(self.nics):
10261       net = nic.network
10262       if net is not None:
10263         netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10264         if netparams is None:
10265           raise errors.OpPrereqError("No netparams found for network"
10266                                      " %s. Propably not connected to"
10267                                      " node's %s nodegroup" %
10268                                      (net, self.pnode.name),
10269                                      errors.ECODE_INVAL)
10270         self.LogInfo("NIC/%d inherits netparams %s" %
10271                      (idx, netparams.values()))
10272         nic.nicparams = dict(netparams)
10273         if nic.ip is not None:
10274           if nic.ip.lower() == constants.NIC_IP_POOL:
10275             try:
10276               nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10277             except errors.ReservationError:
10278               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10279                                          " from the address pool" % idx,
10280                                          errors.ECODE_STATE)
10281             self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10282           else:
10283             try:
10284               self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10285             except errors.ReservationError:
10286               raise errors.OpPrereqError("IP address %s already in use"
10287                                          " or does not belong to network %s" %
10288                                          (nic.ip, net),
10289                                          errors.ECODE_NOTUNIQUE)
10290       else:
10291         # net is None, ip None or given
10292         if self.op.conflicts_check:
10293           _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10294
10295     # mirror node verification
10296     if self.op.disk_template in constants.DTS_INT_MIRROR:
10297       if self.op.snode == pnode.name:
10298         raise errors.OpPrereqError("The secondary node cannot be the"
10299                                    " primary node", errors.ECODE_INVAL)
10300       _CheckNodeOnline(self, self.op.snode)
10301       _CheckNodeNotDrained(self, self.op.snode)
10302       _CheckNodeVmCapable(self, self.op.snode)
10303       self.secondaries.append(self.op.snode)
10304
10305       snode = self.cfg.GetNodeInfo(self.op.snode)
10306       if pnode.group != snode.group:
10307         self.LogWarning("The primary and secondary nodes are in two"
10308                         " different node groups; the disk parameters"
10309                         " from the first disk's node group will be"
10310                         " used")
10311
10312     nodenames = [pnode.name] + self.secondaries
10313
10314     # Verify instance specs
10315     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10316     ispec = {
10317       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10318       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10319       constants.ISPEC_DISK_COUNT: len(self.disks),
10320       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10321       constants.ISPEC_NIC_COUNT: len(self.nics),
10322       constants.ISPEC_SPINDLE_USE: spindle_use,
10323       }
10324
10325     group_info = self.cfg.GetNodeGroup(pnode.group)
10326     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10327     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10328     if not self.op.ignore_ipolicy and res:
10329       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10330              (pnode.group, group_info.name, utils.CommaJoin(res)))
10331       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10332
10333     if not self.adopt_disks:
10334       if self.op.disk_template == constants.DT_RBD:
10335         # _CheckRADOSFreeSpace() is just a placeholder.
10336         # Any function that checks prerequisites can be placed here.
10337         # Check if there is enough space on the RADOS cluster.
10338         _CheckRADOSFreeSpace()
10339       else:
10340         # Check lv size requirements, if not adopting
10341         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10342         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10343
10344     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10345       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10346                                 disk[constants.IDISK_ADOPT])
10347                      for disk in self.disks])
10348       if len(all_lvs) != len(self.disks):
10349         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10350                                    errors.ECODE_INVAL)
10351       for lv_name in all_lvs:
10352         try:
10353           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10354           # to ReserveLV uses the same syntax
10355           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10356         except errors.ReservationError:
10357           raise errors.OpPrereqError("LV named %s used by another instance" %
10358                                      lv_name, errors.ECODE_NOTUNIQUE)
10359
10360       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10361       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10362
10363       node_lvs = self.rpc.call_lv_list([pnode.name],
10364                                        vg_names.payload.keys())[pnode.name]
10365       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10366       node_lvs = node_lvs.payload
10367
10368       delta = all_lvs.difference(node_lvs.keys())
10369       if delta:
10370         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10371                                    utils.CommaJoin(delta),
10372                                    errors.ECODE_INVAL)
10373       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10374       if online_lvs:
10375         raise errors.OpPrereqError("Online logical volumes found, cannot"
10376                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10377                                    errors.ECODE_STATE)
10378       # update the size of disk based on what is found
10379       for dsk in self.disks:
10380         dsk[constants.IDISK_SIZE] = \
10381           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10382                                         dsk[constants.IDISK_ADOPT])][0]))
10383
10384     elif self.op.disk_template == constants.DT_BLOCK:
10385       # Normalize and de-duplicate device paths
10386       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10387                        for disk in self.disks])
10388       if len(all_disks) != len(self.disks):
10389         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10390                                    errors.ECODE_INVAL)
10391       baddisks = [d for d in all_disks
10392                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10393       if baddisks:
10394         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10395                                    " cannot be adopted" %
10396                                    (", ".join(baddisks),
10397                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10398                                    errors.ECODE_INVAL)
10399
10400       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10401                                             list(all_disks))[pnode.name]
10402       node_disks.Raise("Cannot get block device information from node %s" %
10403                        pnode.name)
10404       node_disks = node_disks.payload
10405       delta = all_disks.difference(node_disks.keys())
10406       if delta:
10407         raise errors.OpPrereqError("Missing block device(s): %s" %
10408                                    utils.CommaJoin(delta),
10409                                    errors.ECODE_INVAL)
10410       for dsk in self.disks:
10411         dsk[constants.IDISK_SIZE] = \
10412           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10413
10414     # Verify instance specs
10415     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10416     ispec = {
10417       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10418       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10419       constants.ISPEC_DISK_COUNT: len(self.disks),
10420       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10421                                   for disk in self.disks],
10422       constants.ISPEC_NIC_COUNT: len(self.nics),
10423       constants.ISPEC_SPINDLE_USE: spindle_use,
10424       }
10425
10426     group_info = self.cfg.GetNodeGroup(pnode.group)
10427     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10428     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10429     if not self.op.ignore_ipolicy and res:
10430       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10431                                   " policy: %s") % (pnode.group,
10432                                                     utils.CommaJoin(res)),
10433                                   errors.ECODE_INVAL)
10434
10435     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10436
10437     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10438     # check OS parameters (remotely)
10439     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10440
10441     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10442
10443     # memory check on primary node
10444     #TODO(dynmem): use MINMEM for checking
10445     if self.op.start:
10446       _CheckNodeFreeMemory(self, self.pnode.name,
10447                            "creating instance %s" % self.op.instance_name,
10448                            self.be_full[constants.BE_MAXMEM],
10449                            self.op.hypervisor)
10450
10451     self.dry_run_result = list(nodenames)
10452
10453   def Exec(self, feedback_fn):
10454     """Create and add the instance to the cluster.
10455
10456     """
10457     instance = self.op.instance_name
10458     pnode_name = self.pnode.name
10459
10460     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10461                 self.owned_locks(locking.LEVEL_NODE)), \
10462       "Node locks differ from node resource locks"
10463
10464     ht_kind = self.op.hypervisor
10465     if ht_kind in constants.HTS_REQ_PORT:
10466       network_port = self.cfg.AllocatePort()
10467     else:
10468       network_port = None
10469
10470     # This is ugly but we got a chicken-egg problem here
10471     # We can only take the group disk parameters, as the instance
10472     # has no disks yet (we are generating them right here).
10473     node = self.cfg.GetNodeInfo(pnode_name)
10474     nodegroup = self.cfg.GetNodeGroup(node.group)
10475     disks = _GenerateDiskTemplate(self,
10476                                   self.op.disk_template,
10477                                   instance, pnode_name,
10478                                   self.secondaries,
10479                                   self.disks,
10480                                   self.instance_file_storage_dir,
10481                                   self.op.file_driver,
10482                                   0,
10483                                   feedback_fn,
10484                                   self.cfg.GetGroupDiskParams(nodegroup))
10485
10486     iobj = objects.Instance(name=instance, os=self.op.os_type,
10487                             primary_node=pnode_name,
10488                             nics=self.nics, disks=disks,
10489                             disk_template=self.op.disk_template,
10490                             admin_state=constants.ADMINST_DOWN,
10491                             network_port=network_port,
10492                             beparams=self.op.beparams,
10493                             hvparams=self.op.hvparams,
10494                             hypervisor=self.op.hypervisor,
10495                             osparams=self.op.osparams,
10496                             )
10497
10498     if self.op.tags:
10499       for tag in self.op.tags:
10500         iobj.AddTag(tag)
10501
10502     if self.adopt_disks:
10503       if self.op.disk_template == constants.DT_PLAIN:
10504         # rename LVs to the newly-generated names; we need to construct
10505         # 'fake' LV disks with the old data, plus the new unique_id
10506         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10507         rename_to = []
10508         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10509           rename_to.append(t_dsk.logical_id)
10510           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10511           self.cfg.SetDiskID(t_dsk, pnode_name)
10512         result = self.rpc.call_blockdev_rename(pnode_name,
10513                                                zip(tmp_disks, rename_to))
10514         result.Raise("Failed to rename adoped LVs")
10515     else:
10516       feedback_fn("* creating instance disks...")
10517       try:
10518         _CreateDisks(self, iobj)
10519       except errors.OpExecError:
10520         self.LogWarning("Device creation failed, reverting...")
10521         try:
10522           _RemoveDisks(self, iobj)
10523         finally:
10524           self.cfg.ReleaseDRBDMinors(instance)
10525           raise
10526
10527     feedback_fn("adding instance %s to cluster config" % instance)
10528
10529     self.cfg.AddInstance(iobj, self.proc.GetECId())
10530
10531     # Declare that we don't want to remove the instance lock anymore, as we've
10532     # added the instance to the config
10533     del self.remove_locks[locking.LEVEL_INSTANCE]
10534
10535     if self.op.mode == constants.INSTANCE_IMPORT:
10536       # Release unused nodes
10537       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10538     else:
10539       # Release all nodes
10540       _ReleaseLocks(self, locking.LEVEL_NODE)
10541
10542     disk_abort = False
10543     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10544       feedback_fn("* wiping instance disks...")
10545       try:
10546         _WipeDisks(self, iobj)
10547       except errors.OpExecError, err:
10548         logging.exception("Wiping disks failed")
10549         self.LogWarning("Wiping instance disks failed (%s)", err)
10550         disk_abort = True
10551
10552     if disk_abort:
10553       # Something is already wrong with the disks, don't do anything else
10554       pass
10555     elif self.op.wait_for_sync:
10556       disk_abort = not _WaitForSync(self, iobj)
10557     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10558       # make sure the disks are not degraded (still sync-ing is ok)
10559       feedback_fn("* checking mirrors status")
10560       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10561     else:
10562       disk_abort = False
10563
10564     if disk_abort:
10565       _RemoveDisks(self, iobj)
10566       self.cfg.RemoveInstance(iobj.name)
10567       # Make sure the instance lock gets removed
10568       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10569       raise errors.OpExecError("There are some degraded disks for"
10570                                " this instance")
10571
10572     # Release all node resource locks
10573     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10574
10575     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10576       # we need to set the disks ID to the primary node, since the
10577       # preceding code might or might have not done it, depending on
10578       # disk template and other options
10579       for disk in iobj.disks:
10580         self.cfg.SetDiskID(disk, pnode_name)
10581       if self.op.mode == constants.INSTANCE_CREATE:
10582         if not self.op.no_install:
10583           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10584                         not self.op.wait_for_sync)
10585           if pause_sync:
10586             feedback_fn("* pausing disk sync to install instance OS")
10587             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10588                                                               (iobj.disks,
10589                                                                iobj), True)
10590             for idx, success in enumerate(result.payload):
10591               if not success:
10592                 logging.warn("pause-sync of instance %s for disk %d failed",
10593                              instance, idx)
10594
10595           feedback_fn("* running the instance OS create scripts...")
10596           # FIXME: pass debug option from opcode to backend
10597           os_add_result = \
10598             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10599                                           self.op.debug_level)
10600           if pause_sync:
10601             feedback_fn("* resuming disk sync")
10602             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10603                                                               (iobj.disks,
10604                                                                iobj), False)
10605             for idx, success in enumerate(result.payload):
10606               if not success:
10607                 logging.warn("resume-sync of instance %s for disk %d failed",
10608                              instance, idx)
10609
10610           os_add_result.Raise("Could not add os for instance %s"
10611                               " on node %s" % (instance, pnode_name))
10612
10613       else:
10614         if self.op.mode == constants.INSTANCE_IMPORT:
10615           feedback_fn("* running the instance OS import scripts...")
10616
10617           transfers = []
10618
10619           for idx, image in enumerate(self.src_images):
10620             if not image:
10621               continue
10622
10623             # FIXME: pass debug option from opcode to backend
10624             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10625                                                constants.IEIO_FILE, (image, ),
10626                                                constants.IEIO_SCRIPT,
10627                                                (iobj.disks[idx], idx),
10628                                                None)
10629             transfers.append(dt)
10630
10631           import_result = \
10632             masterd.instance.TransferInstanceData(self, feedback_fn,
10633                                                   self.op.src_node, pnode_name,
10634                                                   self.pnode.secondary_ip,
10635                                                   iobj, transfers)
10636           if not compat.all(import_result):
10637             self.LogWarning("Some disks for instance %s on node %s were not"
10638                             " imported successfully" % (instance, pnode_name))
10639
10640           rename_from = self._old_instance_name
10641
10642         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10643           feedback_fn("* preparing remote import...")
10644           # The source cluster will stop the instance before attempting to make
10645           # a connection. In some cases stopping an instance can take a long
10646           # time, hence the shutdown timeout is added to the connection
10647           # timeout.
10648           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10649                              self.op.source_shutdown_timeout)
10650           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10651
10652           assert iobj.primary_node == self.pnode.name
10653           disk_results = \
10654             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10655                                           self.source_x509_ca,
10656                                           self._cds, timeouts)
10657           if not compat.all(disk_results):
10658             # TODO: Should the instance still be started, even if some disks
10659             # failed to import (valid for local imports, too)?
10660             self.LogWarning("Some disks for instance %s on node %s were not"
10661                             " imported successfully" % (instance, pnode_name))
10662
10663           rename_from = self.source_instance_name
10664
10665         else:
10666           # also checked in the prereq part
10667           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10668                                        % self.op.mode)
10669
10670         # Run rename script on newly imported instance
10671         assert iobj.name == instance
10672         feedback_fn("Running rename script for %s" % instance)
10673         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10674                                                    rename_from,
10675                                                    self.op.debug_level)
10676         if result.fail_msg:
10677           self.LogWarning("Failed to run rename script for %s on node"
10678                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10679
10680     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10681
10682     if self.op.start:
10683       iobj.admin_state = constants.ADMINST_UP
10684       self.cfg.Update(iobj, feedback_fn)
10685       logging.info("Starting instance %s on node %s", instance, pnode_name)
10686       feedback_fn("* starting instance...")
10687       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10688                                             False)
10689       result.Raise("Could not start instance")
10690
10691     return list(iobj.all_nodes)
10692
10693
10694 class LUInstanceMultiAlloc(NoHooksLU):
10695   """Allocates multiple instances at the same time.
10696
10697   """
10698   REQ_BGL = False
10699
10700   def CheckArguments(self):
10701     """Check arguments.
10702
10703     """
10704     nodes = []
10705     for inst in self.op.instances:
10706       if inst.iallocator is not None:
10707         raise errors.OpPrereqError("iallocator are not allowed to be set on"
10708                                    " instance objects", errors.ECODE_INVAL)
10709       nodes.append(bool(inst.pnode))
10710       if inst.disk_template in constants.DTS_INT_MIRROR:
10711         nodes.append(bool(inst.snode))
10712
10713     has_nodes = compat.any(nodes)
10714     if compat.all(nodes) ^ has_nodes:
10715       raise errors.OpPrereqError("There are instance objects providing"
10716                                  " pnode/snode while others do not",
10717                                  errors.ECODE_INVAL)
10718
10719     if self.op.iallocator is None:
10720       default_iallocator = self.cfg.GetDefaultIAllocator()
10721       if default_iallocator and has_nodes:
10722         self.op.iallocator = default_iallocator
10723       else:
10724         raise errors.OpPrereqError("No iallocator or nodes on the instances"
10725                                    " given and no cluster-wide default"
10726                                    " iallocator found; please specify either"
10727                                    " an iallocator or nodes on the instances"
10728                                    " or set a cluster-wide default iallocator",
10729                                    errors.ECODE_INVAL)
10730
10731     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10732     if dups:
10733       raise errors.OpPrereqError("There are duplicate instance names: %s" %
10734                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
10735
10736   def ExpandNames(self):
10737     """Calculate the locks.
10738
10739     """
10740     self.share_locks = _ShareAll()
10741     self.needed_locks = {}
10742
10743     if self.op.iallocator:
10744       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10745       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10746     else:
10747       nodeslist = []
10748       for inst in self.op.instances:
10749         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10750         nodeslist.append(inst.pnode)
10751         if inst.snode is not None:
10752           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10753           nodeslist.append(inst.snode)
10754
10755       self.needed_locks[locking.LEVEL_NODE] = nodeslist
10756       # Lock resources of instance's primary and secondary nodes (copy to
10757       # prevent accidential modification)
10758       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10759
10760   def CheckPrereq(self):
10761     """Check prerequisite.
10762
10763     """
10764     cluster = self.cfg.GetClusterInfo()
10765     default_vg = self.cfg.GetVGName()
10766     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10767                                          _ComputeNics(op, cluster, None,
10768                                                       self.cfg, self.proc),
10769                                          _ComputeFullBeParams(op, cluster))
10770              for op in self.op.instances]
10771     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10772     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10773
10774     ial.Run(self.op.iallocator)
10775
10776     if not ial.success:
10777       raise errors.OpPrereqError("Can't compute nodes using"
10778                                  " iallocator '%s': %s" %
10779                                  (self.op.iallocator, ial.info),
10780                                  errors.ECODE_NORES)
10781
10782     self.ia_result = ial.result
10783
10784     if self.op.dry_run:
10785       self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10786         constants.JOB_IDS_KEY: [],
10787         })
10788
10789   def _ConstructPartialResult(self):
10790     """Contructs the partial result.
10791
10792     """
10793     (allocatable, failed) = self.ia_result
10794     return {
10795       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10796         map(compat.fst, allocatable),
10797       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10798       }
10799
10800   def Exec(self, feedback_fn):
10801     """Executes the opcode.
10802
10803     """
10804     op2inst = dict((op.instance_name, op) for op in self.op.instances)
10805     (allocatable, failed) = self.ia_result
10806
10807     jobs = []
10808     for (name, nodes) in allocatable:
10809       op = op2inst.pop(name)
10810
10811       if len(nodes) > 1:
10812         (op.pnode, op.snode) = nodes
10813       else:
10814         (op.pnode,) = nodes
10815
10816       jobs.append([op])
10817
10818     missing = set(op2inst.keys()) - set(failed)
10819     assert not missing, \
10820       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10821
10822     return ResultWithJobs(jobs, **self._ConstructPartialResult())
10823
10824
10825 def _CheckRADOSFreeSpace():
10826   """Compute disk size requirements inside the RADOS cluster.
10827
10828   """
10829   # For the RADOS cluster we assume there is always enough space.
10830   pass
10831
10832
10833 class LUInstanceConsole(NoHooksLU):
10834   """Connect to an instance's console.
10835
10836   This is somewhat special in that it returns the command line that
10837   you need to run on the master node in order to connect to the
10838   console.
10839
10840   """
10841   REQ_BGL = False
10842
10843   def ExpandNames(self):
10844     self.share_locks = _ShareAll()
10845     self._ExpandAndLockInstance()
10846
10847   def CheckPrereq(self):
10848     """Check prerequisites.
10849
10850     This checks that the instance is in the cluster.
10851
10852     """
10853     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10854     assert self.instance is not None, \
10855       "Cannot retrieve locked instance %s" % self.op.instance_name
10856     _CheckNodeOnline(self, self.instance.primary_node)
10857
10858   def Exec(self, feedback_fn):
10859     """Connect to the console of an instance
10860
10861     """
10862     instance = self.instance
10863     node = instance.primary_node
10864
10865     node_insts = self.rpc.call_instance_list([node],
10866                                              [instance.hypervisor])[node]
10867     node_insts.Raise("Can't get node information from %s" % node)
10868
10869     if instance.name not in node_insts.payload:
10870       if instance.admin_state == constants.ADMINST_UP:
10871         state = constants.INSTST_ERRORDOWN
10872       elif instance.admin_state == constants.ADMINST_DOWN:
10873         state = constants.INSTST_ADMINDOWN
10874       else:
10875         state = constants.INSTST_ADMINOFFLINE
10876       raise errors.OpExecError("Instance %s is not running (state %s)" %
10877                                (instance.name, state))
10878
10879     logging.debug("Connecting to console of %s on %s", instance.name, node)
10880
10881     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10882
10883
10884 def _GetInstanceConsole(cluster, instance):
10885   """Returns console information for an instance.
10886
10887   @type cluster: L{objects.Cluster}
10888   @type instance: L{objects.Instance}
10889   @rtype: dict
10890
10891   """
10892   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10893   # beparams and hvparams are passed separately, to avoid editing the
10894   # instance and then saving the defaults in the instance itself.
10895   hvparams = cluster.FillHV(instance)
10896   beparams = cluster.FillBE(instance)
10897   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10898
10899   assert console.instance == instance.name
10900   assert console.Validate()
10901
10902   return console.ToDict()
10903
10904
10905 class LUInstanceReplaceDisks(LogicalUnit):
10906   """Replace the disks of an instance.
10907
10908   """
10909   HPATH = "mirrors-replace"
10910   HTYPE = constants.HTYPE_INSTANCE
10911   REQ_BGL = False
10912
10913   def CheckArguments(self):
10914     """Check arguments.
10915
10916     """
10917     remote_node = self.op.remote_node
10918     ialloc = self.op.iallocator
10919     if self.op.mode == constants.REPLACE_DISK_CHG:
10920       if remote_node is None and ialloc is None:
10921         raise errors.OpPrereqError("When changing the secondary either an"
10922                                    " iallocator script must be used or the"
10923                                    " new node given", errors.ECODE_INVAL)
10924       else:
10925         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10926
10927     elif remote_node is not None or ialloc is not None:
10928       # Not replacing the secondary
10929       raise errors.OpPrereqError("The iallocator and new node options can"
10930                                  " only be used when changing the"
10931                                  " secondary node", errors.ECODE_INVAL)
10932
10933   def ExpandNames(self):
10934     self._ExpandAndLockInstance()
10935
10936     assert locking.LEVEL_NODE not in self.needed_locks
10937     assert locking.LEVEL_NODE_RES not in self.needed_locks
10938     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10939
10940     assert self.op.iallocator is None or self.op.remote_node is None, \
10941       "Conflicting options"
10942
10943     if self.op.remote_node is not None:
10944       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10945
10946       # Warning: do not remove the locking of the new secondary here
10947       # unless DRBD8.AddChildren is changed to work in parallel;
10948       # currently it doesn't since parallel invocations of
10949       # FindUnusedMinor will conflict
10950       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10951       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10952     else:
10953       self.needed_locks[locking.LEVEL_NODE] = []
10954       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10955
10956       if self.op.iallocator is not None:
10957         # iallocator will select a new node in the same group
10958         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10959
10960     self.needed_locks[locking.LEVEL_NODE_RES] = []
10961
10962     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10963                                    self.op.iallocator, self.op.remote_node,
10964                                    self.op.disks, False, self.op.early_release,
10965                                    self.op.ignore_ipolicy)
10966
10967     self.tasklets = [self.replacer]
10968
10969   def DeclareLocks(self, level):
10970     if level == locking.LEVEL_NODEGROUP:
10971       assert self.op.remote_node is None
10972       assert self.op.iallocator is not None
10973       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10974
10975       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10976       # Lock all groups used by instance optimistically; this requires going
10977       # via the node before it's locked, requiring verification later on
10978       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10979         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10980
10981     elif level == locking.LEVEL_NODE:
10982       if self.op.iallocator is not None:
10983         assert self.op.remote_node is None
10984         assert not self.needed_locks[locking.LEVEL_NODE]
10985
10986         # Lock member nodes of all locked groups
10987         self.needed_locks[locking.LEVEL_NODE] = \
10988             [node_name
10989              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10990              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10991       else:
10992         self._LockInstancesNodes()
10993     elif level == locking.LEVEL_NODE_RES:
10994       # Reuse node locks
10995       self.needed_locks[locking.LEVEL_NODE_RES] = \
10996         self.needed_locks[locking.LEVEL_NODE]
10997
10998   def BuildHooksEnv(self):
10999     """Build hooks env.
11000
11001     This runs on the master, the primary and all the secondaries.
11002
11003     """
11004     instance = self.replacer.instance
11005     env = {
11006       "MODE": self.op.mode,
11007       "NEW_SECONDARY": self.op.remote_node,
11008       "OLD_SECONDARY": instance.secondary_nodes[0],
11009       }
11010     env.update(_BuildInstanceHookEnvByObject(self, instance))
11011     return env
11012
11013   def BuildHooksNodes(self):
11014     """Build hooks nodes.
11015
11016     """
11017     instance = self.replacer.instance
11018     nl = [
11019       self.cfg.GetMasterNode(),
11020       instance.primary_node,
11021       ]
11022     if self.op.remote_node is not None:
11023       nl.append(self.op.remote_node)
11024     return nl, nl
11025
11026   def CheckPrereq(self):
11027     """Check prerequisites.
11028
11029     """
11030     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11031             self.op.iallocator is None)
11032
11033     # Verify if node group locks are still correct
11034     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11035     if owned_groups:
11036       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11037
11038     return LogicalUnit.CheckPrereq(self)
11039
11040
11041 class TLReplaceDisks(Tasklet):
11042   """Replaces disks for an instance.
11043
11044   Note: Locking is not within the scope of this class.
11045
11046   """
11047   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11048                disks, delay_iallocator, early_release, ignore_ipolicy):
11049     """Initializes this class.
11050
11051     """
11052     Tasklet.__init__(self, lu)
11053
11054     # Parameters
11055     self.instance_name = instance_name
11056     self.mode = mode
11057     self.iallocator_name = iallocator_name
11058     self.remote_node = remote_node
11059     self.disks = disks
11060     self.delay_iallocator = delay_iallocator
11061     self.early_release = early_release
11062     self.ignore_ipolicy = ignore_ipolicy
11063
11064     # Runtime data
11065     self.instance = None
11066     self.new_node = None
11067     self.target_node = None
11068     self.other_node = None
11069     self.remote_node_info = None
11070     self.node_secondary_ip = None
11071
11072   @staticmethod
11073   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11074     """Compute a new secondary node using an IAllocator.
11075
11076     """
11077     req = iallocator.IAReqRelocate(name=instance_name,
11078                                    relocate_from=list(relocate_from))
11079     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11080
11081     ial.Run(iallocator_name)
11082
11083     if not ial.success:
11084       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11085                                  " %s" % (iallocator_name, ial.info),
11086                                  errors.ECODE_NORES)
11087
11088     remote_node_name = ial.result[0]
11089
11090     lu.LogInfo("Selected new secondary for instance '%s': %s",
11091                instance_name, remote_node_name)
11092
11093     return remote_node_name
11094
11095   def _FindFaultyDisks(self, node_name):
11096     """Wrapper for L{_FindFaultyInstanceDisks}.
11097
11098     """
11099     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11100                                     node_name, True)
11101
11102   def _CheckDisksActivated(self, instance):
11103     """Checks if the instance disks are activated.
11104
11105     @param instance: The instance to check disks
11106     @return: True if they are activated, False otherwise
11107
11108     """
11109     nodes = instance.all_nodes
11110
11111     for idx, dev in enumerate(instance.disks):
11112       for node in nodes:
11113         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11114         self.cfg.SetDiskID(dev, node)
11115
11116         result = _BlockdevFind(self, node, dev, instance)
11117
11118         if result.offline:
11119           continue
11120         elif result.fail_msg or not result.payload:
11121           return False
11122
11123     return True
11124
11125   def CheckPrereq(self):
11126     """Check prerequisites.
11127
11128     This checks that the instance is in the cluster.
11129
11130     """
11131     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11132     assert instance is not None, \
11133       "Cannot retrieve locked instance %s" % self.instance_name
11134
11135     if instance.disk_template != constants.DT_DRBD8:
11136       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11137                                  " instances", errors.ECODE_INVAL)
11138
11139     if len(instance.secondary_nodes) != 1:
11140       raise errors.OpPrereqError("The instance has a strange layout,"
11141                                  " expected one secondary but found %d" %
11142                                  len(instance.secondary_nodes),
11143                                  errors.ECODE_FAULT)
11144
11145     if not self.delay_iallocator:
11146       self._CheckPrereq2()
11147
11148   def _CheckPrereq2(self):
11149     """Check prerequisites, second part.
11150
11151     This function should always be part of CheckPrereq. It was separated and is
11152     now called from Exec because during node evacuation iallocator was only
11153     called with an unmodified cluster model, not taking planned changes into
11154     account.
11155
11156     """
11157     instance = self.instance
11158     secondary_node = instance.secondary_nodes[0]
11159
11160     if self.iallocator_name is None:
11161       remote_node = self.remote_node
11162     else:
11163       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11164                                        instance.name, instance.secondary_nodes)
11165
11166     if remote_node is None:
11167       self.remote_node_info = None
11168     else:
11169       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11170              "Remote node '%s' is not locked" % remote_node
11171
11172       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11173       assert self.remote_node_info is not None, \
11174         "Cannot retrieve locked node %s" % remote_node
11175
11176     if remote_node == self.instance.primary_node:
11177       raise errors.OpPrereqError("The specified node is the primary node of"
11178                                  " the instance", errors.ECODE_INVAL)
11179
11180     if remote_node == secondary_node:
11181       raise errors.OpPrereqError("The specified node is already the"
11182                                  " secondary node of the instance",
11183                                  errors.ECODE_INVAL)
11184
11185     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11186                                     constants.REPLACE_DISK_CHG):
11187       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11188                                  errors.ECODE_INVAL)
11189
11190     if self.mode == constants.REPLACE_DISK_AUTO:
11191       if not self._CheckDisksActivated(instance):
11192         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11193                                    " first" % self.instance_name,
11194                                    errors.ECODE_STATE)
11195       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11196       faulty_secondary = self._FindFaultyDisks(secondary_node)
11197
11198       if faulty_primary and faulty_secondary:
11199         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11200                                    " one node and can not be repaired"
11201                                    " automatically" % self.instance_name,
11202                                    errors.ECODE_STATE)
11203
11204       if faulty_primary:
11205         self.disks = faulty_primary
11206         self.target_node = instance.primary_node
11207         self.other_node = secondary_node
11208         check_nodes = [self.target_node, self.other_node]
11209       elif faulty_secondary:
11210         self.disks = faulty_secondary
11211         self.target_node = secondary_node
11212         self.other_node = instance.primary_node
11213         check_nodes = [self.target_node, self.other_node]
11214       else:
11215         self.disks = []
11216         check_nodes = []
11217
11218     else:
11219       # Non-automatic modes
11220       if self.mode == constants.REPLACE_DISK_PRI:
11221         self.target_node = instance.primary_node
11222         self.other_node = secondary_node
11223         check_nodes = [self.target_node, self.other_node]
11224
11225       elif self.mode == constants.REPLACE_DISK_SEC:
11226         self.target_node = secondary_node
11227         self.other_node = instance.primary_node
11228         check_nodes = [self.target_node, self.other_node]
11229
11230       elif self.mode == constants.REPLACE_DISK_CHG:
11231         self.new_node = remote_node
11232         self.other_node = instance.primary_node
11233         self.target_node = secondary_node
11234         check_nodes = [self.new_node, self.other_node]
11235
11236         _CheckNodeNotDrained(self.lu, remote_node)
11237         _CheckNodeVmCapable(self.lu, remote_node)
11238
11239         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11240         assert old_node_info is not None
11241         if old_node_info.offline and not self.early_release:
11242           # doesn't make sense to delay the release
11243           self.early_release = True
11244           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11245                           " early-release mode", secondary_node)
11246
11247       else:
11248         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11249                                      self.mode)
11250
11251       # If not specified all disks should be replaced
11252       if not self.disks:
11253         self.disks = range(len(self.instance.disks))
11254
11255     # TODO: This is ugly, but right now we can't distinguish between internal
11256     # submitted opcode and external one. We should fix that.
11257     if self.remote_node_info:
11258       # We change the node, lets verify it still meets instance policy
11259       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11260       cluster = self.cfg.GetClusterInfo()
11261       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11262                                                               new_group_info)
11263       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11264                               ignore=self.ignore_ipolicy)
11265
11266     for node in check_nodes:
11267       _CheckNodeOnline(self.lu, node)
11268
11269     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11270                                                           self.other_node,
11271                                                           self.target_node]
11272                               if node_name is not None)
11273
11274     # Release unneeded node and node resource locks
11275     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11276     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11277
11278     # Release any owned node group
11279     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11280       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11281
11282     # Check whether disks are valid
11283     for disk_idx in self.disks:
11284       instance.FindDisk(disk_idx)
11285
11286     # Get secondary node IP addresses
11287     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11288                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11289
11290   def Exec(self, feedback_fn):
11291     """Execute disk replacement.
11292
11293     This dispatches the disk replacement to the appropriate handler.
11294
11295     """
11296     if self.delay_iallocator:
11297       self._CheckPrereq2()
11298
11299     if __debug__:
11300       # Verify owned locks before starting operation
11301       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11302       assert set(owned_nodes) == set(self.node_secondary_ip), \
11303           ("Incorrect node locks, owning %s, expected %s" %
11304            (owned_nodes, self.node_secondary_ip.keys()))
11305       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11306               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11307
11308       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11309       assert list(owned_instances) == [self.instance_name], \
11310           "Instance '%s' not locked" % self.instance_name
11311
11312       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11313           "Should not own any node group lock at this point"
11314
11315     if not self.disks:
11316       feedback_fn("No disks need replacement for instance '%s'" %
11317                   self.instance.name)
11318       return
11319
11320     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11321                 (utils.CommaJoin(self.disks), self.instance.name))
11322     feedback_fn("Current primary node: %s", self.instance.primary_node)
11323     feedback_fn("Current seconary node: %s",
11324                 utils.CommaJoin(self.instance.secondary_nodes))
11325
11326     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11327
11328     # Activate the instance disks if we're replacing them on a down instance
11329     if activate_disks:
11330       _StartInstanceDisks(self.lu, self.instance, True)
11331
11332     try:
11333       # Should we replace the secondary node?
11334       if self.new_node is not None:
11335         fn = self._ExecDrbd8Secondary
11336       else:
11337         fn = self._ExecDrbd8DiskOnly
11338
11339       result = fn(feedback_fn)
11340     finally:
11341       # Deactivate the instance disks if we're replacing them on a
11342       # down instance
11343       if activate_disks:
11344         _SafeShutdownInstanceDisks(self.lu, self.instance)
11345
11346     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11347
11348     if __debug__:
11349       # Verify owned locks
11350       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11351       nodes = frozenset(self.node_secondary_ip)
11352       assert ((self.early_release and not owned_nodes) or
11353               (not self.early_release and not (set(owned_nodes) - nodes))), \
11354         ("Not owning the correct locks, early_release=%s, owned=%r,"
11355          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11356
11357     return result
11358
11359   def _CheckVolumeGroup(self, nodes):
11360     self.lu.LogInfo("Checking volume groups")
11361
11362     vgname = self.cfg.GetVGName()
11363
11364     # Make sure volume group exists on all involved nodes
11365     results = self.rpc.call_vg_list(nodes)
11366     if not results:
11367       raise errors.OpExecError("Can't list volume groups on the nodes")
11368
11369     for node in nodes:
11370       res = results[node]
11371       res.Raise("Error checking node %s" % node)
11372       if vgname not in res.payload:
11373         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11374                                  (vgname, node))
11375
11376   def _CheckDisksExistence(self, nodes):
11377     # Check disk existence
11378     for idx, dev in enumerate(self.instance.disks):
11379       if idx not in self.disks:
11380         continue
11381
11382       for node in nodes:
11383         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11384         self.cfg.SetDiskID(dev, node)
11385
11386         result = _BlockdevFind(self, node, dev, self.instance)
11387
11388         msg = result.fail_msg
11389         if msg or not result.payload:
11390           if not msg:
11391             msg = "disk not found"
11392           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11393                                    (idx, node, msg))
11394
11395   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11396     for idx, dev in enumerate(self.instance.disks):
11397       if idx not in self.disks:
11398         continue
11399
11400       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11401                       (idx, node_name))
11402
11403       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11404                                    on_primary, ldisk=ldisk):
11405         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11406                                  " replace disks for instance %s" %
11407                                  (node_name, self.instance.name))
11408
11409   def _CreateNewStorage(self, node_name):
11410     """Create new storage on the primary or secondary node.
11411
11412     This is only used for same-node replaces, not for changing the
11413     secondary node, hence we don't want to modify the existing disk.
11414
11415     """
11416     iv_names = {}
11417
11418     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11419     for idx, dev in enumerate(disks):
11420       if idx not in self.disks:
11421         continue
11422
11423       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11424
11425       self.cfg.SetDiskID(dev, node_name)
11426
11427       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11428       names = _GenerateUniqueNames(self.lu, lv_names)
11429
11430       (data_disk, meta_disk) = dev.children
11431       vg_data = data_disk.logical_id[0]
11432       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11433                              logical_id=(vg_data, names[0]),
11434                              params=data_disk.params)
11435       vg_meta = meta_disk.logical_id[0]
11436       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11437                              size=constants.DRBD_META_SIZE,
11438                              logical_id=(vg_meta, names[1]),
11439                              params=meta_disk.params)
11440
11441       new_lvs = [lv_data, lv_meta]
11442       old_lvs = [child.Copy() for child in dev.children]
11443       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11444
11445       # we pass force_create=True to force the LVM creation
11446       for new_lv in new_lvs:
11447         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11448                              _GetInstanceInfoText(self.instance), False)
11449
11450     return iv_names
11451
11452   def _CheckDevices(self, node_name, iv_names):
11453     for name, (dev, _, _) in iv_names.iteritems():
11454       self.cfg.SetDiskID(dev, node_name)
11455
11456       result = _BlockdevFind(self, node_name, dev, self.instance)
11457
11458       msg = result.fail_msg
11459       if msg or not result.payload:
11460         if not msg:
11461           msg = "disk not found"
11462         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11463                                  (name, msg))
11464
11465       if result.payload.is_degraded:
11466         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11467
11468   def _RemoveOldStorage(self, node_name, iv_names):
11469     for name, (_, old_lvs, _) in iv_names.iteritems():
11470       self.lu.LogInfo("Remove logical volumes for %s" % name)
11471
11472       for lv in old_lvs:
11473         self.cfg.SetDiskID(lv, node_name)
11474
11475         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11476         if msg:
11477           self.lu.LogWarning("Can't remove old LV: %s" % msg,
11478                              hint="remove unused LVs manually")
11479
11480   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11481     """Replace a disk on the primary or secondary for DRBD 8.
11482
11483     The algorithm for replace is quite complicated:
11484
11485       1. for each disk to be replaced:
11486
11487         1. create new LVs on the target node with unique names
11488         1. detach old LVs from the drbd device
11489         1. rename old LVs to name_replaced.<time_t>
11490         1. rename new LVs to old LVs
11491         1. attach the new LVs (with the old names now) to the drbd device
11492
11493       1. wait for sync across all devices
11494
11495       1. for each modified disk:
11496
11497         1. remove old LVs (which have the name name_replaces.<time_t>)
11498
11499     Failures are not very well handled.
11500
11501     """
11502     steps_total = 6
11503
11504     # Step: check device activation
11505     self.lu.LogStep(1, steps_total, "Check device existence")
11506     self._CheckDisksExistence([self.other_node, self.target_node])
11507     self._CheckVolumeGroup([self.target_node, self.other_node])
11508
11509     # Step: check other node consistency
11510     self.lu.LogStep(2, steps_total, "Check peer consistency")
11511     self._CheckDisksConsistency(self.other_node,
11512                                 self.other_node == self.instance.primary_node,
11513                                 False)
11514
11515     # Step: create new storage
11516     self.lu.LogStep(3, steps_total, "Allocate new storage")
11517     iv_names = self._CreateNewStorage(self.target_node)
11518
11519     # Step: for each lv, detach+rename*2+attach
11520     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11521     for dev, old_lvs, new_lvs in iv_names.itervalues():
11522       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11523
11524       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11525                                                      old_lvs)
11526       result.Raise("Can't detach drbd from local storage on node"
11527                    " %s for device %s" % (self.target_node, dev.iv_name))
11528       #dev.children = []
11529       #cfg.Update(instance)
11530
11531       # ok, we created the new LVs, so now we know we have the needed
11532       # storage; as such, we proceed on the target node to rename
11533       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11534       # using the assumption that logical_id == physical_id (which in
11535       # turn is the unique_id on that node)
11536
11537       # FIXME(iustin): use a better name for the replaced LVs
11538       temp_suffix = int(time.time())
11539       ren_fn = lambda d, suff: (d.physical_id[0],
11540                                 d.physical_id[1] + "_replaced-%s" % suff)
11541
11542       # Build the rename list based on what LVs exist on the node
11543       rename_old_to_new = []
11544       for to_ren in old_lvs:
11545         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11546         if not result.fail_msg and result.payload:
11547           # device exists
11548           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11549
11550       self.lu.LogInfo("Renaming the old LVs on the target node")
11551       result = self.rpc.call_blockdev_rename(self.target_node,
11552                                              rename_old_to_new)
11553       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11554
11555       # Now we rename the new LVs to the old LVs
11556       self.lu.LogInfo("Renaming the new LVs on the target node")
11557       rename_new_to_old = [(new, old.physical_id)
11558                            for old, new in zip(old_lvs, new_lvs)]
11559       result = self.rpc.call_blockdev_rename(self.target_node,
11560                                              rename_new_to_old)
11561       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11562
11563       # Intermediate steps of in memory modifications
11564       for old, new in zip(old_lvs, new_lvs):
11565         new.logical_id = old.logical_id
11566         self.cfg.SetDiskID(new, self.target_node)
11567
11568       # We need to modify old_lvs so that removal later removes the
11569       # right LVs, not the newly added ones; note that old_lvs is a
11570       # copy here
11571       for disk in old_lvs:
11572         disk.logical_id = ren_fn(disk, temp_suffix)
11573         self.cfg.SetDiskID(disk, self.target_node)
11574
11575       # Now that the new lvs have the old name, we can add them to the device
11576       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11577       result = self.rpc.call_blockdev_addchildren(self.target_node,
11578                                                   (dev, self.instance), new_lvs)
11579       msg = result.fail_msg
11580       if msg:
11581         for new_lv in new_lvs:
11582           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11583                                                new_lv).fail_msg
11584           if msg2:
11585             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11586                                hint=("cleanup manually the unused logical"
11587                                      "volumes"))
11588         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11589
11590     cstep = itertools.count(5)
11591
11592     if self.early_release:
11593       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11594       self._RemoveOldStorage(self.target_node, iv_names)
11595       # TODO: Check if releasing locks early still makes sense
11596       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11597     else:
11598       # Release all resource locks except those used by the instance
11599       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11600                     keep=self.node_secondary_ip.keys())
11601
11602     # Release all node locks while waiting for sync
11603     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11604
11605     # TODO: Can the instance lock be downgraded here? Take the optional disk
11606     # shutdown in the caller into consideration.
11607
11608     # Wait for sync
11609     # This can fail as the old devices are degraded and _WaitForSync
11610     # does a combined result over all disks, so we don't check its return value
11611     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11612     _WaitForSync(self.lu, self.instance)
11613
11614     # Check all devices manually
11615     self._CheckDevices(self.instance.primary_node, iv_names)
11616
11617     # Step: remove old storage
11618     if not self.early_release:
11619       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11620       self._RemoveOldStorage(self.target_node, iv_names)
11621
11622   def _ExecDrbd8Secondary(self, feedback_fn):
11623     """Replace the secondary node for DRBD 8.
11624
11625     The algorithm for replace is quite complicated:
11626       - for all disks of the instance:
11627         - create new LVs on the new node with same names
11628         - shutdown the drbd device on the old secondary
11629         - disconnect the drbd network on the primary
11630         - create the drbd device on the new secondary
11631         - network attach the drbd on the primary, using an artifice:
11632           the drbd code for Attach() will connect to the network if it
11633           finds a device which is connected to the good local disks but
11634           not network enabled
11635       - wait for sync across all devices
11636       - remove all disks from the old secondary
11637
11638     Failures are not very well handled.
11639
11640     """
11641     steps_total = 6
11642
11643     pnode = self.instance.primary_node
11644
11645     # Step: check device activation
11646     self.lu.LogStep(1, steps_total, "Check device existence")
11647     self._CheckDisksExistence([self.instance.primary_node])
11648     self._CheckVolumeGroup([self.instance.primary_node])
11649
11650     # Step: check other node consistency
11651     self.lu.LogStep(2, steps_total, "Check peer consistency")
11652     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11653
11654     # Step: create new storage
11655     self.lu.LogStep(3, steps_total, "Allocate new storage")
11656     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11657     for idx, dev in enumerate(disks):
11658       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11659                       (self.new_node, idx))
11660       # we pass force_create=True to force LVM creation
11661       for new_lv in dev.children:
11662         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11663                              True, _GetInstanceInfoText(self.instance), False)
11664
11665     # Step 4: dbrd minors and drbd setups changes
11666     # after this, we must manually remove the drbd minors on both the
11667     # error and the success paths
11668     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11669     minors = self.cfg.AllocateDRBDMinor([self.new_node
11670                                          for dev in self.instance.disks],
11671                                         self.instance.name)
11672     logging.debug("Allocated minors %r", minors)
11673
11674     iv_names = {}
11675     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11676       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11677                       (self.new_node, idx))
11678       # create new devices on new_node; note that we create two IDs:
11679       # one without port, so the drbd will be activated without
11680       # networking information on the new node at this stage, and one
11681       # with network, for the latter activation in step 4
11682       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11683       if self.instance.primary_node == o_node1:
11684         p_minor = o_minor1
11685       else:
11686         assert self.instance.primary_node == o_node2, "Three-node instance?"
11687         p_minor = o_minor2
11688
11689       new_alone_id = (self.instance.primary_node, self.new_node, None,
11690                       p_minor, new_minor, o_secret)
11691       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11692                     p_minor, new_minor, o_secret)
11693
11694       iv_names[idx] = (dev, dev.children, new_net_id)
11695       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11696                     new_net_id)
11697       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11698                               logical_id=new_alone_id,
11699                               children=dev.children,
11700                               size=dev.size,
11701                               params={})
11702       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11703                                              self.cfg)
11704       try:
11705         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11706                               anno_new_drbd,
11707                               _GetInstanceInfoText(self.instance), False)
11708       except errors.GenericError:
11709         self.cfg.ReleaseDRBDMinors(self.instance.name)
11710         raise
11711
11712     # We have new devices, shutdown the drbd on the old secondary
11713     for idx, dev in enumerate(self.instance.disks):
11714       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11715       self.cfg.SetDiskID(dev, self.target_node)
11716       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11717                                             (dev, self.instance)).fail_msg
11718       if msg:
11719         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11720                            "node: %s" % (idx, msg),
11721                            hint=("Please cleanup this device manually as"
11722                                  " soon as possible"))
11723
11724     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11725     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11726                                                self.instance.disks)[pnode]
11727
11728     msg = result.fail_msg
11729     if msg:
11730       # detaches didn't succeed (unlikely)
11731       self.cfg.ReleaseDRBDMinors(self.instance.name)
11732       raise errors.OpExecError("Can't detach the disks from the network on"
11733                                " old node: %s" % (msg,))
11734
11735     # if we managed to detach at least one, we update all the disks of
11736     # the instance to point to the new secondary
11737     self.lu.LogInfo("Updating instance configuration")
11738     for dev, _, new_logical_id in iv_names.itervalues():
11739       dev.logical_id = new_logical_id
11740       self.cfg.SetDiskID(dev, self.instance.primary_node)
11741
11742     self.cfg.Update(self.instance, feedback_fn)
11743
11744     # Release all node locks (the configuration has been updated)
11745     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11746
11747     # and now perform the drbd attach
11748     self.lu.LogInfo("Attaching primary drbds to new secondary"
11749                     " (standalone => connected)")
11750     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11751                                             self.new_node],
11752                                            self.node_secondary_ip,
11753                                            (self.instance.disks, self.instance),
11754                                            self.instance.name,
11755                                            False)
11756     for to_node, to_result in result.items():
11757       msg = to_result.fail_msg
11758       if msg:
11759         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11760                            to_node, msg,
11761                            hint=("please do a gnt-instance info to see the"
11762                                  " status of disks"))
11763
11764     cstep = itertools.count(5)
11765
11766     if self.early_release:
11767       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11768       self._RemoveOldStorage(self.target_node, iv_names)
11769       # TODO: Check if releasing locks early still makes sense
11770       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11771     else:
11772       # Release all resource locks except those used by the instance
11773       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11774                     keep=self.node_secondary_ip.keys())
11775
11776     # TODO: Can the instance lock be downgraded here? Take the optional disk
11777     # shutdown in the caller into consideration.
11778
11779     # Wait for sync
11780     # This can fail as the old devices are degraded and _WaitForSync
11781     # does a combined result over all disks, so we don't check its return value
11782     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11783     _WaitForSync(self.lu, self.instance)
11784
11785     # Check all devices manually
11786     self._CheckDevices(self.instance.primary_node, iv_names)
11787
11788     # Step: remove old storage
11789     if not self.early_release:
11790       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11791       self._RemoveOldStorage(self.target_node, iv_names)
11792
11793
11794 class LURepairNodeStorage(NoHooksLU):
11795   """Repairs the volume group on a node.
11796
11797   """
11798   REQ_BGL = False
11799
11800   def CheckArguments(self):
11801     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11802
11803     storage_type = self.op.storage_type
11804
11805     if (constants.SO_FIX_CONSISTENCY not in
11806         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11807       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11808                                  " repaired" % storage_type,
11809                                  errors.ECODE_INVAL)
11810
11811   def ExpandNames(self):
11812     self.needed_locks = {
11813       locking.LEVEL_NODE: [self.op.node_name],
11814       }
11815
11816   def _CheckFaultyDisks(self, instance, node_name):
11817     """Ensure faulty disks abort the opcode or at least warn."""
11818     try:
11819       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11820                                   node_name, True):
11821         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11822                                    " node '%s'" % (instance.name, node_name),
11823                                    errors.ECODE_STATE)
11824     except errors.OpPrereqError, err:
11825       if self.op.ignore_consistency:
11826         self.proc.LogWarning(str(err.args[0]))
11827       else:
11828         raise
11829
11830   def CheckPrereq(self):
11831     """Check prerequisites.
11832
11833     """
11834     # Check whether any instance on this node has faulty disks
11835     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11836       if inst.admin_state != constants.ADMINST_UP:
11837         continue
11838       check_nodes = set(inst.all_nodes)
11839       check_nodes.discard(self.op.node_name)
11840       for inst_node_name in check_nodes:
11841         self._CheckFaultyDisks(inst, inst_node_name)
11842
11843   def Exec(self, feedback_fn):
11844     feedback_fn("Repairing storage unit '%s' on %s ..." %
11845                 (self.op.name, self.op.node_name))
11846
11847     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11848     result = self.rpc.call_storage_execute(self.op.node_name,
11849                                            self.op.storage_type, st_args,
11850                                            self.op.name,
11851                                            constants.SO_FIX_CONSISTENCY)
11852     result.Raise("Failed to repair storage unit '%s' on %s" %
11853                  (self.op.name, self.op.node_name))
11854
11855
11856 class LUNodeEvacuate(NoHooksLU):
11857   """Evacuates instances off a list of nodes.
11858
11859   """
11860   REQ_BGL = False
11861
11862   _MODE2IALLOCATOR = {
11863     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11864     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11865     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11866     }
11867   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11868   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11869           constants.IALLOCATOR_NEVAC_MODES)
11870
11871   def CheckArguments(self):
11872     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11873
11874   def ExpandNames(self):
11875     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11876
11877     if self.op.remote_node is not None:
11878       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11879       assert self.op.remote_node
11880
11881       if self.op.remote_node == self.op.node_name:
11882         raise errors.OpPrereqError("Can not use evacuated node as a new"
11883                                    " secondary node", errors.ECODE_INVAL)
11884
11885       if self.op.mode != constants.NODE_EVAC_SEC:
11886         raise errors.OpPrereqError("Without the use of an iallocator only"
11887                                    " secondary instances can be evacuated",
11888                                    errors.ECODE_INVAL)
11889
11890     # Declare locks
11891     self.share_locks = _ShareAll()
11892     self.needed_locks = {
11893       locking.LEVEL_INSTANCE: [],
11894       locking.LEVEL_NODEGROUP: [],
11895       locking.LEVEL_NODE: [],
11896       }
11897
11898     # Determine nodes (via group) optimistically, needs verification once locks
11899     # have been acquired
11900     self.lock_nodes = self._DetermineNodes()
11901
11902   def _DetermineNodes(self):
11903     """Gets the list of nodes to operate on.
11904
11905     """
11906     if self.op.remote_node is None:
11907       # Iallocator will choose any node(s) in the same group
11908       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11909     else:
11910       group_nodes = frozenset([self.op.remote_node])
11911
11912     # Determine nodes to be locked
11913     return set([self.op.node_name]) | group_nodes
11914
11915   def _DetermineInstances(self):
11916     """Builds list of instances to operate on.
11917
11918     """
11919     assert self.op.mode in constants.NODE_EVAC_MODES
11920
11921     if self.op.mode == constants.NODE_EVAC_PRI:
11922       # Primary instances only
11923       inst_fn = _GetNodePrimaryInstances
11924       assert self.op.remote_node is None, \
11925         "Evacuating primary instances requires iallocator"
11926     elif self.op.mode == constants.NODE_EVAC_SEC:
11927       # Secondary instances only
11928       inst_fn = _GetNodeSecondaryInstances
11929     else:
11930       # All instances
11931       assert self.op.mode == constants.NODE_EVAC_ALL
11932       inst_fn = _GetNodeInstances
11933       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11934       # per instance
11935       raise errors.OpPrereqError("Due to an issue with the iallocator"
11936                                  " interface it is not possible to evacuate"
11937                                  " all instances at once; specify explicitly"
11938                                  " whether to evacuate primary or secondary"
11939                                  " instances",
11940                                  errors.ECODE_INVAL)
11941
11942     return inst_fn(self.cfg, self.op.node_name)
11943
11944   def DeclareLocks(self, level):
11945     if level == locking.LEVEL_INSTANCE:
11946       # Lock instances optimistically, needs verification once node and group
11947       # locks have been acquired
11948       self.needed_locks[locking.LEVEL_INSTANCE] = \
11949         set(i.name for i in self._DetermineInstances())
11950
11951     elif level == locking.LEVEL_NODEGROUP:
11952       # Lock node groups for all potential target nodes optimistically, needs
11953       # verification once nodes have been acquired
11954       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11955         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11956
11957     elif level == locking.LEVEL_NODE:
11958       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11959
11960   def CheckPrereq(self):
11961     # Verify locks
11962     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11963     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11964     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11965
11966     need_nodes = self._DetermineNodes()
11967
11968     if not owned_nodes.issuperset(need_nodes):
11969       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11970                                  " locks were acquired, current nodes are"
11971                                  " are '%s', used to be '%s'; retry the"
11972                                  " operation" %
11973                                  (self.op.node_name,
11974                                   utils.CommaJoin(need_nodes),
11975                                   utils.CommaJoin(owned_nodes)),
11976                                  errors.ECODE_STATE)
11977
11978     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11979     if owned_groups != wanted_groups:
11980       raise errors.OpExecError("Node groups changed since locks were acquired,"
11981                                " current groups are '%s', used to be '%s';"
11982                                " retry the operation" %
11983                                (utils.CommaJoin(wanted_groups),
11984                                 utils.CommaJoin(owned_groups)))
11985
11986     # Determine affected instances
11987     self.instances = self._DetermineInstances()
11988     self.instance_names = [i.name for i in self.instances]
11989
11990     if set(self.instance_names) != owned_instances:
11991       raise errors.OpExecError("Instances on node '%s' changed since locks"
11992                                " were acquired, current instances are '%s',"
11993                                " used to be '%s'; retry the operation" %
11994                                (self.op.node_name,
11995                                 utils.CommaJoin(self.instance_names),
11996                                 utils.CommaJoin(owned_instances)))
11997
11998     if self.instance_names:
11999       self.LogInfo("Evacuating instances from node '%s': %s",
12000                    self.op.node_name,
12001                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
12002     else:
12003       self.LogInfo("No instances to evacuate from node '%s'",
12004                    self.op.node_name)
12005
12006     if self.op.remote_node is not None:
12007       for i in self.instances:
12008         if i.primary_node == self.op.remote_node:
12009           raise errors.OpPrereqError("Node %s is the primary node of"
12010                                      " instance %s, cannot use it as"
12011                                      " secondary" %
12012                                      (self.op.remote_node, i.name),
12013                                      errors.ECODE_INVAL)
12014
12015   def Exec(self, feedback_fn):
12016     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12017
12018     if not self.instance_names:
12019       # No instances to evacuate
12020       jobs = []
12021
12022     elif self.op.iallocator is not None:
12023       # TODO: Implement relocation to other group
12024       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12025       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12026                                      instances=list(self.instance_names))
12027       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12028
12029       ial.Run(self.op.iallocator)
12030
12031       if not ial.success:
12032         raise errors.OpPrereqError("Can't compute node evacuation using"
12033                                    " iallocator '%s': %s" %
12034                                    (self.op.iallocator, ial.info),
12035                                    errors.ECODE_NORES)
12036
12037       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12038
12039     elif self.op.remote_node is not None:
12040       assert self.op.mode == constants.NODE_EVAC_SEC
12041       jobs = [
12042         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12043                                         remote_node=self.op.remote_node,
12044                                         disks=[],
12045                                         mode=constants.REPLACE_DISK_CHG,
12046                                         early_release=self.op.early_release)]
12047         for instance_name in self.instance_names]
12048
12049     else:
12050       raise errors.ProgrammerError("No iallocator or remote node")
12051
12052     return ResultWithJobs(jobs)
12053
12054
12055 def _SetOpEarlyRelease(early_release, op):
12056   """Sets C{early_release} flag on opcodes if available.
12057
12058   """
12059   try:
12060     op.early_release = early_release
12061   except AttributeError:
12062     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12063
12064   return op
12065
12066
12067 def _NodeEvacDest(use_nodes, group, nodes):
12068   """Returns group or nodes depending on caller's choice.
12069
12070   """
12071   if use_nodes:
12072     return utils.CommaJoin(nodes)
12073   else:
12074     return group
12075
12076
12077 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12078   """Unpacks the result of change-group and node-evacuate iallocator requests.
12079
12080   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12081   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12082
12083   @type lu: L{LogicalUnit}
12084   @param lu: Logical unit instance
12085   @type alloc_result: tuple/list
12086   @param alloc_result: Result from iallocator
12087   @type early_release: bool
12088   @param early_release: Whether to release locks early if possible
12089   @type use_nodes: bool
12090   @param use_nodes: Whether to display node names instead of groups
12091
12092   """
12093   (moved, failed, jobs) = alloc_result
12094
12095   if failed:
12096     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12097                                  for (name, reason) in failed)
12098     lu.LogWarning("Unable to evacuate instances %s", failreason)
12099     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12100
12101   if moved:
12102     lu.LogInfo("Instances to be moved: %s",
12103                utils.CommaJoin("%s (to %s)" %
12104                                (name, _NodeEvacDest(use_nodes, group, nodes))
12105                                for (name, group, nodes) in moved))
12106
12107   return [map(compat.partial(_SetOpEarlyRelease, early_release),
12108               map(opcodes.OpCode.LoadOpCode, ops))
12109           for ops in jobs]
12110
12111
12112 def _DiskSizeInBytesToMebibytes(lu, size):
12113   """Converts a disk size in bytes to mebibytes.
12114
12115   Warns and rounds up if the size isn't an even multiple of 1 MiB.
12116
12117   """
12118   (mib, remainder) = divmod(size, 1024 * 1024)
12119
12120   if remainder != 0:
12121     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12122                   " to not overwrite existing data (%s bytes will not be"
12123                   " wiped)", (1024 * 1024) - remainder)
12124     mib += 1
12125
12126   return mib
12127
12128
12129 class LUInstanceGrowDisk(LogicalUnit):
12130   """Grow a disk of an instance.
12131
12132   """
12133   HPATH = "disk-grow"
12134   HTYPE = constants.HTYPE_INSTANCE
12135   REQ_BGL = False
12136
12137   def ExpandNames(self):
12138     self._ExpandAndLockInstance()
12139     self.needed_locks[locking.LEVEL_NODE] = []
12140     self.needed_locks[locking.LEVEL_NODE_RES] = []
12141     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12142     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12143
12144   def DeclareLocks(self, level):
12145     if level == locking.LEVEL_NODE:
12146       self._LockInstancesNodes()
12147     elif level == locking.LEVEL_NODE_RES:
12148       # Copy node locks
12149       self.needed_locks[locking.LEVEL_NODE_RES] = \
12150         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12151
12152   def BuildHooksEnv(self):
12153     """Build hooks env.
12154
12155     This runs on the master, the primary and all the secondaries.
12156
12157     """
12158     env = {
12159       "DISK": self.op.disk,
12160       "AMOUNT": self.op.amount,
12161       "ABSOLUTE": self.op.absolute,
12162       }
12163     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12164     return env
12165
12166   def BuildHooksNodes(self):
12167     """Build hooks nodes.
12168
12169     """
12170     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12171     return (nl, nl)
12172
12173   def CheckPrereq(self):
12174     """Check prerequisites.
12175
12176     This checks that the instance is in the cluster.
12177
12178     """
12179     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12180     assert instance is not None, \
12181       "Cannot retrieve locked instance %s" % self.op.instance_name
12182     nodenames = list(instance.all_nodes)
12183     for node in nodenames:
12184       _CheckNodeOnline(self, node)
12185
12186     self.instance = instance
12187
12188     if instance.disk_template not in constants.DTS_GROWABLE:
12189       raise errors.OpPrereqError("Instance's disk layout does not support"
12190                                  " growing", errors.ECODE_INVAL)
12191
12192     self.disk = instance.FindDisk(self.op.disk)
12193
12194     if self.op.absolute:
12195       self.target = self.op.amount
12196       self.delta = self.target - self.disk.size
12197       if self.delta < 0:
12198         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12199                                    "current disk size (%s)" %
12200                                    (utils.FormatUnit(self.target, "h"),
12201                                     utils.FormatUnit(self.disk.size, "h")),
12202                                    errors.ECODE_STATE)
12203     else:
12204       self.delta = self.op.amount
12205       self.target = self.disk.size + self.delta
12206       if self.delta < 0:
12207         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12208                                    utils.FormatUnit(self.delta, "h"),
12209                                    errors.ECODE_INVAL)
12210
12211     if instance.disk_template not in (constants.DT_FILE,
12212                                       constants.DT_SHARED_FILE,
12213                                       constants.DT_RBD):
12214       # TODO: check the free disk space for file, when that feature will be
12215       # supported
12216       _CheckNodesFreeDiskPerVG(self, nodenames,
12217                                self.disk.ComputeGrowth(self.delta))
12218
12219   def Exec(self, feedback_fn):
12220     """Execute disk grow.
12221
12222     """
12223     instance = self.instance
12224     disk = self.disk
12225
12226     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12227     assert (self.owned_locks(locking.LEVEL_NODE) ==
12228             self.owned_locks(locking.LEVEL_NODE_RES))
12229
12230     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12231
12232     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12233     if not disks_ok:
12234       raise errors.OpExecError("Cannot activate block device to grow")
12235
12236     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12237                 (self.op.disk, instance.name,
12238                  utils.FormatUnit(self.delta, "h"),
12239                  utils.FormatUnit(self.target, "h")))
12240
12241     # First run all grow ops in dry-run mode
12242     for node in instance.all_nodes:
12243       self.cfg.SetDiskID(disk, node)
12244       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12245                                            True, True)
12246       result.Raise("Dry-run grow request failed to node %s" % node)
12247
12248     if wipe_disks:
12249       # Get disk size from primary node for wiping
12250       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12251       result.Raise("Failed to retrieve disk size from node '%s'" %
12252                    instance.primary_node)
12253
12254       (disk_size_in_bytes, ) = result.payload
12255
12256       if disk_size_in_bytes is None:
12257         raise errors.OpExecError("Failed to retrieve disk size from primary"
12258                                  " node '%s'" % instance.primary_node)
12259
12260       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12261
12262       assert old_disk_size >= disk.size, \
12263         ("Retrieved disk size too small (got %s, should be at least %s)" %
12264          (old_disk_size, disk.size))
12265     else:
12266       old_disk_size = None
12267
12268     # We know that (as far as we can test) operations across different
12269     # nodes will succeed, time to run it for real on the backing storage
12270     for node in instance.all_nodes:
12271       self.cfg.SetDiskID(disk, node)
12272       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12273                                            False, True)
12274       result.Raise("Grow request failed to node %s" % node)
12275
12276     # And now execute it for logical storage, on the primary node
12277     node = instance.primary_node
12278     self.cfg.SetDiskID(disk, node)
12279     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12280                                          False, False)
12281     result.Raise("Grow request failed to node %s" % node)
12282
12283     disk.RecordGrow(self.delta)
12284     self.cfg.Update(instance, feedback_fn)
12285
12286     # Changes have been recorded, release node lock
12287     _ReleaseLocks(self, locking.LEVEL_NODE)
12288
12289     # Downgrade lock while waiting for sync
12290     self.glm.downgrade(locking.LEVEL_INSTANCE)
12291
12292     assert wipe_disks ^ (old_disk_size is None)
12293
12294     if wipe_disks:
12295       assert instance.disks[self.op.disk] == disk
12296
12297       # Wipe newly added disk space
12298       _WipeDisks(self, instance,
12299                  disks=[(self.op.disk, disk, old_disk_size)])
12300
12301     if self.op.wait_for_sync:
12302       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12303       if disk_abort:
12304         self.proc.LogWarning("Disk sync-ing has not returned a good"
12305                              " status; please check the instance")
12306       if instance.admin_state != constants.ADMINST_UP:
12307         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12308     elif instance.admin_state != constants.ADMINST_UP:
12309       self.proc.LogWarning("Not shutting down the disk even if the instance is"
12310                            " not supposed to be running because no wait for"
12311                            " sync mode was requested")
12312
12313     assert self.owned_locks(locking.LEVEL_NODE_RES)
12314     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12315
12316
12317 class LUInstanceQueryData(NoHooksLU):
12318   """Query runtime instance data.
12319
12320   """
12321   REQ_BGL = False
12322
12323   def ExpandNames(self):
12324     self.needed_locks = {}
12325
12326     # Use locking if requested or when non-static information is wanted
12327     if not (self.op.static or self.op.use_locking):
12328       self.LogWarning("Non-static data requested, locks need to be acquired")
12329       self.op.use_locking = True
12330
12331     if self.op.instances or not self.op.use_locking:
12332       # Expand instance names right here
12333       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12334     else:
12335       # Will use acquired locks
12336       self.wanted_names = None
12337
12338     if self.op.use_locking:
12339       self.share_locks = _ShareAll()
12340
12341       if self.wanted_names is None:
12342         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12343       else:
12344         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12345
12346       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12347       self.needed_locks[locking.LEVEL_NODE] = []
12348       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12349
12350   def DeclareLocks(self, level):
12351     if self.op.use_locking:
12352       if level == locking.LEVEL_NODEGROUP:
12353         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12354
12355         # Lock all groups used by instances optimistically; this requires going
12356         # via the node before it's locked, requiring verification later on
12357         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12358           frozenset(group_uuid
12359                     for instance_name in owned_instances
12360                     for group_uuid in
12361                       self.cfg.GetInstanceNodeGroups(instance_name))
12362
12363       elif level == locking.LEVEL_NODE:
12364         self._LockInstancesNodes()
12365
12366   def CheckPrereq(self):
12367     """Check prerequisites.
12368
12369     This only checks the optional instance list against the existing names.
12370
12371     """
12372     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12373     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12374     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12375
12376     if self.wanted_names is None:
12377       assert self.op.use_locking, "Locking was not used"
12378       self.wanted_names = owned_instances
12379
12380     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12381
12382     if self.op.use_locking:
12383       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12384                                 None)
12385     else:
12386       assert not (owned_instances or owned_groups or owned_nodes)
12387
12388     self.wanted_instances = instances.values()
12389
12390   def _ComputeBlockdevStatus(self, node, instance, dev):
12391     """Returns the status of a block device
12392
12393     """
12394     if self.op.static or not node:
12395       return None
12396
12397     self.cfg.SetDiskID(dev, node)
12398
12399     result = self.rpc.call_blockdev_find(node, dev)
12400     if result.offline:
12401       return None
12402
12403     result.Raise("Can't compute disk status for %s" % instance.name)
12404
12405     status = result.payload
12406     if status is None:
12407       return None
12408
12409     return (status.dev_path, status.major, status.minor,
12410             status.sync_percent, status.estimated_time,
12411             status.is_degraded, status.ldisk_status)
12412
12413   def _ComputeDiskStatus(self, instance, snode, dev):
12414     """Compute block device status.
12415
12416     """
12417     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12418
12419     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12420
12421   def _ComputeDiskStatusInner(self, instance, snode, dev):
12422     """Compute block device status.
12423
12424     @attention: The device has to be annotated already.
12425
12426     """
12427     if dev.dev_type in constants.LDS_DRBD:
12428       # we change the snode then (otherwise we use the one passed in)
12429       if dev.logical_id[0] == instance.primary_node:
12430         snode = dev.logical_id[1]
12431       else:
12432         snode = dev.logical_id[0]
12433
12434     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12435                                               instance, dev)
12436     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12437
12438     if dev.children:
12439       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12440                                         instance, snode),
12441                          dev.children)
12442     else:
12443       dev_children = []
12444
12445     return {
12446       "iv_name": dev.iv_name,
12447       "dev_type": dev.dev_type,
12448       "logical_id": dev.logical_id,
12449       "physical_id": dev.physical_id,
12450       "pstatus": dev_pstatus,
12451       "sstatus": dev_sstatus,
12452       "children": dev_children,
12453       "mode": dev.mode,
12454       "size": dev.size,
12455       }
12456
12457   def Exec(self, feedback_fn):
12458     """Gather and return data"""
12459     result = {}
12460
12461     cluster = self.cfg.GetClusterInfo()
12462
12463     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12464     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12465
12466     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12467                                                  for node in nodes.values()))
12468
12469     group2name_fn = lambda uuid: groups[uuid].name
12470
12471     for instance in self.wanted_instances:
12472       pnode = nodes[instance.primary_node]
12473
12474       if self.op.static or pnode.offline:
12475         remote_state = None
12476         if pnode.offline:
12477           self.LogWarning("Primary node %s is marked offline, returning static"
12478                           " information only for instance %s" %
12479                           (pnode.name, instance.name))
12480       else:
12481         remote_info = self.rpc.call_instance_info(instance.primary_node,
12482                                                   instance.name,
12483                                                   instance.hypervisor)
12484         remote_info.Raise("Error checking node %s" % instance.primary_node)
12485         remote_info = remote_info.payload
12486         if remote_info and "state" in remote_info:
12487           remote_state = "up"
12488         else:
12489           if instance.admin_state == constants.ADMINST_UP:
12490             remote_state = "down"
12491           else:
12492             remote_state = instance.admin_state
12493
12494       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12495                   instance.disks)
12496
12497       snodes_group_uuids = [nodes[snode_name].group
12498                             for snode_name in instance.secondary_nodes]
12499
12500       result[instance.name] = {
12501         "name": instance.name,
12502         "config_state": instance.admin_state,
12503         "run_state": remote_state,
12504         "pnode": instance.primary_node,
12505         "pnode_group_uuid": pnode.group,
12506         "pnode_group_name": group2name_fn(pnode.group),
12507         "snodes": instance.secondary_nodes,
12508         "snodes_group_uuids": snodes_group_uuids,
12509         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12510         "os": instance.os,
12511         # this happens to be the same format used for hooks
12512         "nics": _NICListToTuple(self, instance.nics),
12513         "disk_template": instance.disk_template,
12514         "disks": disks,
12515         "hypervisor": instance.hypervisor,
12516         "network_port": instance.network_port,
12517         "hv_instance": instance.hvparams,
12518         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12519         "be_instance": instance.beparams,
12520         "be_actual": cluster.FillBE(instance),
12521         "os_instance": instance.osparams,
12522         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12523         "serial_no": instance.serial_no,
12524         "mtime": instance.mtime,
12525         "ctime": instance.ctime,
12526         "uuid": instance.uuid,
12527         }
12528
12529     return result
12530
12531
12532 def PrepareContainerMods(mods, private_fn):
12533   """Prepares a list of container modifications by adding a private data field.
12534
12535   @type mods: list of tuples; (operation, index, parameters)
12536   @param mods: List of modifications
12537   @type private_fn: callable or None
12538   @param private_fn: Callable for constructing a private data field for a
12539     modification
12540   @rtype: list
12541
12542   """
12543   if private_fn is None:
12544     fn = lambda: None
12545   else:
12546     fn = private_fn
12547
12548   return [(op, idx, params, fn()) for (op, idx, params) in mods]
12549
12550
12551 #: Type description for changes as returned by L{ApplyContainerMods}'s
12552 #: callbacks
12553 _TApplyContModsCbChanges = \
12554   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12555     ht.TNonEmptyString,
12556     ht.TAny,
12557     ])))
12558
12559
12560 def ApplyContainerMods(kind, container, chgdesc, mods,
12561                        create_fn, modify_fn, remove_fn):
12562   """Applies descriptions in C{mods} to C{container}.
12563
12564   @type kind: string
12565   @param kind: One-word item description
12566   @type container: list
12567   @param container: Container to modify
12568   @type chgdesc: None or list
12569   @param chgdesc: List of applied changes
12570   @type mods: list
12571   @param mods: Modifications as returned by L{PrepareContainerMods}
12572   @type create_fn: callable
12573   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12574     receives absolute item index, parameters and private data object as added
12575     by L{PrepareContainerMods}, returns tuple containing new item and changes
12576     as list
12577   @type modify_fn: callable
12578   @param modify_fn: Callback for modifying an existing item
12579     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12580     and private data object as added by L{PrepareContainerMods}, returns
12581     changes as list
12582   @type remove_fn: callable
12583   @param remove_fn: Callback on removing item; receives absolute item index,
12584     item and private data object as added by L{PrepareContainerMods}
12585
12586   """
12587   for (op, idx, params, private) in mods:
12588     if idx == -1:
12589       # Append
12590       absidx = len(container) - 1
12591     elif idx < 0:
12592       raise IndexError("Not accepting negative indices other than -1")
12593     elif idx > len(container):
12594       raise IndexError("Got %s index %s, but there are only %s" %
12595                        (kind, idx, len(container)))
12596     else:
12597       absidx = idx
12598
12599     changes = None
12600
12601     if op == constants.DDM_ADD:
12602       # Calculate where item will be added
12603       if idx == -1:
12604         addidx = len(container)
12605       else:
12606         addidx = idx
12607
12608       if create_fn is None:
12609         item = params
12610       else:
12611         (item, changes) = create_fn(addidx, params, private)
12612
12613       if idx == -1:
12614         container.append(item)
12615       else:
12616         assert idx >= 0
12617         assert idx <= len(container)
12618         # list.insert does so before the specified index
12619         container.insert(idx, item)
12620     else:
12621       # Retrieve existing item
12622       try:
12623         item = container[absidx]
12624       except IndexError:
12625         raise IndexError("Invalid %s index %s" % (kind, idx))
12626
12627       if op == constants.DDM_REMOVE:
12628         assert not params
12629
12630         if remove_fn is not None:
12631           remove_fn(absidx, item, private)
12632
12633         changes = [("%s/%s" % (kind, absidx), "remove")]
12634
12635         assert container[absidx] == item
12636         del container[absidx]
12637       elif op == constants.DDM_MODIFY:
12638         if modify_fn is not None:
12639           changes = modify_fn(absidx, item, params, private)
12640       else:
12641         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12642
12643     assert _TApplyContModsCbChanges(changes)
12644
12645     if not (chgdesc is None or changes is None):
12646       chgdesc.extend(changes)
12647
12648
12649 def _UpdateIvNames(base_index, disks):
12650   """Updates the C{iv_name} attribute of disks.
12651
12652   @type disks: list of L{objects.Disk}
12653
12654   """
12655   for (idx, disk) in enumerate(disks):
12656     disk.iv_name = "disk/%s" % (base_index + idx, )
12657
12658
12659 class _InstNicModPrivate:
12660   """Data structure for network interface modifications.
12661
12662   Used by L{LUInstanceSetParams}.
12663
12664   """
12665   def __init__(self):
12666     self.params = None
12667     self.filled = None
12668
12669
12670 class LUInstanceSetParams(LogicalUnit):
12671   """Modifies an instances's parameters.
12672
12673   """
12674   HPATH = "instance-modify"
12675   HTYPE = constants.HTYPE_INSTANCE
12676   REQ_BGL = False
12677
12678   @staticmethod
12679   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12680     assert ht.TList(mods)
12681     assert not mods or len(mods[0]) in (2, 3)
12682
12683     if mods and len(mods[0]) == 2:
12684       result = []
12685
12686       addremove = 0
12687       for op, params in mods:
12688         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12689           result.append((op, -1, params))
12690           addremove += 1
12691
12692           if addremove > 1:
12693             raise errors.OpPrereqError("Only one %s add or remove operation is"
12694                                        " supported at a time" % kind,
12695                                        errors.ECODE_INVAL)
12696         else:
12697           result.append((constants.DDM_MODIFY, op, params))
12698
12699       assert verify_fn(result)
12700     else:
12701       result = mods
12702
12703     return result
12704
12705   @staticmethod
12706   def _CheckMods(kind, mods, key_types, item_fn):
12707     """Ensures requested disk/NIC modifications are valid.
12708
12709     """
12710     for (op, _, params) in mods:
12711       assert ht.TDict(params)
12712
12713       utils.ForceDictType(params, key_types)
12714
12715       if op == constants.DDM_REMOVE:
12716         if params:
12717           raise errors.OpPrereqError("No settings should be passed when"
12718                                      " removing a %s" % kind,
12719                                      errors.ECODE_INVAL)
12720       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12721         item_fn(op, params)
12722       else:
12723         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12724
12725   @staticmethod
12726   def _VerifyDiskModification(op, params):
12727     """Verifies a disk modification.
12728
12729     """
12730     if op == constants.DDM_ADD:
12731       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12732       if mode not in constants.DISK_ACCESS_SET:
12733         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12734                                    errors.ECODE_INVAL)
12735
12736       size = params.get(constants.IDISK_SIZE, None)
12737       if size is None:
12738         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12739                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12740
12741       try:
12742         size = int(size)
12743       except (TypeError, ValueError), err:
12744         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12745                                    errors.ECODE_INVAL)
12746
12747       params[constants.IDISK_SIZE] = size
12748
12749     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12750       raise errors.OpPrereqError("Disk size change not possible, use"
12751                                  " grow-disk", errors.ECODE_INVAL)
12752
12753   @staticmethod
12754   def _VerifyNicModification(op, params):
12755     """Verifies a network interface modification.
12756
12757     """
12758     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12759       ip = params.get(constants.INIC_IP, None)
12760       req_net = params.get(constants.INIC_NETWORK, None)
12761       link = params.get(constants.NIC_LINK, None)
12762       mode = params.get(constants.NIC_MODE, None)
12763       if req_net is not None:
12764         if req_net.lower() == constants.VALUE_NONE:
12765           params[constants.INIC_NETWORK] = None
12766           req_net = None
12767         elif link is not None or mode is not None:
12768           raise errors.OpPrereqError("If network is given"
12769                                      " mode or link should not",
12770                                      errors.ECODE_INVAL)
12771
12772       if op == constants.DDM_ADD:
12773         macaddr = params.get(constants.INIC_MAC, None)
12774         if macaddr is None:
12775           params[constants.INIC_MAC] = constants.VALUE_AUTO
12776
12777       if ip is not None:
12778         if ip.lower() == constants.VALUE_NONE:
12779           params[constants.INIC_IP] = None
12780         else:
12781           if ip.lower() == constants.NIC_IP_POOL:
12782             if op == constants.DDM_ADD and req_net is None:
12783               raise errors.OpPrereqError("If ip=pool, parameter network"
12784                                          " cannot be none",
12785                                          errors.ECODE_INVAL)
12786           else:
12787             if not netutils.IPAddress.IsValid(ip):
12788               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12789                                          errors.ECODE_INVAL)
12790
12791       if constants.INIC_MAC in params:
12792         macaddr = params[constants.INIC_MAC]
12793         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12794           macaddr = utils.NormalizeAndValidateMac(macaddr)
12795
12796         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12797           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12798                                      " modifying an existing NIC",
12799                                      errors.ECODE_INVAL)
12800
12801   def CheckArguments(self):
12802     if not (self.op.nics or self.op.disks or self.op.disk_template or
12803             self.op.hvparams or self.op.beparams or self.op.os_name or
12804             self.op.offline is not None or self.op.runtime_mem):
12805       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12806
12807     if self.op.hvparams:
12808       _CheckGlobalHvParams(self.op.hvparams)
12809
12810     self.op.disks = self._UpgradeDiskNicMods(
12811       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12812     self.op.nics = self._UpgradeDiskNicMods(
12813       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12814
12815     # Check disk modifications
12816     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12817                     self._VerifyDiskModification)
12818
12819     if self.op.disks and self.op.disk_template is not None:
12820       raise errors.OpPrereqError("Disk template conversion and other disk"
12821                                  " changes not supported at the same time",
12822                                  errors.ECODE_INVAL)
12823
12824     if (self.op.disk_template and
12825         self.op.disk_template in constants.DTS_INT_MIRROR and
12826         self.op.remote_node is None):
12827       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12828                                  " one requires specifying a secondary node",
12829                                  errors.ECODE_INVAL)
12830
12831     # Check NIC modifications
12832     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12833                     self._VerifyNicModification)
12834
12835   def ExpandNames(self):
12836     self._ExpandAndLockInstance()
12837     self.needed_locks[locking.LEVEL_NODEGROUP] = []
12838     # Can't even acquire node locks in shared mode as upcoming changes in
12839     # Ganeti 2.6 will start to modify the node object on disk conversion
12840     self.needed_locks[locking.LEVEL_NODE] = []
12841     self.needed_locks[locking.LEVEL_NODE_RES] = []
12842     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12843     # Look node group to look up the ipolicy
12844     self.share_locks[locking.LEVEL_NODEGROUP] = 1
12845
12846   def DeclareLocks(self, level):
12847     if level == locking.LEVEL_NODEGROUP:
12848       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12849       # Acquire locks for the instance's nodegroups optimistically. Needs
12850       # to be verified in CheckPrereq
12851       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12852         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12853     elif level == locking.LEVEL_NODE:
12854       self._LockInstancesNodes()
12855       if self.op.disk_template and self.op.remote_node:
12856         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12857         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12858     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12859       # Copy node locks
12860       self.needed_locks[locking.LEVEL_NODE_RES] = \
12861         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12862
12863   def BuildHooksEnv(self):
12864     """Build hooks env.
12865
12866     This runs on the master, primary and secondaries.
12867
12868     """
12869     args = dict()
12870     if constants.BE_MINMEM in self.be_new:
12871       args["minmem"] = self.be_new[constants.BE_MINMEM]
12872     if constants.BE_MAXMEM in self.be_new:
12873       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12874     if constants.BE_VCPUS in self.be_new:
12875       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12876     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12877     # information at all.
12878
12879     if self._new_nics is not None:
12880       nics = []
12881
12882       for nic in self._new_nics:
12883         n = copy.deepcopy(nic)
12884         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12885         n.nicparams = nicparams
12886         nics.append(_NICToTuple(self, n))
12887
12888       args["nics"] = nics
12889
12890     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12891     if self.op.disk_template:
12892       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12893     if self.op.runtime_mem:
12894       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12895
12896     return env
12897
12898   def BuildHooksNodes(self):
12899     """Build hooks nodes.
12900
12901     """
12902     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12903     return (nl, nl)
12904
12905   def _PrepareNicModification(self, params, private, old_ip, old_net,
12906                               old_params, cluster, pnode):
12907
12908     update_params_dict = dict([(key, params[key])
12909                                for key in constants.NICS_PARAMETERS
12910                                if key in params])
12911
12912     req_link = update_params_dict.get(constants.NIC_LINK, None)
12913     req_mode = update_params_dict.get(constants.NIC_MODE, None)
12914
12915     new_net = params.get(constants.INIC_NETWORK, old_net)
12916     if new_net is not None:
12917       netparams = self.cfg.GetGroupNetParams(new_net, pnode)
12918       if netparams is None:
12919         raise errors.OpPrereqError("No netparams found for the network"
12920                                    " %s, propably not connected." % new_net,
12921                                    errors.ECODE_INVAL)
12922       new_params = dict(netparams)
12923     else:
12924       new_params = _GetUpdatedParams(old_params, update_params_dict)
12925
12926     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12927
12928     new_filled_params = cluster.SimpleFillNIC(new_params)
12929     objects.NIC.CheckParameterSyntax(new_filled_params)
12930
12931     new_mode = new_filled_params[constants.NIC_MODE]
12932     if new_mode == constants.NIC_MODE_BRIDGED:
12933       bridge = new_filled_params[constants.NIC_LINK]
12934       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12935       if msg:
12936         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12937         if self.op.force:
12938           self.warn.append(msg)
12939         else:
12940           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12941
12942     elif new_mode == constants.NIC_MODE_ROUTED:
12943       ip = params.get(constants.INIC_IP, old_ip)
12944       if ip is None:
12945         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12946                                    " on a routed NIC", errors.ECODE_INVAL)
12947
12948     if constants.INIC_MAC in params:
12949       mac = params[constants.INIC_MAC]
12950       if mac is None:
12951         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12952                                    errors.ECODE_INVAL)
12953       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12954         # otherwise generate the MAC address
12955         params[constants.INIC_MAC] = \
12956           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12957       else:
12958         # or validate/reserve the current one
12959         try:
12960           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12961         except errors.ReservationError:
12962           raise errors.OpPrereqError("MAC address '%s' already in use"
12963                                      " in cluster" % mac,
12964                                      errors.ECODE_NOTUNIQUE)
12965     elif new_net != old_net:
12966
12967       def get_net_prefix(net):
12968         if net:
12969           uuid = self.cfg.LookupNetwork(net)
12970           if uuid:
12971             nobj = self.cfg.GetNetwork(uuid)
12972             return nobj.mac_prefix
12973         return None
12974
12975       new_prefix = get_net_prefix(new_net)
12976       old_prefix = get_net_prefix(old_net)
12977       if old_prefix != new_prefix:
12978         params[constants.INIC_MAC] = \
12979           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12980
12981     #if there is a change in nic-network configuration
12982     new_ip = params.get(constants.INIC_IP, old_ip)
12983     if (new_ip, new_net) != (old_ip, old_net):
12984       if new_ip:
12985         if new_net:
12986           if new_ip.lower() == constants.NIC_IP_POOL:
12987             try:
12988               new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
12989             except errors.ReservationError:
12990               raise errors.OpPrereqError("Unable to get a free IP"
12991                                          " from the address pool",
12992                                          errors.ECODE_STATE)
12993             self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
12994             params[constants.INIC_IP] = new_ip
12995           elif new_ip != old_ip or new_net != old_net:
12996             try:
12997               self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
12998               self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
12999             except errors.ReservationError:
13000               raise errors.OpPrereqError("IP %s not available in network %s" %
13001                                          (new_ip, new_net),
13002                                          errors.ECODE_NOTUNIQUE)
13003         elif new_ip.lower() == constants.NIC_IP_POOL:
13004           raise errors.OpPrereqError("ip=pool, but no network found",
13005                                      errors.ECODE_INVAL)
13006         else:
13007           # new net is None
13008           if self.op.conflicts_check:
13009             _CheckForConflictingIp(self, new_ip, pnode)
13010
13011       if old_ip:
13012         if old_net:
13013           try:
13014             self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13015           except errors.AddressPoolError:
13016             logging.warning("Release IP %s not contained in network %s",
13017                             old_ip, old_net)
13018
13019     # there are no changes in (net, ip) tuple
13020     elif (old_net is not None and
13021           (req_link is not None or req_mode is not None)):
13022       raise errors.OpPrereqError("Not allowed to change link or mode of"
13023                                  " a NIC that is connected to a network.",
13024                                  errors.ECODE_INVAL)
13025
13026     private.params = new_params
13027     private.filled = new_filled_params
13028
13029   def CheckPrereq(self):
13030     """Check prerequisites.
13031
13032     This only checks the instance list against the existing names.
13033
13034     """
13035     assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13036     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13037
13038     cluster = self.cluster = self.cfg.GetClusterInfo()
13039     assert self.instance is not None, \
13040       "Cannot retrieve locked instance %s" % self.op.instance_name
13041
13042     pnode = instance.primary_node
13043     assert pnode in self.owned_locks(locking.LEVEL_NODE)
13044     nodelist = list(instance.all_nodes)
13045     pnode_info = self.cfg.GetNodeInfo(pnode)
13046     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13047
13048     #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13049     assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13050     group_info = self.cfg.GetNodeGroup(pnode_info.group)
13051
13052     # dictionary with instance information after the modification
13053     ispec = {}
13054
13055     # Prepare disk/NIC modifications
13056     self.diskmod = PrepareContainerMods(self.op.disks, None)
13057     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13058
13059     # OS change
13060     if self.op.os_name and not self.op.force:
13061       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13062                       self.op.force_variant)
13063       instance_os = self.op.os_name
13064     else:
13065       instance_os = instance.os
13066
13067     assert not (self.op.disk_template and self.op.disks), \
13068       "Can't modify disk template and apply disk changes at the same time"
13069
13070     if self.op.disk_template:
13071       if instance.disk_template == self.op.disk_template:
13072         raise errors.OpPrereqError("Instance already has disk template %s" %
13073                                    instance.disk_template, errors.ECODE_INVAL)
13074
13075       if (instance.disk_template,
13076           self.op.disk_template) not in self._DISK_CONVERSIONS:
13077         raise errors.OpPrereqError("Unsupported disk template conversion from"
13078                                    " %s to %s" % (instance.disk_template,
13079                                                   self.op.disk_template),
13080                                    errors.ECODE_INVAL)
13081       _CheckInstanceState(self, instance, INSTANCE_DOWN,
13082                           msg="cannot change disk template")
13083       if self.op.disk_template in constants.DTS_INT_MIRROR:
13084         if self.op.remote_node == pnode:
13085           raise errors.OpPrereqError("Given new secondary node %s is the same"
13086                                      " as the primary node of the instance" %
13087                                      self.op.remote_node, errors.ECODE_STATE)
13088         _CheckNodeOnline(self, self.op.remote_node)
13089         _CheckNodeNotDrained(self, self.op.remote_node)
13090         # FIXME: here we assume that the old instance type is DT_PLAIN
13091         assert instance.disk_template == constants.DT_PLAIN
13092         disks = [{constants.IDISK_SIZE: d.size,
13093                   constants.IDISK_VG: d.logical_id[0]}
13094                  for d in instance.disks]
13095         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13096         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13097
13098         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13099         snode_group = self.cfg.GetNodeGroup(snode_info.group)
13100         ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13101                                                                 snode_group)
13102         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13103                                 ignore=self.op.ignore_ipolicy)
13104         if pnode_info.group != snode_info.group:
13105           self.LogWarning("The primary and secondary nodes are in two"
13106                           " different node groups; the disk parameters"
13107                           " from the first disk's node group will be"
13108                           " used")
13109
13110     # hvparams processing
13111     if self.op.hvparams:
13112       hv_type = instance.hypervisor
13113       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13114       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13115       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13116
13117       # local check
13118       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
13119       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13120       self.hv_proposed = self.hv_new = hv_new # the new actual values
13121       self.hv_inst = i_hvdict # the new dict (without defaults)
13122     else:
13123       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13124                                               instance.hvparams)
13125       self.hv_new = self.hv_inst = {}
13126
13127     # beparams processing
13128     if self.op.beparams:
13129       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13130                                    use_none=True)
13131       objects.UpgradeBeParams(i_bedict)
13132       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13133       be_new = cluster.SimpleFillBE(i_bedict)
13134       self.be_proposed = self.be_new = be_new # the new actual values
13135       self.be_inst = i_bedict # the new dict (without defaults)
13136     else:
13137       self.be_new = self.be_inst = {}
13138       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13139     be_old = cluster.FillBE(instance)
13140
13141     # CPU param validation -- checking every time a parameter is
13142     # changed to cover all cases where either CPU mask or vcpus have
13143     # changed
13144     if (constants.BE_VCPUS in self.be_proposed and
13145         constants.HV_CPU_MASK in self.hv_proposed):
13146       cpu_list = \
13147         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13148       # Verify mask is consistent with number of vCPUs. Can skip this
13149       # test if only 1 entry in the CPU mask, which means same mask
13150       # is applied to all vCPUs.
13151       if (len(cpu_list) > 1 and
13152           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13153         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13154                                    " CPU mask [%s]" %
13155                                    (self.be_proposed[constants.BE_VCPUS],
13156                                     self.hv_proposed[constants.HV_CPU_MASK]),
13157                                    errors.ECODE_INVAL)
13158
13159       # Only perform this test if a new CPU mask is given
13160       if constants.HV_CPU_MASK in self.hv_new:
13161         # Calculate the largest CPU number requested
13162         max_requested_cpu = max(map(max, cpu_list))
13163         # Check that all of the instance's nodes have enough physical CPUs to
13164         # satisfy the requested CPU mask
13165         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13166                                 max_requested_cpu + 1, instance.hypervisor)
13167
13168     # osparams processing
13169     if self.op.osparams:
13170       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13171       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13172       self.os_inst = i_osdict # the new dict (without defaults)
13173     else:
13174       self.os_inst = {}
13175
13176     self.warn = []
13177
13178     #TODO(dynmem): do the appropriate check involving MINMEM
13179     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13180         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13181       mem_check_list = [pnode]
13182       if be_new[constants.BE_AUTO_BALANCE]:
13183         # either we changed auto_balance to yes or it was from before
13184         mem_check_list.extend(instance.secondary_nodes)
13185       instance_info = self.rpc.call_instance_info(pnode, instance.name,
13186                                                   instance.hypervisor)
13187       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13188                                          [instance.hypervisor])
13189       pninfo = nodeinfo[pnode]
13190       msg = pninfo.fail_msg
13191       if msg:
13192         # Assume the primary node is unreachable and go ahead
13193         self.warn.append("Can't get info from primary node %s: %s" %
13194                          (pnode, msg))
13195       else:
13196         (_, _, (pnhvinfo, )) = pninfo.payload
13197         if not isinstance(pnhvinfo.get("memory_free", None), int):
13198           self.warn.append("Node data from primary node %s doesn't contain"
13199                            " free memory information" % pnode)
13200         elif instance_info.fail_msg:
13201           self.warn.append("Can't get instance runtime information: %s" %
13202                            instance_info.fail_msg)
13203         else:
13204           if instance_info.payload:
13205             current_mem = int(instance_info.payload["memory"])
13206           else:
13207             # Assume instance not running
13208             # (there is a slight race condition here, but it's not very
13209             # probable, and we have no other way to check)
13210             # TODO: Describe race condition
13211             current_mem = 0
13212           #TODO(dynmem): do the appropriate check involving MINMEM
13213           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13214                       pnhvinfo["memory_free"])
13215           if miss_mem > 0:
13216             raise errors.OpPrereqError("This change will prevent the instance"
13217                                        " from starting, due to %d MB of memory"
13218                                        " missing on its primary node" %
13219                                        miss_mem, errors.ECODE_NORES)
13220
13221       if be_new[constants.BE_AUTO_BALANCE]:
13222         for node, nres in nodeinfo.items():
13223           if node not in instance.secondary_nodes:
13224             continue
13225           nres.Raise("Can't get info from secondary node %s" % node,
13226                      prereq=True, ecode=errors.ECODE_STATE)
13227           (_, _, (nhvinfo, )) = nres.payload
13228           if not isinstance(nhvinfo.get("memory_free", None), int):
13229             raise errors.OpPrereqError("Secondary node %s didn't return free"
13230                                        " memory information" % node,
13231                                        errors.ECODE_STATE)
13232           #TODO(dynmem): do the appropriate check involving MINMEM
13233           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13234             raise errors.OpPrereqError("This change will prevent the instance"
13235                                        " from failover to its secondary node"
13236                                        " %s, due to not enough memory" % node,
13237                                        errors.ECODE_STATE)
13238
13239     if self.op.runtime_mem:
13240       remote_info = self.rpc.call_instance_info(instance.primary_node,
13241                                                 instance.name,
13242                                                 instance.hypervisor)
13243       remote_info.Raise("Error checking node %s" % instance.primary_node)
13244       if not remote_info.payload: # not running already
13245         raise errors.OpPrereqError("Instance %s is not running" %
13246                                    instance.name, errors.ECODE_STATE)
13247
13248       current_memory = remote_info.payload["memory"]
13249       if (not self.op.force and
13250            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13251             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13252         raise errors.OpPrereqError("Instance %s must have memory between %d"
13253                                    " and %d MB of memory unless --force is"
13254                                    " given" %
13255                                    (instance.name,
13256                                     self.be_proposed[constants.BE_MINMEM],
13257                                     self.be_proposed[constants.BE_MAXMEM]),
13258                                    errors.ECODE_INVAL)
13259
13260       delta = self.op.runtime_mem - current_memory
13261       if delta > 0:
13262         _CheckNodeFreeMemory(self, instance.primary_node,
13263                              "ballooning memory for instance %s" %
13264                              instance.name, delta, instance.hypervisor)
13265
13266     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13267       raise errors.OpPrereqError("Disk operations not supported for"
13268                                  " diskless instances", errors.ECODE_INVAL)
13269
13270     def _PrepareNicCreate(_, params, private):
13271       self._PrepareNicModification(params, private, None, None,
13272                                    {}, cluster, pnode)
13273       return (None, None)
13274
13275     def _PrepareNicMod(_, nic, params, private):
13276       self._PrepareNicModification(params, private, nic.ip, nic.network,
13277                                    nic.nicparams, cluster, pnode)
13278       return None
13279
13280     def _PrepareNicRemove(_, params, __):
13281       ip = params.ip
13282       net = params.network
13283       if net is not None and ip is not None:
13284         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13285
13286     # Verify NIC changes (operating on copy)
13287     nics = instance.nics[:]
13288     ApplyContainerMods("NIC", nics, None, self.nicmod,
13289                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13290     if len(nics) > constants.MAX_NICS:
13291       raise errors.OpPrereqError("Instance has too many network interfaces"
13292                                  " (%d), cannot add more" % constants.MAX_NICS,
13293                                  errors.ECODE_STATE)
13294
13295     # Verify disk changes (operating on a copy)
13296     disks = instance.disks[:]
13297     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13298     if len(disks) > constants.MAX_DISKS:
13299       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13300                                  " more" % constants.MAX_DISKS,
13301                                  errors.ECODE_STATE)
13302     disk_sizes = [disk.size for disk in instance.disks]
13303     disk_sizes.extend(params["size"] for (op, idx, params, private) in
13304                       self.diskmod)
13305     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13306     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13307
13308     if self.op.offline is not None:
13309       if self.op.offline:
13310         msg = "can't change to offline"
13311       else:
13312         msg = "can't change to online"
13313       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13314
13315     # Pre-compute NIC changes (necessary to use result in hooks)
13316     self._nic_chgdesc = []
13317     if self.nicmod:
13318       # Operate on copies as this is still in prereq
13319       nics = [nic.Copy() for nic in instance.nics]
13320       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13321                          self._CreateNewNic, self._ApplyNicMods, None)
13322       self._new_nics = nics
13323       ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13324     else:
13325       self._new_nics = None
13326       ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13327
13328     if not self.op.ignore_ipolicy:
13329       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13330                                                               group_info)
13331
13332       # Fill ispec with backend parameters
13333       ispec[constants.ISPEC_SPINDLE_USE] = \
13334         self.be_new.get(constants.BE_SPINDLE_USE, None)
13335       ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13336                                                          None)
13337
13338       # Copy ispec to verify parameters with min/max values separately
13339       ispec_max = ispec.copy()
13340       ispec_max[constants.ISPEC_MEM_SIZE] = \
13341         self.be_new.get(constants.BE_MAXMEM, None)
13342       res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13343       ispec_min = ispec.copy()
13344       ispec_min[constants.ISPEC_MEM_SIZE] = \
13345         self.be_new.get(constants.BE_MINMEM, None)
13346       res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13347
13348       if (res_max or res_min):
13349         res = set(res_max + res_min)
13350         # FIXME: Improve error message by including information about whether
13351         # the upper or lower limit of the parameter fails the ipolicy.
13352         msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13353                (group_info, group_info.name, utils.CommaJoin(res)))
13354         raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13355
13356   def _ConvertPlainToDrbd(self, feedback_fn):
13357     """Converts an instance from plain to drbd.
13358
13359     """
13360     feedback_fn("Converting template to drbd")
13361     instance = self.instance
13362     pnode = instance.primary_node
13363     snode = self.op.remote_node
13364
13365     assert instance.disk_template == constants.DT_PLAIN
13366
13367     # create a fake disk info for _GenerateDiskTemplate
13368     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13369                   constants.IDISK_VG: d.logical_id[0]}
13370                  for d in instance.disks]
13371     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13372                                       instance.name, pnode, [snode],
13373                                       disk_info, None, None, 0, feedback_fn,
13374                                       self.diskparams)
13375     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13376                                         self.diskparams)
13377     info = _GetInstanceInfoText(instance)
13378     feedback_fn("Creating additional volumes...")
13379     # first, create the missing data and meta devices
13380     for disk in anno_disks:
13381       # unfortunately this is... not too nice
13382       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13383                             info, True)
13384       for child in disk.children:
13385         _CreateSingleBlockDev(self, snode, instance, child, info, True)
13386     # at this stage, all new LVs have been created, we can rename the
13387     # old ones
13388     feedback_fn("Renaming original volumes...")
13389     rename_list = [(o, n.children[0].logical_id)
13390                    for (o, n) in zip(instance.disks, new_disks)]
13391     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13392     result.Raise("Failed to rename original LVs")
13393
13394     feedback_fn("Initializing DRBD devices...")
13395     # all child devices are in place, we can now create the DRBD devices
13396     for disk in anno_disks:
13397       for node in [pnode, snode]:
13398         f_create = node == pnode
13399         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13400
13401     # at this point, the instance has been modified
13402     instance.disk_template = constants.DT_DRBD8
13403     instance.disks = new_disks
13404     self.cfg.Update(instance, feedback_fn)
13405
13406     # Release node locks while waiting for sync
13407     _ReleaseLocks(self, locking.LEVEL_NODE)
13408
13409     # disks are created, waiting for sync
13410     disk_abort = not _WaitForSync(self, instance,
13411                                   oneshot=not self.op.wait_for_sync)
13412     if disk_abort:
13413       raise errors.OpExecError("There are some degraded disks for"
13414                                " this instance, please cleanup manually")
13415
13416     # Node resource locks will be released by caller
13417
13418   def _ConvertDrbdToPlain(self, feedback_fn):
13419     """Converts an instance from drbd to plain.
13420
13421     """
13422     instance = self.instance
13423
13424     assert len(instance.secondary_nodes) == 1
13425     assert instance.disk_template == constants.DT_DRBD8
13426
13427     pnode = instance.primary_node
13428     snode = instance.secondary_nodes[0]
13429     feedback_fn("Converting template to plain")
13430
13431     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13432     new_disks = [d.children[0] for d in instance.disks]
13433
13434     # copy over size and mode
13435     for parent, child in zip(old_disks, new_disks):
13436       child.size = parent.size
13437       child.mode = parent.mode
13438
13439     # this is a DRBD disk, return its port to the pool
13440     # NOTE: this must be done right before the call to cfg.Update!
13441     for disk in old_disks:
13442       tcp_port = disk.logical_id[2]
13443       self.cfg.AddTcpUdpPort(tcp_port)
13444
13445     # update instance structure
13446     instance.disks = new_disks
13447     instance.disk_template = constants.DT_PLAIN
13448     self.cfg.Update(instance, feedback_fn)
13449
13450     # Release locks in case removing disks takes a while
13451     _ReleaseLocks(self, locking.LEVEL_NODE)
13452
13453     feedback_fn("Removing volumes on the secondary node...")
13454     for disk in old_disks:
13455       self.cfg.SetDiskID(disk, snode)
13456       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13457       if msg:
13458         self.LogWarning("Could not remove block device %s on node %s,"
13459                         " continuing anyway: %s", disk.iv_name, snode, msg)
13460
13461     feedback_fn("Removing unneeded volumes on the primary node...")
13462     for idx, disk in enumerate(old_disks):
13463       meta = disk.children[1]
13464       self.cfg.SetDiskID(meta, pnode)
13465       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13466       if msg:
13467         self.LogWarning("Could not remove metadata for disk %d on node %s,"
13468                         " continuing anyway: %s", idx, pnode, msg)
13469
13470   def _CreateNewDisk(self, idx, params, _):
13471     """Creates a new disk.
13472
13473     """
13474     instance = self.instance
13475
13476     # add a new disk
13477     if instance.disk_template in constants.DTS_FILEBASED:
13478       (file_driver, file_path) = instance.disks[0].logical_id
13479       file_path = os.path.dirname(file_path)
13480     else:
13481       file_driver = file_path = None
13482
13483     disk = \
13484       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13485                             instance.primary_node, instance.secondary_nodes,
13486                             [params], file_path, file_driver, idx,
13487                             self.Log, self.diskparams)[0]
13488
13489     info = _GetInstanceInfoText(instance)
13490
13491     logging.info("Creating volume %s for instance %s",
13492                  disk.iv_name, instance.name)
13493     # Note: this needs to be kept in sync with _CreateDisks
13494     #HARDCODE
13495     for node in instance.all_nodes:
13496       f_create = (node == instance.primary_node)
13497       try:
13498         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13499       except errors.OpExecError, err:
13500         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13501                         disk.iv_name, disk, node, err)
13502
13503     return (disk, [
13504       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13505       ])
13506
13507   @staticmethod
13508   def _ModifyDisk(idx, disk, params, _):
13509     """Modifies a disk.
13510
13511     """
13512     disk.mode = params[constants.IDISK_MODE]
13513
13514     return [
13515       ("disk.mode/%d" % idx, disk.mode),
13516       ]
13517
13518   def _RemoveDisk(self, idx, root, _):
13519     """Removes a disk.
13520
13521     """
13522     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13523     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13524       self.cfg.SetDiskID(disk, node)
13525       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13526       if msg:
13527         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13528                         " continuing anyway", idx, node, msg)
13529
13530     # if this is a DRBD disk, return its port to the pool
13531     if root.dev_type in constants.LDS_DRBD:
13532       self.cfg.AddTcpUdpPort(root.logical_id[2])
13533
13534   @staticmethod
13535   def _CreateNewNic(idx, params, private):
13536     """Creates data structure for a new network interface.
13537
13538     """
13539     mac = params[constants.INIC_MAC]
13540     ip = params.get(constants.INIC_IP, None)
13541     net = params.get(constants.INIC_NETWORK, None)
13542     #TODO: not private.filled?? can a nic have no nicparams??
13543     nicparams = private.filled
13544
13545     return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
13546       ("nic.%d" % idx,
13547        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13548        (mac, ip, private.filled[constants.NIC_MODE],
13549        private.filled[constants.NIC_LINK],
13550        net)),
13551       ])
13552
13553   @staticmethod
13554   def _ApplyNicMods(idx, nic, params, private):
13555     """Modifies a network interface.
13556
13557     """
13558     changes = []
13559
13560     for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13561       if key in params:
13562         changes.append(("nic.%s/%d" % (key, idx), params[key]))
13563         setattr(nic, key, params[key])
13564
13565     if private.filled:
13566       nic.nicparams = private.filled
13567
13568       for (key, val) in nic.nicparams.items():
13569         changes.append(("nic.%s/%d" % (key, idx), val))
13570
13571     return changes
13572
13573   def Exec(self, feedback_fn):
13574     """Modifies an instance.
13575
13576     All parameters take effect only at the next restart of the instance.
13577
13578     """
13579     # Process here the warnings from CheckPrereq, as we don't have a
13580     # feedback_fn there.
13581     # TODO: Replace with self.LogWarning
13582     for warn in self.warn:
13583       feedback_fn("WARNING: %s" % warn)
13584
13585     assert ((self.op.disk_template is None) ^
13586             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13587       "Not owning any node resource locks"
13588
13589     result = []
13590     instance = self.instance
13591
13592     # runtime memory
13593     if self.op.runtime_mem:
13594       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13595                                                      instance,
13596                                                      self.op.runtime_mem)
13597       rpcres.Raise("Cannot modify instance runtime memory")
13598       result.append(("runtime_memory", self.op.runtime_mem))
13599
13600     # Apply disk changes
13601     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13602                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13603     _UpdateIvNames(0, instance.disks)
13604
13605     if self.op.disk_template:
13606       if __debug__:
13607         check_nodes = set(instance.all_nodes)
13608         if self.op.remote_node:
13609           check_nodes.add(self.op.remote_node)
13610         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13611           owned = self.owned_locks(level)
13612           assert not (check_nodes - owned), \
13613             ("Not owning the correct locks, owning %r, expected at least %r" %
13614              (owned, check_nodes))
13615
13616       r_shut = _ShutdownInstanceDisks(self, instance)
13617       if not r_shut:
13618         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13619                                  " proceed with disk template conversion")
13620       mode = (instance.disk_template, self.op.disk_template)
13621       try:
13622         self._DISK_CONVERSIONS[mode](self, feedback_fn)
13623       except:
13624         self.cfg.ReleaseDRBDMinors(instance.name)
13625         raise
13626       result.append(("disk_template", self.op.disk_template))
13627
13628       assert instance.disk_template == self.op.disk_template, \
13629         ("Expected disk template '%s', found '%s'" %
13630          (self.op.disk_template, instance.disk_template))
13631
13632     # Release node and resource locks if there are any (they might already have
13633     # been released during disk conversion)
13634     _ReleaseLocks(self, locking.LEVEL_NODE)
13635     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13636
13637     # Apply NIC changes
13638     if self._new_nics is not None:
13639       instance.nics = self._new_nics
13640       result.extend(self._nic_chgdesc)
13641
13642     # hvparams changes
13643     if self.op.hvparams:
13644       instance.hvparams = self.hv_inst
13645       for key, val in self.op.hvparams.iteritems():
13646         result.append(("hv/%s" % key, val))
13647
13648     # beparams changes
13649     if self.op.beparams:
13650       instance.beparams = self.be_inst
13651       for key, val in self.op.beparams.iteritems():
13652         result.append(("be/%s" % key, val))
13653
13654     # OS change
13655     if self.op.os_name:
13656       instance.os = self.op.os_name
13657
13658     # osparams changes
13659     if self.op.osparams:
13660       instance.osparams = self.os_inst
13661       for key, val in self.op.osparams.iteritems():
13662         result.append(("os/%s" % key, val))
13663
13664     if self.op.offline is None:
13665       # Ignore
13666       pass
13667     elif self.op.offline:
13668       # Mark instance as offline
13669       self.cfg.MarkInstanceOffline(instance.name)
13670       result.append(("admin_state", constants.ADMINST_OFFLINE))
13671     else:
13672       # Mark instance as online, but stopped
13673       self.cfg.MarkInstanceDown(instance.name)
13674       result.append(("admin_state", constants.ADMINST_DOWN))
13675
13676     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13677
13678     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13679                 self.owned_locks(locking.LEVEL_NODE)), \
13680       "All node locks should have been released by now"
13681
13682     return result
13683
13684   _DISK_CONVERSIONS = {
13685     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13686     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13687     }
13688
13689
13690 class LUInstanceChangeGroup(LogicalUnit):
13691   HPATH = "instance-change-group"
13692   HTYPE = constants.HTYPE_INSTANCE
13693   REQ_BGL = False
13694
13695   def ExpandNames(self):
13696     self.share_locks = _ShareAll()
13697     self.needed_locks = {
13698       locking.LEVEL_NODEGROUP: [],
13699       locking.LEVEL_NODE: [],
13700       }
13701
13702     self._ExpandAndLockInstance()
13703
13704     if self.op.target_groups:
13705       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13706                                   self.op.target_groups)
13707     else:
13708       self.req_target_uuids = None
13709
13710     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13711
13712   def DeclareLocks(self, level):
13713     if level == locking.LEVEL_NODEGROUP:
13714       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13715
13716       if self.req_target_uuids:
13717         lock_groups = set(self.req_target_uuids)
13718
13719         # Lock all groups used by instance optimistically; this requires going
13720         # via the node before it's locked, requiring verification later on
13721         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13722         lock_groups.update(instance_groups)
13723       else:
13724         # No target groups, need to lock all of them
13725         lock_groups = locking.ALL_SET
13726
13727       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13728
13729     elif level == locking.LEVEL_NODE:
13730       if self.req_target_uuids:
13731         # Lock all nodes used by instances
13732         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13733         self._LockInstancesNodes()
13734
13735         # Lock all nodes in all potential target groups
13736         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13737                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13738         member_nodes = [node_name
13739                         for group in lock_groups
13740                         for node_name in self.cfg.GetNodeGroup(group).members]
13741         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13742       else:
13743         # Lock all nodes as all groups are potential targets
13744         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13745
13746   def CheckPrereq(self):
13747     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13748     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13749     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13750
13751     assert (self.req_target_uuids is None or
13752             owned_groups.issuperset(self.req_target_uuids))
13753     assert owned_instances == set([self.op.instance_name])
13754
13755     # Get instance information
13756     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13757
13758     # Check if node groups for locked instance are still correct
13759     assert owned_nodes.issuperset(self.instance.all_nodes), \
13760       ("Instance %s's nodes changed while we kept the lock" %
13761        self.op.instance_name)
13762
13763     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13764                                            owned_groups)
13765
13766     if self.req_target_uuids:
13767       # User requested specific target groups
13768       self.target_uuids = frozenset(self.req_target_uuids)
13769     else:
13770       # All groups except those used by the instance are potential targets
13771       self.target_uuids = owned_groups - inst_groups
13772
13773     conflicting_groups = self.target_uuids & inst_groups
13774     if conflicting_groups:
13775       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13776                                  " used by the instance '%s'" %
13777                                  (utils.CommaJoin(conflicting_groups),
13778                                   self.op.instance_name),
13779                                  errors.ECODE_INVAL)
13780
13781     if not self.target_uuids:
13782       raise errors.OpPrereqError("There are no possible target groups",
13783                                  errors.ECODE_INVAL)
13784
13785   def BuildHooksEnv(self):
13786     """Build hooks env.
13787
13788     """
13789     assert self.target_uuids
13790
13791     env = {
13792       "TARGET_GROUPS": " ".join(self.target_uuids),
13793       }
13794
13795     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13796
13797     return env
13798
13799   def BuildHooksNodes(self):
13800     """Build hooks nodes.
13801
13802     """
13803     mn = self.cfg.GetMasterNode()
13804     return ([mn], [mn])
13805
13806   def Exec(self, feedback_fn):
13807     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13808
13809     assert instances == [self.op.instance_name], "Instance not locked"
13810
13811     req = iallocator.IAReqGroupChange(instances=instances,
13812                                       target_groups=list(self.target_uuids))
13813     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13814
13815     ial.Run(self.op.iallocator)
13816
13817     if not ial.success:
13818       raise errors.OpPrereqError("Can't compute solution for changing group of"
13819                                  " instance '%s' using iallocator '%s': %s" %
13820                                  (self.op.instance_name, self.op.iallocator,
13821                                   ial.info), errors.ECODE_NORES)
13822
13823     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13824
13825     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13826                  " instance '%s'", len(jobs), self.op.instance_name)
13827
13828     return ResultWithJobs(jobs)
13829
13830
13831 class LUBackupQuery(NoHooksLU):
13832   """Query the exports list
13833
13834   """
13835   REQ_BGL = False
13836
13837   def CheckArguments(self):
13838     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13839                              ["node", "export"], self.op.use_locking)
13840
13841   def ExpandNames(self):
13842     self.expq.ExpandNames(self)
13843
13844   def DeclareLocks(self, level):
13845     self.expq.DeclareLocks(self, level)
13846
13847   def Exec(self, feedback_fn):
13848     result = {}
13849
13850     for (node, expname) in self.expq.OldStyleQuery(self):
13851       if expname is None:
13852         result[node] = False
13853       else:
13854         result.setdefault(node, []).append(expname)
13855
13856     return result
13857
13858
13859 class _ExportQuery(_QueryBase):
13860   FIELDS = query.EXPORT_FIELDS
13861
13862   #: The node name is not a unique key for this query
13863   SORT_FIELD = "node"
13864
13865   def ExpandNames(self, lu):
13866     lu.needed_locks = {}
13867
13868     # The following variables interact with _QueryBase._GetNames
13869     if self.names:
13870       self.wanted = _GetWantedNodes(lu, self.names)
13871     else:
13872       self.wanted = locking.ALL_SET
13873
13874     self.do_locking = self.use_locking
13875
13876     if self.do_locking:
13877       lu.share_locks = _ShareAll()
13878       lu.needed_locks = {
13879         locking.LEVEL_NODE: self.wanted,
13880         }
13881
13882   def DeclareLocks(self, lu, level):
13883     pass
13884
13885   def _GetQueryData(self, lu):
13886     """Computes the list of nodes and their attributes.
13887
13888     """
13889     # Locking is not used
13890     # TODO
13891     assert not (compat.any(lu.glm.is_owned(level)
13892                            for level in locking.LEVELS
13893                            if level != locking.LEVEL_CLUSTER) or
13894                 self.do_locking or self.use_locking)
13895
13896     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13897
13898     result = []
13899
13900     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13901       if nres.fail_msg:
13902         result.append((node, None))
13903       else:
13904         result.extend((node, expname) for expname in nres.payload)
13905
13906     return result
13907
13908
13909 class LUBackupPrepare(NoHooksLU):
13910   """Prepares an instance for an export and returns useful information.
13911
13912   """
13913   REQ_BGL = False
13914
13915   def ExpandNames(self):
13916     self._ExpandAndLockInstance()
13917
13918   def CheckPrereq(self):
13919     """Check prerequisites.
13920
13921     """
13922     instance_name = self.op.instance_name
13923
13924     self.instance = self.cfg.GetInstanceInfo(instance_name)
13925     assert self.instance is not None, \
13926           "Cannot retrieve locked instance %s" % self.op.instance_name
13927     _CheckNodeOnline(self, self.instance.primary_node)
13928
13929     self._cds = _GetClusterDomainSecret()
13930
13931   def Exec(self, feedback_fn):
13932     """Prepares an instance for an export.
13933
13934     """
13935     instance = self.instance
13936
13937     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13938       salt = utils.GenerateSecret(8)
13939
13940       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13941       result = self.rpc.call_x509_cert_create(instance.primary_node,
13942                                               constants.RIE_CERT_VALIDITY)
13943       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13944
13945       (name, cert_pem) = result.payload
13946
13947       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13948                                              cert_pem)
13949
13950       return {
13951         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13952         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13953                           salt),
13954         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13955         }
13956
13957     return None
13958
13959
13960 class LUBackupExport(LogicalUnit):
13961   """Export an instance to an image in the cluster.
13962
13963   """
13964   HPATH = "instance-export"
13965   HTYPE = constants.HTYPE_INSTANCE
13966   REQ_BGL = False
13967
13968   def CheckArguments(self):
13969     """Check the arguments.
13970
13971     """
13972     self.x509_key_name = self.op.x509_key_name
13973     self.dest_x509_ca_pem = self.op.destination_x509_ca
13974
13975     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13976       if not self.x509_key_name:
13977         raise errors.OpPrereqError("Missing X509 key name for encryption",
13978                                    errors.ECODE_INVAL)
13979
13980       if not self.dest_x509_ca_pem:
13981         raise errors.OpPrereqError("Missing destination X509 CA",
13982                                    errors.ECODE_INVAL)
13983
13984   def ExpandNames(self):
13985     self._ExpandAndLockInstance()
13986
13987     # Lock all nodes for local exports
13988     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13989       # FIXME: lock only instance primary and destination node
13990       #
13991       # Sad but true, for now we have do lock all nodes, as we don't know where
13992       # the previous export might be, and in this LU we search for it and
13993       # remove it from its current node. In the future we could fix this by:
13994       #  - making a tasklet to search (share-lock all), then create the
13995       #    new one, then one to remove, after
13996       #  - removing the removal operation altogether
13997       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13998
13999   def DeclareLocks(self, level):
14000     """Last minute lock declaration."""
14001     # All nodes are locked anyway, so nothing to do here.
14002
14003   def BuildHooksEnv(self):
14004     """Build hooks env.
14005
14006     This will run on the master, primary node and target node.
14007
14008     """
14009     env = {
14010       "EXPORT_MODE": self.op.mode,
14011       "EXPORT_NODE": self.op.target_node,
14012       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14013       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14014       # TODO: Generic function for boolean env variables
14015       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14016       }
14017
14018     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14019
14020     return env
14021
14022   def BuildHooksNodes(self):
14023     """Build hooks nodes.
14024
14025     """
14026     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14027
14028     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14029       nl.append(self.op.target_node)
14030
14031     return (nl, nl)
14032
14033   def CheckPrereq(self):
14034     """Check prerequisites.
14035
14036     This checks that the instance and node names are valid.
14037
14038     """
14039     instance_name = self.op.instance_name
14040
14041     self.instance = self.cfg.GetInstanceInfo(instance_name)
14042     assert self.instance is not None, \
14043           "Cannot retrieve locked instance %s" % self.op.instance_name
14044     _CheckNodeOnline(self, self.instance.primary_node)
14045
14046     if (self.op.remove_instance and
14047         self.instance.admin_state == constants.ADMINST_UP and
14048         not self.op.shutdown):
14049       raise errors.OpPrereqError("Can not remove instance without shutting it"
14050                                  " down before", errors.ECODE_STATE)
14051
14052     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14053       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14054       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14055       assert self.dst_node is not None
14056
14057       _CheckNodeOnline(self, self.dst_node.name)
14058       _CheckNodeNotDrained(self, self.dst_node.name)
14059
14060       self._cds = None
14061       self.dest_disk_info = None
14062       self.dest_x509_ca = None
14063
14064     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14065       self.dst_node = None
14066
14067       if len(self.op.target_node) != len(self.instance.disks):
14068         raise errors.OpPrereqError(("Received destination information for %s"
14069                                     " disks, but instance %s has %s disks") %
14070                                    (len(self.op.target_node), instance_name,
14071                                     len(self.instance.disks)),
14072                                    errors.ECODE_INVAL)
14073
14074       cds = _GetClusterDomainSecret()
14075
14076       # Check X509 key name
14077       try:
14078         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14079       except (TypeError, ValueError), err:
14080         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14081                                    errors.ECODE_INVAL)
14082
14083       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14084         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14085                                    errors.ECODE_INVAL)
14086
14087       # Load and verify CA
14088       try:
14089         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14090       except OpenSSL.crypto.Error, err:
14091         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14092                                    (err, ), errors.ECODE_INVAL)
14093
14094       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14095       if errcode is not None:
14096         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14097                                    (msg, ), errors.ECODE_INVAL)
14098
14099       self.dest_x509_ca = cert
14100
14101       # Verify target information
14102       disk_info = []
14103       for idx, disk_data in enumerate(self.op.target_node):
14104         try:
14105           (host, port, magic) = \
14106             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14107         except errors.GenericError, err:
14108           raise errors.OpPrereqError("Target info for disk %s: %s" %
14109                                      (idx, err), errors.ECODE_INVAL)
14110
14111         disk_info.append((host, port, magic))
14112
14113       assert len(disk_info) == len(self.op.target_node)
14114       self.dest_disk_info = disk_info
14115
14116     else:
14117       raise errors.ProgrammerError("Unhandled export mode %r" %
14118                                    self.op.mode)
14119
14120     # instance disk type verification
14121     # TODO: Implement export support for file-based disks
14122     for disk in self.instance.disks:
14123       if disk.dev_type == constants.LD_FILE:
14124         raise errors.OpPrereqError("Export not supported for instances with"
14125                                    " file-based disks", errors.ECODE_INVAL)
14126
14127   def _CleanupExports(self, feedback_fn):
14128     """Removes exports of current instance from all other nodes.
14129
14130     If an instance in a cluster with nodes A..D was exported to node C, its
14131     exports will be removed from the nodes A, B and D.
14132
14133     """
14134     assert self.op.mode != constants.EXPORT_MODE_REMOTE
14135
14136     nodelist = self.cfg.GetNodeList()
14137     nodelist.remove(self.dst_node.name)
14138
14139     # on one-node clusters nodelist will be empty after the removal
14140     # if we proceed the backup would be removed because OpBackupQuery
14141     # substitutes an empty list with the full cluster node list.
14142     iname = self.instance.name
14143     if nodelist:
14144       feedback_fn("Removing old exports for instance %s" % iname)
14145       exportlist = self.rpc.call_export_list(nodelist)
14146       for node in exportlist:
14147         if exportlist[node].fail_msg:
14148           continue
14149         if iname in exportlist[node].payload:
14150           msg = self.rpc.call_export_remove(node, iname).fail_msg
14151           if msg:
14152             self.LogWarning("Could not remove older export for instance %s"
14153                             " on node %s: %s", iname, node, msg)
14154
14155   def Exec(self, feedback_fn):
14156     """Export an instance to an image in the cluster.
14157
14158     """
14159     assert self.op.mode in constants.EXPORT_MODES
14160
14161     instance = self.instance
14162     src_node = instance.primary_node
14163
14164     if self.op.shutdown:
14165       # shutdown the instance, but not the disks
14166       feedback_fn("Shutting down instance %s" % instance.name)
14167       result = self.rpc.call_instance_shutdown(src_node, instance,
14168                                                self.op.shutdown_timeout)
14169       # TODO: Maybe ignore failures if ignore_remove_failures is set
14170       result.Raise("Could not shutdown instance %s on"
14171                    " node %s" % (instance.name, src_node))
14172
14173     # set the disks ID correctly since call_instance_start needs the
14174     # correct drbd minor to create the symlinks
14175     for disk in instance.disks:
14176       self.cfg.SetDiskID(disk, src_node)
14177
14178     activate_disks = (instance.admin_state != constants.ADMINST_UP)
14179
14180     if activate_disks:
14181       # Activate the instance disks if we'exporting a stopped instance
14182       feedback_fn("Activating disks for %s" % instance.name)
14183       _StartInstanceDisks(self, instance, None)
14184
14185     try:
14186       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14187                                                      instance)
14188
14189       helper.CreateSnapshots()
14190       try:
14191         if (self.op.shutdown and
14192             instance.admin_state == constants.ADMINST_UP and
14193             not self.op.remove_instance):
14194           assert not activate_disks
14195           feedback_fn("Starting instance %s" % instance.name)
14196           result = self.rpc.call_instance_start(src_node,
14197                                                 (instance, None, None), False)
14198           msg = result.fail_msg
14199           if msg:
14200             feedback_fn("Failed to start instance: %s" % msg)
14201             _ShutdownInstanceDisks(self, instance)
14202             raise errors.OpExecError("Could not start instance: %s" % msg)
14203
14204         if self.op.mode == constants.EXPORT_MODE_LOCAL:
14205           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14206         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14207           connect_timeout = constants.RIE_CONNECT_TIMEOUT
14208           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14209
14210           (key_name, _, _) = self.x509_key_name
14211
14212           dest_ca_pem = \
14213             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14214                                             self.dest_x509_ca)
14215
14216           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14217                                                      key_name, dest_ca_pem,
14218                                                      timeouts)
14219       finally:
14220         helper.Cleanup()
14221
14222       # Check for backwards compatibility
14223       assert len(dresults) == len(instance.disks)
14224       assert compat.all(isinstance(i, bool) for i in dresults), \
14225              "Not all results are boolean: %r" % dresults
14226
14227     finally:
14228       if activate_disks:
14229         feedback_fn("Deactivating disks for %s" % instance.name)
14230         _ShutdownInstanceDisks(self, instance)
14231
14232     if not (compat.all(dresults) and fin_resu):
14233       failures = []
14234       if not fin_resu:
14235         failures.append("export finalization")
14236       if not compat.all(dresults):
14237         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14238                                if not dsk)
14239         failures.append("disk export: disk(s) %s" % fdsk)
14240
14241       raise errors.OpExecError("Export failed, errors in %s" %
14242                                utils.CommaJoin(failures))
14243
14244     # At this point, the export was successful, we can cleanup/finish
14245
14246     # Remove instance if requested
14247     if self.op.remove_instance:
14248       feedback_fn("Removing instance %s" % instance.name)
14249       _RemoveInstance(self, feedback_fn, instance,
14250                       self.op.ignore_remove_failures)
14251
14252     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14253       self._CleanupExports(feedback_fn)
14254
14255     return fin_resu, dresults
14256
14257
14258 class LUBackupRemove(NoHooksLU):
14259   """Remove exports related to the named instance.
14260
14261   """
14262   REQ_BGL = False
14263
14264   def ExpandNames(self):
14265     self.needed_locks = {}
14266     # We need all nodes to be locked in order for RemoveExport to work, but we
14267     # don't need to lock the instance itself, as nothing will happen to it (and
14268     # we can remove exports also for a removed instance)
14269     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14270
14271   def Exec(self, feedback_fn):
14272     """Remove any export.
14273
14274     """
14275     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14276     # If the instance was not found we'll try with the name that was passed in.
14277     # This will only work if it was an FQDN, though.
14278     fqdn_warn = False
14279     if not instance_name:
14280       fqdn_warn = True
14281       instance_name = self.op.instance_name
14282
14283     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14284     exportlist = self.rpc.call_export_list(locked_nodes)
14285     found = False
14286     for node in exportlist:
14287       msg = exportlist[node].fail_msg
14288       if msg:
14289         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14290         continue
14291       if instance_name in exportlist[node].payload:
14292         found = True
14293         result = self.rpc.call_export_remove(node, instance_name)
14294         msg = result.fail_msg
14295         if msg:
14296           logging.error("Could not remove export for instance %s"
14297                         " on node %s: %s", instance_name, node, msg)
14298
14299     if fqdn_warn and not found:
14300       feedback_fn("Export not found. If trying to remove an export belonging"
14301                   " to a deleted instance please use its Fully Qualified"
14302                   " Domain Name.")
14303
14304
14305 class LUGroupAdd(LogicalUnit):
14306   """Logical unit for creating node groups.
14307
14308   """
14309   HPATH = "group-add"
14310   HTYPE = constants.HTYPE_GROUP
14311   REQ_BGL = False
14312
14313   def ExpandNames(self):
14314     # We need the new group's UUID here so that we can create and acquire the
14315     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14316     # that it should not check whether the UUID exists in the configuration.
14317     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14318     self.needed_locks = {}
14319     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14320
14321   def CheckPrereq(self):
14322     """Check prerequisites.
14323
14324     This checks that the given group name is not an existing node group
14325     already.
14326
14327     """
14328     try:
14329       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14330     except errors.OpPrereqError:
14331       pass
14332     else:
14333       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14334                                  " node group (UUID: %s)" %
14335                                  (self.op.group_name, existing_uuid),
14336                                  errors.ECODE_EXISTS)
14337
14338     if self.op.ndparams:
14339       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14340
14341     if self.op.hv_state:
14342       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14343     else:
14344       self.new_hv_state = None
14345
14346     if self.op.disk_state:
14347       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14348     else:
14349       self.new_disk_state = None
14350
14351     if self.op.diskparams:
14352       for templ in constants.DISK_TEMPLATES:
14353         if templ in self.op.diskparams:
14354           utils.ForceDictType(self.op.diskparams[templ],
14355                               constants.DISK_DT_TYPES)
14356       self.new_diskparams = self.op.diskparams
14357       try:
14358         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14359       except errors.OpPrereqError, err:
14360         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14361                                    errors.ECODE_INVAL)
14362     else:
14363       self.new_diskparams = {}
14364
14365     if self.op.ipolicy:
14366       cluster = self.cfg.GetClusterInfo()
14367       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14368       try:
14369         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14370       except errors.ConfigurationError, err:
14371         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14372                                    errors.ECODE_INVAL)
14373
14374   def BuildHooksEnv(self):
14375     """Build hooks env.
14376
14377     """
14378     return {
14379       "GROUP_NAME": self.op.group_name,
14380       }
14381
14382   def BuildHooksNodes(self):
14383     """Build hooks nodes.
14384
14385     """
14386     mn = self.cfg.GetMasterNode()
14387     return ([mn], [mn])
14388
14389   def Exec(self, feedback_fn):
14390     """Add the node group to the cluster.
14391
14392     """
14393     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14394                                   uuid=self.group_uuid,
14395                                   alloc_policy=self.op.alloc_policy,
14396                                   ndparams=self.op.ndparams,
14397                                   diskparams=self.new_diskparams,
14398                                   ipolicy=self.op.ipolicy,
14399                                   hv_state_static=self.new_hv_state,
14400                                   disk_state_static=self.new_disk_state)
14401
14402     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14403     del self.remove_locks[locking.LEVEL_NODEGROUP]
14404
14405
14406 class LUGroupAssignNodes(NoHooksLU):
14407   """Logical unit for assigning nodes to groups.
14408
14409   """
14410   REQ_BGL = False
14411
14412   def ExpandNames(self):
14413     # These raise errors.OpPrereqError on their own:
14414     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14415     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14416
14417     # We want to lock all the affected nodes and groups. We have readily
14418     # available the list of nodes, and the *destination* group. To gather the
14419     # list of "source" groups, we need to fetch node information later on.
14420     self.needed_locks = {
14421       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14422       locking.LEVEL_NODE: self.op.nodes,
14423       }
14424
14425   def DeclareLocks(self, level):
14426     if level == locking.LEVEL_NODEGROUP:
14427       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14428
14429       # Try to get all affected nodes' groups without having the group or node
14430       # lock yet. Needs verification later in the code flow.
14431       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14432
14433       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14434
14435   def CheckPrereq(self):
14436     """Check prerequisites.
14437
14438     """
14439     assert self.needed_locks[locking.LEVEL_NODEGROUP]
14440     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14441             frozenset(self.op.nodes))
14442
14443     expected_locks = (set([self.group_uuid]) |
14444                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14445     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14446     if actual_locks != expected_locks:
14447       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14448                                " current groups are '%s', used to be '%s'" %
14449                                (utils.CommaJoin(expected_locks),
14450                                 utils.CommaJoin(actual_locks)))
14451
14452     self.node_data = self.cfg.GetAllNodesInfo()
14453     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14454     instance_data = self.cfg.GetAllInstancesInfo()
14455
14456     if self.group is None:
14457       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14458                                (self.op.group_name, self.group_uuid))
14459
14460     (new_splits, previous_splits) = \
14461       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14462                                              for node in self.op.nodes],
14463                                             self.node_data, instance_data)
14464
14465     if new_splits:
14466       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14467
14468       if not self.op.force:
14469         raise errors.OpExecError("The following instances get split by this"
14470                                  " change and --force was not given: %s" %
14471                                  fmt_new_splits)
14472       else:
14473         self.LogWarning("This operation will split the following instances: %s",
14474                         fmt_new_splits)
14475
14476         if previous_splits:
14477           self.LogWarning("In addition, these already-split instances continue"
14478                           " to be split across groups: %s",
14479                           utils.CommaJoin(utils.NiceSort(previous_splits)))
14480
14481   def Exec(self, feedback_fn):
14482     """Assign nodes to a new group.
14483
14484     """
14485     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14486
14487     self.cfg.AssignGroupNodes(mods)
14488
14489   @staticmethod
14490   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14491     """Check for split instances after a node assignment.
14492
14493     This method considers a series of node assignments as an atomic operation,
14494     and returns information about split instances after applying the set of
14495     changes.
14496
14497     In particular, it returns information about newly split instances, and
14498     instances that were already split, and remain so after the change.
14499
14500     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14501     considered.
14502
14503     @type changes: list of (node_name, new_group_uuid) pairs.
14504     @param changes: list of node assignments to consider.
14505     @param node_data: a dict with data for all nodes
14506     @param instance_data: a dict with all instances to consider
14507     @rtype: a two-tuple
14508     @return: a list of instances that were previously okay and result split as a
14509       consequence of this change, and a list of instances that were previously
14510       split and this change does not fix.
14511
14512     """
14513     changed_nodes = dict((node, group) for node, group in changes
14514                          if node_data[node].group != group)
14515
14516     all_split_instances = set()
14517     previously_split_instances = set()
14518
14519     def InstanceNodes(instance):
14520       return [instance.primary_node] + list(instance.secondary_nodes)
14521
14522     for inst in instance_data.values():
14523       if inst.disk_template not in constants.DTS_INT_MIRROR:
14524         continue
14525
14526       instance_nodes = InstanceNodes(inst)
14527
14528       if len(set(node_data[node].group for node in instance_nodes)) > 1:
14529         previously_split_instances.add(inst.name)
14530
14531       if len(set(changed_nodes.get(node, node_data[node].group)
14532                  for node in instance_nodes)) > 1:
14533         all_split_instances.add(inst.name)
14534
14535     return (list(all_split_instances - previously_split_instances),
14536             list(previously_split_instances & all_split_instances))
14537
14538
14539 class _GroupQuery(_QueryBase):
14540   FIELDS = query.GROUP_FIELDS
14541
14542   def ExpandNames(self, lu):
14543     lu.needed_locks = {}
14544
14545     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14546     self._cluster = lu.cfg.GetClusterInfo()
14547     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14548
14549     if not self.names:
14550       self.wanted = [name_to_uuid[name]
14551                      for name in utils.NiceSort(name_to_uuid.keys())]
14552     else:
14553       # Accept names to be either names or UUIDs.
14554       missing = []
14555       self.wanted = []
14556       all_uuid = frozenset(self._all_groups.keys())
14557
14558       for name in self.names:
14559         if name in all_uuid:
14560           self.wanted.append(name)
14561         elif name in name_to_uuid:
14562           self.wanted.append(name_to_uuid[name])
14563         else:
14564           missing.append(name)
14565
14566       if missing:
14567         raise errors.OpPrereqError("Some groups do not exist: %s" %
14568                                    utils.CommaJoin(missing),
14569                                    errors.ECODE_NOENT)
14570
14571   def DeclareLocks(self, lu, level):
14572     pass
14573
14574   def _GetQueryData(self, lu):
14575     """Computes the list of node groups and their attributes.
14576
14577     """
14578     do_nodes = query.GQ_NODE in self.requested_data
14579     do_instances = query.GQ_INST in self.requested_data
14580
14581     group_to_nodes = None
14582     group_to_instances = None
14583
14584     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14585     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14586     # latter GetAllInstancesInfo() is not enough, for we have to go through
14587     # instance->node. Hence, we will need to process nodes even if we only need
14588     # instance information.
14589     if do_nodes or do_instances:
14590       all_nodes = lu.cfg.GetAllNodesInfo()
14591       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14592       node_to_group = {}
14593
14594       for node in all_nodes.values():
14595         if node.group in group_to_nodes:
14596           group_to_nodes[node.group].append(node.name)
14597           node_to_group[node.name] = node.group
14598
14599       if do_instances:
14600         all_instances = lu.cfg.GetAllInstancesInfo()
14601         group_to_instances = dict((uuid, []) for uuid in self.wanted)
14602
14603         for instance in all_instances.values():
14604           node = instance.primary_node
14605           if node in node_to_group:
14606             group_to_instances[node_to_group[node]].append(instance.name)
14607
14608         if not do_nodes:
14609           # Do not pass on node information if it was not requested.
14610           group_to_nodes = None
14611
14612     return query.GroupQueryData(self._cluster,
14613                                 [self._all_groups[uuid]
14614                                  for uuid in self.wanted],
14615                                 group_to_nodes, group_to_instances,
14616                                 query.GQ_DISKPARAMS in self.requested_data)
14617
14618
14619 class LUGroupQuery(NoHooksLU):
14620   """Logical unit for querying node groups.
14621
14622   """
14623   REQ_BGL = False
14624
14625   def CheckArguments(self):
14626     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14627                           self.op.output_fields, False)
14628
14629   def ExpandNames(self):
14630     self.gq.ExpandNames(self)
14631
14632   def DeclareLocks(self, level):
14633     self.gq.DeclareLocks(self, level)
14634
14635   def Exec(self, feedback_fn):
14636     return self.gq.OldStyleQuery(self)
14637
14638
14639 class LUGroupSetParams(LogicalUnit):
14640   """Modifies the parameters of a node group.
14641
14642   """
14643   HPATH = "group-modify"
14644   HTYPE = constants.HTYPE_GROUP
14645   REQ_BGL = False
14646
14647   def CheckArguments(self):
14648     all_changes = [
14649       self.op.ndparams,
14650       self.op.diskparams,
14651       self.op.alloc_policy,
14652       self.op.hv_state,
14653       self.op.disk_state,
14654       self.op.ipolicy,
14655       ]
14656
14657     if all_changes.count(None) == len(all_changes):
14658       raise errors.OpPrereqError("Please pass at least one modification",
14659                                  errors.ECODE_INVAL)
14660
14661   def ExpandNames(self):
14662     # This raises errors.OpPrereqError on its own:
14663     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14664
14665     self.needed_locks = {
14666       locking.LEVEL_INSTANCE: [],
14667       locking.LEVEL_NODEGROUP: [self.group_uuid],
14668       }
14669
14670     self.share_locks[locking.LEVEL_INSTANCE] = 1
14671
14672   def DeclareLocks(self, level):
14673     if level == locking.LEVEL_INSTANCE:
14674       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14675
14676       # Lock instances optimistically, needs verification once group lock has
14677       # been acquired
14678       self.needed_locks[locking.LEVEL_INSTANCE] = \
14679           self.cfg.GetNodeGroupInstances(self.group_uuid)
14680
14681   @staticmethod
14682   def _UpdateAndVerifyDiskParams(old, new):
14683     """Updates and verifies disk parameters.
14684
14685     """
14686     new_params = _GetUpdatedParams(old, new)
14687     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14688     return new_params
14689
14690   def CheckPrereq(self):
14691     """Check prerequisites.
14692
14693     """
14694     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14695
14696     # Check if locked instances are still correct
14697     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14698
14699     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14700     cluster = self.cfg.GetClusterInfo()
14701
14702     if self.group is None:
14703       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14704                                (self.op.group_name, self.group_uuid))
14705
14706     if self.op.ndparams:
14707       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14708       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14709       self.new_ndparams = new_ndparams
14710
14711     if self.op.diskparams:
14712       diskparams = self.group.diskparams
14713       uavdp = self._UpdateAndVerifyDiskParams
14714       # For each disktemplate subdict update and verify the values
14715       new_diskparams = dict((dt,
14716                              uavdp(diskparams.get(dt, {}),
14717                                    self.op.diskparams[dt]))
14718                             for dt in constants.DISK_TEMPLATES
14719                             if dt in self.op.diskparams)
14720       # As we've all subdicts of diskparams ready, lets merge the actual
14721       # dict with all updated subdicts
14722       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14723       try:
14724         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14725       except errors.OpPrereqError, err:
14726         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14727                                    errors.ECODE_INVAL)
14728
14729     if self.op.hv_state:
14730       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14731                                                  self.group.hv_state_static)
14732
14733     if self.op.disk_state:
14734       self.new_disk_state = \
14735         _MergeAndVerifyDiskState(self.op.disk_state,
14736                                  self.group.disk_state_static)
14737
14738     if self.op.ipolicy:
14739       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14740                                             self.op.ipolicy,
14741                                             group_policy=True)
14742
14743       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14744       inst_filter = lambda inst: inst.name in owned_instances
14745       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14746       gmi = ganeti.masterd.instance
14747       violations = \
14748           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14749                                                                   self.group),
14750                                         new_ipolicy, instances)
14751
14752       if violations:
14753         self.LogWarning("After the ipolicy change the following instances"
14754                         " violate them: %s",
14755                         utils.CommaJoin(violations))
14756
14757   def BuildHooksEnv(self):
14758     """Build hooks env.
14759
14760     """
14761     return {
14762       "GROUP_NAME": self.op.group_name,
14763       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14764       }
14765
14766   def BuildHooksNodes(self):
14767     """Build hooks nodes.
14768
14769     """
14770     mn = self.cfg.GetMasterNode()
14771     return ([mn], [mn])
14772
14773   def Exec(self, feedback_fn):
14774     """Modifies the node group.
14775
14776     """
14777     result = []
14778
14779     if self.op.ndparams:
14780       self.group.ndparams = self.new_ndparams
14781       result.append(("ndparams", str(self.group.ndparams)))
14782
14783     if self.op.diskparams:
14784       self.group.diskparams = self.new_diskparams
14785       result.append(("diskparams", str(self.group.diskparams)))
14786
14787     if self.op.alloc_policy:
14788       self.group.alloc_policy = self.op.alloc_policy
14789
14790     if self.op.hv_state:
14791       self.group.hv_state_static = self.new_hv_state
14792
14793     if self.op.disk_state:
14794       self.group.disk_state_static = self.new_disk_state
14795
14796     if self.op.ipolicy:
14797       self.group.ipolicy = self.new_ipolicy
14798
14799     self.cfg.Update(self.group, feedback_fn)
14800     return result
14801
14802
14803 class LUGroupRemove(LogicalUnit):
14804   HPATH = "group-remove"
14805   HTYPE = constants.HTYPE_GROUP
14806   REQ_BGL = False
14807
14808   def ExpandNames(self):
14809     # This will raises errors.OpPrereqError on its own:
14810     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14811     self.needed_locks = {
14812       locking.LEVEL_NODEGROUP: [self.group_uuid],
14813       }
14814
14815   def CheckPrereq(self):
14816     """Check prerequisites.
14817
14818     This checks that the given group name exists as a node group, that is
14819     empty (i.e., contains no nodes), and that is not the last group of the
14820     cluster.
14821
14822     """
14823     # Verify that the group is empty.
14824     group_nodes = [node.name
14825                    for node in self.cfg.GetAllNodesInfo().values()
14826                    if node.group == self.group_uuid]
14827
14828     if group_nodes:
14829       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14830                                  " nodes: %s" %
14831                                  (self.op.group_name,
14832                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14833                                  errors.ECODE_STATE)
14834
14835     # Verify the cluster would not be left group-less.
14836     if len(self.cfg.GetNodeGroupList()) == 1:
14837       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14838                                  " removed" % self.op.group_name,
14839                                  errors.ECODE_STATE)
14840
14841   def BuildHooksEnv(self):
14842     """Build hooks env.
14843
14844     """
14845     return {
14846       "GROUP_NAME": self.op.group_name,
14847       }
14848
14849   def BuildHooksNodes(self):
14850     """Build hooks nodes.
14851
14852     """
14853     mn = self.cfg.GetMasterNode()
14854     return ([mn], [mn])
14855
14856   def Exec(self, feedback_fn):
14857     """Remove the node group.
14858
14859     """
14860     try:
14861       self.cfg.RemoveNodeGroup(self.group_uuid)
14862     except errors.ConfigurationError:
14863       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14864                                (self.op.group_name, self.group_uuid))
14865
14866     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14867
14868
14869 class LUGroupRename(LogicalUnit):
14870   HPATH = "group-rename"
14871   HTYPE = constants.HTYPE_GROUP
14872   REQ_BGL = False
14873
14874   def ExpandNames(self):
14875     # This raises errors.OpPrereqError on its own:
14876     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14877
14878     self.needed_locks = {
14879       locking.LEVEL_NODEGROUP: [self.group_uuid],
14880       }
14881
14882   def CheckPrereq(self):
14883     """Check prerequisites.
14884
14885     Ensures requested new name is not yet used.
14886
14887     """
14888     try:
14889       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14890     except errors.OpPrereqError:
14891       pass
14892     else:
14893       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14894                                  " node group (UUID: %s)" %
14895                                  (self.op.new_name, new_name_uuid),
14896                                  errors.ECODE_EXISTS)
14897
14898   def BuildHooksEnv(self):
14899     """Build hooks env.
14900
14901     """
14902     return {
14903       "OLD_NAME": self.op.group_name,
14904       "NEW_NAME": self.op.new_name,
14905       }
14906
14907   def BuildHooksNodes(self):
14908     """Build hooks nodes.
14909
14910     """
14911     mn = self.cfg.GetMasterNode()
14912
14913     all_nodes = self.cfg.GetAllNodesInfo()
14914     all_nodes.pop(mn, None)
14915
14916     run_nodes = [mn]
14917     run_nodes.extend(node.name for node in all_nodes.values()
14918                      if node.group == self.group_uuid)
14919
14920     return (run_nodes, run_nodes)
14921
14922   def Exec(self, feedback_fn):
14923     """Rename the node group.
14924
14925     """
14926     group = self.cfg.GetNodeGroup(self.group_uuid)
14927
14928     if group is None:
14929       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14930                                (self.op.group_name, self.group_uuid))
14931
14932     group.name = self.op.new_name
14933     self.cfg.Update(group, feedback_fn)
14934
14935     return self.op.new_name
14936
14937
14938 class LUGroupEvacuate(LogicalUnit):
14939   HPATH = "group-evacuate"
14940   HTYPE = constants.HTYPE_GROUP
14941   REQ_BGL = False
14942
14943   def ExpandNames(self):
14944     # This raises errors.OpPrereqError on its own:
14945     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14946
14947     if self.op.target_groups:
14948       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14949                                   self.op.target_groups)
14950     else:
14951       self.req_target_uuids = []
14952
14953     if self.group_uuid in self.req_target_uuids:
14954       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14955                                  " as a target group (targets are %s)" %
14956                                  (self.group_uuid,
14957                                   utils.CommaJoin(self.req_target_uuids)),
14958                                  errors.ECODE_INVAL)
14959
14960     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14961
14962     self.share_locks = _ShareAll()
14963     self.needed_locks = {
14964       locking.LEVEL_INSTANCE: [],
14965       locking.LEVEL_NODEGROUP: [],
14966       locking.LEVEL_NODE: [],
14967       }
14968
14969   def DeclareLocks(self, level):
14970     if level == locking.LEVEL_INSTANCE:
14971       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14972
14973       # Lock instances optimistically, needs verification once node and group
14974       # locks have been acquired
14975       self.needed_locks[locking.LEVEL_INSTANCE] = \
14976         self.cfg.GetNodeGroupInstances(self.group_uuid)
14977
14978     elif level == locking.LEVEL_NODEGROUP:
14979       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14980
14981       if self.req_target_uuids:
14982         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14983
14984         # Lock all groups used by instances optimistically; this requires going
14985         # via the node before it's locked, requiring verification later on
14986         lock_groups.update(group_uuid
14987                            for instance_name in
14988                              self.owned_locks(locking.LEVEL_INSTANCE)
14989                            for group_uuid in
14990                              self.cfg.GetInstanceNodeGroups(instance_name))
14991       else:
14992         # No target groups, need to lock all of them
14993         lock_groups = locking.ALL_SET
14994
14995       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14996
14997     elif level == locking.LEVEL_NODE:
14998       # This will only lock the nodes in the group to be evacuated which
14999       # contain actual instances
15000       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15001       self._LockInstancesNodes()
15002
15003       # Lock all nodes in group to be evacuated and target groups
15004       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15005       assert self.group_uuid in owned_groups
15006       member_nodes = [node_name
15007                       for group in owned_groups
15008                       for node_name in self.cfg.GetNodeGroup(group).members]
15009       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15010
15011   def CheckPrereq(self):
15012     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15013     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15014     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15015
15016     assert owned_groups.issuperset(self.req_target_uuids)
15017     assert self.group_uuid in owned_groups
15018
15019     # Check if locked instances are still correct
15020     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15021
15022     # Get instance information
15023     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15024
15025     # Check if node groups for locked instances are still correct
15026     _CheckInstancesNodeGroups(self.cfg, self.instances,
15027                               owned_groups, owned_nodes, self.group_uuid)
15028
15029     if self.req_target_uuids:
15030       # User requested specific target groups
15031       self.target_uuids = self.req_target_uuids
15032     else:
15033       # All groups except the one to be evacuated are potential targets
15034       self.target_uuids = [group_uuid for group_uuid in owned_groups
15035                            if group_uuid != self.group_uuid]
15036
15037       if not self.target_uuids:
15038         raise errors.OpPrereqError("There are no possible target groups",
15039                                    errors.ECODE_INVAL)
15040
15041   def BuildHooksEnv(self):
15042     """Build hooks env.
15043
15044     """
15045     return {
15046       "GROUP_NAME": self.op.group_name,
15047       "TARGET_GROUPS": " ".join(self.target_uuids),
15048       }
15049
15050   def BuildHooksNodes(self):
15051     """Build hooks nodes.
15052
15053     """
15054     mn = self.cfg.GetMasterNode()
15055
15056     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15057
15058     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15059
15060     return (run_nodes, run_nodes)
15061
15062   def Exec(self, feedback_fn):
15063     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15064
15065     assert self.group_uuid not in self.target_uuids
15066
15067     req = iallocator.IAReqGroupChange(instances=instances,
15068                                       target_groups=self.target_uuids)
15069     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15070
15071     ial.Run(self.op.iallocator)
15072
15073     if not ial.success:
15074       raise errors.OpPrereqError("Can't compute group evacuation using"
15075                                  " iallocator '%s': %s" %
15076                                  (self.op.iallocator, ial.info),
15077                                  errors.ECODE_NORES)
15078
15079     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15080
15081     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15082                  len(jobs), self.op.group_name)
15083
15084     return ResultWithJobs(jobs)
15085
15086
15087 class TagsLU(NoHooksLU): # pylint: disable=W0223
15088   """Generic tags LU.
15089
15090   This is an abstract class which is the parent of all the other tags LUs.
15091
15092   """
15093   def ExpandNames(self):
15094     self.group_uuid = None
15095     self.needed_locks = {}
15096
15097     if self.op.kind == constants.TAG_NODE:
15098       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15099       lock_level = locking.LEVEL_NODE
15100       lock_name = self.op.name
15101     elif self.op.kind == constants.TAG_INSTANCE:
15102       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15103       lock_level = locking.LEVEL_INSTANCE
15104       lock_name = self.op.name
15105     elif self.op.kind == constants.TAG_NODEGROUP:
15106       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15107       lock_level = locking.LEVEL_NODEGROUP
15108       lock_name = self.group_uuid
15109     elif self.op.kind == constants.TAG_NETWORK:
15110       self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15111       lock_level = locking.LEVEL_NETWORK
15112       lock_name = self.network_uuid
15113     else:
15114       lock_level = None
15115       lock_name = None
15116
15117     if lock_level and getattr(self.op, "use_locking", True):
15118       self.needed_locks[lock_level] = lock_name
15119
15120     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15121     # not possible to acquire the BGL based on opcode parameters)
15122
15123   def CheckPrereq(self):
15124     """Check prerequisites.
15125
15126     """
15127     if self.op.kind == constants.TAG_CLUSTER:
15128       self.target = self.cfg.GetClusterInfo()
15129     elif self.op.kind == constants.TAG_NODE:
15130       self.target = self.cfg.GetNodeInfo(self.op.name)
15131     elif self.op.kind == constants.TAG_INSTANCE:
15132       self.target = self.cfg.GetInstanceInfo(self.op.name)
15133     elif self.op.kind == constants.TAG_NODEGROUP:
15134       self.target = self.cfg.GetNodeGroup(self.group_uuid)
15135     elif self.op.kind == constants.TAG_NETWORK:
15136       self.target = self.cfg.GetNetwork(self.network_uuid)
15137     else:
15138       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15139                                  str(self.op.kind), errors.ECODE_INVAL)
15140
15141
15142 class LUTagsGet(TagsLU):
15143   """Returns the tags of a given object.
15144
15145   """
15146   REQ_BGL = False
15147
15148   def ExpandNames(self):
15149     TagsLU.ExpandNames(self)
15150
15151     # Share locks as this is only a read operation
15152     self.share_locks = _ShareAll()
15153
15154   def Exec(self, feedback_fn):
15155     """Returns the tag list.
15156
15157     """
15158     return list(self.target.GetTags())
15159
15160
15161 class LUTagsSearch(NoHooksLU):
15162   """Searches the tags for a given pattern.
15163
15164   """
15165   REQ_BGL = False
15166
15167   def ExpandNames(self):
15168     self.needed_locks = {}
15169
15170   def CheckPrereq(self):
15171     """Check prerequisites.
15172
15173     This checks the pattern passed for validity by compiling it.
15174
15175     """
15176     try:
15177       self.re = re.compile(self.op.pattern)
15178     except re.error, err:
15179       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15180                                  (self.op.pattern, err), errors.ECODE_INVAL)
15181
15182   def Exec(self, feedback_fn):
15183     """Returns the tag list.
15184
15185     """
15186     cfg = self.cfg
15187     tgts = [("/cluster", cfg.GetClusterInfo())]
15188     ilist = cfg.GetAllInstancesInfo().values()
15189     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15190     nlist = cfg.GetAllNodesInfo().values()
15191     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15192     tgts.extend(("/nodegroup/%s" % n.name, n)
15193                 for n in cfg.GetAllNodeGroupsInfo().values())
15194     results = []
15195     for path, target in tgts:
15196       for tag in target.GetTags():
15197         if self.re.search(tag):
15198           results.append((path, tag))
15199     return results
15200
15201
15202 class LUTagsSet(TagsLU):
15203   """Sets a tag on a given object.
15204
15205   """
15206   REQ_BGL = False
15207
15208   def CheckPrereq(self):
15209     """Check prerequisites.
15210
15211     This checks the type and length of the tag name and value.
15212
15213     """
15214     TagsLU.CheckPrereq(self)
15215     for tag in self.op.tags:
15216       objects.TaggableObject.ValidateTag(tag)
15217
15218   def Exec(self, feedback_fn):
15219     """Sets the tag.
15220
15221     """
15222     try:
15223       for tag in self.op.tags:
15224         self.target.AddTag(tag)
15225     except errors.TagError, err:
15226       raise errors.OpExecError("Error while setting tag: %s" % str(err))
15227     self.cfg.Update(self.target, feedback_fn)
15228
15229
15230 class LUTagsDel(TagsLU):
15231   """Delete a list of tags from a given object.
15232
15233   """
15234   REQ_BGL = False
15235
15236   def CheckPrereq(self):
15237     """Check prerequisites.
15238
15239     This checks that we have the given tag.
15240
15241     """
15242     TagsLU.CheckPrereq(self)
15243     for tag in self.op.tags:
15244       objects.TaggableObject.ValidateTag(tag)
15245     del_tags = frozenset(self.op.tags)
15246     cur_tags = self.target.GetTags()
15247
15248     diff_tags = del_tags - cur_tags
15249     if diff_tags:
15250       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15251       raise errors.OpPrereqError("Tag(s) %s not found" %
15252                                  (utils.CommaJoin(diff_names), ),
15253                                  errors.ECODE_NOENT)
15254
15255   def Exec(self, feedback_fn):
15256     """Remove the tag from the object.
15257
15258     """
15259     for tag in self.op.tags:
15260       self.target.RemoveTag(tag)
15261     self.cfg.Update(self.target, feedback_fn)
15262
15263
15264 class LUTestDelay(NoHooksLU):
15265   """Sleep for a specified amount of time.
15266
15267   This LU sleeps on the master and/or nodes for a specified amount of
15268   time.
15269
15270   """
15271   REQ_BGL = False
15272
15273   def ExpandNames(self):
15274     """Expand names and set required locks.
15275
15276     This expands the node list, if any.
15277
15278     """
15279     self.needed_locks = {}
15280     if self.op.on_nodes:
15281       # _GetWantedNodes can be used here, but is not always appropriate to use
15282       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15283       # more information.
15284       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15285       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15286
15287   def _TestDelay(self):
15288     """Do the actual sleep.
15289
15290     """
15291     if self.op.on_master:
15292       if not utils.TestDelay(self.op.duration):
15293         raise errors.OpExecError("Error during master delay test")
15294     if self.op.on_nodes:
15295       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15296       for node, node_result in result.items():
15297         node_result.Raise("Failure during rpc call to node %s" % node)
15298
15299   def Exec(self, feedback_fn):
15300     """Execute the test delay opcode, with the wanted repetitions.
15301
15302     """
15303     if self.op.repeat == 0:
15304       self._TestDelay()
15305     else:
15306       top_value = self.op.repeat - 1
15307       for i in range(self.op.repeat):
15308         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
15309         self._TestDelay()
15310
15311
15312 class LURestrictedCommand(NoHooksLU):
15313   """Logical unit for executing restricted commands.
15314
15315   """
15316   REQ_BGL = False
15317
15318   def ExpandNames(self):
15319     if self.op.nodes:
15320       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15321
15322     self.needed_locks = {
15323       locking.LEVEL_NODE: self.op.nodes,
15324       }
15325     self.share_locks = {
15326       locking.LEVEL_NODE: not self.op.use_locking,
15327       }
15328
15329   def CheckPrereq(self):
15330     """Check prerequisites.
15331
15332     """
15333
15334   def Exec(self, feedback_fn):
15335     """Execute restricted command and return output.
15336
15337     """
15338     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15339
15340     # Check if correct locks are held
15341     assert set(self.op.nodes).issubset(owned_nodes)
15342
15343     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15344
15345     result = []
15346
15347     for node_name in self.op.nodes:
15348       nres = rpcres[node_name]
15349       if nres.fail_msg:
15350         msg = ("Command '%s' on node '%s' failed: %s" %
15351                (self.op.command, node_name, nres.fail_msg))
15352         result.append((False, msg))
15353       else:
15354         result.append((True, nres.payload))
15355
15356     return result
15357
15358
15359 class LUTestJqueue(NoHooksLU):
15360   """Utility LU to test some aspects of the job queue.
15361
15362   """
15363   REQ_BGL = False
15364
15365   # Must be lower than default timeout for WaitForJobChange to see whether it
15366   # notices changed jobs
15367   _CLIENT_CONNECT_TIMEOUT = 20.0
15368   _CLIENT_CONFIRM_TIMEOUT = 60.0
15369
15370   @classmethod
15371   def _NotifyUsingSocket(cls, cb, errcls):
15372     """Opens a Unix socket and waits for another program to connect.
15373
15374     @type cb: callable
15375     @param cb: Callback to send socket name to client
15376     @type errcls: class
15377     @param errcls: Exception class to use for errors
15378
15379     """
15380     # Using a temporary directory as there's no easy way to create temporary
15381     # sockets without writing a custom loop around tempfile.mktemp and
15382     # socket.bind
15383     tmpdir = tempfile.mkdtemp()
15384     try:
15385       tmpsock = utils.PathJoin(tmpdir, "sock")
15386
15387       logging.debug("Creating temporary socket at %s", tmpsock)
15388       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15389       try:
15390         sock.bind(tmpsock)
15391         sock.listen(1)
15392
15393         # Send details to client
15394         cb(tmpsock)
15395
15396         # Wait for client to connect before continuing
15397         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15398         try:
15399           (conn, _) = sock.accept()
15400         except socket.error, err:
15401           raise errcls("Client didn't connect in time (%s)" % err)
15402       finally:
15403         sock.close()
15404     finally:
15405       # Remove as soon as client is connected
15406       shutil.rmtree(tmpdir)
15407
15408     # Wait for client to close
15409     try:
15410       try:
15411         # pylint: disable=E1101
15412         # Instance of '_socketobject' has no ... member
15413         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15414         conn.recv(1)
15415       except socket.error, err:
15416         raise errcls("Client failed to confirm notification (%s)" % err)
15417     finally:
15418       conn.close()
15419
15420   def _SendNotification(self, test, arg, sockname):
15421     """Sends a notification to the client.
15422
15423     @type test: string
15424     @param test: Test name
15425     @param arg: Test argument (depends on test)
15426     @type sockname: string
15427     @param sockname: Socket path
15428
15429     """
15430     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15431
15432   def _Notify(self, prereq, test, arg):
15433     """Notifies the client of a test.
15434
15435     @type prereq: bool
15436     @param prereq: Whether this is a prereq-phase test
15437     @type test: string
15438     @param test: Test name
15439     @param arg: Test argument (depends on test)
15440
15441     """
15442     if prereq:
15443       errcls = errors.OpPrereqError
15444     else:
15445       errcls = errors.OpExecError
15446
15447     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15448                                                   test, arg),
15449                                    errcls)
15450
15451   def CheckArguments(self):
15452     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15453     self.expandnames_calls = 0
15454
15455   def ExpandNames(self):
15456     checkargs_calls = getattr(self, "checkargs_calls", 0)
15457     if checkargs_calls < 1:
15458       raise errors.ProgrammerError("CheckArguments was not called")
15459
15460     self.expandnames_calls += 1
15461
15462     if self.op.notify_waitlock:
15463       self._Notify(True, constants.JQT_EXPANDNAMES, None)
15464
15465     self.LogInfo("Expanding names")
15466
15467     # Get lock on master node (just to get a lock, not for a particular reason)
15468     self.needed_locks = {
15469       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15470       }
15471
15472   def Exec(self, feedback_fn):
15473     if self.expandnames_calls < 1:
15474       raise errors.ProgrammerError("ExpandNames was not called")
15475
15476     if self.op.notify_exec:
15477       self._Notify(False, constants.JQT_EXEC, None)
15478
15479     self.LogInfo("Executing")
15480
15481     if self.op.log_messages:
15482       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15483       for idx, msg in enumerate(self.op.log_messages):
15484         self.LogInfo("Sending log message %s", idx + 1)
15485         feedback_fn(constants.JQT_MSGPREFIX + msg)
15486         # Report how many test messages have been sent
15487         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15488
15489     if self.op.fail:
15490       raise errors.OpExecError("Opcode failure was requested")
15491
15492     return True
15493
15494
15495 class LUTestAllocator(NoHooksLU):
15496   """Run allocator tests.
15497
15498   This LU runs the allocator tests
15499
15500   """
15501   def CheckPrereq(self):
15502     """Check prerequisites.
15503
15504     This checks the opcode parameters depending on the director and mode test.
15505
15506     """
15507     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15508                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
15509       for attr in ["memory", "disks", "disk_template",
15510                    "os", "tags", "nics", "vcpus"]:
15511         if not hasattr(self.op, attr):
15512           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15513                                      attr, errors.ECODE_INVAL)
15514       iname = self.cfg.ExpandInstanceName(self.op.name)
15515       if iname is not None:
15516         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15517                                    iname, errors.ECODE_EXISTS)
15518       if not isinstance(self.op.nics, list):
15519         raise errors.OpPrereqError("Invalid parameter 'nics'",
15520                                    errors.ECODE_INVAL)
15521       if not isinstance(self.op.disks, list):
15522         raise errors.OpPrereqError("Invalid parameter 'disks'",
15523                                    errors.ECODE_INVAL)
15524       for row in self.op.disks:
15525         if (not isinstance(row, dict) or
15526             constants.IDISK_SIZE not in row or
15527             not isinstance(row[constants.IDISK_SIZE], int) or
15528             constants.IDISK_MODE not in row or
15529             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15530           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15531                                      " parameter", errors.ECODE_INVAL)
15532       if self.op.hypervisor is None:
15533         self.op.hypervisor = self.cfg.GetHypervisorType()
15534     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15535       fname = _ExpandInstanceName(self.cfg, self.op.name)
15536       self.op.name = fname
15537       self.relocate_from = \
15538           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15539     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15540                           constants.IALLOCATOR_MODE_NODE_EVAC):
15541       if not self.op.instances:
15542         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15543       self.op.instances = _GetWantedInstances(self, self.op.instances)
15544     else:
15545       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15546                                  self.op.mode, errors.ECODE_INVAL)
15547
15548     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15549       if self.op.allocator is None:
15550         raise errors.OpPrereqError("Missing allocator name",
15551                                    errors.ECODE_INVAL)
15552     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15553       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15554                                  self.op.direction, errors.ECODE_INVAL)
15555
15556   def Exec(self, feedback_fn):
15557     """Run the allocator test.
15558
15559     """
15560     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15561       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15562                                           memory=self.op.memory,
15563                                           disks=self.op.disks,
15564                                           disk_template=self.op.disk_template,
15565                                           os=self.op.os,
15566                                           tags=self.op.tags,
15567                                           nics=self.op.nics,
15568                                           vcpus=self.op.vcpus,
15569                                           spindle_use=self.op.spindle_use,
15570                                           hypervisor=self.op.hypervisor)
15571     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15572       req = iallocator.IAReqRelocate(name=self.op.name,
15573                                      relocate_from=list(self.relocate_from))
15574     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15575       req = iallocator.IAReqGroupChange(instances=self.op.instances,
15576                                         target_groups=self.op.target_groups)
15577     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15578       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15579                                      evac_mode=self.op.evac_mode)
15580     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15581       disk_template = self.op.disk_template
15582       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15583                                              memory=self.op.memory,
15584                                              disks=self.op.disks,
15585                                              disk_template=disk_template,
15586                                              os=self.op.os,
15587                                              tags=self.op.tags,
15588                                              nics=self.op.nics,
15589                                              vcpus=self.op.vcpus,
15590                                              spindle_use=self.op.spindle_use,
15591                                              hypervisor=self.op.hypervisor)
15592                for idx in range(self.op.count)]
15593       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15594     else:
15595       raise errors.ProgrammerError("Uncatched mode %s in"
15596                                    " LUTestAllocator.Exec", self.op.mode)
15597
15598     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15599     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15600       result = ial.in_text
15601     else:
15602       ial.Run(self.op.allocator, validate=False)
15603       result = ial.out_text
15604     return result
15605
15606
15607 # Network LUs
15608 class LUNetworkAdd(LogicalUnit):
15609   """Logical unit for creating networks.
15610
15611   """
15612   HPATH = "network-add"
15613   HTYPE = constants.HTYPE_NETWORK
15614   REQ_BGL = False
15615
15616   def BuildHooksNodes(self):
15617     """Build hooks nodes.
15618
15619     """
15620     mn = self.cfg.GetMasterNode()
15621     return ([mn], [mn])
15622
15623   def ExpandNames(self):
15624     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15625     self.needed_locks = {}
15626     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15627
15628   def CheckPrereq(self):
15629     """Check prerequisites.
15630
15631     This checks that the given group name is not an existing node group
15632     already.
15633
15634     """
15635     if self.op.network is None:
15636       raise errors.OpPrereqError("Network must be given",
15637                                  errors.ECODE_INVAL)
15638
15639     uuid = self.cfg.LookupNetwork(self.op.network_name)
15640
15641     if uuid:
15642       raise errors.OpPrereqError("Network '%s' already defined" %
15643                                  self.op.network, errors.ECODE_EXISTS)
15644
15645     if self.op.mac_prefix:
15646       utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15647
15648     # Check tag validity
15649     for tag in self.op.tags:
15650       objects.TaggableObject.ValidateTag(tag)
15651
15652   def BuildHooksEnv(self):
15653     """Build hooks env.
15654
15655     """
15656     args = {
15657       "name": self.op.network_name,
15658       "subnet": self.op.network,
15659       "gateway": self.op.gateway,
15660       "network6": self.op.network6,
15661       "gateway6": self.op.gateway6,
15662       "mac_prefix": self.op.mac_prefix,
15663       "network_type": self.op.network_type,
15664       "tags": self.op.tags,
15665       }
15666     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15667
15668   def Exec(self, feedback_fn):
15669     """Add the ip pool to the cluster.
15670
15671     """
15672     nobj = objects.Network(name=self.op.network_name,
15673                            network=self.op.network,
15674                            gateway=self.op.gateway,
15675                            network6=self.op.network6,
15676                            gateway6=self.op.gateway6,
15677                            mac_prefix=self.op.mac_prefix,
15678                            network_type=self.op.network_type,
15679                            uuid=self.network_uuid,
15680                            family=4)
15681     # Initialize the associated address pool
15682     try:
15683       pool = network.AddressPool.InitializeNetwork(nobj)
15684     except errors.AddressPoolError, e:
15685       raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15686
15687     # Check if we need to reserve the nodes and the cluster master IP
15688     # These may not be allocated to any instances in routed mode, as
15689     # they wouldn't function anyway.
15690     for node in self.cfg.GetAllNodesInfo().values():
15691       for ip in [node.primary_ip, node.secondary_ip]:
15692         try:
15693           pool.Reserve(ip)
15694           self.LogInfo("Reserved node %s's IP (%s)", node.name, ip)
15695
15696         except errors.AddressPoolError:
15697           pass
15698
15699     master_ip = self.cfg.GetClusterInfo().master_ip
15700     try:
15701       pool.Reserve(master_ip)
15702       self.LogInfo("Reserved cluster master IP (%s)", master_ip)
15703     except errors.AddressPoolError:
15704       pass
15705
15706     if self.op.add_reserved_ips:
15707       for ip in self.op.add_reserved_ips:
15708         try:
15709           pool.Reserve(ip, external=True)
15710         except errors.AddressPoolError, e:
15711           raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15712
15713     if self.op.tags:
15714       for tag in self.op.tags:
15715         nobj.AddTag(tag)
15716
15717     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15718     del self.remove_locks[locking.LEVEL_NETWORK]
15719
15720
15721 class LUNetworkRemove(LogicalUnit):
15722   HPATH = "network-remove"
15723   HTYPE = constants.HTYPE_NETWORK
15724   REQ_BGL = False
15725
15726   def ExpandNames(self):
15727     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15728
15729     if not self.network_uuid:
15730       raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
15731                                  errors.ECODE_INVAL)
15732     self.needed_locks = {
15733       locking.LEVEL_NETWORK: [self.network_uuid],
15734       }
15735
15736   def CheckPrereq(self):
15737     """Check prerequisites.
15738
15739     This checks that the given network name exists as a network, that is
15740     empty (i.e., contains no nodes), and that is not the last group of the
15741     cluster.
15742
15743     """
15744
15745     # Verify that the network is not conncted.
15746     node_groups = [group.name
15747                    for group in self.cfg.GetAllNodeGroupsInfo().values()
15748                    for net in group.networks.keys()
15749                    if net == self.network_uuid]
15750
15751     if node_groups:
15752       self.LogWarning("Nework '%s' is connected to the following"
15753                       " node groups: %s" % (self.op.network_name,
15754                       utils.CommaJoin(utils.NiceSort(node_groups))))
15755       raise errors.OpPrereqError("Network still connected",
15756                                  errors.ECODE_STATE)
15757
15758   def BuildHooksEnv(self):
15759     """Build hooks env.
15760
15761     """
15762     return {
15763       "NETWORK_NAME": self.op.network_name,
15764       }
15765
15766   def BuildHooksNodes(self):
15767     """Build hooks nodes.
15768
15769     """
15770     mn = self.cfg.GetMasterNode()
15771     return ([mn], [mn])
15772
15773   def Exec(self, feedback_fn):
15774     """Remove the network.
15775
15776     """
15777     try:
15778       self.cfg.RemoveNetwork(self.network_uuid)
15779     except errors.ConfigurationError:
15780       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15781                                (self.op.network_name, self.network_uuid))
15782
15783
15784 class LUNetworkSetParams(LogicalUnit):
15785   """Modifies the parameters of a network.
15786
15787   """
15788   HPATH = "network-modify"
15789   HTYPE = constants.HTYPE_NETWORK
15790   REQ_BGL = False
15791
15792   def CheckArguments(self):
15793     if (self.op.gateway and
15794         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15795       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15796                                  " at once", errors.ECODE_INVAL)
15797
15798   def ExpandNames(self):
15799     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15800     self.network = self.cfg.GetNetwork(self.network_uuid)
15801     if self.network is None:
15802       raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
15803                                  (self.op.network_name, self.network_uuid),
15804                                  errors.ECODE_INVAL)
15805     self.needed_locks = {
15806       locking.LEVEL_NETWORK: [self.network_uuid],
15807       }
15808
15809   def CheckPrereq(self):
15810     """Check prerequisites.
15811
15812     """
15813     self.gateway = self.network.gateway
15814     self.network_type = self.network.network_type
15815     self.mac_prefix = self.network.mac_prefix
15816     self.network6 = self.network.network6
15817     self.gateway6 = self.network.gateway6
15818     self.tags = self.network.tags
15819
15820     self.pool = network.AddressPool(self.network)
15821
15822     if self.op.gateway:
15823       if self.op.gateway == constants.VALUE_NONE:
15824         self.gateway = None
15825       else:
15826         self.gateway = self.op.gateway
15827         if self.pool.IsReserved(self.gateway):
15828           raise errors.OpPrereqError("%s is already reserved" %
15829                                      self.gateway, errors.ECODE_INVAL)
15830
15831     if self.op.network_type:
15832       if self.op.network_type == constants.VALUE_NONE:
15833         self.network_type = None
15834       else:
15835         self.network_type = self.op.network_type
15836
15837     if self.op.mac_prefix:
15838       if self.op.mac_prefix == constants.VALUE_NONE:
15839         self.mac_prefix = None
15840       else:
15841         utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15842         self.mac_prefix = self.op.mac_prefix
15843
15844     if self.op.gateway6:
15845       if self.op.gateway6 == constants.VALUE_NONE:
15846         self.gateway6 = None
15847       else:
15848         self.gateway6 = self.op.gateway6
15849
15850     if self.op.network6:
15851       if self.op.network6 == constants.VALUE_NONE:
15852         self.network6 = None
15853       else:
15854         self.network6 = self.op.network6
15855
15856   def BuildHooksEnv(self):
15857     """Build hooks env.
15858
15859     """
15860     args = {
15861       "name": self.op.network_name,
15862       "subnet": self.network.network,
15863       "gateway": self.gateway,
15864       "network6": self.network6,
15865       "gateway6": self.gateway6,
15866       "mac_prefix": self.mac_prefix,
15867       "network_type": self.network_type,
15868       "tags": self.tags,
15869       }
15870     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15871
15872   def BuildHooksNodes(self):
15873     """Build hooks nodes.
15874
15875     """
15876     mn = self.cfg.GetMasterNode()
15877     return ([mn], [mn])
15878
15879   def Exec(self, feedback_fn):
15880     """Modifies the network.
15881
15882     """
15883     #TODO: reserve/release via temporary reservation manager
15884     #      extend cfg.ReserveIp/ReleaseIp with the external flag
15885     if self.op.gateway:
15886       if self.gateway == self.network.gateway:
15887         self.LogWarning("Gateway is already %s" % self.gateway)
15888       else:
15889         if self.gateway:
15890           self.pool.Reserve(self.gateway, external=True)
15891         if self.network.gateway:
15892           self.pool.Release(self.network.gateway, external=True)
15893         self.network.gateway = self.gateway
15894
15895     if self.op.add_reserved_ips:
15896       for ip in self.op.add_reserved_ips:
15897         try:
15898           if self.pool.IsReserved(ip):
15899             self.LogWarning("IP %s is already reserved" % ip)
15900           else:
15901             self.pool.Reserve(ip, external=True)
15902         except errors.AddressPoolError, e:
15903           self.LogWarning("Cannot reserve ip %s. %s" % (ip, e))
15904
15905     if self.op.remove_reserved_ips:
15906       for ip in self.op.remove_reserved_ips:
15907         if ip == self.network.gateway:
15908           self.LogWarning("Cannot unreserve Gateway's IP")
15909           continue
15910         try:
15911           if not self.pool.IsReserved(ip):
15912             self.LogWarning("IP %s is already unreserved" % ip)
15913           else:
15914             self.pool.Release(ip, external=True)
15915         except errors.AddressPoolError, e:
15916           self.LogWarning("Cannot release ip %s. %s" % (ip, e))
15917
15918     if self.op.mac_prefix:
15919       self.network.mac_prefix = self.mac_prefix
15920
15921     if self.op.network6:
15922       self.network.network6 = self.network6
15923
15924     if self.op.gateway6:
15925       self.network.gateway6 = self.gateway6
15926
15927     if self.op.network_type:
15928       self.network.network_type = self.network_type
15929
15930     self.pool.Validate()
15931
15932     self.cfg.Update(self.network, feedback_fn)
15933
15934
15935 class _NetworkQuery(_QueryBase):
15936   FIELDS = query.NETWORK_FIELDS
15937
15938   def ExpandNames(self, lu):
15939     lu.needed_locks = {}
15940
15941     self._all_networks = lu.cfg.GetAllNetworksInfo()
15942     name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
15943
15944     if not self.names:
15945       self.wanted = [name_to_uuid[name]
15946                      for name in utils.NiceSort(name_to_uuid.keys())]
15947     else:
15948       # Accept names to be either names or UUIDs.
15949       missing = []
15950       self.wanted = []
15951       all_uuid = frozenset(self._all_networks.keys())
15952
15953       for name in self.names:
15954         if name in all_uuid:
15955           self.wanted.append(name)
15956         elif name in name_to_uuid:
15957           self.wanted.append(name_to_uuid[name])
15958         else:
15959           missing.append(name)
15960
15961       if missing:
15962         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
15963                                    errors.ECODE_NOENT)
15964
15965   def DeclareLocks(self, lu, level):
15966     pass
15967
15968   def _GetQueryData(self, lu):
15969     """Computes the list of networks and their attributes.
15970
15971     """
15972     do_instances = query.NETQ_INST in self.requested_data
15973     do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
15974     do_stats = query.NETQ_STATS in self.requested_data
15975
15976     network_to_groups = None
15977     network_to_instances = None
15978     stats = None
15979
15980     # For NETQ_GROUP, we need to map network->[groups]
15981     if do_groups:
15982       all_groups = lu.cfg.GetAllNodeGroupsInfo()
15983       network_to_groups = dict((uuid, []) for uuid in self.wanted)
15984
15985       if do_instances:
15986         all_instances = lu.cfg.GetAllInstancesInfo()
15987         all_nodes = lu.cfg.GetAllNodesInfo()
15988         network_to_instances = dict((uuid, []) for uuid in self.wanted)
15989
15990       for group in all_groups.values():
15991         if do_instances:
15992           group_nodes = [node.name for node in all_nodes.values() if
15993                          node.group == group.uuid]
15994           group_instances = [instance for instance in all_instances.values()
15995                              if instance.primary_node in group_nodes]
15996
15997         for net_uuid in group.networks.keys():
15998           if net_uuid in network_to_groups:
15999             netparams = group.networks[net_uuid]
16000             mode = netparams[constants.NIC_MODE]
16001             link = netparams[constants.NIC_LINK]
16002             info = group.name + '(' + mode + ', ' + link + ')'
16003             network_to_groups[net_uuid].append(info)
16004
16005             if do_instances:
16006               for instance in group_instances:
16007                 for nic in instance.nics:
16008                   if nic.network == self._all_networks[net_uuid].name:
16009                     network_to_instances[net_uuid].append(instance.name)
16010                     break
16011
16012     if do_stats:
16013       stats = {}
16014       for uuid, net in self._all_networks.items():
16015         if uuid in self.wanted:
16016           pool = network.AddressPool(net)
16017           stats[uuid] = {
16018             "free_count": pool.GetFreeCount(),
16019             "reserved_count": pool.GetReservedCount(),
16020             "map": pool.GetMap(),
16021             "external_reservations": ", ".join(pool.GetExternalReservations()),
16022             }
16023
16024     return query.NetworkQueryData([self._all_networks[uuid]
16025                                    for uuid in self.wanted],
16026                                    network_to_groups,
16027                                    network_to_instances,
16028                                    stats)
16029
16030
16031 class LUNetworkQuery(NoHooksLU):
16032   """Logical unit for querying networks.
16033
16034   """
16035   REQ_BGL = False
16036
16037   def CheckArguments(self):
16038     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16039                             self.op.output_fields, False)
16040
16041   def ExpandNames(self):
16042     self.nq.ExpandNames(self)
16043
16044   def Exec(self, feedback_fn):
16045     return self.nq.OldStyleQuery(self)
16046
16047
16048 class LUNetworkConnect(LogicalUnit):
16049   """Connect a network to a nodegroup
16050
16051   """
16052   HPATH = "network-connect"
16053   HTYPE = constants.HTYPE_NETWORK
16054   REQ_BGL = False
16055
16056   def ExpandNames(self):
16057     self.network_name = self.op.network_name
16058     self.group_name = self.op.group_name
16059     self.network_mode = self.op.network_mode
16060     self.network_link = self.op.network_link
16061
16062     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16063     self.network = self.cfg.GetNetwork(self.network_uuid)
16064     if self.network is None:
16065       raise errors.OpPrereqError("Network %s does not exist" %
16066                                  self.network_name, errors.ECODE_INVAL)
16067
16068     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16069     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16070     if self.group is None:
16071       raise errors.OpPrereqError("Group %s does not exist" %
16072                                  self.group_name, errors.ECODE_INVAL)
16073
16074     self.needed_locks = {
16075       locking.LEVEL_INSTANCE: [],
16076       locking.LEVEL_NODEGROUP: [self.group_uuid],
16077       }
16078     self.share_locks[locking.LEVEL_INSTANCE] = 1
16079
16080   def DeclareLocks(self, level):
16081     if level == locking.LEVEL_INSTANCE:
16082       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16083
16084       # Lock instances optimistically, needs verification once group lock has
16085       # been acquired
16086       self.needed_locks[locking.LEVEL_INSTANCE] = \
16087           self.cfg.GetNodeGroupInstances(self.group_uuid)
16088
16089   def BuildHooksEnv(self):
16090     ret = dict()
16091     ret["GROUP_NAME"] = self.group_name
16092     ret["GROUP_NETWORK_MODE"] = self.network_mode
16093     ret["GROUP_NETWORK_LINK"] = self.network_link
16094     ret.update(_BuildNetworkHookEnvByObject(self.network))
16095     return ret
16096
16097   def BuildHooksNodes(self):
16098     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16099     return (nodes, nodes)
16100
16101   def CheckPrereq(self):
16102     l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
16103                                    for i in value)
16104
16105     self.netparams = dict()
16106     self.netparams[constants.NIC_MODE] = self.network_mode
16107     self.netparams[constants.NIC_LINK] = self.network_link
16108     objects.NIC.CheckParameterSyntax(self.netparams)
16109
16110     #if self.network_mode == constants.NIC_MODE_BRIDGED:
16111     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16112     self.connected = False
16113     if self.network_uuid in self.group.networks:
16114       self.LogWarning("Network '%s' is already mapped to group '%s'" %
16115                       (self.network_name, self.group.name))
16116       self.connected = True
16117       return
16118
16119     pool = network.AddressPool(self.network)
16120     if self.op.conflicts_check:
16121       groupinstances = []
16122       for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16123         groupinstances.append(self.cfg.GetInstanceInfo(n))
16124       instances = [(instance.name, idx, nic.ip)
16125                    for instance in groupinstances
16126                    for idx, nic in enumerate(instance.nics)
16127                    if (not nic.network and pool.Contains(nic.ip))]
16128       if instances:
16129         self.LogWarning("Following occurences use IPs from network %s"
16130                         " that is about to connect to nodegroup %s: %s" %
16131                         (self.network_name, self.group.name,
16132                         l(instances)))
16133         raise errors.OpPrereqError("Conflicting IPs found."
16134                                    " Please remove/modify"
16135                                    " corresponding NICs",
16136                                    errors.ECODE_INVAL)
16137
16138   def Exec(self, feedback_fn):
16139     if self.connected:
16140       return
16141
16142     self.group.networks[self.network_uuid] = self.netparams
16143     self.cfg.Update(self.group, feedback_fn)
16144
16145
16146 class LUNetworkDisconnect(LogicalUnit):
16147   """Disconnect a network to a nodegroup
16148
16149   """
16150   HPATH = "network-disconnect"
16151   HTYPE = constants.HTYPE_NETWORK
16152   REQ_BGL = False
16153
16154   def ExpandNames(self):
16155     self.network_name = self.op.network_name
16156     self.group_name = self.op.group_name
16157
16158     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16159     self.network = self.cfg.GetNetwork(self.network_uuid)
16160     if self.network is None:
16161       raise errors.OpPrereqError("Network %s does not exist" %
16162                                  self.network_name, errors.ECODE_INVAL)
16163
16164     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16165     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16166     if self.group is None:
16167       raise errors.OpPrereqError("Group %s does not exist" %
16168                                  self.group_name, errors.ECODE_INVAL)
16169
16170     self.needed_locks = {
16171       locking.LEVEL_INSTANCE: [],
16172       locking.LEVEL_NODEGROUP: [self.group_uuid],
16173       }
16174     self.share_locks[locking.LEVEL_INSTANCE] = 1
16175
16176   def DeclareLocks(self, level):
16177     if level == locking.LEVEL_INSTANCE:
16178       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16179
16180       # Lock instances optimistically, needs verification once group lock has
16181       # been acquired
16182       self.needed_locks[locking.LEVEL_INSTANCE] = \
16183           self.cfg.GetNodeGroupInstances(self.group_uuid)
16184
16185   def BuildHooksEnv(self):
16186     ret = dict()
16187     ret["GROUP_NAME"] = self.group_name
16188     ret.update(_BuildNetworkHookEnvByObject(self.network))
16189     return ret
16190
16191   def BuildHooksNodes(self):
16192     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16193     return (nodes, nodes)
16194
16195   def CheckPrereq(self):
16196     l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
16197                                    for i in value)
16198
16199     self.connected = True
16200     if self.network_uuid not in self.group.networks:
16201       self.LogWarning("Network '%s' is"
16202                          " not mapped to group '%s'" %
16203                          (self.network_name, self.group.name))
16204       self.connected = False
16205       return
16206
16207     if self.op.conflicts_check:
16208       groupinstances = []
16209       for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16210         groupinstances.append(self.cfg.GetInstanceInfo(n))
16211       instances = [(instance.name, idx, nic.ip)
16212                    for instance in groupinstances
16213                    for idx, nic in enumerate(instance.nics)
16214                    if nic.network == self.network_name]
16215       if instances:
16216         self.LogWarning("Following occurences use IPs from network %s"
16217                            " that is about to disconnected from the nodegroup"
16218                            " %s: %s" %
16219                            (self.network_name, self.group.name,
16220                             l(instances)))
16221         raise errors.OpPrereqError("Conflicting IPs."
16222                                    " Please remove/modify"
16223                                    " corresponding NICS",
16224                                    errors.ECODE_INVAL)
16225
16226   def Exec(self, feedback_fn):
16227     if not self.connected:
16228       return
16229
16230     del self.group.networks[self.network_uuid]
16231     self.cfg.Update(self.group, feedback_fn)
16232
16233
16234 #: Query type implementations
16235 _QUERY_IMPL = {
16236   constants.QR_CLUSTER: _ClusterQuery,
16237   constants.QR_INSTANCE: _InstanceQuery,
16238   constants.QR_NODE: _NodeQuery,
16239   constants.QR_GROUP: _GroupQuery,
16240   constants.QR_NETWORK: _NetworkQuery,
16241   constants.QR_OS: _OsQuery,
16242   constants.QR_EXPORT: _ExportQuery,
16243   }
16244
16245 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16246
16247
16248 def _GetQueryImplementation(name):
16249   """Returns the implemtnation for a query type.
16250
16251   @param name: Query type, must be one of L{constants.QR_VIA_OP}
16252
16253   """
16254   try:
16255     return _QUERY_IMPL[name]
16256   except KeyError:
16257     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16258                                errors.ECODE_INVAL)
16259
16260
16261 def _CheckForConflictingIp(lu, ip, node):
16262   """In case of conflicting ip raise error.
16263
16264   @type ip: string
16265   @param ip: ip address
16266   @type node: string
16267   @param node: node name
16268
16269   """
16270   (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16271   if conf_net is not None:
16272     raise errors.OpPrereqError("Conflicting IP found:"
16273                                " %s <> %s." % (ip, conf_net),
16274                                errors.ECODE_INVAL)
16275
16276   return (None, None)