4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 import ganeti.masterd.instance # pylint: disable-msg=W0611
64 def _SupportsOob(cfg, node):
65 """Tells if node supports OOB.
67 @type cfg: L{config.ConfigWriter}
68 @param cfg: The cluster configuration
69 @type node: L{objects.Node}
71 @return: The OOB script if supported or an empty string otherwise
74 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
78 class LogicalUnit(object):
79 """Logical Unit base class.
81 Subclasses must follow these rules:
82 - implement ExpandNames
83 - implement CheckPrereq (except when tasklets are used)
84 - implement Exec (except when tasklets are used)
85 - implement BuildHooksEnv
86 - redefine HPATH and HTYPE
87 - optionally redefine their run requirements:
88 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
90 Note that all commands require root permissions.
92 @ivar dry_run_result: the value (if any) that will be returned to the caller
93 in dry-run mode (signalled by opcode dry_run parameter)
100 def __init__(self, processor, op, context, rpc):
101 """Constructor for LogicalUnit.
103 This needs to be overridden in derived classes in order to check op
107 self.proc = processor
109 self.cfg = context.cfg
110 self.context = context
112 # Dicts used to declare locking needs to mcpu
113 self.needed_locks = None
114 self.acquired_locks = {}
115 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
117 self.remove_locks = {}
118 # Used to force good behavior when calling helper functions
119 self.recalculate_locks = {}
122 self.Log = processor.Log # pylint: disable-msg=C0103
123 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
124 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
125 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
126 # support for dry-run
127 self.dry_run_result = None
128 # support for generic debug attribute
129 if (not hasattr(self.op, "debug_level") or
130 not isinstance(self.op.debug_level, int)):
131 self.op.debug_level = 0
136 # Validate opcode parameters and set defaults
137 self.op.Validate(True)
139 self.CheckArguments()
142 """Returns the SshRunner object
146 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
149 ssh = property(fget=__GetSSH)
151 def CheckArguments(self):
152 """Check syntactic validity for the opcode arguments.
154 This method is for doing a simple syntactic check and ensure
155 validity of opcode parameters, without any cluster-related
156 checks. While the same can be accomplished in ExpandNames and/or
157 CheckPrereq, doing these separate is better because:
159 - ExpandNames is left as as purely a lock-related function
160 - CheckPrereq is run after we have acquired locks (and possible
163 The function is allowed to change the self.op attribute so that
164 later methods can no longer worry about missing parameters.
169 def ExpandNames(self):
170 """Expand names for this LU.
172 This method is called before starting to execute the opcode, and it should
173 update all the parameters of the opcode to their canonical form (e.g. a
174 short node name must be fully expanded after this method has successfully
175 completed). This way locking, hooks, logging, etc. can work correctly.
177 LUs which implement this method must also populate the self.needed_locks
178 member, as a dict with lock levels as keys, and a list of needed lock names
181 - use an empty dict if you don't need any lock
182 - if you don't need any lock at a particular level omit that level
183 - don't put anything for the BGL level
184 - if you want all locks at a level use locking.ALL_SET as a value
186 If you need to share locks (rather than acquire them exclusively) at one
187 level you can modify self.share_locks, setting a true value (usually 1) for
188 that level. By default locks are not shared.
190 This function can also define a list of tasklets, which then will be
191 executed in order instead of the usual LU-level CheckPrereq and Exec
192 functions, if those are not defined by the LU.
196 # Acquire all nodes and one instance
197 self.needed_locks = {
198 locking.LEVEL_NODE: locking.ALL_SET,
199 locking.LEVEL_INSTANCE: ['instance1.example.com'],
201 # Acquire just two nodes
202 self.needed_locks = {
203 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206 self.needed_locks = {} # No, you can't leave it to the default value None
209 # The implementation of this method is mandatory only if the new LU is
210 # concurrent, so that old LUs don't need to be changed all at the same
213 self.needed_locks = {} # Exclusive LUs don't need locks.
215 raise NotImplementedError
217 def DeclareLocks(self, level):
218 """Declare LU locking needs for a level
220 While most LUs can just declare their locking needs at ExpandNames time,
221 sometimes there's the need to calculate some locks after having acquired
222 the ones before. This function is called just before acquiring locks at a
223 particular level, but after acquiring the ones at lower levels, and permits
224 such calculations. It can be used to modify self.needed_locks, and by
225 default it does nothing.
227 This function is only called if you have something already set in
228 self.needed_locks for the level.
230 @param level: Locking level which is going to be locked
231 @type level: member of ganeti.locking.LEVELS
235 def CheckPrereq(self):
236 """Check prerequisites for this LU.
238 This method should check that the prerequisites for the execution
239 of this LU are fulfilled. It can do internode communication, but
240 it should be idempotent - no cluster or system changes are
243 The method should raise errors.OpPrereqError in case something is
244 not fulfilled. Its return value is ignored.
246 This method should also update all the parameters of the opcode to
247 their canonical form if it hasn't been done by ExpandNames before.
250 if self.tasklets is not None:
251 for (idx, tl) in enumerate(self.tasklets):
252 logging.debug("Checking prerequisites for tasklet %s/%s",
253 idx + 1, len(self.tasklets))
258 def Exec(self, feedback_fn):
261 This method should implement the actual work. It should raise
262 errors.OpExecError for failures that are somewhat dealt with in
266 if self.tasklets is not None:
267 for (idx, tl) in enumerate(self.tasklets):
268 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271 raise NotImplementedError
273 def BuildHooksEnv(self):
274 """Build hooks environment for this LU.
276 This method should return a three-node tuple consisting of: a dict
277 containing the environment that will be used for running the
278 specific hook for this LU, a list of node names on which the hook
279 should run before the execution, and a list of node names on which
280 the hook should run after the execution.
282 The keys of the dict must not have 'GANETI_' prefixed as this will
283 be handled in the hooks runner. Also note additional keys will be
284 added by the hooks runner. If the LU doesn't define any
285 environment, an empty dict (and not None) should be returned.
287 No nodes should be returned as an empty list (and not None).
289 Note that if the HPATH for a LU class is None, this function will
293 raise NotImplementedError
295 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
296 """Notify the LU about the results of its hooks.
298 This method is called every time a hooks phase is executed, and notifies
299 the Logical Unit about the hooks' result. The LU can then use it to alter
300 its result based on the hooks. By default the method does nothing and the
301 previous result is passed back unchanged but any LU can define it if it
302 wants to use the local cluster hook-scripts somehow.
304 @param phase: one of L{constants.HOOKS_PHASE_POST} or
305 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
306 @param hook_results: the results of the multi-node hooks rpc call
307 @param feedback_fn: function used send feedback back to the caller
308 @param lu_result: the previous Exec result this LU had, or None
310 @return: the new Exec result, based on the previous result
314 # API must be kept, thus we ignore the unused argument and could
315 # be a function warnings
316 # pylint: disable-msg=W0613,R0201
319 def _ExpandAndLockInstance(self):
320 """Helper function to expand and lock an instance.
322 Many LUs that work on an instance take its name in self.op.instance_name
323 and need to expand it and then declare the expanded name for locking. This
324 function does it, and then updates self.op.instance_name to the expanded
325 name. It also initializes needed_locks as a dict, if this hasn't been done
329 if self.needed_locks is None:
330 self.needed_locks = {}
332 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
333 "_ExpandAndLockInstance called with instance-level locks set"
334 self.op.instance_name = _ExpandInstanceName(self.cfg,
335 self.op.instance_name)
336 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
338 def _LockInstancesNodes(self, primary_only=False):
339 """Helper function to declare instances' nodes for locking.
341 This function should be called after locking one or more instances to lock
342 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
343 with all primary or secondary nodes for instances already locked and
344 present in self.needed_locks[locking.LEVEL_INSTANCE].
346 It should be called from DeclareLocks, and for safety only works if
347 self.recalculate_locks[locking.LEVEL_NODE] is set.
349 In the future it may grow parameters to just lock some instance's nodes, or
350 to just lock primaries or secondary nodes, if needed.
352 If should be called in DeclareLocks in a way similar to::
354 if level == locking.LEVEL_NODE:
355 self._LockInstancesNodes()
357 @type primary_only: boolean
358 @param primary_only: only lock primary nodes of locked instances
361 assert locking.LEVEL_NODE in self.recalculate_locks, \
362 "_LockInstancesNodes helper function called with no nodes to recalculate"
364 # TODO: check if we're really been called with the instance locks held
366 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
367 # future we might want to have different behaviors depending on the value
368 # of self.recalculate_locks[locking.LEVEL_NODE]
370 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
371 instance = self.context.cfg.GetInstanceInfo(instance_name)
372 wanted_nodes.append(instance.primary_node)
374 wanted_nodes.extend(instance.secondary_nodes)
376 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
377 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
378 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
379 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
381 del self.recalculate_locks[locking.LEVEL_NODE]
384 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
385 """Simple LU which runs no hooks.
387 This LU is intended as a parent for other LogicalUnits which will
388 run no hooks, in order to reduce duplicate code.
394 def BuildHooksEnv(self):
395 """Empty BuildHooksEnv for NoHooksLu.
397 This just raises an error.
400 assert False, "BuildHooksEnv called for NoHooksLUs"
404 """Tasklet base class.
406 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
407 they can mix legacy code with tasklets. Locking needs to be done in the LU,
408 tasklets know nothing about locks.
410 Subclasses must follow these rules:
411 - Implement CheckPrereq
415 def __init__(self, lu):
422 def CheckPrereq(self):
423 """Check prerequisites for this tasklets.
425 This method should check whether the prerequisites for the execution of
426 this tasklet are fulfilled. It can do internode communication, but it
427 should be idempotent - no cluster or system changes are allowed.
429 The method should raise errors.OpPrereqError in case something is not
430 fulfilled. Its return value is ignored.
432 This method should also update all parameters to their canonical form if it
433 hasn't been done before.
438 def Exec(self, feedback_fn):
439 """Execute the tasklet.
441 This method should implement the actual work. It should raise
442 errors.OpExecError for failures that are somewhat dealt with in code, or
446 raise NotImplementedError
450 """Base for query utility classes.
453 #: Attribute holding field definitions
456 def __init__(self, filter_, fields, use_locking):
457 """Initializes this class.
460 self.use_locking = use_locking
462 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
464 self.requested_data = self.query.RequestedData()
465 self.names = self.query.RequestedNames()
467 # Sort only if no names were requested
468 self.sort_by_name = not self.names
470 self.do_locking = None
473 def _GetNames(self, lu, all_names, lock_level):
474 """Helper function to determine names asked for in the query.
478 names = lu.acquired_locks[lock_level]
482 if self.wanted == locking.ALL_SET:
483 assert not self.names
484 # caller didn't specify names, so ordering is not important
485 return utils.NiceSort(names)
487 # caller specified names and we must keep the same order
489 assert not self.do_locking or lu.acquired_locks[lock_level]
491 missing = set(self.wanted).difference(names)
493 raise errors.OpExecError("Some items were removed before retrieving"
494 " their data: %s" % missing)
496 # Return expanded names
500 def FieldsQuery(cls, fields):
501 """Returns list of available fields.
503 @return: List of L{objects.QueryFieldDefinition}
506 return query.QueryFields(cls.FIELDS, fields)
508 def ExpandNames(self, lu):
509 """Expand names for this query.
511 See L{LogicalUnit.ExpandNames}.
514 raise NotImplementedError()
516 def DeclareLocks(self, lu, level):
517 """Declare locks for this query.
519 See L{LogicalUnit.DeclareLocks}.
522 raise NotImplementedError()
524 def _GetQueryData(self, lu):
525 """Collects all data for this query.
527 @return: Query data object
530 raise NotImplementedError()
532 def NewStyleQuery(self, lu):
533 """Collect data and execute query.
536 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
537 sort_by_name=self.sort_by_name)
539 def OldStyleQuery(self, lu):
540 """Collect data and execute query.
543 return self.query.OldStyleQuery(self._GetQueryData(lu),
544 sort_by_name=self.sort_by_name)
547 def _GetWantedNodes(lu, nodes):
548 """Returns list of checked and expanded node names.
550 @type lu: L{LogicalUnit}
551 @param lu: the logical unit on whose behalf we execute
553 @param nodes: list of node names or None for all nodes
555 @return: the list of nodes, sorted
556 @raise errors.ProgrammerError: if the nodes parameter is wrong type
560 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
562 return utils.NiceSort(lu.cfg.GetNodeList())
565 def _GetWantedInstances(lu, instances):
566 """Returns list of checked and expanded instance names.
568 @type lu: L{LogicalUnit}
569 @param lu: the logical unit on whose behalf we execute
570 @type instances: list
571 @param instances: list of instance names or None for all instances
573 @return: the list of instances, sorted
574 @raise errors.OpPrereqError: if the instances parameter is wrong type
575 @raise errors.OpPrereqError: if any of the passed instances is not found
579 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
581 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
585 def _GetUpdatedParams(old_params, update_dict,
586 use_default=True, use_none=False):
587 """Return the new version of a parameter dictionary.
589 @type old_params: dict
590 @param old_params: old parameters
591 @type update_dict: dict
592 @param update_dict: dict containing new parameter values, or
593 constants.VALUE_DEFAULT to reset the parameter to its default
595 @param use_default: boolean
596 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
597 values as 'to be deleted' values
598 @param use_none: boolean
599 @type use_none: whether to recognise C{None} values as 'to be
602 @return: the new parameter dictionary
605 params_copy = copy.deepcopy(old_params)
606 for key, val in update_dict.iteritems():
607 if ((use_default and val == constants.VALUE_DEFAULT) or
608 (use_none and val is None)):
614 params_copy[key] = val
618 def _CheckOutputFields(static, dynamic, selected):
619 """Checks whether all selected fields are valid.
621 @type static: L{utils.FieldSet}
622 @param static: static fields set
623 @type dynamic: L{utils.FieldSet}
624 @param dynamic: dynamic fields set
631 delta = f.NonMatching(selected)
633 raise errors.OpPrereqError("Unknown output fields selected: %s"
634 % ",".join(delta), errors.ECODE_INVAL)
637 def _CheckGlobalHvParams(params):
638 """Validates that given hypervisor params are not global ones.
640 This will ensure that instances don't get customised versions of
644 used_globals = constants.HVC_GLOBALS.intersection(params)
646 msg = ("The following hypervisor parameters are global and cannot"
647 " be customized at instance level, please modify them at"
648 " cluster level: %s" % utils.CommaJoin(used_globals))
649 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
652 def _CheckNodeOnline(lu, node, msg=None):
653 """Ensure that a given node is online.
655 @param lu: the LU on behalf of which we make the check
656 @param node: the node to check
657 @param msg: if passed, should be a message to replace the default one
658 @raise errors.OpPrereqError: if the node is offline
662 msg = "Can't use offline node"
663 if lu.cfg.GetNodeInfo(node).offline:
664 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
667 def _CheckNodeNotDrained(lu, node):
668 """Ensure that a given node is not drained.
670 @param lu: the LU on behalf of which we make the check
671 @param node: the node to check
672 @raise errors.OpPrereqError: if the node is drained
675 if lu.cfg.GetNodeInfo(node).drained:
676 raise errors.OpPrereqError("Can't use drained node %s" % node,
680 def _CheckNodeVmCapable(lu, node):
681 """Ensure that a given node is vm capable.
683 @param lu: the LU on behalf of which we make the check
684 @param node: the node to check
685 @raise errors.OpPrereqError: if the node is not vm capable
688 if not lu.cfg.GetNodeInfo(node).vm_capable:
689 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
693 def _CheckNodeHasOS(lu, node, os_name, force_variant):
694 """Ensure that a node supports a given OS.
696 @param lu: the LU on behalf of which we make the check
697 @param node: the node to check
698 @param os_name: the OS to query about
699 @param force_variant: whether to ignore variant errors
700 @raise errors.OpPrereqError: if the node is not supporting the OS
703 result = lu.rpc.call_os_get(node, os_name)
704 result.Raise("OS '%s' not in supported OS list for node %s" %
706 prereq=True, ecode=errors.ECODE_INVAL)
707 if not force_variant:
708 _CheckOSVariant(result.payload, os_name)
711 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
712 """Ensure that a node has the given secondary ip.
714 @type lu: L{LogicalUnit}
715 @param lu: the LU on behalf of which we make the check
717 @param node: the node to check
718 @type secondary_ip: string
719 @param secondary_ip: the ip to check
720 @type prereq: boolean
721 @param prereq: whether to throw a prerequisite or an execute error
722 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
723 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
726 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
727 result.Raise("Failure checking secondary ip on node %s" % node,
728 prereq=prereq, ecode=errors.ECODE_ENVIRON)
729 if not result.payload:
730 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
731 " please fix and re-run this command" % secondary_ip)
733 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
735 raise errors.OpExecError(msg)
738 def _GetClusterDomainSecret():
739 """Reads the cluster domain secret.
742 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
746 def _CheckInstanceDown(lu, instance, reason):
747 """Ensure that an instance is not running."""
748 if instance.admin_up:
749 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
750 (instance.name, reason), errors.ECODE_STATE)
752 pnode = instance.primary_node
753 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
754 ins_l.Raise("Can't contact node %s for instance information" % pnode,
755 prereq=True, ecode=errors.ECODE_ENVIRON)
757 if instance.name in ins_l.payload:
758 raise errors.OpPrereqError("Instance %s is running, %s" %
759 (instance.name, reason), errors.ECODE_STATE)
762 def _ExpandItemName(fn, name, kind):
763 """Expand an item name.
765 @param fn: the function to use for expansion
766 @param name: requested item name
767 @param kind: text description ('Node' or 'Instance')
768 @return: the resolved (full) name
769 @raise errors.OpPrereqError: if the item is not found
773 if full_name is None:
774 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
779 def _ExpandNodeName(cfg, name):
780 """Wrapper over L{_ExpandItemName} for nodes."""
781 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
784 def _ExpandInstanceName(cfg, name):
785 """Wrapper over L{_ExpandItemName} for instance."""
786 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
789 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
790 memory, vcpus, nics, disk_template, disks,
791 bep, hvp, hypervisor_name):
792 """Builds instance related env variables for hooks
794 This builds the hook environment from individual variables.
797 @param name: the name of the instance
798 @type primary_node: string
799 @param primary_node: the name of the instance's primary node
800 @type secondary_nodes: list
801 @param secondary_nodes: list of secondary nodes as strings
802 @type os_type: string
803 @param os_type: the name of the instance's OS
804 @type status: boolean
805 @param status: the should_run status of the instance
807 @param memory: the memory size of the instance
809 @param vcpus: the count of VCPUs the instance has
811 @param nics: list of tuples (ip, mac, mode, link) representing
812 the NICs the instance has
813 @type disk_template: string
814 @param disk_template: the disk template of the instance
816 @param disks: the list of (size, mode) pairs
818 @param bep: the backend parameters for the instance
820 @param hvp: the hypervisor parameters for the instance
821 @type hypervisor_name: string
822 @param hypervisor_name: the hypervisor for the instance
824 @return: the hook environment for this instance
833 "INSTANCE_NAME": name,
834 "INSTANCE_PRIMARY": primary_node,
835 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
836 "INSTANCE_OS_TYPE": os_type,
837 "INSTANCE_STATUS": str_status,
838 "INSTANCE_MEMORY": memory,
839 "INSTANCE_VCPUS": vcpus,
840 "INSTANCE_DISK_TEMPLATE": disk_template,
841 "INSTANCE_HYPERVISOR": hypervisor_name,
845 nic_count = len(nics)
846 for idx, (ip, mac, mode, link) in enumerate(nics):
849 env["INSTANCE_NIC%d_IP" % idx] = ip
850 env["INSTANCE_NIC%d_MAC" % idx] = mac
851 env["INSTANCE_NIC%d_MODE" % idx] = mode
852 env["INSTANCE_NIC%d_LINK" % idx] = link
853 if mode == constants.NIC_MODE_BRIDGED:
854 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
858 env["INSTANCE_NIC_COUNT"] = nic_count
861 disk_count = len(disks)
862 for idx, (size, mode) in enumerate(disks):
863 env["INSTANCE_DISK%d_SIZE" % idx] = size
864 env["INSTANCE_DISK%d_MODE" % idx] = mode
868 env["INSTANCE_DISK_COUNT"] = disk_count
870 for source, kind in [(bep, "BE"), (hvp, "HV")]:
871 for key, value in source.items():
872 env["INSTANCE_%s_%s" % (kind, key)] = value
877 def _NICListToTuple(lu, nics):
878 """Build a list of nic information tuples.
880 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
881 value in LUInstanceQueryData.
883 @type lu: L{LogicalUnit}
884 @param lu: the logical unit on whose behalf we execute
885 @type nics: list of L{objects.NIC}
886 @param nics: list of nics to convert to hooks tuples
890 cluster = lu.cfg.GetClusterInfo()
894 filled_params = cluster.SimpleFillNIC(nic.nicparams)
895 mode = filled_params[constants.NIC_MODE]
896 link = filled_params[constants.NIC_LINK]
897 hooks_nics.append((ip, mac, mode, link))
901 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
902 """Builds instance related env variables for hooks from an object.
904 @type lu: L{LogicalUnit}
905 @param lu: the logical unit on whose behalf we execute
906 @type instance: L{objects.Instance}
907 @param instance: the instance for which we should build the
910 @param override: dictionary with key/values that will override
913 @return: the hook environment dictionary
916 cluster = lu.cfg.GetClusterInfo()
917 bep = cluster.FillBE(instance)
918 hvp = cluster.FillHV(instance)
920 'name': instance.name,
921 'primary_node': instance.primary_node,
922 'secondary_nodes': instance.secondary_nodes,
923 'os_type': instance.os,
924 'status': instance.admin_up,
925 'memory': bep[constants.BE_MEMORY],
926 'vcpus': bep[constants.BE_VCPUS],
927 'nics': _NICListToTuple(lu, instance.nics),
928 'disk_template': instance.disk_template,
929 'disks': [(disk.size, disk.mode) for disk in instance.disks],
932 'hypervisor_name': instance.hypervisor,
935 args.update(override)
936 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
939 def _AdjustCandidatePool(lu, exceptions):
940 """Adjust the candidate pool after node operations.
943 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
945 lu.LogInfo("Promoted nodes to master candidate role: %s",
946 utils.CommaJoin(node.name for node in mod_list))
947 for name in mod_list:
948 lu.context.ReaddNode(name)
949 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
951 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
955 def _DecideSelfPromotion(lu, exceptions=None):
956 """Decide whether I should promote myself as a master candidate.
959 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
960 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
961 # the new node will increase mc_max with one, so:
962 mc_should = min(mc_should + 1, cp_size)
963 return mc_now < mc_should
966 def _CheckNicsBridgesExist(lu, target_nics, target_node):
967 """Check that the brigdes needed by a list of nics exist.
970 cluster = lu.cfg.GetClusterInfo()
971 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
972 brlist = [params[constants.NIC_LINK] for params in paramslist
973 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
975 result = lu.rpc.call_bridges_exist(target_node, brlist)
976 result.Raise("Error checking bridges on destination node '%s'" %
977 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
980 def _CheckInstanceBridgesExist(lu, instance, node=None):
981 """Check that the brigdes needed by an instance exist.
985 node = instance.primary_node
986 _CheckNicsBridgesExist(lu, instance.nics, node)
989 def _CheckOSVariant(os_obj, name):
990 """Check whether an OS name conforms to the os variants specification.
992 @type os_obj: L{objects.OS}
993 @param os_obj: OS object to check
995 @param name: OS name passed by the user, to check for validity
998 if not os_obj.supported_variants:
1000 variant = objects.OS.GetVariant(name)
1002 raise errors.OpPrereqError("OS name must include a variant",
1005 if variant not in os_obj.supported_variants:
1006 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1009 def _GetNodeInstancesInner(cfg, fn):
1010 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1013 def _GetNodeInstances(cfg, node_name):
1014 """Returns a list of all primary and secondary instances on a node.
1018 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1021 def _GetNodePrimaryInstances(cfg, node_name):
1022 """Returns primary instances on a node.
1025 return _GetNodeInstancesInner(cfg,
1026 lambda inst: node_name == inst.primary_node)
1029 def _GetNodeSecondaryInstances(cfg, node_name):
1030 """Returns secondary instances on a node.
1033 return _GetNodeInstancesInner(cfg,
1034 lambda inst: node_name in inst.secondary_nodes)
1037 def _GetStorageTypeArgs(cfg, storage_type):
1038 """Returns the arguments for a storage type.
1041 # Special case for file storage
1042 if storage_type == constants.ST_FILE:
1043 # storage.FileStorage wants a list of storage directories
1044 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1049 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1052 for dev in instance.disks:
1053 cfg.SetDiskID(dev, node_name)
1055 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1056 result.Raise("Failed to get disk status from node %s" % node_name,
1057 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1059 for idx, bdev_status in enumerate(result.payload):
1060 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1066 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1067 """Check the sanity of iallocator and node arguments and use the
1068 cluster-wide iallocator if appropriate.
1070 Check that at most one of (iallocator, node) is specified. If none is
1071 specified, then the LU's opcode's iallocator slot is filled with the
1072 cluster-wide default iallocator.
1074 @type iallocator_slot: string
1075 @param iallocator_slot: the name of the opcode iallocator slot
1076 @type node_slot: string
1077 @param node_slot: the name of the opcode target node slot
1080 node = getattr(lu.op, node_slot, None)
1081 iallocator = getattr(lu.op, iallocator_slot, None)
1083 if node is not None and iallocator is not None:
1084 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1086 elif node is None and iallocator is None:
1087 default_iallocator = lu.cfg.GetDefaultIAllocator()
1088 if default_iallocator:
1089 setattr(lu.op, iallocator_slot, default_iallocator)
1091 raise errors.OpPrereqError("No iallocator or node given and no"
1092 " cluster-wide default iallocator found."
1093 " Please specify either an iallocator or a"
1094 " node, or set a cluster-wide default"
1098 class LUClusterPostInit(LogicalUnit):
1099 """Logical unit for running hooks after cluster initialization.
1102 HPATH = "cluster-init"
1103 HTYPE = constants.HTYPE_CLUSTER
1105 def BuildHooksEnv(self):
1109 env = {"OP_TARGET": self.cfg.GetClusterName()}
1110 mn = self.cfg.GetMasterNode()
1111 return env, [], [mn]
1113 def Exec(self, feedback_fn):
1120 class LUClusterDestroy(LogicalUnit):
1121 """Logical unit for destroying the cluster.
1124 HPATH = "cluster-destroy"
1125 HTYPE = constants.HTYPE_CLUSTER
1127 def BuildHooksEnv(self):
1131 env = {"OP_TARGET": self.cfg.GetClusterName()}
1134 def CheckPrereq(self):
1135 """Check prerequisites.
1137 This checks whether the cluster is empty.
1139 Any errors are signaled by raising errors.OpPrereqError.
1142 master = self.cfg.GetMasterNode()
1144 nodelist = self.cfg.GetNodeList()
1145 if len(nodelist) != 1 or nodelist[0] != master:
1146 raise errors.OpPrereqError("There are still %d node(s) in"
1147 " this cluster." % (len(nodelist) - 1),
1149 instancelist = self.cfg.GetInstanceList()
1151 raise errors.OpPrereqError("There are still %d instance(s) in"
1152 " this cluster." % len(instancelist),
1155 def Exec(self, feedback_fn):
1156 """Destroys the cluster.
1159 master = self.cfg.GetMasterNode()
1161 # Run post hooks on master node before it's removed
1162 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1164 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1166 # pylint: disable-msg=W0702
1167 self.LogWarning("Errors occurred running hooks on %s" % master)
1169 result = self.rpc.call_node_stop_master(master, False)
1170 result.Raise("Could not disable the master role")
1175 def _VerifyCertificate(filename):
1176 """Verifies a certificate for LUClusterVerify.
1178 @type filename: string
1179 @param filename: Path to PEM file
1183 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1184 utils.ReadFile(filename))
1185 except Exception, err: # pylint: disable-msg=W0703
1186 return (LUClusterVerify.ETYPE_ERROR,
1187 "Failed to load X509 certificate %s: %s" % (filename, err))
1190 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1191 constants.SSL_CERT_EXPIRATION_ERROR)
1194 fnamemsg = "While verifying %s: %s" % (filename, msg)
1199 return (None, fnamemsg)
1200 elif errcode == utils.CERT_WARNING:
1201 return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1202 elif errcode == utils.CERT_ERROR:
1203 return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1205 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1208 class LUClusterVerify(LogicalUnit):
1209 """Verifies the cluster status.
1212 HPATH = "cluster-verify"
1213 HTYPE = constants.HTYPE_CLUSTER
1216 TCLUSTER = "cluster"
1218 TINSTANCE = "instance"
1220 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1221 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1222 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1223 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1224 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1225 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1226 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1227 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1228 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1229 ENODEDRBD = (TNODE, "ENODEDRBD")
1230 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1231 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1232 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1233 ENODEHV = (TNODE, "ENODEHV")
1234 ENODELVM = (TNODE, "ENODELVM")
1235 ENODEN1 = (TNODE, "ENODEN1")
1236 ENODENET = (TNODE, "ENODENET")
1237 ENODEOS = (TNODE, "ENODEOS")
1238 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1239 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1240 ENODERPC = (TNODE, "ENODERPC")
1241 ENODESSH = (TNODE, "ENODESSH")
1242 ENODEVERSION = (TNODE, "ENODEVERSION")
1243 ENODESETUP = (TNODE, "ENODESETUP")
1244 ENODETIME = (TNODE, "ENODETIME")
1245 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1247 ETYPE_FIELD = "code"
1248 ETYPE_ERROR = "ERROR"
1249 ETYPE_WARNING = "WARNING"
1251 _HOOKS_INDENT_RE = re.compile("^", re.M)
1253 class NodeImage(object):
1254 """A class representing the logical and physical status of a node.
1257 @ivar name: the node name to which this object refers
1258 @ivar volumes: a structure as returned from
1259 L{ganeti.backend.GetVolumeList} (runtime)
1260 @ivar instances: a list of running instances (runtime)
1261 @ivar pinst: list of configured primary instances (config)
1262 @ivar sinst: list of configured secondary instances (config)
1263 @ivar sbp: dictionary of {primary-node: list of instances} for all
1264 instances for which this node is secondary (config)
1265 @ivar mfree: free memory, as reported by hypervisor (runtime)
1266 @ivar dfree: free disk, as reported by the node (runtime)
1267 @ivar offline: the offline status (config)
1268 @type rpc_fail: boolean
1269 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1270 not whether the individual keys were correct) (runtime)
1271 @type lvm_fail: boolean
1272 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1273 @type hyp_fail: boolean
1274 @ivar hyp_fail: whether the RPC call didn't return the instance list
1275 @type ghost: boolean
1276 @ivar ghost: whether this is a known node or not (config)
1277 @type os_fail: boolean
1278 @ivar os_fail: whether the RPC call didn't return valid OS data
1280 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1281 @type vm_capable: boolean
1282 @ivar vm_capable: whether the node can host instances
1285 def __init__(self, offline=False, name=None, vm_capable=True):
1294 self.offline = offline
1295 self.vm_capable = vm_capable
1296 self.rpc_fail = False
1297 self.lvm_fail = False
1298 self.hyp_fail = False
1300 self.os_fail = False
1303 def ExpandNames(self):
1304 self.needed_locks = {
1305 locking.LEVEL_NODE: locking.ALL_SET,
1306 locking.LEVEL_INSTANCE: locking.ALL_SET,
1308 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1310 def _Error(self, ecode, item, msg, *args, **kwargs):
1311 """Format an error message.
1313 Based on the opcode's error_codes parameter, either format a
1314 parseable error code, or a simpler error string.
1316 This must be called only from Exec and functions called from Exec.
1319 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1321 # first complete the msg
1324 # then format the whole message
1325 if self.op.error_codes:
1326 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1332 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1333 # and finally report it via the feedback_fn
1334 self._feedback_fn(" - %s" % msg)
1336 def _ErrorIf(self, cond, *args, **kwargs):
1337 """Log an error message if the passed condition is True.
1340 cond = bool(cond) or self.op.debug_simulate_errors
1342 self._Error(*args, **kwargs)
1343 # do not mark the operation as failed for WARN cases only
1344 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1345 self.bad = self.bad or cond
1347 def _VerifyNode(self, ninfo, nresult):
1348 """Perform some basic validation on data returned from a node.
1350 - check the result data structure is well formed and has all the
1352 - check ganeti version
1354 @type ninfo: L{objects.Node}
1355 @param ninfo: the node to check
1356 @param nresult: the results from the node
1358 @return: whether overall this call was successful (and we can expect
1359 reasonable values in the respose)
1363 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1365 # main result, nresult should be a non-empty dict
1366 test = not nresult or not isinstance(nresult, dict)
1367 _ErrorIf(test, self.ENODERPC, node,
1368 "unable to verify node: no data returned")
1372 # compares ganeti version
1373 local_version = constants.PROTOCOL_VERSION
1374 remote_version = nresult.get("version", None)
1375 test = not (remote_version and
1376 isinstance(remote_version, (list, tuple)) and
1377 len(remote_version) == 2)
1378 _ErrorIf(test, self.ENODERPC, node,
1379 "connection to node returned invalid data")
1383 test = local_version != remote_version[0]
1384 _ErrorIf(test, self.ENODEVERSION, node,
1385 "incompatible protocol versions: master %s,"
1386 " node %s", local_version, remote_version[0])
1390 # node seems compatible, we can actually try to look into its results
1392 # full package version
1393 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1394 self.ENODEVERSION, node,
1395 "software version mismatch: master %s, node %s",
1396 constants.RELEASE_VERSION, remote_version[1],
1397 code=self.ETYPE_WARNING)
1399 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1400 if ninfo.vm_capable and isinstance(hyp_result, dict):
1401 for hv_name, hv_result in hyp_result.iteritems():
1402 test = hv_result is not None
1403 _ErrorIf(test, self.ENODEHV, node,
1404 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1406 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1407 if ninfo.vm_capable and isinstance(hvp_result, list):
1408 for item, hv_name, hv_result in hvp_result:
1409 _ErrorIf(True, self.ENODEHV, node,
1410 "hypervisor %s parameter verify failure (source %s): %s",
1411 hv_name, item, hv_result)
1413 test = nresult.get(constants.NV_NODESETUP,
1414 ["Missing NODESETUP results"])
1415 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1420 def _VerifyNodeTime(self, ninfo, nresult,
1421 nvinfo_starttime, nvinfo_endtime):
1422 """Check the node time.
1424 @type ninfo: L{objects.Node}
1425 @param ninfo: the node to check
1426 @param nresult: the remote results for the node
1427 @param nvinfo_starttime: the start time of the RPC call
1428 @param nvinfo_endtime: the end time of the RPC call
1432 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1434 ntime = nresult.get(constants.NV_TIME, None)
1436 ntime_merged = utils.MergeTime(ntime)
1437 except (ValueError, TypeError):
1438 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1441 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1442 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1443 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1444 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1448 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1449 "Node time diverges by at least %s from master node time",
1452 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1453 """Check the node time.
1455 @type ninfo: L{objects.Node}
1456 @param ninfo: the node to check
1457 @param nresult: the remote results for the node
1458 @param vg_name: the configured VG name
1465 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1467 # checks vg existence and size > 20G
1468 vglist = nresult.get(constants.NV_VGLIST, None)
1470 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1472 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1473 constants.MIN_VG_SIZE)
1474 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1477 pvlist = nresult.get(constants.NV_PVLIST, None)
1478 test = pvlist is None
1479 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1481 # check that ':' is not present in PV names, since it's a
1482 # special character for lvcreate (denotes the range of PEs to
1484 for _, pvname, owner_vg in pvlist:
1485 test = ":" in pvname
1486 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1487 " '%s' of VG '%s'", pvname, owner_vg)
1489 def _VerifyNodeNetwork(self, ninfo, nresult):
1490 """Check the node time.
1492 @type ninfo: L{objects.Node}
1493 @param ninfo: the node to check
1494 @param nresult: the remote results for the node
1498 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1500 test = constants.NV_NODELIST not in nresult
1501 _ErrorIf(test, self.ENODESSH, node,
1502 "node hasn't returned node ssh connectivity data")
1504 if nresult[constants.NV_NODELIST]:
1505 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1506 _ErrorIf(True, self.ENODESSH, node,
1507 "ssh communication with node '%s': %s", a_node, a_msg)
1509 test = constants.NV_NODENETTEST not in nresult
1510 _ErrorIf(test, self.ENODENET, node,
1511 "node hasn't returned node tcp connectivity data")
1513 if nresult[constants.NV_NODENETTEST]:
1514 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1516 _ErrorIf(True, self.ENODENET, node,
1517 "tcp communication with node '%s': %s",
1518 anode, nresult[constants.NV_NODENETTEST][anode])
1520 test = constants.NV_MASTERIP not in nresult
1521 _ErrorIf(test, self.ENODENET, node,
1522 "node hasn't returned node master IP reachability data")
1524 if not nresult[constants.NV_MASTERIP]:
1525 if node == self.master_node:
1526 msg = "the master node cannot reach the master IP (not configured?)"
1528 msg = "cannot reach the master IP"
1529 _ErrorIf(True, self.ENODENET, node, msg)
1531 def _VerifyInstance(self, instance, instanceconfig, node_image,
1533 """Verify an instance.
1535 This function checks to see if the required block devices are
1536 available on the instance's node.
1539 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1540 node_current = instanceconfig.primary_node
1542 node_vol_should = {}
1543 instanceconfig.MapLVsByNode(node_vol_should)
1545 for node in node_vol_should:
1546 n_img = node_image[node]
1547 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1548 # ignore missing volumes on offline or broken nodes
1550 for volume in node_vol_should[node]:
1551 test = volume not in n_img.volumes
1552 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1553 "volume %s missing on node %s", volume, node)
1555 if instanceconfig.admin_up:
1556 pri_img = node_image[node_current]
1557 test = instance not in pri_img.instances and not pri_img.offline
1558 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1559 "instance not running on its primary node %s",
1562 for node, n_img in node_image.items():
1563 if node != node_current:
1564 test = instance in n_img.instances
1565 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1566 "instance should not run on node %s", node)
1568 diskdata = [(nname, success, status, idx)
1569 for (nname, disks) in diskstatus.items()
1570 for idx, (success, status) in enumerate(disks)]
1572 for nname, success, bdev_status, idx in diskdata:
1573 # the 'ghost node' construction in Exec() ensures that we have a
1575 snode = node_image[nname]
1576 bad_snode = snode.ghost or snode.offline
1577 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1578 self.EINSTANCEFAULTYDISK, instance,
1579 "couldn't retrieve status for disk/%s on %s: %s",
1580 idx, nname, bdev_status)
1581 _ErrorIf((instanceconfig.admin_up and success and
1582 bdev_status.ldisk_status == constants.LDS_FAULTY),
1583 self.EINSTANCEFAULTYDISK, instance,
1584 "disk/%s on %s is faulty", idx, nname)
1586 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1587 """Verify if there are any unknown volumes in the cluster.
1589 The .os, .swap and backup volumes are ignored. All other volumes are
1590 reported as unknown.
1592 @type reserved: L{ganeti.utils.FieldSet}
1593 @param reserved: a FieldSet of reserved volume names
1596 for node, n_img in node_image.items():
1597 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1598 # skip non-healthy nodes
1600 for volume in n_img.volumes:
1601 test = ((node not in node_vol_should or
1602 volume not in node_vol_should[node]) and
1603 not reserved.Matches(volume))
1604 self._ErrorIf(test, self.ENODEORPHANLV, node,
1605 "volume %s is unknown", volume)
1607 def _VerifyOrphanInstances(self, instancelist, node_image):
1608 """Verify the list of running instances.
1610 This checks what instances are running but unknown to the cluster.
1613 for node, n_img in node_image.items():
1614 for o_inst in n_img.instances:
1615 test = o_inst not in instancelist
1616 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1617 "instance %s on node %s should not exist", o_inst, node)
1619 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1620 """Verify N+1 Memory Resilience.
1622 Check that if one single node dies we can still start all the
1623 instances it was primary for.
1626 cluster_info = self.cfg.GetClusterInfo()
1627 for node, n_img in node_image.items():
1628 # This code checks that every node which is now listed as
1629 # secondary has enough memory to host all instances it is
1630 # supposed to should a single other node in the cluster fail.
1631 # FIXME: not ready for failover to an arbitrary node
1632 # FIXME: does not support file-backed instances
1633 # WARNING: we currently take into account down instances as well
1634 # as up ones, considering that even if they're down someone
1635 # might want to start them even in the event of a node failure.
1637 # we're skipping offline nodes from the N+1 warning, since
1638 # most likely we don't have good memory infromation from them;
1639 # we already list instances living on such nodes, and that's
1642 for prinode, instances in n_img.sbp.items():
1644 for instance in instances:
1645 bep = cluster_info.FillBE(instance_cfg[instance])
1646 if bep[constants.BE_AUTO_BALANCE]:
1647 needed_mem += bep[constants.BE_MEMORY]
1648 test = n_img.mfree < needed_mem
1649 self._ErrorIf(test, self.ENODEN1, node,
1650 "not enough memory to accomodate instance failovers"
1651 " should node %s fail", prinode)
1653 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1655 """Verifies and computes the node required file checksums.
1657 @type ninfo: L{objects.Node}
1658 @param ninfo: the node to check
1659 @param nresult: the remote results for the node
1660 @param file_list: required list of files
1661 @param local_cksum: dictionary of local files and their checksums
1662 @param master_files: list of files that only masters should have
1666 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1668 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1669 test = not isinstance(remote_cksum, dict)
1670 _ErrorIf(test, self.ENODEFILECHECK, node,
1671 "node hasn't returned file checksum data")
1675 for file_name in file_list:
1676 node_is_mc = ninfo.master_candidate
1677 must_have = (file_name not in master_files) or node_is_mc
1679 test1 = file_name not in remote_cksum
1681 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1683 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1684 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1685 "file '%s' missing", file_name)
1686 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1687 "file '%s' has wrong checksum", file_name)
1688 # not candidate and this is not a must-have file
1689 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1690 "file '%s' should not exist on non master"
1691 " candidates (and the file is outdated)", file_name)
1692 # all good, except non-master/non-must have combination
1693 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1694 "file '%s' should not exist"
1695 " on non master candidates", file_name)
1697 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1699 """Verifies and the node DRBD status.
1701 @type ninfo: L{objects.Node}
1702 @param ninfo: the node to check
1703 @param nresult: the remote results for the node
1704 @param instanceinfo: the dict of instances
1705 @param drbd_helper: the configured DRBD usermode helper
1706 @param drbd_map: the DRBD map as returned by
1707 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1711 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1714 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1715 test = (helper_result == None)
1716 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1717 "no drbd usermode helper returned")
1719 status, payload = helper_result
1721 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1722 "drbd usermode helper check unsuccessful: %s", payload)
1723 test = status and (payload != drbd_helper)
1724 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1725 "wrong drbd usermode helper: %s", payload)
1727 # compute the DRBD minors
1729 for minor, instance in drbd_map[node].items():
1730 test = instance not in instanceinfo
1731 _ErrorIf(test, self.ECLUSTERCFG, None,
1732 "ghost instance '%s' in temporary DRBD map", instance)
1733 # ghost instance should not be running, but otherwise we
1734 # don't give double warnings (both ghost instance and
1735 # unallocated minor in use)
1737 node_drbd[minor] = (instance, False)
1739 instance = instanceinfo[instance]
1740 node_drbd[minor] = (instance.name, instance.admin_up)
1742 # and now check them
1743 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1744 test = not isinstance(used_minors, (tuple, list))
1745 _ErrorIf(test, self.ENODEDRBD, node,
1746 "cannot parse drbd status file: %s", str(used_minors))
1748 # we cannot check drbd status
1751 for minor, (iname, must_exist) in node_drbd.items():
1752 test = minor not in used_minors and must_exist
1753 _ErrorIf(test, self.ENODEDRBD, node,
1754 "drbd minor %d of instance %s is not active", minor, iname)
1755 for minor in used_minors:
1756 test = minor not in node_drbd
1757 _ErrorIf(test, self.ENODEDRBD, node,
1758 "unallocated drbd minor %d is in use", minor)
1760 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1761 """Builds the node OS structures.
1763 @type ninfo: L{objects.Node}
1764 @param ninfo: the node to check
1765 @param nresult: the remote results for the node
1766 @param nimg: the node image object
1770 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1772 remote_os = nresult.get(constants.NV_OSLIST, None)
1773 test = (not isinstance(remote_os, list) or
1774 not compat.all(isinstance(v, list) and len(v) == 7
1775 for v in remote_os))
1777 _ErrorIf(test, self.ENODEOS, node,
1778 "node hasn't returned valid OS data")
1787 for (name, os_path, status, diagnose,
1788 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1790 if name not in os_dict:
1793 # parameters is a list of lists instead of list of tuples due to
1794 # JSON lacking a real tuple type, fix it:
1795 parameters = [tuple(v) for v in parameters]
1796 os_dict[name].append((os_path, status, diagnose,
1797 set(variants), set(parameters), set(api_ver)))
1799 nimg.oslist = os_dict
1801 def _VerifyNodeOS(self, ninfo, nimg, base):
1802 """Verifies the node OS list.
1804 @type ninfo: L{objects.Node}
1805 @param ninfo: the node to check
1806 @param nimg: the node image object
1807 @param base: the 'template' node we match against (e.g. from the master)
1811 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1813 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1815 for os_name, os_data in nimg.oslist.items():
1816 assert os_data, "Empty OS status for OS %s?!" % os_name
1817 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1818 _ErrorIf(not f_status, self.ENODEOS, node,
1819 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1820 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1821 "OS '%s' has multiple entries (first one shadows the rest): %s",
1822 os_name, utils.CommaJoin([v[0] for v in os_data]))
1823 # this will catched in backend too
1824 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1825 and not f_var, self.ENODEOS, node,
1826 "OS %s with API at least %d does not declare any variant",
1827 os_name, constants.OS_API_V15)
1828 # comparisons with the 'base' image
1829 test = os_name not in base.oslist
1830 _ErrorIf(test, self.ENODEOS, node,
1831 "Extra OS %s not present on reference node (%s)",
1835 assert base.oslist[os_name], "Base node has empty OS status?"
1836 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1838 # base OS is invalid, skipping
1840 for kind, a, b in [("API version", f_api, b_api),
1841 ("variants list", f_var, b_var),
1842 ("parameters", f_param, b_param)]:
1843 _ErrorIf(a != b, self.ENODEOS, node,
1844 "OS %s %s differs from reference node %s: %s vs. %s",
1845 kind, os_name, base.name,
1846 utils.CommaJoin(a), utils.CommaJoin(b))
1848 # check any missing OSes
1849 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1850 _ErrorIf(missing, self.ENODEOS, node,
1851 "OSes present on reference node %s but missing on this node: %s",
1852 base.name, utils.CommaJoin(missing))
1854 def _VerifyOob(self, ninfo, nresult):
1855 """Verifies out of band functionality of a node.
1857 @type ninfo: L{objects.Node}
1858 @param ninfo: the node to check
1859 @param nresult: the remote results for the node
1863 # We just have to verify the paths on master and/or master candidates
1864 # as the oob helper is invoked on the master
1865 if ((ninfo.master_candidate or ninfo.master_capable) and
1866 constants.NV_OOB_PATHS in nresult):
1867 for path_result in nresult[constants.NV_OOB_PATHS]:
1868 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1870 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1871 """Verifies and updates the node volume data.
1873 This function will update a L{NodeImage}'s internal structures
1874 with data from the remote call.
1876 @type ninfo: L{objects.Node}
1877 @param ninfo: the node to check
1878 @param nresult: the remote results for the node
1879 @param nimg: the node image object
1880 @param vg_name: the configured VG name
1884 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1886 nimg.lvm_fail = True
1887 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1890 elif isinstance(lvdata, basestring):
1891 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1892 utils.SafeEncode(lvdata))
1893 elif not isinstance(lvdata, dict):
1894 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1896 nimg.volumes = lvdata
1897 nimg.lvm_fail = False
1899 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1900 """Verifies and updates the node instance list.
1902 If the listing was successful, then updates this node's instance
1903 list. Otherwise, it marks the RPC call as failed for the instance
1906 @type ninfo: L{objects.Node}
1907 @param ninfo: the node to check
1908 @param nresult: the remote results for the node
1909 @param nimg: the node image object
1912 idata = nresult.get(constants.NV_INSTANCELIST, None)
1913 test = not isinstance(idata, list)
1914 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1915 " (instancelist): %s", utils.SafeEncode(str(idata)))
1917 nimg.hyp_fail = True
1919 nimg.instances = idata
1921 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1922 """Verifies and computes a node information map
1924 @type ninfo: L{objects.Node}
1925 @param ninfo: the node to check
1926 @param nresult: the remote results for the node
1927 @param nimg: the node image object
1928 @param vg_name: the configured VG name
1932 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1934 # try to read free memory (from the hypervisor)
1935 hv_info = nresult.get(constants.NV_HVINFO, None)
1936 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1937 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1940 nimg.mfree = int(hv_info["memory_free"])
1941 except (ValueError, TypeError):
1942 _ErrorIf(True, self.ENODERPC, node,
1943 "node returned invalid nodeinfo, check hypervisor")
1945 # FIXME: devise a free space model for file based instances as well
1946 if vg_name is not None:
1947 test = (constants.NV_VGLIST not in nresult or
1948 vg_name not in nresult[constants.NV_VGLIST])
1949 _ErrorIf(test, self.ENODELVM, node,
1950 "node didn't return data for the volume group '%s'"
1951 " - it is either missing or broken", vg_name)
1954 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1955 except (ValueError, TypeError):
1956 _ErrorIf(True, self.ENODERPC, node,
1957 "node returned invalid LVM info, check LVM status")
1959 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1960 """Gets per-disk status information for all instances.
1962 @type nodelist: list of strings
1963 @param nodelist: Node names
1964 @type node_image: dict of (name, L{objects.Node})
1965 @param node_image: Node objects
1966 @type instanceinfo: dict of (name, L{objects.Instance})
1967 @param instanceinfo: Instance objects
1968 @rtype: {instance: {node: [(succes, payload)]}}
1969 @return: a dictionary of per-instance dictionaries with nodes as
1970 keys and disk information as values; the disk information is a
1971 list of tuples (success, payload)
1974 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1977 node_disks_devonly = {}
1978 diskless_instances = set()
1979 diskless = constants.DT_DISKLESS
1981 for nname in nodelist:
1982 node_instances = list(itertools.chain(node_image[nname].pinst,
1983 node_image[nname].sinst))
1984 diskless_instances.update(inst for inst in node_instances
1985 if instanceinfo[inst].disk_template == diskless)
1986 disks = [(inst, disk)
1987 for inst in node_instances
1988 for disk in instanceinfo[inst].disks]
1991 # No need to collect data
1994 node_disks[nname] = disks
1996 # Creating copies as SetDiskID below will modify the objects and that can
1997 # lead to incorrect data returned from nodes
1998 devonly = [dev.Copy() for (_, dev) in disks]
2001 self.cfg.SetDiskID(dev, nname)
2003 node_disks_devonly[nname] = devonly
2005 assert len(node_disks) == len(node_disks_devonly)
2007 # Collect data from all nodes with disks
2008 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2011 assert len(result) == len(node_disks)
2015 for (nname, nres) in result.items():
2016 disks = node_disks[nname]
2019 # No data from this node
2020 data = len(disks) * [(False, "node offline")]
2023 _ErrorIf(msg, self.ENODERPC, nname,
2024 "while getting disk information: %s", msg)
2026 # No data from this node
2027 data = len(disks) * [(False, msg)]
2030 for idx, i in enumerate(nres.payload):
2031 if isinstance(i, (tuple, list)) and len(i) == 2:
2034 logging.warning("Invalid result from node %s, entry %d: %s",
2036 data.append((False, "Invalid result from the remote node"))
2038 for ((inst, _), status) in zip(disks, data):
2039 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2041 # Add empty entries for diskless instances.
2042 for inst in diskless_instances:
2043 assert inst not in instdisk
2046 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2047 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2048 compat.all(isinstance(s, (tuple, list)) and
2049 len(s) == 2 for s in statuses)
2050 for inst, nnames in instdisk.items()
2051 for nname, statuses in nnames.items())
2052 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2056 def _VerifyHVP(self, hvp_data):
2057 """Verifies locally the syntax of the hypervisor parameters.
2060 for item, hv_name, hv_params in hvp_data:
2061 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2064 hv_class = hypervisor.GetHypervisor(hv_name)
2065 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2066 hv_class.CheckParameterSyntax(hv_params)
2067 except errors.GenericError, err:
2068 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2071 def BuildHooksEnv(self):
2074 Cluster-Verify hooks just ran in the post phase and their failure makes
2075 the output be logged in the verify output and the verification to fail.
2078 all_nodes = self.cfg.GetNodeList()
2080 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2082 for node in self.cfg.GetAllNodesInfo().values():
2083 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2085 return env, [], all_nodes
2087 def Exec(self, feedback_fn):
2088 """Verify integrity of cluster, performing various test on nodes.
2091 # This method has too many local variables. pylint: disable-msg=R0914
2093 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2094 verbose = self.op.verbose
2095 self._feedback_fn = feedback_fn
2096 feedback_fn("* Verifying global settings")
2097 for msg in self.cfg.VerifyConfig():
2098 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2100 # Check the cluster certificates
2101 for cert_filename in constants.ALL_CERT_FILES:
2102 (errcode, msg) = _VerifyCertificate(cert_filename)
2103 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2105 vg_name = self.cfg.GetVGName()
2106 drbd_helper = self.cfg.GetDRBDHelper()
2107 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2108 cluster = self.cfg.GetClusterInfo()
2109 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2110 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2111 nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2112 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2113 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2114 for iname in instancelist)
2115 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2116 i_non_redundant = [] # Non redundant instances
2117 i_non_a_balanced = [] # Non auto-balanced instances
2118 n_offline = 0 # Count of offline nodes
2119 n_drained = 0 # Count of nodes being drained
2120 node_vol_should = {}
2122 # FIXME: verify OS list
2123 # do local checksums
2124 master_files = [constants.CLUSTER_CONF_FILE]
2125 master_node = self.master_node = self.cfg.GetMasterNode()
2126 master_ip = self.cfg.GetMasterIP()
2128 file_names = ssconf.SimpleStore().GetFileList()
2129 file_names.extend(constants.ALL_CERT_FILES)
2130 file_names.extend(master_files)
2131 if cluster.modify_etc_hosts:
2132 file_names.append(constants.ETC_HOSTS)
2134 local_checksums = utils.FingerprintFiles(file_names)
2136 # Compute the set of hypervisor parameters
2138 for hv_name in hypervisors:
2139 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2140 for os_name, os_hvp in cluster.os_hvp.items():
2141 for hv_name, hv_params in os_hvp.items():
2144 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2145 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2146 # TODO: collapse identical parameter values in a single one
2147 for instance in instanceinfo.values():
2148 if not instance.hvparams:
2150 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2151 cluster.FillHV(instance)))
2152 # and verify them locally
2153 self._VerifyHVP(hvp_data)
2155 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2156 node_verify_param = {
2157 constants.NV_FILELIST: file_names,
2158 constants.NV_NODELIST: [node.name for node in nodeinfo
2159 if not node.offline],
2160 constants.NV_HYPERVISOR: hypervisors,
2161 constants.NV_HVPARAMS: hvp_data,
2162 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2163 node.secondary_ip) for node in nodeinfo
2164 if not node.offline],
2165 constants.NV_INSTANCELIST: hypervisors,
2166 constants.NV_VERSION: None,
2167 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2168 constants.NV_NODESETUP: None,
2169 constants.NV_TIME: None,
2170 constants.NV_MASTERIP: (master_node, master_ip),
2171 constants.NV_OSLIST: None,
2172 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2175 if vg_name is not None:
2176 node_verify_param[constants.NV_VGLIST] = None
2177 node_verify_param[constants.NV_LVLIST] = vg_name
2178 node_verify_param[constants.NV_PVLIST] = [vg_name]
2179 node_verify_param[constants.NV_DRBDLIST] = None
2182 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2184 # Build our expected cluster state
2185 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2187 vm_capable=node.vm_capable))
2188 for node in nodeinfo)
2192 for node in nodeinfo:
2193 path = _SupportsOob(self.cfg, node)
2194 if path and path not in oob_paths:
2195 oob_paths.append(path)
2198 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2200 for instance in instancelist:
2201 inst_config = instanceinfo[instance]
2203 for nname in inst_config.all_nodes:
2204 if nname not in node_image:
2206 gnode = self.NodeImage(name=nname)
2208 node_image[nname] = gnode
2210 inst_config.MapLVsByNode(node_vol_should)
2212 pnode = inst_config.primary_node
2213 node_image[pnode].pinst.append(instance)
2215 for snode in inst_config.secondary_nodes:
2216 nimg = node_image[snode]
2217 nimg.sinst.append(instance)
2218 if pnode not in nimg.sbp:
2219 nimg.sbp[pnode] = []
2220 nimg.sbp[pnode].append(instance)
2222 # At this point, we have the in-memory data structures complete,
2223 # except for the runtime information, which we'll gather next
2225 # Due to the way our RPC system works, exact response times cannot be
2226 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2227 # time before and after executing the request, we can at least have a time
2229 nvinfo_starttime = time.time()
2230 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2231 self.cfg.GetClusterName())
2232 nvinfo_endtime = time.time()
2234 all_drbd_map = self.cfg.ComputeDRBDMap()
2236 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2237 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2239 feedback_fn("* Verifying node status")
2243 for node_i in nodeinfo:
2245 nimg = node_image[node]
2249 feedback_fn("* Skipping offline node %s" % (node,))
2253 if node == master_node:
2255 elif node_i.master_candidate:
2256 ntype = "master candidate"
2257 elif node_i.drained:
2263 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2265 msg = all_nvinfo[node].fail_msg
2266 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2268 nimg.rpc_fail = True
2271 nresult = all_nvinfo[node].payload
2273 nimg.call_ok = self._VerifyNode(node_i, nresult)
2274 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2275 self._VerifyNodeNetwork(node_i, nresult)
2276 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2279 self._VerifyOob(node_i, nresult)
2282 self._VerifyNodeLVM(node_i, nresult, vg_name)
2283 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2286 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2287 self._UpdateNodeInstances(node_i, nresult, nimg)
2288 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2289 self._UpdateNodeOS(node_i, nresult, nimg)
2290 if not nimg.os_fail:
2291 if refos_img is None:
2293 self._VerifyNodeOS(node_i, nimg, refos_img)
2295 feedback_fn("* Verifying instance status")
2296 for instance in instancelist:
2298 feedback_fn("* Verifying instance %s" % instance)
2299 inst_config = instanceinfo[instance]
2300 self._VerifyInstance(instance, inst_config, node_image,
2302 inst_nodes_offline = []
2304 pnode = inst_config.primary_node
2305 pnode_img = node_image[pnode]
2306 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2307 self.ENODERPC, pnode, "instance %s, connection to"
2308 " primary node failed", instance)
2310 _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2311 "instance lives on offline node %s", inst_config.primary_node)
2313 # If the instance is non-redundant we cannot survive losing its primary
2314 # node, so we are not N+1 compliant. On the other hand we have no disk
2315 # templates with more than one secondary so that situation is not well
2317 # FIXME: does not support file-backed instances
2318 if not inst_config.secondary_nodes:
2319 i_non_redundant.append(instance)
2321 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2322 instance, "instance has multiple secondary nodes: %s",
2323 utils.CommaJoin(inst_config.secondary_nodes),
2324 code=self.ETYPE_WARNING)
2326 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2327 pnode = inst_config.primary_node
2328 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2329 instance_groups = {}
2331 for node in instance_nodes:
2332 instance_groups.setdefault(nodeinfo_byname[node].group,
2336 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2337 # Sort so that we always list the primary node first.
2338 for group, nodes in sorted(instance_groups.items(),
2339 key=lambda (_, nodes): pnode in nodes,
2342 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2343 instance, "instance has primary and secondary nodes in"
2344 " different groups: %s", utils.CommaJoin(pretty_list),
2345 code=self.ETYPE_WARNING)
2347 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2348 i_non_a_balanced.append(instance)
2350 for snode in inst_config.secondary_nodes:
2351 s_img = node_image[snode]
2352 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2353 "instance %s, connection to secondary node failed", instance)
2356 inst_nodes_offline.append(snode)
2358 # warn that the instance lives on offline nodes
2359 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2360 "instance has offline secondary node(s) %s",
2361 utils.CommaJoin(inst_nodes_offline))
2362 # ... or ghost/non-vm_capable nodes
2363 for node in inst_config.all_nodes:
2364 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2365 "instance lives on ghost node %s", node)
2366 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2367 instance, "instance lives on non-vm_capable node %s", node)
2369 feedback_fn("* Verifying orphan volumes")
2370 reserved = utils.FieldSet(*cluster.reserved_lvs)
2371 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2373 feedback_fn("* Verifying orphan instances")
2374 self._VerifyOrphanInstances(instancelist, node_image)
2376 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2377 feedback_fn("* Verifying N+1 Memory redundancy")
2378 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2380 feedback_fn("* Other Notes")
2382 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2383 % len(i_non_redundant))
2385 if i_non_a_balanced:
2386 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2387 % len(i_non_a_balanced))
2390 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2393 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2397 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2398 """Analyze the post-hooks' result
2400 This method analyses the hook result, handles it, and sends some
2401 nicely-formatted feedback back to the user.
2403 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2404 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2405 @param hooks_results: the results of the multi-node hooks rpc call
2406 @param feedback_fn: function used send feedback back to the caller
2407 @param lu_result: previous Exec result
2408 @return: the new Exec result, based on the previous result
2412 # We only really run POST phase hooks, and are only interested in
2414 if phase == constants.HOOKS_PHASE_POST:
2415 # Used to change hooks' output to proper indentation
2416 feedback_fn("* Hooks Results")
2417 assert hooks_results, "invalid result from hooks"
2419 for node_name in hooks_results:
2420 res = hooks_results[node_name]
2422 test = msg and not res.offline
2423 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2424 "Communication failure in hooks execution: %s", msg)
2425 if res.offline or msg:
2426 # No need to investigate payload if node is offline or gave an error.
2427 # override manually lu_result here as _ErrorIf only
2428 # overrides self.bad
2431 for script, hkr, output in res.payload:
2432 test = hkr == constants.HKR_FAIL
2433 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2434 "Script %s failed, output:", script)
2436 output = self._HOOKS_INDENT_RE.sub(' ', output)
2437 feedback_fn("%s" % output)
2443 class LUClusterVerifyDisks(NoHooksLU):
2444 """Verifies the cluster disks status.
2449 def ExpandNames(self):
2450 self.needed_locks = {
2451 locking.LEVEL_NODE: locking.ALL_SET,
2452 locking.LEVEL_INSTANCE: locking.ALL_SET,
2454 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2456 def Exec(self, feedback_fn):
2457 """Verify integrity of cluster disks.
2459 @rtype: tuple of three items
2460 @return: a tuple of (dict of node-to-node_error, list of instances
2461 which need activate-disks, dict of instance: (node, volume) for
2465 result = res_nodes, res_instances, res_missing = {}, [], {}
2467 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2468 instances = self.cfg.GetAllInstancesInfo().values()
2471 for inst in instances:
2473 if not inst.admin_up:
2475 inst.MapLVsByNode(inst_lvs)
2476 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2477 for node, vol_list in inst_lvs.iteritems():
2478 for vol in vol_list:
2479 nv_dict[(node, vol)] = inst
2484 node_lvs = self.rpc.call_lv_list(nodes, [])
2485 for node, node_res in node_lvs.items():
2486 if node_res.offline:
2488 msg = node_res.fail_msg
2490 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2491 res_nodes[node] = msg
2494 lvs = node_res.payload
2495 for lv_name, (_, _, lv_online) in lvs.items():
2496 inst = nv_dict.pop((node, lv_name), None)
2497 if (not lv_online and inst is not None
2498 and inst.name not in res_instances):
2499 res_instances.append(inst.name)
2501 # any leftover items in nv_dict are missing LVs, let's arrange the
2503 for key, inst in nv_dict.iteritems():
2504 if inst.name not in res_missing:
2505 res_missing[inst.name] = []
2506 res_missing[inst.name].append(key)
2511 class LUClusterRepairDiskSizes(NoHooksLU):
2512 """Verifies the cluster disks sizes.
2517 def ExpandNames(self):
2518 if self.op.instances:
2519 self.wanted_names = []
2520 for name in self.op.instances:
2521 full_name = _ExpandInstanceName(self.cfg, name)
2522 self.wanted_names.append(full_name)
2523 self.needed_locks = {
2524 locking.LEVEL_NODE: [],
2525 locking.LEVEL_INSTANCE: self.wanted_names,
2527 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2529 self.wanted_names = None
2530 self.needed_locks = {
2531 locking.LEVEL_NODE: locking.ALL_SET,
2532 locking.LEVEL_INSTANCE: locking.ALL_SET,
2534 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2536 def DeclareLocks(self, level):
2537 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2538 self._LockInstancesNodes(primary_only=True)
2540 def CheckPrereq(self):
2541 """Check prerequisites.
2543 This only checks the optional instance list against the existing names.
2546 if self.wanted_names is None:
2547 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2549 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2550 in self.wanted_names]
2552 def _EnsureChildSizes(self, disk):
2553 """Ensure children of the disk have the needed disk size.
2555 This is valid mainly for DRBD8 and fixes an issue where the
2556 children have smaller disk size.
2558 @param disk: an L{ganeti.objects.Disk} object
2561 if disk.dev_type == constants.LD_DRBD8:
2562 assert disk.children, "Empty children for DRBD8?"
2563 fchild = disk.children[0]
2564 mismatch = fchild.size < disk.size
2566 self.LogInfo("Child disk has size %d, parent %d, fixing",
2567 fchild.size, disk.size)
2568 fchild.size = disk.size
2570 # and we recurse on this child only, not on the metadev
2571 return self._EnsureChildSizes(fchild) or mismatch
2575 def Exec(self, feedback_fn):
2576 """Verify the size of cluster disks.
2579 # TODO: check child disks too
2580 # TODO: check differences in size between primary/secondary nodes
2582 for instance in self.wanted_instances:
2583 pnode = instance.primary_node
2584 if pnode not in per_node_disks:
2585 per_node_disks[pnode] = []
2586 for idx, disk in enumerate(instance.disks):
2587 per_node_disks[pnode].append((instance, idx, disk))
2590 for node, dskl in per_node_disks.items():
2591 newl = [v[2].Copy() for v in dskl]
2593 self.cfg.SetDiskID(dsk, node)
2594 result = self.rpc.call_blockdev_getsize(node, newl)
2596 self.LogWarning("Failure in blockdev_getsize call to node"
2597 " %s, ignoring", node)
2599 if len(result.payload) != len(dskl):
2600 logging.warning("Invalid result from node %s: len(dksl)=%d,"
2601 " result.payload=%s", node, len(dskl), result.payload)
2602 self.LogWarning("Invalid result from node %s, ignoring node results",
2605 for ((instance, idx, disk), size) in zip(dskl, result.payload):
2607 self.LogWarning("Disk %d of instance %s did not return size"
2608 " information, ignoring", idx, instance.name)
2610 if not isinstance(size, (int, long)):
2611 self.LogWarning("Disk %d of instance %s did not return valid"
2612 " size information, ignoring", idx, instance.name)
2615 if size != disk.size:
2616 self.LogInfo("Disk %d of instance %s has mismatched size,"
2617 " correcting: recorded %d, actual %d", idx,
2618 instance.name, disk.size, size)
2620 self.cfg.Update(instance, feedback_fn)
2621 changed.append((instance.name, idx, size))
2622 if self._EnsureChildSizes(disk):
2623 self.cfg.Update(instance, feedback_fn)
2624 changed.append((instance.name, idx, disk.size))
2628 class LUClusterRename(LogicalUnit):
2629 """Rename the cluster.
2632 HPATH = "cluster-rename"
2633 HTYPE = constants.HTYPE_CLUSTER
2635 def BuildHooksEnv(self):
2640 "OP_TARGET": self.cfg.GetClusterName(),
2641 "NEW_NAME": self.op.name,
2643 mn = self.cfg.GetMasterNode()
2644 all_nodes = self.cfg.GetNodeList()
2645 return env, [mn], all_nodes
2647 def CheckPrereq(self):
2648 """Verify that the passed name is a valid one.
2651 hostname = netutils.GetHostname(name=self.op.name,
2652 family=self.cfg.GetPrimaryIPFamily())
2654 new_name = hostname.name
2655 self.ip = new_ip = hostname.ip
2656 old_name = self.cfg.GetClusterName()
2657 old_ip = self.cfg.GetMasterIP()
2658 if new_name == old_name and new_ip == old_ip:
2659 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2660 " cluster has changed",
2662 if new_ip != old_ip:
2663 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2664 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2665 " reachable on the network" %
2666 new_ip, errors.ECODE_NOTUNIQUE)
2668 self.op.name = new_name
2670 def Exec(self, feedback_fn):
2671 """Rename the cluster.
2674 clustername = self.op.name
2677 # shutdown the master IP
2678 master = self.cfg.GetMasterNode()
2679 result = self.rpc.call_node_stop_master(master, False)
2680 result.Raise("Could not disable the master role")
2683 cluster = self.cfg.GetClusterInfo()
2684 cluster.cluster_name = clustername
2685 cluster.master_ip = ip
2686 self.cfg.Update(cluster, feedback_fn)
2688 # update the known hosts file
2689 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2690 node_list = self.cfg.GetOnlineNodeList()
2692 node_list.remove(master)
2695 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2697 result = self.rpc.call_node_start_master(master, False, False)
2698 msg = result.fail_msg
2700 self.LogWarning("Could not re-enable the master role on"
2701 " the master, please restart manually: %s", msg)
2706 class LUClusterSetParams(LogicalUnit):
2707 """Change the parameters of the cluster.
2710 HPATH = "cluster-modify"
2711 HTYPE = constants.HTYPE_CLUSTER
2714 def CheckArguments(self):
2718 if self.op.uid_pool:
2719 uidpool.CheckUidPool(self.op.uid_pool)
2721 if self.op.add_uids:
2722 uidpool.CheckUidPool(self.op.add_uids)
2724 if self.op.remove_uids:
2725 uidpool.CheckUidPool(self.op.remove_uids)
2727 def ExpandNames(self):
2728 # FIXME: in the future maybe other cluster params won't require checking on
2729 # all nodes to be modified.
2730 self.needed_locks = {
2731 locking.LEVEL_NODE: locking.ALL_SET,
2733 self.share_locks[locking.LEVEL_NODE] = 1
2735 def BuildHooksEnv(self):
2740 "OP_TARGET": self.cfg.GetClusterName(),
2741 "NEW_VG_NAME": self.op.vg_name,
2743 mn = self.cfg.GetMasterNode()
2744 return env, [mn], [mn]
2746 def CheckPrereq(self):
2747 """Check prerequisites.
2749 This checks whether the given params don't conflict and
2750 if the given volume group is valid.
2753 if self.op.vg_name is not None and not self.op.vg_name:
2754 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2755 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2756 " instances exist", errors.ECODE_INVAL)
2758 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2759 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2760 raise errors.OpPrereqError("Cannot disable drbd helper while"
2761 " drbd-based instances exist",
2764 node_list = self.acquired_locks[locking.LEVEL_NODE]
2766 # if vg_name not None, checks given volume group on all nodes
2768 vglist = self.rpc.call_vg_list(node_list)
2769 for node in node_list:
2770 msg = vglist[node].fail_msg
2772 # ignoring down node
2773 self.LogWarning("Error while gathering data on node %s"
2774 " (ignoring node): %s", node, msg)
2776 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2778 constants.MIN_VG_SIZE)
2780 raise errors.OpPrereqError("Error on node '%s': %s" %
2781 (node, vgstatus), errors.ECODE_ENVIRON)
2783 if self.op.drbd_helper:
2784 # checks given drbd helper on all nodes
2785 helpers = self.rpc.call_drbd_helper(node_list)
2786 for node in node_list:
2787 ninfo = self.cfg.GetNodeInfo(node)
2789 self.LogInfo("Not checking drbd helper on offline node %s", node)
2791 msg = helpers[node].fail_msg
2793 raise errors.OpPrereqError("Error checking drbd helper on node"
2794 " '%s': %s" % (node, msg),
2795 errors.ECODE_ENVIRON)
2796 node_helper = helpers[node].payload
2797 if node_helper != self.op.drbd_helper:
2798 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2799 (node, node_helper), errors.ECODE_ENVIRON)
2801 self.cluster = cluster = self.cfg.GetClusterInfo()
2802 # validate params changes
2803 if self.op.beparams:
2804 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2805 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2807 if self.op.ndparams:
2808 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2809 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2811 if self.op.nicparams:
2812 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2813 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2814 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2817 # check all instances for consistency
2818 for instance in self.cfg.GetAllInstancesInfo().values():
2819 for nic_idx, nic in enumerate(instance.nics):
2820 params_copy = copy.deepcopy(nic.nicparams)
2821 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2823 # check parameter syntax
2825 objects.NIC.CheckParameterSyntax(params_filled)
2826 except errors.ConfigurationError, err:
2827 nic_errors.append("Instance %s, nic/%d: %s" %
2828 (instance.name, nic_idx, err))
2830 # if we're moving instances to routed, check that they have an ip
2831 target_mode = params_filled[constants.NIC_MODE]
2832 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2833 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2834 (instance.name, nic_idx))
2836 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2837 "\n".join(nic_errors))
2839 # hypervisor list/parameters
2840 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2841 if self.op.hvparams:
2842 for hv_name, hv_dict in self.op.hvparams.items():
2843 if hv_name not in self.new_hvparams:
2844 self.new_hvparams[hv_name] = hv_dict
2846 self.new_hvparams[hv_name].update(hv_dict)
2848 # os hypervisor parameters
2849 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2851 for os_name, hvs in self.op.os_hvp.items():
2852 if os_name not in self.new_os_hvp:
2853 self.new_os_hvp[os_name] = hvs
2855 for hv_name, hv_dict in hvs.items():
2856 if hv_name not in self.new_os_hvp[os_name]:
2857 self.new_os_hvp[os_name][hv_name] = hv_dict
2859 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2862 self.new_osp = objects.FillDict(cluster.osparams, {})
2863 if self.op.osparams:
2864 for os_name, osp in self.op.osparams.items():
2865 if os_name not in self.new_osp:
2866 self.new_osp[os_name] = {}
2868 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2871 if not self.new_osp[os_name]:
2872 # we removed all parameters
2873 del self.new_osp[os_name]
2875 # check the parameter validity (remote check)
2876 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2877 os_name, self.new_osp[os_name])
2879 # changes to the hypervisor list
2880 if self.op.enabled_hypervisors is not None:
2881 self.hv_list = self.op.enabled_hypervisors
2882 for hv in self.hv_list:
2883 # if the hypervisor doesn't already exist in the cluster
2884 # hvparams, we initialize it to empty, and then (in both
2885 # cases) we make sure to fill the defaults, as we might not
2886 # have a complete defaults list if the hypervisor wasn't
2888 if hv not in new_hvp:
2890 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2891 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2893 self.hv_list = cluster.enabled_hypervisors
2895 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2896 # either the enabled list has changed, or the parameters have, validate
2897 for hv_name, hv_params in self.new_hvparams.items():
2898 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2899 (self.op.enabled_hypervisors and
2900 hv_name in self.op.enabled_hypervisors)):
2901 # either this is a new hypervisor, or its parameters have changed
2902 hv_class = hypervisor.GetHypervisor(hv_name)
2903 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2904 hv_class.CheckParameterSyntax(hv_params)
2905 _CheckHVParams(self, node_list, hv_name, hv_params)
2908 # no need to check any newly-enabled hypervisors, since the
2909 # defaults have already been checked in the above code-block
2910 for os_name, os_hvp in self.new_os_hvp.items():
2911 for hv_name, hv_params in os_hvp.items():
2912 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2913 # we need to fill in the new os_hvp on top of the actual hv_p
2914 cluster_defaults = self.new_hvparams.get(hv_name, {})
2915 new_osp = objects.FillDict(cluster_defaults, hv_params)
2916 hv_class = hypervisor.GetHypervisor(hv_name)
2917 hv_class.CheckParameterSyntax(new_osp)
2918 _CheckHVParams(self, node_list, hv_name, new_osp)
2920 if self.op.default_iallocator:
2921 alloc_script = utils.FindFile(self.op.default_iallocator,
2922 constants.IALLOCATOR_SEARCH_PATH,
2924 if alloc_script is None:
2925 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2926 " specified" % self.op.default_iallocator,
2929 def Exec(self, feedback_fn):
2930 """Change the parameters of the cluster.
2933 if self.op.vg_name is not None:
2934 new_volume = self.op.vg_name
2937 if new_volume != self.cfg.GetVGName():
2938 self.cfg.SetVGName(new_volume)
2940 feedback_fn("Cluster LVM configuration already in desired"
2941 " state, not changing")
2942 if self.op.drbd_helper is not None:
2943 new_helper = self.op.drbd_helper
2946 if new_helper != self.cfg.GetDRBDHelper():
2947 self.cfg.SetDRBDHelper(new_helper)
2949 feedback_fn("Cluster DRBD helper already in desired state,"
2951 if self.op.hvparams:
2952 self.cluster.hvparams = self.new_hvparams
2954 self.cluster.os_hvp = self.new_os_hvp
2955 if self.op.enabled_hypervisors is not None:
2956 self.cluster.hvparams = self.new_hvparams
2957 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2958 if self.op.beparams:
2959 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2960 if self.op.nicparams:
2961 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2962 if self.op.osparams:
2963 self.cluster.osparams = self.new_osp
2964 if self.op.ndparams:
2965 self.cluster.ndparams = self.new_ndparams
2967 if self.op.candidate_pool_size is not None:
2968 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2969 # we need to update the pool size here, otherwise the save will fail
2970 _AdjustCandidatePool(self, [])
2972 if self.op.maintain_node_health is not None:
2973 self.cluster.maintain_node_health = self.op.maintain_node_health
2975 if self.op.prealloc_wipe_disks is not None:
2976 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2978 if self.op.add_uids is not None:
2979 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2981 if self.op.remove_uids is not None:
2982 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2984 if self.op.uid_pool is not None:
2985 self.cluster.uid_pool = self.op.uid_pool
2987 if self.op.default_iallocator is not None:
2988 self.cluster.default_iallocator = self.op.default_iallocator
2990 if self.op.reserved_lvs is not None:
2991 self.cluster.reserved_lvs = self.op.reserved_lvs
2993 def helper_os(aname, mods, desc):
2995 lst = getattr(self.cluster, aname)
2996 for key, val in mods:
2997 if key == constants.DDM_ADD:
2999 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3002 elif key == constants.DDM_REMOVE:
3006 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3008 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3010 if self.op.hidden_os:
3011 helper_os("hidden_os", self.op.hidden_os, "hidden")
3013 if self.op.blacklisted_os:
3014 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3016 if self.op.master_netdev:
3017 master = self.cfg.GetMasterNode()
3018 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3019 self.cluster.master_netdev)
3020 result = self.rpc.call_node_stop_master(master, False)
3021 result.Raise("Could not disable the master ip")
3022 feedback_fn("Changing master_netdev from %s to %s" %
3023 (self.cluster.master_netdev, self.op.master_netdev))
3024 self.cluster.master_netdev = self.op.master_netdev
3026 self.cfg.Update(self.cluster, feedback_fn)
3028 if self.op.master_netdev:
3029 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3030 self.op.master_netdev)
3031 result = self.rpc.call_node_start_master(master, False, False)
3033 self.LogWarning("Could not re-enable the master ip on"
3034 " the master, please restart manually: %s",
3038 def _UploadHelper(lu, nodes, fname):
3039 """Helper for uploading a file and showing warnings.
3042 if os.path.exists(fname):
3043 result = lu.rpc.call_upload_file(nodes, fname)
3044 for to_node, to_result in result.items():
3045 msg = to_result.fail_msg
3047 msg = ("Copy of file %s to node %s failed: %s" %
3048 (fname, to_node, msg))
3049 lu.proc.LogWarning(msg)
3052 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3053 """Distribute additional files which are part of the cluster configuration.
3055 ConfigWriter takes care of distributing the config and ssconf files, but
3056 there are more files which should be distributed to all nodes. This function
3057 makes sure those are copied.
3059 @param lu: calling logical unit
3060 @param additional_nodes: list of nodes not in the config to distribute to
3061 @type additional_vm: boolean
3062 @param additional_vm: whether the additional nodes are vm-capable or not
3065 # 1. Gather target nodes
3066 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3067 dist_nodes = lu.cfg.GetOnlineNodeList()
3068 nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3069 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3070 if additional_nodes is not None:
3071 dist_nodes.extend(additional_nodes)
3073 vm_nodes.extend(additional_nodes)
3074 if myself.name in dist_nodes:
3075 dist_nodes.remove(myself.name)
3076 if myself.name in vm_nodes:
3077 vm_nodes.remove(myself.name)
3079 # 2. Gather files to distribute
3080 dist_files = set([constants.ETC_HOSTS,
3081 constants.SSH_KNOWN_HOSTS_FILE,
3082 constants.RAPI_CERT_FILE,
3083 constants.RAPI_USERS_FILE,
3084 constants.CONFD_HMAC_KEY,
3085 constants.CLUSTER_DOMAIN_SECRET_FILE,
3089 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3090 for hv_name in enabled_hypervisors:
3091 hv_class = hypervisor.GetHypervisor(hv_name)
3092 vm_files.update(hv_class.GetAncillaryFiles())
3094 # 3. Perform the files upload
3095 for fname in dist_files:
3096 _UploadHelper(lu, dist_nodes, fname)
3097 for fname in vm_files:
3098 _UploadHelper(lu, vm_nodes, fname)
3101 class LUClusterRedistConf(NoHooksLU):
3102 """Force the redistribution of cluster configuration.
3104 This is a very simple LU.
3109 def ExpandNames(self):
3110 self.needed_locks = {
3111 locking.LEVEL_NODE: locking.ALL_SET,
3113 self.share_locks[locking.LEVEL_NODE] = 1
3115 def Exec(self, feedback_fn):
3116 """Redistribute the configuration.
3119 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3120 _RedistributeAncillaryFiles(self)
3123 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3124 """Sleep and poll for an instance's disk to sync.
3127 if not instance.disks or disks is not None and not disks:
3130 disks = _ExpandCheckDisks(instance, disks)
3133 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3135 node = instance.primary_node
3138 lu.cfg.SetDiskID(dev, node)
3140 # TODO: Convert to utils.Retry
3143 degr_retries = 10 # in seconds, as we sleep 1 second each time
3147 cumul_degraded = False
3148 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3149 msg = rstats.fail_msg
3151 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3154 raise errors.RemoteError("Can't contact node %s for mirror data,"
3155 " aborting." % node)
3158 rstats = rstats.payload
3160 for i, mstat in enumerate(rstats):
3162 lu.LogWarning("Can't compute data for node %s/%s",
3163 node, disks[i].iv_name)
3166 cumul_degraded = (cumul_degraded or
3167 (mstat.is_degraded and mstat.sync_percent is None))
3168 if mstat.sync_percent is not None:
3170 if mstat.estimated_time is not None:
3171 rem_time = ("%s remaining (estimated)" %
3172 utils.FormatSeconds(mstat.estimated_time))
3173 max_time = mstat.estimated_time
3175 rem_time = "no time estimate"
3176 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3177 (disks[i].iv_name, mstat.sync_percent, rem_time))
3179 # if we're done but degraded, let's do a few small retries, to
3180 # make sure we see a stable and not transient situation; therefore
3181 # we force restart of the loop
3182 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3183 logging.info("Degraded disks found, %d retries left", degr_retries)
3191 time.sleep(min(60, max_time))
3194 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3195 return not cumul_degraded
3198 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3199 """Check that mirrors are not degraded.
3201 The ldisk parameter, if True, will change the test from the
3202 is_degraded attribute (which represents overall non-ok status for
3203 the device(s)) to the ldisk (representing the local storage status).
3206 lu.cfg.SetDiskID(dev, node)
3210 if on_primary or dev.AssembleOnSecondary():
3211 rstats = lu.rpc.call_blockdev_find(node, dev)
3212 msg = rstats.fail_msg
3214 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3216 elif not rstats.payload:
3217 lu.LogWarning("Can't find disk on node %s", node)
3221 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3223 result = result and not rstats.payload.is_degraded
3226 for child in dev.children:
3227 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3232 class LUOobCommand(NoHooksLU):
3233 """Logical unit for OOB handling.
3237 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3239 def CheckPrereq(self):
3240 """Check prerequisites.
3243 - the node exists in the configuration
3246 Any errors are signaled by raising errors.OpPrereqError.
3250 self.master_node = self.cfg.GetMasterNode()
3252 assert self.op.power_delay >= 0.0
3254 if self.op.node_names:
3255 if self.op.command in self._SKIP_MASTER:
3256 if self.master_node in self.op.node_names:
3257 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3258 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3260 if master_oob_handler:
3261 additional_text = ("Run '%s %s %s' if you want to operate on the"
3262 " master regardless") % (master_oob_handler,
3266 additional_text = "The master node does not support out-of-band"
3268 raise errors.OpPrereqError(("Operating on the master node %s is not"
3269 " allowed for %s\n%s") %
3270 (self.master_node, self.op.command,
3271 additional_text), errors.ECODE_INVAL)
3273 self.op.node_names = self.cfg.GetNodeList()
3274 if self.op.command in self._SKIP_MASTER:
3275 self.op.node_names.remove(self.master_node)
3277 if self.op.command in self._SKIP_MASTER:
3278 assert self.master_node not in self.op.node_names
3280 for node_name in self.op.node_names:
3281 node = self.cfg.GetNodeInfo(node_name)
3284 raise errors.OpPrereqError("Node %s not found" % node_name,
3287 self.nodes.append(node)
3289 if (not self.op.ignore_status and
3290 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3291 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3292 " not marked offline") % node_name,
3295 def ExpandNames(self):
3296 """Gather locks we need.
3299 if self.op.node_names:
3300 self.op.node_names = [_ExpandNodeName(self.cfg, name)
3301 for name in self.op.node_names]
3302 lock_names = self.op.node_names
3304 lock_names = locking.ALL_SET
3306 self.needed_locks = {
3307 locking.LEVEL_NODE: lock_names,
3310 def Exec(self, feedback_fn):
3311 """Execute OOB and return result if we expect any.
3314 master_node = self.master_node
3317 for idx, node in enumerate(self.nodes):
3318 node_entry = [(constants.RS_NORMAL, node.name)]
3319 ret.append(node_entry)
3321 oob_program = _SupportsOob(self.cfg, node)
3324 node_entry.append((constants.RS_UNAVAIL, None))
3327 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3328 self.op.command, oob_program, node.name)
3329 result = self.rpc.call_run_oob(master_node, oob_program,
3330 self.op.command, node.name,
3334 self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3335 node.name, result.fail_msg)
3336 node_entry.append((constants.RS_NODATA, None))
3339 self._CheckPayload(result)
3340 except errors.OpExecError, err:
3341 self.LogWarning("The payload returned by '%s' is not valid: %s",
3343 node_entry.append((constants.RS_NODATA, None))
3345 if self.op.command == constants.OOB_HEALTH:
3346 # For health we should log important events
3347 for item, status in result.payload:
3348 if status in [constants.OOB_STATUS_WARNING,
3349 constants.OOB_STATUS_CRITICAL]:
3350 self.LogWarning("On node '%s' item '%s' has status '%s'",
3351 node.name, item, status)
3353 if self.op.command == constants.OOB_POWER_ON:
3355 elif self.op.command == constants.OOB_POWER_OFF:
3356 node.powered = False
3357 elif self.op.command == constants.OOB_POWER_STATUS:
3358 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3359 if powered != node.powered:
3360 logging.warning(("Recorded power state (%s) of node '%s' does not"
3361 " match actual power state (%s)"), node.powered,
3364 # For configuration changing commands we should update the node
3365 if self.op.command in (constants.OOB_POWER_ON,
3366 constants.OOB_POWER_OFF):
3367 self.cfg.Update(node, feedback_fn)
3369 node_entry.append((constants.RS_NORMAL, result.payload))
3371 if (self.op.command == constants.OOB_POWER_ON and
3372 idx < len(self.nodes) - 1):
3373 time.sleep(self.op.power_delay)
3377 def _CheckPayload(self, result):
3378 """Checks if the payload is valid.
3380 @param result: RPC result
3381 @raises errors.OpExecError: If payload is not valid
3385 if self.op.command == constants.OOB_HEALTH:
3386 if not isinstance(result.payload, list):
3387 errs.append("command 'health' is expected to return a list but got %s" %
3388 type(result.payload))
3390 for item, status in result.payload:
3391 if status not in constants.OOB_STATUSES:
3392 errs.append("health item '%s' has invalid status '%s'" %
3395 if self.op.command == constants.OOB_POWER_STATUS:
3396 if not isinstance(result.payload, dict):
3397 errs.append("power-status is expected to return a dict but got %s" %
3398 type(result.payload))
3400 if self.op.command in [
3401 constants.OOB_POWER_ON,
3402 constants.OOB_POWER_OFF,
3403 constants.OOB_POWER_CYCLE,
3405 if result.payload is not None:
3406 errs.append("%s is expected to not return payload but got '%s'" %
3407 (self.op.command, result.payload))
3410 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3411 utils.CommaJoin(errs))
3415 class LUOsDiagnose(NoHooksLU):
3416 """Logical unit for OS diagnose/query.
3421 _BLK = "blacklisted"
3423 _FIELDS_STATIC = utils.FieldSet()
3424 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3425 "parameters", "api_versions", _HID, _BLK)
3427 def CheckArguments(self):
3429 raise errors.OpPrereqError("Selective OS query not supported",
3432 _CheckOutputFields(static=self._FIELDS_STATIC,
3433 dynamic=self._FIELDS_DYNAMIC,
3434 selected=self.op.output_fields)
3436 def ExpandNames(self):
3437 # Lock all nodes, in shared mode
3438 # Temporary removal of locks, should be reverted later
3439 # TODO: reintroduce locks when they are lighter-weight
3440 self.needed_locks = {}
3441 #self.share_locks[locking.LEVEL_NODE] = 1
3442 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3445 def _DiagnoseByOS(rlist):
3446 """Remaps a per-node return list into an a per-os per-node dictionary
3448 @param rlist: a map with node names as keys and OS objects as values
3451 @return: a dictionary with osnames as keys and as value another
3452 map, with nodes as keys and tuples of (path, status, diagnose,
3453 variants, parameters, api_versions) as values, eg::
3455 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3456 (/srv/..., False, "invalid api")],
3457 "node2": [(/srv/..., True, "", [], [])]}
3462 # we build here the list of nodes that didn't fail the RPC (at RPC
3463 # level), so that nodes with a non-responding node daemon don't
3464 # make all OSes invalid
3465 good_nodes = [node_name for node_name in rlist
3466 if not rlist[node_name].fail_msg]
3467 for node_name, nr in rlist.items():
3468 if nr.fail_msg or not nr.payload:
3470 for (name, path, status, diagnose, variants,
3471 params, api_versions) in nr.payload:
3472 if name not in all_os:
3473 # build a list of nodes for this os containing empty lists
3474 # for each node in node_list
3476 for nname in good_nodes:
3477 all_os[name][nname] = []
3478 # convert params from [name, help] to (name, help)
3479 params = [tuple(v) for v in params]
3480 all_os[name][node_name].append((path, status, diagnose,
3481 variants, params, api_versions))
3484 def Exec(self, feedback_fn):
3485 """Compute the list of OSes.
3488 valid_nodes = [node.name
3489 for node in self.cfg.GetAllNodesInfo().values()
3490 if not node.offline and node.vm_capable]
3491 node_data = self.rpc.call_os_diagnose(valid_nodes)
3492 pol = self._DiagnoseByOS(node_data)
3494 cluster = self.cfg.GetClusterInfo()
3496 for os_name in utils.NiceSort(pol.keys()):
3497 os_data = pol[os_name]
3500 (variants, params, api_versions) = null_state = (set(), set(), set())
3501 for idx, osl in enumerate(os_data.values()):
3502 valid = bool(valid and osl and osl[0][1])
3504 (variants, params, api_versions) = null_state
3506 node_variants, node_params, node_api = osl[0][3:6]
3507 if idx == 0: # first entry
3508 variants = set(node_variants)
3509 params = set(node_params)
3510 api_versions = set(node_api)
3511 else: # keep consistency
3512 variants.intersection_update(node_variants)
3513 params.intersection_update(node_params)
3514 api_versions.intersection_update(node_api)
3516 is_hid = os_name in cluster.hidden_os
3517 is_blk = os_name in cluster.blacklisted_os
3518 if ((self._HID not in self.op.output_fields and is_hid) or
3519 (self._BLK not in self.op.output_fields and is_blk) or
3520 (self._VLD not in self.op.output_fields and not valid)):
3523 for field in self.op.output_fields:
3526 elif field == self._VLD:
3528 elif field == "node_status":
3529 # this is just a copy of the dict
3531 for node_name, nos_list in os_data.items():
3532 val[node_name] = nos_list
3533 elif field == "variants":
3534 val = utils.NiceSort(list(variants))
3535 elif field == "parameters":
3537 elif field == "api_versions":
3538 val = list(api_versions)
3539 elif field == self._HID:
3541 elif field == self._BLK:
3544 raise errors.ParameterError(field)
3551 class LUNodeRemove(LogicalUnit):
3552 """Logical unit for removing a node.
3555 HPATH = "node-remove"
3556 HTYPE = constants.HTYPE_NODE
3558 def BuildHooksEnv(self):
3561 This doesn't run on the target node in the pre phase as a failed
3562 node would then be impossible to remove.
3566 "OP_TARGET": self.op.node_name,
3567 "NODE_NAME": self.op.node_name,
3569 all_nodes = self.cfg.GetNodeList()
3571 all_nodes.remove(self.op.node_name)
3573 logging.warning("Node %s which is about to be removed not found"
3574 " in the all nodes list", self.op.node_name)
3575 return env, all_nodes, all_nodes
3577 def CheckPrereq(self):
3578 """Check prerequisites.
3581 - the node exists in the configuration
3582 - it does not have primary or secondary instances
3583 - it's not the master
3585 Any errors are signaled by raising errors.OpPrereqError.
3588 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3589 node = self.cfg.GetNodeInfo(self.op.node_name)
3590 assert node is not None
3592 instance_list = self.cfg.GetInstanceList()
3594 masternode = self.cfg.GetMasterNode()
3595 if node.name == masternode:
3596 raise errors.OpPrereqError("Node is the master node,"
3597 " you need to failover first.",
3600 for instance_name in instance_list:
3601 instance = self.cfg.GetInstanceInfo(instance_name)
3602 if node.name in instance.all_nodes:
3603 raise errors.OpPrereqError("Instance %s is still running on the node,"
3604 " please remove first." % instance_name,
3606 self.op.node_name = node.name
3609 def Exec(self, feedback_fn):
3610 """Removes the node from the cluster.
3614 logging.info("Stopping the node daemon and removing configs from node %s",
3617 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3619 # Promote nodes to master candidate as needed
3620 _AdjustCandidatePool(self, exceptions=[node.name])
3621 self.context.RemoveNode(node.name)
3623 # Run post hooks on the node before it's removed
3624 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3626 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3628 # pylint: disable-msg=W0702
3629 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3631 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3632 msg = result.fail_msg
3634 self.LogWarning("Errors encountered on the remote node while leaving"
3635 " the cluster: %s", msg)
3637 # Remove node from our /etc/hosts
3638 if self.cfg.GetClusterInfo().modify_etc_hosts:
3639 master_node = self.cfg.GetMasterNode()
3640 result = self.rpc.call_etc_hosts_modify(master_node,
3641 constants.ETC_HOSTS_REMOVE,
3643 result.Raise("Can't update hosts file with new host data")
3644 _RedistributeAncillaryFiles(self)
3647 class _NodeQuery(_QueryBase):
3648 FIELDS = query.NODE_FIELDS
3650 def ExpandNames(self, lu):
3651 lu.needed_locks = {}
3652 lu.share_locks[locking.LEVEL_NODE] = 1
3655 self.wanted = _GetWantedNodes(lu, self.names)
3657 self.wanted = locking.ALL_SET
3659 self.do_locking = (self.use_locking and
3660 query.NQ_LIVE in self.requested_data)
3663 # if we don't request only static fields, we need to lock the nodes
3664 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3666 def DeclareLocks(self, lu, level):
3669 def _GetQueryData(self, lu):
3670 """Computes the list of nodes and their attributes.
3673 all_info = lu.cfg.GetAllNodesInfo()
3675 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3677 # Gather data as requested
3678 if query.NQ_LIVE in self.requested_data:
3679 # filter out non-vm_capable nodes
3680 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3682 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3683 lu.cfg.GetHypervisorType())
3684 live_data = dict((name, nresult.payload)
3685 for (name, nresult) in node_data.items()
3686 if not nresult.fail_msg and nresult.payload)
3690 if query.NQ_INST in self.requested_data:
3691 node_to_primary = dict([(name, set()) for name in nodenames])
3692 node_to_secondary = dict([(name, set()) for name in nodenames])
3694 inst_data = lu.cfg.GetAllInstancesInfo()
3696 for inst in inst_data.values():
3697 if inst.primary_node in node_to_primary:
3698 node_to_primary[inst.primary_node].add(inst.name)
3699 for secnode in inst.secondary_nodes:
3700 if secnode in node_to_secondary:
3701 node_to_secondary[secnode].add(inst.name)
3703 node_to_primary = None
3704 node_to_secondary = None
3706 if query.NQ_OOB in self.requested_data:
3707 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3708 for name, node in all_info.iteritems())
3712 if query.NQ_GROUP in self.requested_data:
3713 groups = lu.cfg.GetAllNodeGroupsInfo()
3717 return query.NodeQueryData([all_info[name] for name in nodenames],
3718 live_data, lu.cfg.GetMasterNode(),
3719 node_to_primary, node_to_secondary, groups,
3720 oob_support, lu.cfg.GetClusterInfo())
3723 class LUNodeQuery(NoHooksLU):
3724 """Logical unit for querying nodes.
3727 # pylint: disable-msg=W0142
3730 def CheckArguments(self):
3731 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3732 self.op.output_fields, self.op.use_locking)
3734 def ExpandNames(self):
3735 self.nq.ExpandNames(self)
3737 def Exec(self, feedback_fn):
3738 return self.nq.OldStyleQuery(self)
3741 class LUNodeQueryvols(NoHooksLU):
3742 """Logical unit for getting volumes on node(s).
3746 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3747 _FIELDS_STATIC = utils.FieldSet("node")
3749 def CheckArguments(self):
3750 _CheckOutputFields(static=self._FIELDS_STATIC,
3751 dynamic=self._FIELDS_DYNAMIC,
3752 selected=self.op.output_fields)
3754 def ExpandNames(self):
3755 self.needed_locks = {}
3756 self.share_locks[locking.LEVEL_NODE] = 1
3757 if not self.op.nodes:
3758 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3760 self.needed_locks[locking.LEVEL_NODE] = \
3761 _GetWantedNodes(self, self.op.nodes)
3763 def Exec(self, feedback_fn):
3764 """Computes the list of nodes and their attributes.
3767 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3768 volumes = self.rpc.call_node_volumes(nodenames)
3770 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3771 in self.cfg.GetInstanceList()]
3773 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3776 for node in nodenames:
3777 nresult = volumes[node]
3780 msg = nresult.fail_msg
3782 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3785 node_vols = nresult.payload[:]
3786 node_vols.sort(key=lambda vol: vol['dev'])
3788 for vol in node_vols:
3790 for field in self.op.output_fields:
3793 elif field == "phys":
3797 elif field == "name":
3799 elif field == "size":
3800 val = int(float(vol['size']))
3801 elif field == "instance":
3803 if node not in lv_by_node[inst]:
3805 if vol['name'] in lv_by_node[inst][node]:
3811 raise errors.ParameterError(field)
3812 node_output.append(str(val))
3814 output.append(node_output)
3819 class LUNodeQueryStorage(NoHooksLU):
3820 """Logical unit for getting information on storage units on node(s).
3823 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3826 def CheckArguments(self):
3827 _CheckOutputFields(static=self._FIELDS_STATIC,
3828 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3829 selected=self.op.output_fields)
3831 def ExpandNames(self):
3832 self.needed_locks = {}
3833 self.share_locks[locking.LEVEL_NODE] = 1
3836 self.needed_locks[locking.LEVEL_NODE] = \
3837 _GetWantedNodes(self, self.op.nodes)
3839 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3841 def Exec(self, feedback_fn):
3842 """Computes the list of nodes and their attributes.
3845 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3847 # Always get name to sort by
3848 if constants.SF_NAME in self.op.output_fields:
3849 fields = self.op.output_fields[:]
3851 fields = [constants.SF_NAME] + self.op.output_fields
3853 # Never ask for node or type as it's only known to the LU
3854 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3855 while extra in fields:
3856 fields.remove(extra)
3858 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3859 name_idx = field_idx[constants.SF_NAME]
3861 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3862 data = self.rpc.call_storage_list(self.nodes,
3863 self.op.storage_type, st_args,
3864 self.op.name, fields)
3868 for node in utils.NiceSort(self.nodes):
3869 nresult = data[node]
3873 msg = nresult.fail_msg
3875 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3878 rows = dict([(row[name_idx], row) for row in nresult.payload])
3880 for name in utils.NiceSort(rows.keys()):
3885 for field in self.op.output_fields:
3886 if field == constants.SF_NODE:
3888 elif field == constants.SF_TYPE:
3889 val = self.op.storage_type
3890 elif field in field_idx:
3891 val = row[field_idx[field]]
3893 raise errors.ParameterError(field)
3902 class _InstanceQuery(_QueryBase):
3903 FIELDS = query.INSTANCE_FIELDS
3905 def ExpandNames(self, lu):
3906 lu.needed_locks = {}
3907 lu.share_locks[locking.LEVEL_INSTANCE] = 1
3908 lu.share_locks[locking.LEVEL_NODE] = 1
3911 self.wanted = _GetWantedInstances(lu, self.names)
3913 self.wanted = locking.ALL_SET
3915 self.do_locking = (self.use_locking and
3916 query.IQ_LIVE in self.requested_data)
3918 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3919 lu.needed_locks[locking.LEVEL_NODE] = []
3920 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3922 def DeclareLocks(self, lu, level):
3923 if level == locking.LEVEL_NODE and self.do_locking:
3924 lu._LockInstancesNodes() # pylint: disable-msg=W0212
3926 def _GetQueryData(self, lu):
3927 """Computes the list of instances and their attributes.
3930 cluster = lu.cfg.GetClusterInfo()
3931 all_info = lu.cfg.GetAllInstancesInfo()
3933 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3935 instance_list = [all_info[name] for name in instance_names]
3936 nodes = frozenset(itertools.chain(*(inst.all_nodes
3937 for inst in instance_list)))
3938 hv_list = list(set([inst.hypervisor for inst in instance_list]))
3941 wrongnode_inst = set()
3943 # Gather data as requested
3944 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
3946 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3948 result = node_data[name]
3950 # offline nodes will be in both lists
3951 assert result.fail_msg
3952 offline_nodes.append(name)
3954 bad_nodes.append(name)
3955 elif result.payload:
3956 for inst in result.payload:
3957 if all_info[inst].primary_node == name:
3958 live_data.update(result.payload)
3960 wrongnode_inst.add(inst)
3961 # else no instance is alive
3965 if query.IQ_DISKUSAGE in self.requested_data:
3966 disk_usage = dict((inst.name,
3967 _ComputeDiskSize(inst.disk_template,
3968 [{"size": disk.size}
3969 for disk in inst.disks]))
3970 for inst in instance_list)
3974 if query.IQ_CONSOLE in self.requested_data:
3976 for inst in instance_list:
3977 if inst.name in live_data:
3978 # Instance is running
3979 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
3981 consinfo[inst.name] = None
3982 assert set(consinfo.keys()) == set(instance_names)
3986 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3987 disk_usage, offline_nodes, bad_nodes,
3988 live_data, wrongnode_inst, consinfo)
3991 class LUQuery(NoHooksLU):
3992 """Query for resources/items of a certain kind.
3995 # pylint: disable-msg=W0142
3998 def CheckArguments(self):
3999 qcls = _GetQueryImplementation(self.op.what)
4001 self.impl = qcls(self.op.filter, self.op.fields, False)
4003 def ExpandNames(self):
4004 self.impl.ExpandNames(self)
4006 def DeclareLocks(self, level):
4007 self.impl.DeclareLocks(self, level)
4009 def Exec(self, feedback_fn):
4010 return self.impl.NewStyleQuery(self)
4013 class LUQueryFields(NoHooksLU):
4014 """Query for resources/items of a certain kind.
4017 # pylint: disable-msg=W0142
4020 def CheckArguments(self):
4021 self.qcls = _GetQueryImplementation(self.op.what)
4023 def ExpandNames(self):
4024 self.needed_locks = {}
4026 def Exec(self, feedback_fn):
4027 return self.qcls.FieldsQuery(self.op.fields)
4030 class LUNodeModifyStorage(NoHooksLU):
4031 """Logical unit for modifying a storage volume on a node.
4036 def CheckArguments(self):
4037 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4039 storage_type = self.op.storage_type
4042 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4044 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4045 " modified" % storage_type,
4048 diff = set(self.op.changes.keys()) - modifiable
4050 raise errors.OpPrereqError("The following fields can not be modified for"
4051 " storage units of type '%s': %r" %
4052 (storage_type, list(diff)),
4055 def ExpandNames(self):
4056 self.needed_locks = {
4057 locking.LEVEL_NODE: self.op.node_name,
4060 def Exec(self, feedback_fn):
4061 """Computes the list of nodes and their attributes.
4064 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4065 result = self.rpc.call_storage_modify(self.op.node_name,
4066 self.op.storage_type, st_args,
4067 self.op.name, self.op.changes)
4068 result.Raise("Failed to modify storage unit '%s' on %s" %
4069 (self.op.name, self.op.node_name))
4072 class LUNodeAdd(LogicalUnit):
4073 """Logical unit for adding node to the cluster.
4077 HTYPE = constants.HTYPE_NODE
4078 _NFLAGS = ["master_capable", "vm_capable"]
4080 def CheckArguments(self):
4081 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4082 # validate/normalize the node name
4083 self.hostname = netutils.GetHostname(name=self.op.node_name,
4084 family=self.primary_ip_family)
4085 self.op.node_name = self.hostname.name
4086 if self.op.readd and self.op.group:
4087 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4088 " being readded", errors.ECODE_INVAL)
4090 def BuildHooksEnv(self):
4093 This will run on all nodes before, and on all nodes + the new node after.
4097 "OP_TARGET": self.op.node_name,
4098 "NODE_NAME": self.op.node_name,
4099 "NODE_PIP": self.op.primary_ip,
4100 "NODE_SIP": self.op.secondary_ip,
4101 "MASTER_CAPABLE": str(self.op.master_capable),
4102 "VM_CAPABLE": str(self.op.vm_capable),
4104 nodes_0 = self.cfg.GetNodeList()
4105 nodes_1 = nodes_0 + [self.op.node_name, ]
4106 return env, nodes_0, nodes_1
4108 def CheckPrereq(self):
4109 """Check prerequisites.
4112 - the new node is not already in the config
4114 - its parameters (single/dual homed) matches the cluster
4116 Any errors are signaled by raising errors.OpPrereqError.
4120 hostname = self.hostname
4121 node = hostname.name
4122 primary_ip = self.op.primary_ip = hostname.ip
4123 if self.op.secondary_ip is None:
4124 if self.primary_ip_family == netutils.IP6Address.family:
4125 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4126 " IPv4 address must be given as secondary",
4128 self.op.secondary_ip = primary_ip
4130 secondary_ip = self.op.secondary_ip
4131 if not netutils.IP4Address.IsValid(secondary_ip):
4132 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4133 " address" % secondary_ip, errors.ECODE_INVAL)
4135 node_list = cfg.GetNodeList()
4136 if not self.op.readd and node in node_list:
4137 raise errors.OpPrereqError("Node %s is already in the configuration" %
4138 node, errors.ECODE_EXISTS)
4139 elif self.op.readd and node not in node_list:
4140 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4143 self.changed_primary_ip = False
4145 for existing_node_name in node_list:
4146 existing_node = cfg.GetNodeInfo(existing_node_name)
4148 if self.op.readd and node == existing_node_name:
4149 if existing_node.secondary_ip != secondary_ip:
4150 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4151 " address configuration as before",
4153 if existing_node.primary_ip != primary_ip:
4154 self.changed_primary_ip = True
4158 if (existing_node.primary_ip == primary_ip or
4159 existing_node.secondary_ip == primary_ip or
4160 existing_node.primary_ip == secondary_ip or
4161 existing_node.secondary_ip == secondary_ip):
4162 raise errors.OpPrereqError("New node ip address(es) conflict with"
4163 " existing node %s" % existing_node.name,
4164 errors.ECODE_NOTUNIQUE)
4166 # After this 'if' block, None is no longer a valid value for the
4167 # _capable op attributes
4169 old_node = self.cfg.GetNodeInfo(node)
4170 assert old_node is not None, "Can't retrieve locked node %s" % node
4171 for attr in self._NFLAGS:
4172 if getattr(self.op, attr) is None:
4173 setattr(self.op, attr, getattr(old_node, attr))
4175 for attr in self._NFLAGS:
4176 if getattr(self.op, attr) is None:
4177 setattr(self.op, attr, True)
4179 if self.op.readd and not self.op.vm_capable:
4180 pri, sec = cfg.GetNodeInstances(node)
4182 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4183 " flag set to false, but it already holds"
4184 " instances" % node,
4187 # check that the type of the node (single versus dual homed) is the
4188 # same as for the master
4189 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4190 master_singlehomed = myself.secondary_ip == myself.primary_ip
4191 newbie_singlehomed = secondary_ip == primary_ip
4192 if master_singlehomed != newbie_singlehomed:
4193 if master_singlehomed:
4194 raise errors.OpPrereqError("The master has no secondary ip but the"
4195 " new node has one",
4198 raise errors.OpPrereqError("The master has a secondary ip but the"
4199 " new node doesn't have one",
4202 # checks reachability
4203 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4204 raise errors.OpPrereqError("Node not reachable by ping",
4205 errors.ECODE_ENVIRON)
4207 if not newbie_singlehomed:
4208 # check reachability from my secondary ip to newbie's secondary ip
4209 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4210 source=myself.secondary_ip):
4211 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4212 " based ping to node daemon port",
4213 errors.ECODE_ENVIRON)
4220 if self.op.master_capable:
4221 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4223 self.master_candidate = False
4226 self.new_node = old_node
4228 node_group = cfg.LookupNodeGroup(self.op.group)
4229 self.new_node = objects.Node(name=node,
4230 primary_ip=primary_ip,
4231 secondary_ip=secondary_ip,
4232 master_candidate=self.master_candidate,
4233 offline=False, drained=False,
4236 if self.op.ndparams:
4237 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4239 def Exec(self, feedback_fn):
4240 """Adds the new node to the cluster.
4243 new_node = self.new_node
4244 node = new_node.name
4246 # We adding a new node so we assume it's powered
4247 new_node.powered = True
4249 # for re-adds, reset the offline/drained/master-candidate flags;
4250 # we need to reset here, otherwise offline would prevent RPC calls
4251 # later in the procedure; this also means that if the re-add
4252 # fails, we are left with a non-offlined, broken node
4254 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4255 self.LogInfo("Readding a node, the offline/drained flags were reset")
4256 # if we demote the node, we do cleanup later in the procedure
4257 new_node.master_candidate = self.master_candidate
4258 if self.changed_primary_ip:
4259 new_node.primary_ip = self.op.primary_ip
4261 # copy the master/vm_capable flags
4262 for attr in self._NFLAGS:
4263 setattr(new_node, attr, getattr(self.op, attr))
4265 # notify the user about any possible mc promotion
4266 if new_node.master_candidate:
4267 self.LogInfo("Node will be a master candidate")
4269 if self.op.ndparams:
4270 new_node.ndparams = self.op.ndparams
4272 new_node.ndparams = {}
4274 # check connectivity
4275 result = self.rpc.call_version([node])[node]
4276 result.Raise("Can't get version information from node %s" % node)
4277 if constants.PROTOCOL_VERSION == result.payload:
4278 logging.info("Communication to node %s fine, sw version %s match",
4279 node, result.payload)
4281 raise errors.OpExecError("Version mismatch master version %s,"
4282 " node version %s" %
4283 (constants.PROTOCOL_VERSION, result.payload))
4285 # Add node to our /etc/hosts, and add key to known_hosts
4286 if self.cfg.GetClusterInfo().modify_etc_hosts:
4287 master_node = self.cfg.GetMasterNode()
4288 result = self.rpc.call_etc_hosts_modify(master_node,
4289 constants.ETC_HOSTS_ADD,
4292 result.Raise("Can't update hosts file with new host data")
4294 if new_node.secondary_ip != new_node.primary_ip:
4295 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4298 node_verify_list = [self.cfg.GetMasterNode()]
4299 node_verify_param = {
4300 constants.NV_NODELIST: [node],
4301 # TODO: do a node-net-test as well?
4304 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4305 self.cfg.GetClusterName())
4306 for verifier in node_verify_list:
4307 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4308 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4310 for failed in nl_payload:
4311 feedback_fn("ssh/hostname verification failed"
4312 " (checking from %s): %s" %
4313 (verifier, nl_payload[failed]))
4314 raise errors.OpExecError("ssh/hostname verification failed.")
4317 _RedistributeAncillaryFiles(self)
4318 self.context.ReaddNode(new_node)
4319 # make sure we redistribute the config
4320 self.cfg.Update(new_node, feedback_fn)
4321 # and make sure the new node will not have old files around
4322 if not new_node.master_candidate:
4323 result = self.rpc.call_node_demote_from_mc(new_node.name)
4324 msg = result.fail_msg
4326 self.LogWarning("Node failed to demote itself from master"
4327 " candidate status: %s" % msg)
4329 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4330 additional_vm=self.op.vm_capable)
4331 self.context.AddNode(new_node, self.proc.GetECId())
4334 class LUNodeSetParams(LogicalUnit):
4335 """Modifies the parameters of a node.
4337 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4338 to the node role (as _ROLE_*)
4339 @cvar _R2F: a dictionary from node role to tuples of flags
4340 @cvar _FLAGS: a list of attribute names corresponding to the flags
4343 HPATH = "node-modify"
4344 HTYPE = constants.HTYPE_NODE
4346 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4348 (True, False, False): _ROLE_CANDIDATE,
4349 (False, True, False): _ROLE_DRAINED,
4350 (False, False, True): _ROLE_OFFLINE,
4351 (False, False, False): _ROLE_REGULAR,
4353 _R2F = dict((v, k) for k, v in _F2R.items())
4354 _FLAGS = ["master_candidate", "drained", "offline"]
4356 def CheckArguments(self):
4357 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4358 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4359 self.op.master_capable, self.op.vm_capable,
4360 self.op.secondary_ip, self.op.ndparams]
4361 if all_mods.count(None) == len(all_mods):
4362 raise errors.OpPrereqError("Please pass at least one modification",
4364 if all_mods.count(True) > 1:
4365 raise errors.OpPrereqError("Can't set the node into more than one"
4366 " state at the same time",
4369 # Boolean value that tells us whether we might be demoting from MC
4370 self.might_demote = (self.op.master_candidate == False or
4371 self.op.offline == True or
4372 self.op.drained == True or
4373 self.op.master_capable == False)
4375 if self.op.secondary_ip:
4376 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4377 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4378 " address" % self.op.secondary_ip,
4381 self.lock_all = self.op.auto_promote and self.might_demote
4382 self.lock_instances = self.op.secondary_ip is not None
4384 def ExpandNames(self):
4386 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4388 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4390 if self.lock_instances:
4391 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4393 def DeclareLocks(self, level):
4394 # If we have locked all instances, before waiting to lock nodes, release
4395 # all the ones living on nodes unrelated to the current operation.
4396 if level == locking.LEVEL_NODE and self.lock_instances:
4397 instances_release = []
4399 self.affected_instances = []
4400 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4401 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4402 instance = self.context.cfg.GetInstanceInfo(instance_name)
4403 i_mirrored = instance.disk_template in constants.DTS_INT_MIRROR
4404 if i_mirrored and self.op.node_name in instance.all_nodes:
4405 instances_keep.append(instance_name)
4406 self.affected_instances.append(instance)
4408 instances_release.append(instance_name)
4409 if instances_release:
4410 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4411 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4413 def BuildHooksEnv(self):
4416 This runs on the master node.
4420 "OP_TARGET": self.op.node_name,
4421 "MASTER_CANDIDATE": str(self.op.master_candidate),
4422 "OFFLINE": str(self.op.offline),
4423 "DRAINED": str(self.op.drained),
4424 "MASTER_CAPABLE": str(self.op.master_capable),
4425 "VM_CAPABLE": str(self.op.vm_capable),
4427 nl = [self.cfg.GetMasterNode(),
4431 def CheckPrereq(self):
4432 """Check prerequisites.
4434 This only checks the instance list against the existing names.
4437 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4439 if (self.op.master_candidate is not None or
4440 self.op.drained is not None or
4441 self.op.offline is not None):
4442 # we can't change the master's node flags
4443 if self.op.node_name == self.cfg.GetMasterNode():
4444 raise errors.OpPrereqError("The master role can be changed"
4445 " only via master-failover",
4448 if self.op.master_candidate and not node.master_capable:
4449 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4450 " it a master candidate" % node.name,
4453 if self.op.vm_capable == False:
4454 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4456 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4457 " the vm_capable flag" % node.name,
4460 if node.master_candidate and self.might_demote and not self.lock_all:
4461 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4462 # check if after removing the current node, we're missing master
4464 (mc_remaining, mc_should, _) = \
4465 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4466 if mc_remaining < mc_should:
4467 raise errors.OpPrereqError("Not enough master candidates, please"
4468 " pass auto promote option to allow"
4469 " promotion", errors.ECODE_STATE)
4471 self.old_flags = old_flags = (node.master_candidate,
4472 node.drained, node.offline)
4473 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4474 self.old_role = old_role = self._F2R[old_flags]
4476 # Check for ineffective changes
4477 for attr in self._FLAGS:
4478 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4479 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4480 setattr(self.op, attr, None)
4482 # Past this point, any flag change to False means a transition
4483 # away from the respective state, as only real changes are kept
4485 # TODO: We might query the real power state if it supports OOB
4486 if _SupportsOob(self.cfg, node):
4487 if self.op.offline is False and not (node.powered or
4488 self.op.powered == True):
4489 raise errors.OpPrereqError(("Please power on node %s first before you"
4490 " can reset offline state") %
4492 elif self.op.powered is not None:
4493 raise errors.OpPrereqError(("Unable to change powered state for node %s"
4494 " which does not support out-of-band"
4495 " handling") % self.op.node_name)
4497 # If we're being deofflined/drained, we'll MC ourself if needed
4498 if (self.op.drained == False or self.op.offline == False or
4499 (self.op.master_capable and not node.master_capable)):
4500 if _DecideSelfPromotion(self):
4501 self.op.master_candidate = True
4502 self.LogInfo("Auto-promoting node to master candidate")
4504 # If we're no longer master capable, we'll demote ourselves from MC
4505 if self.op.master_capable == False and node.master_candidate:
4506 self.LogInfo("Demoting from master candidate")
4507 self.op.master_candidate = False
4510 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4511 if self.op.master_candidate:
4512 new_role = self._ROLE_CANDIDATE
4513 elif self.op.drained:
4514 new_role = self._ROLE_DRAINED
4515 elif self.op.offline:
4516 new_role = self._ROLE_OFFLINE
4517 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4518 # False is still in new flags, which means we're un-setting (the
4520 new_role = self._ROLE_REGULAR
4521 else: # no new flags, nothing, keep old role
4524 self.new_role = new_role
4526 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4527 # Trying to transition out of offline status
4528 result = self.rpc.call_version([node.name])[node.name]
4530 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4531 " to report its version: %s" %
4532 (node.name, result.fail_msg),
4535 self.LogWarning("Transitioning node from offline to online state"
4536 " without using re-add. Please make sure the node"
4539 if self.op.secondary_ip:
4540 # Ok even without locking, because this can't be changed by any LU
4541 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4542 master_singlehomed = master.secondary_ip == master.primary_ip
4543 if master_singlehomed and self.op.secondary_ip:
4544 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4545 " homed cluster", errors.ECODE_INVAL)
4548 if self.affected_instances:
4549 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4550 " node has instances (%s) configured"
4551 " to use it" % self.affected_instances)
4553 # On online nodes, check that no instances are running, and that
4554 # the node has the new ip and we can reach it.
4555 for instance in self.affected_instances:
4556 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4558 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4559 if master.name != node.name:
4560 # check reachability from master secondary ip to new secondary ip
4561 if not netutils.TcpPing(self.op.secondary_ip,
4562 constants.DEFAULT_NODED_PORT,
4563 source=master.secondary_ip):
4564 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4565 " based ping to node daemon port",
4566 errors.ECODE_ENVIRON)
4568 if self.op.ndparams:
4569 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4570 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4571 self.new_ndparams = new_ndparams
4573 def Exec(self, feedback_fn):
4578 old_role = self.old_role
4579 new_role = self.new_role
4583 if self.op.ndparams:
4584 node.ndparams = self.new_ndparams
4586 if self.op.powered is not None:
4587 node.powered = self.op.powered
4589 for attr in ["master_capable", "vm_capable"]:
4590 val = getattr(self.op, attr)
4592 setattr(node, attr, val)
4593 result.append((attr, str(val)))
4595 if new_role != old_role:
4596 # Tell the node to demote itself, if no longer MC and not offline
4597 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4598 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4600 self.LogWarning("Node failed to demote itself: %s", msg)
4602 new_flags = self._R2F[new_role]
4603 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4605 result.append((desc, str(nf)))
4606 (node.master_candidate, node.drained, node.offline) = new_flags
4608 # we locked all nodes, we adjust the CP before updating this node
4610 _AdjustCandidatePool(self, [node.name])
4612 if self.op.secondary_ip:
4613 node.secondary_ip = self.op.secondary_ip
4614 result.append(("secondary_ip", self.op.secondary_ip))
4616 # this will trigger configuration file update, if needed
4617 self.cfg.Update(node, feedback_fn)
4619 # this will trigger job queue propagation or cleanup if the mc
4621 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4622 self.context.ReaddNode(node)
4627 class LUNodePowercycle(NoHooksLU):
4628 """Powercycles a node.
4633 def CheckArguments(self):
4634 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4635 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4636 raise errors.OpPrereqError("The node is the master and the force"
4637 " parameter was not set",
4640 def ExpandNames(self):
4641 """Locking for PowercycleNode.
4643 This is a last-resort option and shouldn't block on other
4644 jobs. Therefore, we grab no locks.
4647 self.needed_locks = {}
4649 def Exec(self, feedback_fn):
4653 result = self.rpc.call_node_powercycle(self.op.node_name,
4654 self.cfg.GetHypervisorType())
4655 result.Raise("Failed to schedule the reboot")
4656 return result.payload
4659 class LUClusterQuery(NoHooksLU):
4660 """Query cluster configuration.
4665 def ExpandNames(self):
4666 self.needed_locks = {}
4668 def Exec(self, feedback_fn):
4669 """Return cluster config.
4672 cluster = self.cfg.GetClusterInfo()
4675 # Filter just for enabled hypervisors
4676 for os_name, hv_dict in cluster.os_hvp.items():
4677 os_hvp[os_name] = {}
4678 for hv_name, hv_params in hv_dict.items():
4679 if hv_name in cluster.enabled_hypervisors:
4680 os_hvp[os_name][hv_name] = hv_params
4682 # Convert ip_family to ip_version
4683 primary_ip_version = constants.IP4_VERSION
4684 if cluster.primary_ip_family == netutils.IP6Address.family:
4685 primary_ip_version = constants.IP6_VERSION
4688 "software_version": constants.RELEASE_VERSION,
4689 "protocol_version": constants.PROTOCOL_VERSION,
4690 "config_version": constants.CONFIG_VERSION,
4691 "os_api_version": max(constants.OS_API_VERSIONS),
4692 "export_version": constants.EXPORT_VERSION,
4693 "architecture": (platform.architecture()[0], platform.machine()),
4694 "name": cluster.cluster_name,
4695 "master": cluster.master_node,
4696 "default_hypervisor": cluster.enabled_hypervisors[0],
4697 "enabled_hypervisors": cluster.enabled_hypervisors,
4698 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4699 for hypervisor_name in cluster.enabled_hypervisors]),
4701 "beparams": cluster.beparams,
4702 "osparams": cluster.osparams,
4703 "nicparams": cluster.nicparams,
4704 "ndparams": cluster.ndparams,
4705 "candidate_pool_size": cluster.candidate_pool_size,
4706 "master_netdev": cluster.master_netdev,
4707 "volume_group_name": cluster.volume_group_name,
4708 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4709 "file_storage_dir": cluster.file_storage_dir,
4710 "shared_file_storage_dir": cluster.shared_file_storage_dir,
4711 "maintain_node_health": cluster.maintain_node_health,
4712 "ctime": cluster.ctime,
4713 "mtime": cluster.mtime,
4714 "uuid": cluster.uuid,
4715 "tags": list(cluster.GetTags()),
4716 "uid_pool": cluster.uid_pool,
4717 "default_iallocator": cluster.default_iallocator,
4718 "reserved_lvs": cluster.reserved_lvs,
4719 "primary_ip_version": primary_ip_version,
4720 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4721 "hidden_os": cluster.hidden_os,
4722 "blacklisted_os": cluster.blacklisted_os,
4728 class LUClusterConfigQuery(NoHooksLU):
4729 """Return configuration values.
4733 _FIELDS_DYNAMIC = utils.FieldSet()
4734 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4735 "watcher_pause", "volume_group_name")
4737 def CheckArguments(self):
4738 _CheckOutputFields(static=self._FIELDS_STATIC,
4739 dynamic=self._FIELDS_DYNAMIC,
4740 selected=self.op.output_fields)
4742 def ExpandNames(self):
4743 self.needed_locks = {}
4745 def Exec(self, feedback_fn):
4746 """Dump a representation of the cluster config to the standard output.
4750 for field in self.op.output_fields:
4751 if field == "cluster_name":
4752 entry = self.cfg.GetClusterName()
4753 elif field == "master_node":
4754 entry = self.cfg.GetMasterNode()
4755 elif field == "drain_flag":
4756 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4757 elif field == "watcher_pause":
4758 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4759 elif field == "volume_group_name":
4760 entry = self.cfg.GetVGName()
4762 raise errors.ParameterError(field)
4763 values.append(entry)
4767 class LUInstanceActivateDisks(NoHooksLU):
4768 """Bring up an instance's disks.
4773 def ExpandNames(self):
4774 self._ExpandAndLockInstance()
4775 self.needed_locks[locking.LEVEL_NODE] = []
4776 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4778 def DeclareLocks(self, level):
4779 if level == locking.LEVEL_NODE:
4780 self._LockInstancesNodes()
4782 def CheckPrereq(self):
4783 """Check prerequisites.
4785 This checks that the instance is in the cluster.
4788 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4789 assert self.instance is not None, \
4790 "Cannot retrieve locked instance %s" % self.op.instance_name
4791 _CheckNodeOnline(self, self.instance.primary_node)
4793 def Exec(self, feedback_fn):
4794 """Activate the disks.
4797 disks_ok, disks_info = \
4798 _AssembleInstanceDisks(self, self.instance,
4799 ignore_size=self.op.ignore_size)
4801 raise errors.OpExecError("Cannot activate block devices")
4806 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4808 """Prepare the block devices for an instance.
4810 This sets up the block devices on all nodes.
4812 @type lu: L{LogicalUnit}
4813 @param lu: the logical unit on whose behalf we execute
4814 @type instance: L{objects.Instance}
4815 @param instance: the instance for whose disks we assemble
4816 @type disks: list of L{objects.Disk} or None
4817 @param disks: which disks to assemble (or all, if None)
4818 @type ignore_secondaries: boolean
4819 @param ignore_secondaries: if true, errors on secondary nodes
4820 won't result in an error return from the function
4821 @type ignore_size: boolean
4822 @param ignore_size: if true, the current known size of the disk
4823 will not be used during the disk activation, useful for cases
4824 when the size is wrong
4825 @return: False if the operation failed, otherwise a list of
4826 (host, instance_visible_name, node_visible_name)
4827 with the mapping from node devices to instance devices
4832 iname = instance.name
4833 disks = _ExpandCheckDisks(instance, disks)
4835 # With the two passes mechanism we try to reduce the window of
4836 # opportunity for the race condition of switching DRBD to primary
4837 # before handshaking occured, but we do not eliminate it
4839 # The proper fix would be to wait (with some limits) until the
4840 # connection has been made and drbd transitions from WFConnection
4841 # into any other network-connected state (Connected, SyncTarget,
4844 # 1st pass, assemble on all nodes in secondary mode
4845 for idx, inst_disk in enumerate(disks):
4846 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4848 node_disk = node_disk.Copy()
4849 node_disk.UnsetSize()
4850 lu.cfg.SetDiskID(node_disk, node)
4851 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4852 msg = result.fail_msg
4854 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4855 " (is_primary=False, pass=1): %s",
4856 inst_disk.iv_name, node, msg)
4857 if not ignore_secondaries:
4860 # FIXME: race condition on drbd migration to primary
4862 # 2nd pass, do only the primary node
4863 for idx, inst_disk in enumerate(disks):
4866 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4867 if node != instance.primary_node:
4870 node_disk = node_disk.Copy()
4871 node_disk.UnsetSize()
4872 lu.cfg.SetDiskID(node_disk, node)
4873 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4874 msg = result.fail_msg
4876 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4877 " (is_primary=True, pass=2): %s",
4878 inst_disk.iv_name, node, msg)
4881 dev_path = result.payload
4883 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4885 # leave the disks configured for the primary node
4886 # this is a workaround that would be fixed better by
4887 # improving the logical/physical id handling
4889 lu.cfg.SetDiskID(disk, instance.primary_node)
4891 return disks_ok, device_info
4894 def _StartInstanceDisks(lu, instance, force):
4895 """Start the disks of an instance.
4898 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4899 ignore_secondaries=force)
4901 _ShutdownInstanceDisks(lu, instance)
4902 if force is not None and not force:
4903 lu.proc.LogWarning("", hint="If the message above refers to a"
4905 " you can retry the operation using '--force'.")
4906 raise errors.OpExecError("Disk consistency error")
4909 class LUInstanceDeactivateDisks(NoHooksLU):
4910 """Shutdown an instance's disks.
4915 def ExpandNames(self):
4916 self._ExpandAndLockInstance()
4917 self.needed_locks[locking.LEVEL_NODE] = []
4918 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4920 def DeclareLocks(self, level):
4921 if level == locking.LEVEL_NODE:
4922 self._LockInstancesNodes()
4924 def CheckPrereq(self):
4925 """Check prerequisites.
4927 This checks that the instance is in the cluster.
4930 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4931 assert self.instance is not None, \
4932 "Cannot retrieve locked instance %s" % self.op.instance_name
4934 def Exec(self, feedback_fn):
4935 """Deactivate the disks
4938 instance = self.instance
4940 _ShutdownInstanceDisks(self, instance)
4942 _SafeShutdownInstanceDisks(self, instance)
4945 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4946 """Shutdown block devices of an instance.
4948 This function checks if an instance is running, before calling
4949 _ShutdownInstanceDisks.
4952 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4953 _ShutdownInstanceDisks(lu, instance, disks=disks)
4956 def _ExpandCheckDisks(instance, disks):
4957 """Return the instance disks selected by the disks list
4959 @type disks: list of L{objects.Disk} or None
4960 @param disks: selected disks
4961 @rtype: list of L{objects.Disk}
4962 @return: selected instance disks to act on
4966 return instance.disks
4968 if not set(disks).issubset(instance.disks):
4969 raise errors.ProgrammerError("Can only act on disks belonging to the"
4974 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4975 """Shutdown block devices of an instance.
4977 This does the shutdown on all nodes of the instance.
4979 If the ignore_primary is false, errors on the primary node are
4984 disks = _ExpandCheckDisks(instance, disks)
4987 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4988 lu.cfg.SetDiskID(top_disk, node)
4989 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4990 msg = result.fail_msg
4992 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4993 disk.iv_name, node, msg)
4994 if ((node == instance.primary_node and not ignore_primary) or
4995 (node != instance.primary_node and not result.offline)):
5000 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5001 """Checks if a node has enough free memory.
5003 This function check if a given node has the needed amount of free
5004 memory. In case the node has less memory or we cannot get the
5005 information from the node, this function raise an OpPrereqError
5008 @type lu: C{LogicalUnit}
5009 @param lu: a logical unit from which we get configuration data
5011 @param node: the node to check
5012 @type reason: C{str}
5013 @param reason: string to use in the error message
5014 @type requested: C{int}
5015 @param requested: the amount of memory in MiB to check for
5016 @type hypervisor_name: C{str}
5017 @param hypervisor_name: the hypervisor to ask for memory stats
5018 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5019 we cannot check the node
5022 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5023 nodeinfo[node].Raise("Can't get data from node %s" % node,
5024 prereq=True, ecode=errors.ECODE_ENVIRON)
5025 free_mem = nodeinfo[node].payload.get('memory_free', None)
5026 if not isinstance(free_mem, int):
5027 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5028 " was '%s'" % (node, free_mem),
5029 errors.ECODE_ENVIRON)
5030 if requested > free_mem:
5031 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5032 " needed %s MiB, available %s MiB" %
5033 (node, reason, requested, free_mem),
5037 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5038 """Checks if nodes have enough free disk space in the all VGs.
5040 This function check if all given nodes have the needed amount of
5041 free disk. In case any node has less disk or we cannot get the
5042 information from the node, this function raise an OpPrereqError
5045 @type lu: C{LogicalUnit}
5046 @param lu: a logical unit from which we get configuration data
5047 @type nodenames: C{list}
5048 @param nodenames: the list of node names to check
5049 @type req_sizes: C{dict}
5050 @param req_sizes: the hash of vg and corresponding amount of disk in
5052 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5053 or we cannot check the node
5056 for vg, req_size in req_sizes.items():
5057 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5060 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5061 """Checks if nodes have enough free disk space in the specified VG.
5063 This function check if all given nodes have the needed amount of
5064 free disk. In case any node has less disk or we cannot get the
5065 information from the node, this function raise an OpPrereqError
5068 @type lu: C{LogicalUnit}
5069 @param lu: a logical unit from which we get configuration data
5070 @type nodenames: C{list}
5071 @param nodenames: the list of node names to check
5073 @param vg: the volume group to check
5074 @type requested: C{int}
5075 @param requested: the amount of disk in MiB to check for
5076 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5077 or we cannot check the node
5080 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5081 for node in nodenames:
5082 info = nodeinfo[node]
5083 info.Raise("Cannot get current information from node %s" % node,
5084 prereq=True, ecode=errors.ECODE_ENVIRON)
5085 vg_free = info.payload.get("vg_free", None)
5086 if not isinstance(vg_free, int):
5087 raise errors.OpPrereqError("Can't compute free disk space on node"
5088 " %s for vg %s, result was '%s'" %
5089 (node, vg, vg_free), errors.ECODE_ENVIRON)
5090 if requested > vg_free:
5091 raise errors.OpPrereqError("Not enough disk space on target node %s"
5092 " vg %s: required %d MiB, available %d MiB" %
5093 (node, vg, requested, vg_free),
5097 class LUInstanceStartup(LogicalUnit):
5098 """Starts an instance.
5101 HPATH = "instance-start"
5102 HTYPE = constants.HTYPE_INSTANCE
5105 def CheckArguments(self):
5107 if self.op.beparams:
5108 # fill the beparams dict
5109 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5111 def ExpandNames(self):
5112 self._ExpandAndLockInstance()
5114 def BuildHooksEnv(self):
5117 This runs on master, primary and secondary nodes of the instance.
5121 "FORCE": self.op.force,
5123 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5124 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5127 def CheckPrereq(self):
5128 """Check prerequisites.
5130 This checks that the instance is in the cluster.
5133 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5134 assert self.instance is not None, \
5135 "Cannot retrieve locked instance %s" % self.op.instance_name
5138 if self.op.hvparams:
5139 # check hypervisor parameter syntax (locally)
5140 cluster = self.cfg.GetClusterInfo()
5141 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5142 filled_hvp = cluster.FillHV(instance)
5143 filled_hvp.update(self.op.hvparams)
5144 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5145 hv_type.CheckParameterSyntax(filled_hvp)
5146 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5148 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5150 if self.primary_offline and self.op.ignore_offline_nodes:
5151 self.proc.LogWarning("Ignoring offline primary node")
5153 if self.op.hvparams or self.op.beparams:
5154 self.proc.LogWarning("Overridden parameters are ignored")
5156 _CheckNodeOnline(self, instance.primary_node)
5158 bep = self.cfg.GetClusterInfo().FillBE(instance)
5160 # check bridges existence
5161 _CheckInstanceBridgesExist(self, instance)
5163 remote_info = self.rpc.call_instance_info(instance.primary_node,
5165 instance.hypervisor)
5166 remote_info.Raise("Error checking node %s" % instance.primary_node,
5167 prereq=True, ecode=errors.ECODE_ENVIRON)
5168 if not remote_info.payload: # not running already
5169 _CheckNodeFreeMemory(self, instance.primary_node,
5170 "starting instance %s" % instance.name,
5171 bep[constants.BE_MEMORY], instance.hypervisor)
5173 def Exec(self, feedback_fn):
5174 """Start the instance.
5177 instance = self.instance
5178 force = self.op.force
5180 self.cfg.MarkInstanceUp(instance.name)
5182 if self.primary_offline:
5183 assert self.op.ignore_offline_nodes
5184 self.proc.LogInfo("Primary node offline, marked instance as started")
5186 node_current = instance.primary_node
5188 _StartInstanceDisks(self, instance, force)
5190 result = self.rpc.call_instance_start(node_current, instance,
5191 self.op.hvparams, self.op.beparams)
5192 msg = result.fail_msg
5194 _ShutdownInstanceDisks(self, instance)
5195 raise errors.OpExecError("Could not start instance: %s" % msg)
5198 class LUInstanceReboot(LogicalUnit):
5199 """Reboot an instance.
5202 HPATH = "instance-reboot"
5203 HTYPE = constants.HTYPE_INSTANCE
5206 def ExpandNames(self):
5207 self._ExpandAndLockInstance()
5209 def BuildHooksEnv(self):
5212 This runs on master, primary and secondary nodes of the instance.
5216 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5217 "REBOOT_TYPE": self.op.reboot_type,
5218 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5220 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5221 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5224 def CheckPrereq(self):
5225 """Check prerequisites.
5227 This checks that the instance is in the cluster.
5230 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5231 assert self.instance is not None, \
5232 "Cannot retrieve locked instance %s" % self.op.instance_name
5234 _CheckNodeOnline(self, instance.primary_node)
5236 # check bridges existence
5237 _CheckInstanceBridgesExist(self, instance)
5239 def Exec(self, feedback_fn):
5240 """Reboot the instance.
5243 instance = self.instance
5244 ignore_secondaries = self.op.ignore_secondaries
5245 reboot_type = self.op.reboot_type
5247 remote_info = self.rpc.call_instance_info(instance.primary_node,
5249 instance.hypervisor)
5250 remote_info.Raise("Error checking node %s" % instance.primary_node)
5251 instance_running = bool(remote_info.payload)
5253 node_current = instance.primary_node
5255 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5256 constants.INSTANCE_REBOOT_HARD]:
5257 for disk in instance.disks:
5258 self.cfg.SetDiskID(disk, node_current)
5259 result = self.rpc.call_instance_reboot(node_current, instance,
5261 self.op.shutdown_timeout)
5262 result.Raise("Could not reboot instance")
5264 if instance_running:
5265 result = self.rpc.call_instance_shutdown(node_current, instance,
5266 self.op.shutdown_timeout)
5267 result.Raise("Could not shutdown instance for full reboot")
5268 _ShutdownInstanceDisks(self, instance)
5270 self.LogInfo("Instance %s was already stopped, starting now",
5272 _StartInstanceDisks(self, instance, ignore_secondaries)
5273 result = self.rpc.call_instance_start(node_current, instance, None, None)
5274 msg = result.fail_msg
5276 _ShutdownInstanceDisks(self, instance)
5277 raise errors.OpExecError("Could not start instance for"
5278 " full reboot: %s" % msg)
5280 self.cfg.MarkInstanceUp(instance.name)
5283 class LUInstanceShutdown(LogicalUnit):
5284 """Shutdown an instance.
5287 HPATH = "instance-stop"
5288 HTYPE = constants.HTYPE_INSTANCE
5291 def ExpandNames(self):
5292 self._ExpandAndLockInstance()
5294 def BuildHooksEnv(self):
5297 This runs on master, primary and secondary nodes of the instance.
5300 env = _BuildInstanceHookEnvByObject(self, self.instance)
5301 env["TIMEOUT"] = self.op.timeout
5302 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5305 def CheckPrereq(self):
5306 """Check prerequisites.
5308 This checks that the instance is in the cluster.
5311 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5312 assert self.instance is not None, \
5313 "Cannot retrieve locked instance %s" % self.op.instance_name
5315 self.primary_offline = \
5316 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5318 if self.primary_offline and self.op.ignore_offline_nodes:
5319 self.proc.LogWarning("Ignoring offline primary node")
5321 _CheckNodeOnline(self, self.instance.primary_node)
5323 def Exec(self, feedback_fn):
5324 """Shutdown the instance.
5327 instance = self.instance
5328 node_current = instance.primary_node
5329 timeout = self.op.timeout
5331 self.cfg.MarkInstanceDown(instance.name)
5333 if self.primary_offline:
5334 assert self.op.ignore_offline_nodes
5335 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5337 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5338 msg = result.fail_msg
5340 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5342 _ShutdownInstanceDisks(self, instance)
5345 class LUInstanceReinstall(LogicalUnit):
5346 """Reinstall an instance.
5349 HPATH = "instance-reinstall"
5350 HTYPE = constants.HTYPE_INSTANCE
5353 def ExpandNames(self):
5354 self._ExpandAndLockInstance()
5356 def BuildHooksEnv(self):
5359 This runs on master, primary and secondary nodes of the instance.
5362 env = _BuildInstanceHookEnvByObject(self, self.instance)
5363 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5366 def CheckPrereq(self):
5367 """Check prerequisites.
5369 This checks that the instance is in the cluster and is not running.
5372 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5373 assert instance is not None, \
5374 "Cannot retrieve locked instance %s" % self.op.instance_name
5375 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5376 " offline, cannot reinstall")
5377 for node in instance.secondary_nodes:
5378 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5379 " cannot reinstall")
5381 if instance.disk_template == constants.DT_DISKLESS:
5382 raise errors.OpPrereqError("Instance '%s' has no disks" %
5383 self.op.instance_name,
5385 _CheckInstanceDown(self, instance, "cannot reinstall")
5387 if self.op.os_type is not None:
5389 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5390 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5391 instance_os = self.op.os_type
5393 instance_os = instance.os
5395 nodelist = list(instance.all_nodes)
5397 if self.op.osparams:
5398 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5399 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5400 self.os_inst = i_osdict # the new dict (without defaults)
5404 self.instance = instance
5406 def Exec(self, feedback_fn):
5407 """Reinstall the instance.
5410 inst = self.instance
5412 if self.op.os_type is not None:
5413 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5414 inst.os = self.op.os_type
5415 # Write to configuration
5416 self.cfg.Update(inst, feedback_fn)
5418 _StartInstanceDisks(self, inst, None)
5420 feedback_fn("Running the instance OS create scripts...")
5421 # FIXME: pass debug option from opcode to backend
5422 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5423 self.op.debug_level,
5424 osparams=self.os_inst)
5425 result.Raise("Could not install OS for instance %s on node %s" %
5426 (inst.name, inst.primary_node))
5428 _ShutdownInstanceDisks(self, inst)
5431 class LUInstanceRecreateDisks(LogicalUnit):
5432 """Recreate an instance's missing disks.
5435 HPATH = "instance-recreate-disks"
5436 HTYPE = constants.HTYPE_INSTANCE
5439 def ExpandNames(self):
5440 self._ExpandAndLockInstance()
5442 def BuildHooksEnv(self):
5445 This runs on master, primary and secondary nodes of the instance.
5448 env = _BuildInstanceHookEnvByObject(self, self.instance)
5449 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5452 def CheckPrereq(self):
5453 """Check prerequisites.
5455 This checks that the instance is in the cluster and is not running.
5458 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5459 assert instance is not None, \
5460 "Cannot retrieve locked instance %s" % self.op.instance_name
5461 _CheckNodeOnline(self, instance.primary_node)
5463 if instance.disk_template == constants.DT_DISKLESS:
5464 raise errors.OpPrereqError("Instance '%s' has no disks" %
5465 self.op.instance_name, errors.ECODE_INVAL)
5466 _CheckInstanceDown(self, instance, "cannot recreate disks")
5468 if not self.op.disks:
5469 self.op.disks = range(len(instance.disks))
5471 for idx in self.op.disks:
5472 if idx >= len(instance.disks):
5473 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5476 self.instance = instance
5478 def Exec(self, feedback_fn):
5479 """Recreate the disks.
5483 for idx, _ in enumerate(self.instance.disks):
5484 if idx not in self.op.disks: # disk idx has not been passed in
5488 _CreateDisks(self, self.instance, to_skip=to_skip)
5491 class LUInstanceRename(LogicalUnit):
5492 """Rename an instance.
5495 HPATH = "instance-rename"
5496 HTYPE = constants.HTYPE_INSTANCE
5498 def CheckArguments(self):
5502 if self.op.ip_check and not self.op.name_check:
5503 # TODO: make the ip check more flexible and not depend on the name check
5504 raise errors.OpPrereqError("Cannot do ip check without a name check",
5507 def BuildHooksEnv(self):
5510 This runs on master, primary and secondary nodes of the instance.
5513 env = _BuildInstanceHookEnvByObject(self, self.instance)
5514 env["INSTANCE_NEW_NAME"] = self.op.new_name
5515 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5518 def CheckPrereq(self):
5519 """Check prerequisites.
5521 This checks that the instance is in the cluster and is not running.
5524 self.op.instance_name = _ExpandInstanceName(self.cfg,
5525 self.op.instance_name)
5526 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5527 assert instance is not None
5528 _CheckNodeOnline(self, instance.primary_node)
5529 _CheckInstanceDown(self, instance, "cannot rename")
5530 self.instance = instance
5532 new_name = self.op.new_name
5533 if self.op.name_check:
5534 hostname = netutils.GetHostname(name=new_name)
5535 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5537 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5538 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5539 " same as given hostname '%s'") %
5540 (hostname.name, self.op.new_name),
5542 new_name = self.op.new_name = hostname.name
5543 if (self.op.ip_check and
5544 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5545 raise errors.OpPrereqError("IP %s of instance %s already in use" %
5546 (hostname.ip, new_name),
5547 errors.ECODE_NOTUNIQUE)
5549 instance_list = self.cfg.GetInstanceList()
5550 if new_name in instance_list and new_name != instance.name:
5551 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5552 new_name, errors.ECODE_EXISTS)
5554 def Exec(self, feedback_fn):
5555 """Rename the instance.
5558 inst = self.instance
5559 old_name = inst.name
5561 rename_file_storage = False
5562 if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5563 self.op.new_name != inst.name):
5564 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5565 rename_file_storage = True
5567 self.cfg.RenameInstance(inst.name, self.op.new_name)
5568 # Change the instance lock. This is definitely safe while we hold the BGL
5569 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5570 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5572 # re-read the instance from the configuration after rename
5573 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5575 if rename_file_storage:
5576 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5577 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5578 old_file_storage_dir,
5579 new_file_storage_dir)
5580 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5581 " (but the instance has been renamed in Ganeti)" %
5582 (inst.primary_node, old_file_storage_dir,
5583 new_file_storage_dir))
5585 _StartInstanceDisks(self, inst, None)
5587 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5588 old_name, self.op.debug_level)
5589 msg = result.fail_msg
5591 msg = ("Could not run OS rename script for instance %s on node %s"
5592 " (but the instance has been renamed in Ganeti): %s" %
5593 (inst.name, inst.primary_node, msg))
5594 self.proc.LogWarning(msg)
5596 _ShutdownInstanceDisks(self, inst)
5601 class LUInstanceRemove(LogicalUnit):
5602 """Remove an instance.
5605 HPATH = "instance-remove"
5606 HTYPE = constants.HTYPE_INSTANCE
5609 def ExpandNames(self):
5610 self._ExpandAndLockInstance()
5611 self.needed_locks[locking.LEVEL_NODE] = []
5612 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5614 def DeclareLocks(self, level):
5615 if level == locking.LEVEL_NODE:
5616 self._LockInstancesNodes()
5618 def BuildHooksEnv(self):
5621 This runs on master, primary and secondary nodes of the instance.
5624 env = _BuildInstanceHookEnvByObject(self, self.instance)
5625 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5626 nl = [self.cfg.GetMasterNode()]
5627 nl_post = list(self.instance.all_nodes) + nl
5628 return env, nl, nl_post
5630 def CheckPrereq(self):
5631 """Check prerequisites.
5633 This checks that the instance is in the cluster.
5636 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5637 assert self.instance is not None, \
5638 "Cannot retrieve locked instance %s" % self.op.instance_name
5640 def Exec(self, feedback_fn):
5641 """Remove the instance.
5644 instance = self.instance
5645 logging.info("Shutting down instance %s on node %s",
5646 instance.name, instance.primary_node)
5648 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5649 self.op.shutdown_timeout)
5650 msg = result.fail_msg
5652 if self.op.ignore_failures:
5653 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5655 raise errors.OpExecError("Could not shutdown instance %s on"
5657 (instance.name, instance.primary_node, msg))
5659 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5662 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5663 """Utility function to remove an instance.
5666 logging.info("Removing block devices for instance %s", instance.name)
5668 if not _RemoveDisks(lu, instance):
5669 if not ignore_failures:
5670 raise errors.OpExecError("Can't remove instance's disks")
5671 feedback_fn("Warning: can't remove instance's disks")
5673 logging.info("Removing instance %s out of cluster config", instance.name)
5675 lu.cfg.RemoveInstance(instance.name)
5677 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5678 "Instance lock removal conflict"
5680 # Remove lock for the instance
5681 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5684 class LUInstanceQuery(NoHooksLU):
5685 """Logical unit for querying instances.
5688 # pylint: disable-msg=W0142
5691 def CheckArguments(self):
5692 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5693 self.op.output_fields, self.op.use_locking)
5695 def ExpandNames(self):
5696 self.iq.ExpandNames(self)
5698 def DeclareLocks(self, level):
5699 self.iq.DeclareLocks(self, level)
5701 def Exec(self, feedback_fn):
5702 return self.iq.OldStyleQuery(self)
5705 class LUInstanceFailover(LogicalUnit):
5706 """Failover an instance.
5709 HPATH = "instance-failover"
5710 HTYPE = constants.HTYPE_INSTANCE
5713 def CheckArguments(self):
5714 """Check the arguments.
5717 self.iallocator = getattr(self.op, "iallocator", None)
5718 self.target_node = getattr(self.op, "target_node", None)
5720 def ExpandNames(self):
5721 self._ExpandAndLockInstance()
5723 if self.op.target_node is not None:
5724 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5726 self.needed_locks[locking.LEVEL_NODE] = []
5727 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5729 def DeclareLocks(self, level):
5730 if level == locking.LEVEL_NODE:
5731 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5732 if instance.disk_template in constants.DTS_EXT_MIRROR:
5733 if self.op.target_node is None:
5734 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5736 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5737 self.op.target_node]
5738 del self.recalculate_locks[locking.LEVEL_NODE]
5740 self._LockInstancesNodes()
5742 def BuildHooksEnv(self):
5745 This runs on master, primary and secondary nodes of the instance.
5748 instance = self.instance
5749 source_node = instance.primary_node
5751 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5752 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5753 "OLD_PRIMARY": source_node,
5754 "NEW_PRIMARY": self.op.target_node,
5757 if instance.disk_template in constants.DTS_INT_MIRROR:
5758 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
5759 env["NEW_SECONDARY"] = source_node
5761 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
5763 env.update(_BuildInstanceHookEnvByObject(self, instance))
5764 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5766 nl_post.append(source_node)
5767 return env, nl, nl_post
5769 def CheckPrereq(self):
5770 """Check prerequisites.
5772 This checks that the instance is in the cluster.
5775 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5776 assert self.instance is not None, \
5777 "Cannot retrieve locked instance %s" % self.op.instance_name
5779 bep = self.cfg.GetClusterInfo().FillBE(instance)
5780 if instance.disk_template not in constants.DTS_MIRRORED:
5781 raise errors.OpPrereqError("Instance's disk layout is not"
5782 " mirrored, cannot failover.",
5785 if instance.disk_template in constants.DTS_EXT_MIRROR:
5786 _CheckIAllocatorOrNode(self, "iallocator", "target_node")
5787 if self.op.iallocator:
5788 self._RunAllocator()
5789 # Release all unnecessary node locks
5790 nodes_keep = [instance.primary_node, self.op.target_node]
5791 nodes_rel = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5792 if node not in nodes_keep]
5793 self.context.glm.release(locking.LEVEL_NODE, nodes_rel)
5794 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5796 # self.op.target_node is already populated, either directly or by the
5798 target_node = self.op.target_node
5801 secondary_nodes = instance.secondary_nodes
5802 if not secondary_nodes:
5803 raise errors.ConfigurationError("No secondary node but using"
5804 " %s disk template" %
5805 instance.disk_template)
5806 target_node = secondary_nodes[0]
5808 if self.op.iallocator or (self.op.target_node and
5809 self.op.target_node != target_node):
5810 raise errors.OpPrereqError("Instances with disk template %s cannot"
5811 " be failed over to arbitrary nodes"
5812 " (neither an iallocator nor a target"
5813 " node can be passed)" %
5814 instance.disk_template, errors.ECODE_INVAL)
5815 _CheckNodeOnline(self, target_node)
5816 _CheckNodeNotDrained(self, target_node)
5818 # Save target_node so that we can use it in BuildHooksEnv
5819 self.op.target_node = target_node
5821 if instance.admin_up:
5822 # check memory requirements on the secondary node
5823 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5824 instance.name, bep[constants.BE_MEMORY],
5825 instance.hypervisor)
5827 self.LogInfo("Not checking memory on the secondary node as"
5828 " instance will not be started")
5830 # check bridge existance
5831 _CheckInstanceBridgesExist(self, instance, node=target_node)
5833 def Exec(self, feedback_fn):
5834 """Failover an instance.
5836 The failover is done by shutting it down on its present node and
5837 starting it on the secondary.
5840 instance = self.instance
5841 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5843 source_node = instance.primary_node
5844 target_node = self.op.target_node
5846 if instance.admin_up:
5847 feedback_fn("* checking disk consistency between source and target")
5848 for dev in instance.disks:
5849 # for drbd, these are drbd over lvm
5850 if not _CheckDiskConsistency(self, dev, target_node, False):
5851 if not self.op.ignore_consistency:
5852 raise errors.OpExecError("Disk %s is degraded on target node,"
5853 " aborting failover." % dev.iv_name)
5855 feedback_fn("* not checking disk consistency as instance is not running")
5857 feedback_fn("* shutting down instance on source node")
5858 logging.info("Shutting down instance %s on node %s",
5859 instance.name, source_node)
5861 result = self.rpc.call_instance_shutdown(source_node, instance,
5862 self.op.shutdown_timeout)
5863 msg = result.fail_msg
5865 if self.op.ignore_consistency or primary_node.offline:
5866 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5867 " Proceeding anyway. Please make sure node"
5868 " %s is down. Error details: %s",
5869 instance.name, source_node, source_node, msg)
5871 raise errors.OpExecError("Could not shutdown instance %s on"
5873 (instance.name, source_node, msg))
5875 feedback_fn("* deactivating the instance's disks on source node")
5876 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5877 raise errors.OpExecError("Can't shut down the instance's disks.")
5879 instance.primary_node = target_node
5880 # distribute new instance config to the other nodes
5881 self.cfg.Update(instance, feedback_fn)
5883 # Only start the instance if it's marked as up
5884 if instance.admin_up:
5885 feedback_fn("* activating the instance's disks on target node")
5886 logging.info("Starting instance %s on node %s",
5887 instance.name, target_node)
5889 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5890 ignore_secondaries=True)
5892 _ShutdownInstanceDisks(self, instance)
5893 raise errors.OpExecError("Can't activate the instance's disks")
5895 feedback_fn("* starting the instance on the target node")
5896 result = self.rpc.call_instance_start(target_node, instance, None, None)
5897 msg = result.fail_msg
5899 _ShutdownInstanceDisks(self, instance)
5900 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5901 (instance.name, target_node, msg))
5903 def _RunAllocator(self):
5904 """Run the allocator based on input opcode.
5907 ial = IAllocator(self.cfg, self.rpc,
5908 mode=constants.IALLOCATOR_MODE_RELOC,
5909 name=self.instance.name,
5910 # TODO See why hail breaks with a single node below
5911 relocate_from=[self.instance.primary_node,
5912 self.instance.primary_node],
5915 ial.Run(self.op.iallocator)
5918 raise errors.OpPrereqError("Can't compute nodes using"
5919 " iallocator '%s': %s" %
5920 (self.op.iallocator, ial.info),
5922 if len(ial.result) != ial.required_nodes:
5923 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5924 " of nodes (%s), required %s" %
5925 (self.op.iallocator, len(ial.result),
5926 ial.required_nodes), errors.ECODE_FAULT)
5927 self.op.target_node = ial.result[0]
5928 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5929 self.instance.name, self.op.iallocator,
5930 utils.CommaJoin(ial.result))
5933 class LUInstanceMigrate(LogicalUnit):
5934 """Migrate an instance.
5936 This is migration without shutting down, compared to the failover,
5937 which is done with shutdown.
5940 HPATH = "instance-migrate"
5941 HTYPE = constants.HTYPE_INSTANCE
5944 def ExpandNames(self):
5945 self._ExpandAndLockInstance()
5947 if self.op.target_node is not None:
5948 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5950 self.needed_locks[locking.LEVEL_NODE] = []
5951 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5953 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5954 self.op.cleanup, self.op.iallocator,
5955 self.op.target_node)
5956 self.tasklets = [self._migrater]
5958 def DeclareLocks(self, level):
5959 if level == locking.LEVEL_NODE:
5960 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5961 if instance.disk_template in constants.DTS_EXT_MIRROR:
5962 if self.op.target_node is None:
5963 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5965 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5966 self.op.target_node]
5967 del self.recalculate_locks[locking.LEVEL_NODE]
5969 self._LockInstancesNodes()
5971 def BuildHooksEnv(self):
5974 This runs on master, primary and secondary nodes of the instance.
5977 instance = self._migrater.instance
5978 source_node = instance.primary_node
5979 target_node = self._migrater.target_node
5980 env = _BuildInstanceHookEnvByObject(self, instance)
5981 env["MIGRATE_LIVE"] = self._migrater.live
5982 env["MIGRATE_CLEANUP"] = self.op.cleanup
5984 "OLD_PRIMARY": source_node,
5985 "NEW_PRIMARY": target_node,
5988 if instance.disk_template in constants.DTS_INT_MIRROR:
5989 env["OLD_SECONDARY"] = target_node
5990 env["NEW_SECONDARY"] = source_node
5992 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
5994 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5996 nl_post.append(source_node)
5997 return env, nl, nl_post
6000 class LUInstanceMove(LogicalUnit):
6001 """Move an instance by data-copying.
6004 HPATH = "instance-move"
6005 HTYPE = constants.HTYPE_INSTANCE
6008 def ExpandNames(self):
6009 self._ExpandAndLockInstance()
6010 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6011 self.op.target_node = target_node
6012 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6013 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6015 def DeclareLocks(self, level):
6016 if level == locking.LEVEL_NODE:
6017 self._LockInstancesNodes(primary_only=True)
6019 def BuildHooksEnv(self):
6022 This runs on master, primary and secondary nodes of the instance.
6026 "TARGET_NODE": self.op.target_node,
6027 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6029 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6030 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
6031 self.op.target_node]
6034 def CheckPrereq(self):
6035 """Check prerequisites.
6037 This checks that the instance is in the cluster.
6040 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6041 assert self.instance is not None, \
6042 "Cannot retrieve locked instance %s" % self.op.instance_name
6044 node = self.cfg.GetNodeInfo(self.op.target_node)
6045 assert node is not None, \
6046 "Cannot retrieve locked node %s" % self.op.target_node
6048 self.target_node = target_node = node.name
6050 if target_node == instance.primary_node:
6051 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6052 (instance.name, target_node),
6055 bep = self.cfg.GetClusterInfo().FillBE(instance)
6057 for idx, dsk in enumerate(instance.disks):
6058 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6059 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6060 " cannot copy" % idx, errors.ECODE_STATE)
6062 _CheckNodeOnline(self, target_node)
6063 _CheckNodeNotDrained(self, target_node)
6064 _CheckNodeVmCapable(self, target_node)
6066 if instance.admin_up:
6067 # check memory requirements on the secondary node
6068 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6069 instance.name, bep[constants.BE_MEMORY],
6070 instance.hypervisor)
6072 self.LogInfo("Not checking memory on the secondary node as"
6073 " instance will not be started")
6075 # check bridge existance
6076 _CheckInstanceBridgesExist(self, instance, node=target_node)
6078 def Exec(self, feedback_fn):
6079 """Move an instance.
6081 The move is done by shutting it down on its present node, copying
6082 the data over (slow) and starting it on the new node.
6085 instance = self.instance
6087 source_node = instance.primary_node
6088 target_node = self.target_node
6090 self.LogInfo("Shutting down instance %s on source node %s",
6091 instance.name, source_node)
6093 result = self.rpc.call_instance_shutdown(source_node, instance,
6094 self.op.shutdown_timeout)
6095 msg = result.fail_msg
6097 if self.op.ignore_consistency:
6098 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6099 " Proceeding anyway. Please make sure node"
6100 " %s is down. Error details: %s",
6101 instance.name, source_node, source_node, msg)
6103 raise errors.OpExecError("Could not shutdown instance %s on"
6105 (instance.name, source_node, msg))
6107 # create the target disks
6109 _CreateDisks(self, instance, target_node=target_node)
6110 except errors.OpExecError:
6111 self.LogWarning("Device creation failed, reverting...")
6113 _RemoveDisks(self, instance, target_node=target_node)
6115 self.cfg.ReleaseDRBDMinors(instance.name)
6118 cluster_name = self.cfg.GetClusterInfo().cluster_name
6121 # activate, get path, copy the data over
6122 for idx, disk in enumerate(instance.disks):
6123 self.LogInfo("Copying data for disk %d", idx)
6124 result = self.rpc.call_blockdev_assemble(target_node, disk,
6125 instance.name, True, idx)
6127 self.LogWarning("Can't assemble newly created disk %d: %s",
6128 idx, result.fail_msg)
6129 errs.append(result.fail_msg)
6131 dev_path = result.payload
6132 result = self.rpc.call_blockdev_export(source_node, disk,
6133 target_node, dev_path,
6136 self.LogWarning("Can't copy data over for disk %d: %s",
6137 idx, result.fail_msg)
6138 errs.append(result.fail_msg)
6142 self.LogWarning("Some disks failed to copy, aborting")
6144 _RemoveDisks(self, instance, target_node=target_node)
6146 self.cfg.ReleaseDRBDMinors(instance.name)
6147 raise errors.OpExecError("Errors during disk copy: %s" %
6150 instance.primary_node = target_node
6151 self.cfg.Update(instance, feedback_fn)
6153 self.LogInfo("Removing the disks on the original node")
6154 _RemoveDisks(self, instance, target_node=source_node)
6156 # Only start the instance if it's marked as up
6157 if instance.admin_up:
6158 self.LogInfo("Starting instance %s on node %s",
6159 instance.name, target_node)
6161 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6162 ignore_secondaries=True)
6164 _ShutdownInstanceDisks(self, instance)
6165 raise errors.OpExecError("Can't activate the instance's disks")
6167 result = self.rpc.call_instance_start(target_node, instance, None, None)
6168 msg = result.fail_msg
6170 _ShutdownInstanceDisks(self, instance)
6171 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6172 (instance.name, target_node, msg))
6175 class LUNodeMigrate(LogicalUnit):
6176 """Migrate all instances from a node.
6179 HPATH = "node-migrate"
6180 HTYPE = constants.HTYPE_NODE
6183 def CheckArguments(self):
6184 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6186 def ExpandNames(self):
6187 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6189 self.needed_locks = {}
6191 # Create tasklets for migrating instances for all instances on this node
6195 self.lock_all_nodes = False
6197 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6198 logging.debug("Migrating instance %s", inst.name)
6199 names.append(inst.name)
6201 tasklets.append(TLMigrateInstance(self, inst.name, False,
6202 self.op.iallocator, None))
6204 if inst.disk_template in constants.DTS_EXT_MIRROR:
6205 # We need to lock all nodes, as the iallocator will choose the
6206 # destination nodes afterwards
6207 self.lock_all_nodes = True
6209 self.tasklets = tasklets
6211 # Declare node locks
6212 if self.lock_all_nodes:
6213 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6215 self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6216 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6218 # Declare instance locks
6219 self.needed_locks[locking.LEVEL_INSTANCE] = names
6221 def DeclareLocks(self, level):
6222 if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6223 self._LockInstancesNodes()
6225 def BuildHooksEnv(self):
6228 This runs on the master, the primary and all the secondaries.
6232 "NODE_NAME": self.op.node_name,
6235 nl = [self.cfg.GetMasterNode()]
6237 return (env, nl, nl)
6240 class TLMigrateInstance(Tasklet):
6241 """Tasklet class for instance migration.
6244 @ivar live: whether the migration will be done live or non-live;
6245 this variable is initalized only after CheckPrereq has run
6248 def __init__(self, lu, instance_name, cleanup,
6249 iallocator=None, target_node=None):
6250 """Initializes this class.
6253 Tasklet.__init__(self, lu)
6256 self.instance_name = instance_name
6257 self.cleanup = cleanup
6258 self.live = False # will be overridden later
6259 self.iallocator = iallocator
6260 self.target_node = target_node
6262 def CheckPrereq(self):
6263 """Check prerequisites.
6265 This checks that the instance is in the cluster.
6268 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6269 instance = self.cfg.GetInstanceInfo(instance_name)
6270 assert instance is not None
6271 self.instance = instance
6273 if instance.disk_template not in constants.DTS_MIRRORED:
6274 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6275 " migrations" % instance.disk_template,
6278 if instance.disk_template in constants.DTS_EXT_MIRROR:
6279 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6282 self._RunAllocator()
6284 # self.target_node is already populated, either directly or by the
6286 target_node = self.target_node
6288 if len(self.lu.tasklets) == 1:
6289 # It is safe to remove locks only when we're the only tasklet in the LU
6290 nodes_keep = [instance.primary_node, self.target_node]
6291 nodes_rel = [node for node in self.lu.acquired_locks[locking.LEVEL_NODE]
6292 if node not in nodes_keep]
6293 self.lu.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6294 self.lu.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6297 secondary_nodes = instance.secondary_nodes
6298 if not secondary_nodes:
6299 raise errors.ConfigurationError("No secondary node but using"
6300 " %s disk template" %
6301 instance.disk_template)
6302 target_node = secondary_nodes[0]
6303 if self.lu.op.iallocator or (self.lu.op.target_node and
6304 self.lu.op.target_node != target_node):
6305 raise errors.OpPrereqError("Instances with disk template %s cannot"
6306 " be migrated over to arbitrary nodes"
6307 " (neither an iallocator nor a target"
6308 " node can be passed)" %
6309 instance.disk_template, errors.ECODE_INVAL)
6311 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6313 # check memory requirements on the secondary node
6314 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6315 instance.name, i_be[constants.BE_MEMORY],
6316 instance.hypervisor)
6318 # check bridge existance
6319 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6321 if not self.cleanup:
6322 _CheckNodeNotDrained(self.lu, target_node)
6323 result = self.rpc.call_instance_migratable(instance.primary_node,
6325 result.Raise("Can't migrate, please use failover",
6326 prereq=True, ecode=errors.ECODE_STATE)
6329 def _RunAllocator(self):
6330 """Run the allocator based on input opcode.
6333 ial = IAllocator(self.cfg, self.rpc,
6334 mode=constants.IALLOCATOR_MODE_RELOC,
6335 name=self.instance_name,
6336 # TODO See why hail breaks with a single node below
6337 relocate_from=[self.instance.primary_node,
6338 self.instance.primary_node],
6341 ial.Run(self.iallocator)
6344 raise errors.OpPrereqError("Can't compute nodes using"
6345 " iallocator '%s': %s" %
6346 (self.iallocator, ial.info),
6348 if len(ial.result) != ial.required_nodes:
6349 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6350 " of nodes (%s), required %s" %
6351 (self.iallocator, len(ial.result),
6352 ial.required_nodes), errors.ECODE_FAULT)
6353 self.target_node = ial.result[0]
6354 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6355 self.instance_name, self.iallocator,
6356 utils.CommaJoin(ial.result))
6358 if self.lu.op.live is not None and self.lu.op.mode is not None:
6359 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6360 " parameters are accepted",
6362 if self.lu.op.live is not None:
6364 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6366 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6367 # reset the 'live' parameter to None so that repeated
6368 # invocations of CheckPrereq do not raise an exception
6369 self.lu.op.live = None
6370 elif self.lu.op.mode is None:
6371 # read the default value from the hypervisor
6372 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance, skip_globals=False)
6373 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6375 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6377 def _WaitUntilSync(self):
6378 """Poll with custom rpc for disk sync.
6380 This uses our own step-based rpc call.
6383 self.feedback_fn("* wait until resync is done")
6387 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6389 self.instance.disks)
6391 for node, nres in result.items():
6392 nres.Raise("Cannot resync disks on node %s" % node)
6393 node_done, node_percent = nres.payload
6394 all_done = all_done and node_done
6395 if node_percent is not None:
6396 min_percent = min(min_percent, node_percent)
6398 if min_percent < 100:
6399 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6402 def _EnsureSecondary(self, node):
6403 """Demote a node to secondary.
6406 self.feedback_fn("* switching node %s to secondary mode" % node)
6408 for dev in self.instance.disks:
6409 self.cfg.SetDiskID(dev, node)
6411 result = self.rpc.call_blockdev_close(node, self.instance.name,
6412 self.instance.disks)
6413 result.Raise("Cannot change disk to secondary on node %s" % node)
6415 def _GoStandalone(self):
6416 """Disconnect from the network.
6419 self.feedback_fn("* changing into standalone mode")
6420 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6421 self.instance.disks)
6422 for node, nres in result.items():
6423 nres.Raise("Cannot disconnect disks node %s" % node)
6425 def _GoReconnect(self, multimaster):
6426 """Reconnect to the network.
6432 msg = "single-master"
6433 self.feedback_fn("* changing disks into %s mode" % msg)
6434 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6435 self.instance.disks,
6436 self.instance.name, multimaster)
6437 for node, nres in result.items():
6438 nres.Raise("Cannot change disks config on node %s" % node)
6440 def _ExecCleanup(self):
6441 """Try to cleanup after a failed migration.
6443 The cleanup is done by:
6444 - check that the instance is running only on one node
6445 (and update the config if needed)
6446 - change disks on its secondary node to secondary
6447 - wait until disks are fully synchronized
6448 - disconnect from the network
6449 - change disks into single-master mode
6450 - wait again until disks are fully synchronized
6453 instance = self.instance
6454 target_node = self.target_node
6455 source_node = self.source_node
6457 # check running on only one node
6458 self.feedback_fn("* checking where the instance actually runs"
6459 " (if this hangs, the hypervisor might be in"
6461 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6462 for node, result in ins_l.items():
6463 result.Raise("Can't contact node %s" % node)
6465 runningon_source = instance.name in ins_l[source_node].payload
6466 runningon_target = instance.name in ins_l[target_node].payload
6468 if runningon_source and runningon_target:
6469 raise errors.OpExecError("Instance seems to be running on two nodes,"
6470 " or the hypervisor is confused. You will have"
6471 " to ensure manually that it runs only on one"
6472 " and restart this operation.")
6474 if not (runningon_source or runningon_target):
6475 raise errors.OpExecError("Instance does not seem to be running at all."
6476 " In this case, it's safer to repair by"
6477 " running 'gnt-instance stop' to ensure disk"
6478 " shutdown, and then restarting it.")
6480 if runningon_target:
6481 # the migration has actually succeeded, we need to update the config
6482 self.feedback_fn("* instance running on secondary node (%s),"
6483 " updating config" % target_node)
6484 instance.primary_node = target_node
6485 self.cfg.Update(instance, self.feedback_fn)
6486 demoted_node = source_node
6488 self.feedback_fn("* instance confirmed to be running on its"
6489 " primary node (%s)" % source_node)
6490 demoted_node = target_node
6492 if instance.disk_template in constants.DTS_INT_MIRROR:
6493 self._EnsureSecondary(demoted_node)
6495 self._WaitUntilSync()
6496 except errors.OpExecError:
6497 # we ignore here errors, since if the device is standalone, it
6498 # won't be able to sync
6500 self._GoStandalone()
6501 self._GoReconnect(False)
6502 self._WaitUntilSync()
6504 self.feedback_fn("* done")
6506 def _RevertDiskStatus(self):
6507 """Try to revert the disk status after a failed migration.
6510 target_node = self.target_node
6511 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6515 self._EnsureSecondary(target_node)
6516 self._GoStandalone()
6517 self._GoReconnect(False)
6518 self._WaitUntilSync()
6519 except errors.OpExecError, err:
6520 self.lu.LogWarning("Migration failed and I can't reconnect the"
6521 " drives: error '%s'\n"
6522 "Please look and recover the instance status" %
6525 def _AbortMigration(self):
6526 """Call the hypervisor code to abort a started migration.
6529 instance = self.instance
6530 target_node = self.target_node
6531 migration_info = self.migration_info
6533 abort_result = self.rpc.call_finalize_migration(target_node,
6537 abort_msg = abort_result.fail_msg
6539 logging.error("Aborting migration failed on target node %s: %s",
6540 target_node, abort_msg)
6541 # Don't raise an exception here, as we stil have to try to revert the
6542 # disk status, even if this step failed.
6544 def _ExecMigration(self):
6545 """Migrate an instance.
6547 The migrate is done by:
6548 - change the disks into dual-master mode
6549 - wait until disks are fully synchronized again
6550 - migrate the instance
6551 - change disks on the new secondary node (the old primary) to secondary
6552 - wait until disks are fully synchronized
6553 - change disks into single-master mode
6556 instance = self.instance
6557 target_node = self.target_node
6558 source_node = self.source_node
6560 self.feedback_fn("* checking disk consistency between source and target")
6561 for dev in instance.disks:
6562 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6563 raise errors.OpExecError("Disk %s is degraded or not fully"
6564 " synchronized on target node,"
6565 " aborting migrate." % dev.iv_name)
6567 # First get the migration information from the remote node
6568 result = self.rpc.call_migration_info(source_node, instance)
6569 msg = result.fail_msg
6571 log_err = ("Failed fetching source migration information from %s: %s" %
6573 logging.error(log_err)
6574 raise errors.OpExecError(log_err)
6576 self.migration_info = migration_info = result.payload
6578 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6579 # Then switch the disks to master/master mode
6580 self._EnsureSecondary(target_node)
6581 self._GoStandalone()
6582 self._GoReconnect(True)
6583 self._WaitUntilSync()
6585 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6586 result = self.rpc.call_accept_instance(target_node,
6589 self.nodes_ip[target_node])
6591 msg = result.fail_msg
6593 logging.error("Instance pre-migration failed, trying to revert"
6594 " disk status: %s", msg)
6595 self.feedback_fn("Pre-migration failed, aborting")
6596 self._AbortMigration()
6597 self._RevertDiskStatus()
6598 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6599 (instance.name, msg))
6601 self.feedback_fn("* migrating instance to %s" % target_node)
6603 result = self.rpc.call_instance_migrate(source_node, instance,
6604 self.nodes_ip[target_node],
6606 msg = result.fail_msg
6608 logging.error("Instance migration failed, trying to revert"
6609 " disk status: %s", msg)
6610 self.feedback_fn("Migration failed, aborting")
6611 self._AbortMigration()
6612 self._RevertDiskStatus()
6613 raise errors.OpExecError("Could not migrate instance %s: %s" %
6614 (instance.name, msg))
6617 instance.primary_node = target_node
6618 # distribute new instance config to the other nodes
6619 self.cfg.Update(instance, self.feedback_fn)
6621 result = self.rpc.call_finalize_migration(target_node,
6625 msg = result.fail_msg
6627 logging.error("Instance migration succeeded, but finalization failed:"
6629 raise errors.OpExecError("Could not finalize instance migration: %s" %
6632 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6633 self._EnsureSecondary(source_node)
6634 self._WaitUntilSync()
6635 self._GoStandalone()
6636 self._GoReconnect(False)
6637 self._WaitUntilSync()
6639 self.feedback_fn("* done")
6641 def Exec(self, feedback_fn):
6642 """Perform the migration.
6645 feedback_fn("Migrating instance %s" % self.instance.name)
6647 self.feedback_fn = feedback_fn
6649 self.source_node = self.instance.primary_node
6651 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6652 if self.instance.disk_template in constants.DTS_INT_MIRROR:
6653 self.target_node = self.instance.secondary_nodes[0]
6654 # Otherwise self.target_node has been populated either
6655 # directly, or through an iallocator.
6657 self.all_nodes = [self.source_node, self.target_node]
6659 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6660 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6664 return self._ExecCleanup()
6666 return self._ExecMigration()
6669 def _CreateBlockDev(lu, node, instance, device, force_create,
6671 """Create a tree of block devices on a given node.
6673 If this device type has to be created on secondaries, create it and
6676 If not, just recurse to children keeping the same 'force' value.
6678 @param lu: the lu on whose behalf we execute
6679 @param node: the node on which to create the device
6680 @type instance: L{objects.Instance}
6681 @param instance: the instance which owns the device
6682 @type device: L{objects.Disk}
6683 @param device: the device to create
6684 @type force_create: boolean
6685 @param force_create: whether to force creation of this device; this
6686 will be change to True whenever we find a device which has
6687 CreateOnSecondary() attribute
6688 @param info: the extra 'metadata' we should attach to the device
6689 (this will be represented as a LVM tag)
6690 @type force_open: boolean
6691 @param force_open: this parameter will be passes to the
6692 L{backend.BlockdevCreate} function where it specifies
6693 whether we run on primary or not, and it affects both
6694 the child assembly and the device own Open() execution
6697 if device.CreateOnSecondary():
6701 for child in device.children:
6702 _CreateBlockDev(lu, node, instance, child, force_create,
6705 if not force_create:
6708 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6711 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6712 """Create a single block device on a given node.
6714 This will not recurse over children of the device, so they must be
6717 @param lu: the lu on whose behalf we execute
6718 @param node: the node on which to create the device
6719 @type instance: L{objects.Instance}
6720 @param instance: the instance which owns the device
6721 @type device: L{objects.Disk}
6722 @param device: the device to create
6723 @param info: the extra 'metadata' we should attach to the device
6724 (this will be represented as a LVM tag)
6725 @type force_open: boolean
6726 @param force_open: this parameter will be passes to the
6727 L{backend.BlockdevCreate} function where it specifies
6728 whether we run on primary or not, and it affects both
6729 the child assembly and the device own Open() execution
6732 lu.cfg.SetDiskID(device, node)
6733 result = lu.rpc.call_blockdev_create(node, device, device.size,
6734 instance.name, force_open, info)
6735 result.Raise("Can't create block device %s on"
6736 " node %s for instance %s" % (device, node, instance.name))
6737 if device.physical_id is None:
6738 device.physical_id = result.payload
6741 def _GenerateUniqueNames(lu, exts):
6742 """Generate a suitable LV name.
6744 This will generate a logical volume name for the given instance.
6749 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6750 results.append("%s%s" % (new_id, val))
6754 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6756 """Generate a drbd8 device complete with its children.
6759 port = lu.cfg.AllocatePort()
6760 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6761 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6762 logical_id=(vgname, names[0]))
6763 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6764 logical_id=(vgname, names[1]))
6765 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6766 logical_id=(primary, secondary, port,
6769 children=[dev_data, dev_meta],
6774 def _GenerateDiskTemplate(lu, template_name,
6775 instance_name, primary_node,
6776 secondary_nodes, disk_info,
6777 file_storage_dir, file_driver,
6778 base_index, feedback_fn):
6779 """Generate the entire disk layout for a given template type.
6782 #TODO: compute space requirements
6784 vgname = lu.cfg.GetVGName()
6785 disk_count = len(disk_info)
6787 if template_name == constants.DT_DISKLESS:
6789 elif template_name == constants.DT_PLAIN:
6790 if len(secondary_nodes) != 0:
6791 raise errors.ProgrammerError("Wrong template configuration")
6793 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6794 for i in range(disk_count)])
6795 for idx, disk in enumerate(disk_info):
6796 disk_index = idx + base_index
6797 vg = disk.get("vg", vgname)
6798 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6799 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6800 logical_id=(vg, names[idx]),
6801 iv_name="disk/%d" % disk_index,
6803 disks.append(disk_dev)
6804 elif template_name == constants.DT_DRBD8:
6805 if len(secondary_nodes) != 1:
6806 raise errors.ProgrammerError("Wrong template configuration")
6807 remote_node = secondary_nodes[0]
6808 minors = lu.cfg.AllocateDRBDMinor(
6809 [primary_node, remote_node] * len(disk_info), instance_name)
6812 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6813 for i in range(disk_count)]):
6814 names.append(lv_prefix + "_data")
6815 names.append(lv_prefix + "_meta")
6816 for idx, disk in enumerate(disk_info):
6817 disk_index = idx + base_index
6818 vg = disk.get("vg", vgname)
6819 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6820 disk["size"], vg, names[idx*2:idx*2+2],
6821 "disk/%d" % disk_index,
6822 minors[idx*2], minors[idx*2+1])
6823 disk_dev.mode = disk["mode"]
6824 disks.append(disk_dev)
6825 elif template_name == constants.DT_FILE:
6826 if len(secondary_nodes) != 0:
6827 raise errors.ProgrammerError("Wrong template configuration")
6829 opcodes.RequireFileStorage()
6831 for idx, disk in enumerate(disk_info):
6832 disk_index = idx + base_index
6833 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6834 iv_name="disk/%d" % disk_index,
6835 logical_id=(file_driver,
6836 "%s/disk%d" % (file_storage_dir,
6839 disks.append(disk_dev)
6840 elif template_name == constants.DT_SHARED_FILE:
6841 if len(secondary_nodes) != 0:
6842 raise errors.ProgrammerError("Wrong template configuration")
6844 opcodes.RequireSharedFileStorage()
6846 for idx, disk in enumerate(disk_info):
6847 disk_index = idx + base_index
6848 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6849 iv_name="disk/%d" % disk_index,
6850 logical_id=(file_driver,
6851 "%s/disk%d" % (file_storage_dir,
6854 disks.append(disk_dev)
6855 elif template_name == constants.DT_BLOCK:
6856 if len(secondary_nodes) != 0:
6857 raise errors.ProgrammerError("Wrong template configuration")
6859 for idx, disk in enumerate(disk_info):
6860 disk_index = idx + base_index
6861 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV, size=disk["size"],
6862 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
6864 iv_name="disk/%d" % disk_index,
6866 disks.append(disk_dev)
6869 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6873 def _GetInstanceInfoText(instance):
6874 """Compute that text that should be added to the disk's metadata.
6877 return "originstname+%s" % instance.name
6880 def _CalcEta(time_taken, written, total_size):
6881 """Calculates the ETA based on size written and total size.
6883 @param time_taken: The time taken so far
6884 @param written: amount written so far
6885 @param total_size: The total size of data to be written
6886 @return: The remaining time in seconds
6889 avg_time = time_taken / float(written)
6890 return (total_size - written) * avg_time
6893 def _WipeDisks(lu, instance):
6894 """Wipes instance disks.
6896 @type lu: L{LogicalUnit}
6897 @param lu: the logical unit on whose behalf we execute
6898 @type instance: L{objects.Instance}
6899 @param instance: the instance whose disks we should create
6900 @return: the success of the wipe
6903 node = instance.primary_node
6905 for device in instance.disks:
6906 lu.cfg.SetDiskID(device, node)
6908 logging.info("Pause sync of instance %s disks", instance.name)
6909 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6911 for idx, success in enumerate(result.payload):
6913 logging.warn("pause-sync of instance %s for disks %d failed",
6917 for idx, device in enumerate(instance.disks):
6918 lu.LogInfo("* Wiping disk %d", idx)
6919 logging.info("Wiping disk %d for instance %s, node %s",
6920 idx, instance.name, node)
6922 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6923 # MAX_WIPE_CHUNK at max
6924 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6925 constants.MIN_WIPE_CHUNK_PERCENT)
6930 start_time = time.time()
6932 while offset < size:
6933 wipe_size = min(wipe_chunk_size, size - offset)
6934 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6935 result.Raise("Could not wipe disk %d at offset %d for size %d" %
6936 (idx, offset, wipe_size))
6939 if now - last_output >= 60:
6940 eta = _CalcEta(now - start_time, offset, size)
6941 lu.LogInfo(" - done: %.1f%% ETA: %s" %
6942 (offset / float(size) * 100, utils.FormatSeconds(eta)))
6945 logging.info("Resume sync of instance %s disks", instance.name)
6947 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6949 for idx, success in enumerate(result.payload):
6951 lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6952 " look at the status and troubleshoot the issue.", idx)
6953 logging.warn("resume-sync of instance %s for disks %d failed",
6957 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6958 """Create all disks for an instance.
6960 This abstracts away some work from AddInstance.
6962 @type lu: L{LogicalUnit}
6963 @param lu: the logical unit on whose behalf we execute
6964 @type instance: L{objects.Instance}
6965 @param instance: the instance whose disks we should create
6967 @param to_skip: list of indices to skip
6968 @type target_node: string
6969 @param target_node: if passed, overrides the target node for creation
6971 @return: the success of the creation
6974 info = _GetInstanceInfoText(instance)
6975 if target_node is None:
6976 pnode = instance.primary_node
6977 all_nodes = instance.all_nodes
6982 if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
6983 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6984 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6986 result.Raise("Failed to create directory '%s' on"
6987 " node %s" % (file_storage_dir, pnode))
6989 # Note: this needs to be kept in sync with adding of disks in
6990 # LUInstanceSetParams
6991 for idx, device in enumerate(instance.disks):
6992 if to_skip and idx in to_skip:
6994 logging.info("Creating volume %s for instance %s",
6995 device.iv_name, instance.name)
6997 for node in all_nodes:
6998 f_create = node == pnode
6999 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7002 def _RemoveDisks(lu, instance, target_node=None):
7003 """Remove all disks for an instance.
7005 This abstracts away some work from `AddInstance()` and
7006 `RemoveInstance()`. Note that in case some of the devices couldn't
7007 be removed, the removal will continue with the other ones (compare
7008 with `_CreateDisks()`).
7010 @type lu: L{LogicalUnit}
7011 @param lu: the logical unit on whose behalf we execute
7012 @type instance: L{objects.Instance}
7013 @param instance: the instance whose disks we should remove
7014 @type target_node: string
7015 @param target_node: used to override the node on which to remove the disks
7017 @return: the success of the removal
7020 logging.info("Removing block devices for instance %s", instance.name)
7023 for device in instance.disks:
7025 edata = [(target_node, device)]
7027 edata = device.ComputeNodeTree(instance.primary_node)
7028 for node, disk in edata:
7029 lu.cfg.SetDiskID(disk, node)
7030 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7032 lu.LogWarning("Could not remove block device %s on node %s,"
7033 " continuing anyway: %s", device.iv_name, node, msg)
7036 if instance.disk_template == constants.DT_FILE:
7037 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7041 tgt = instance.primary_node
7042 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7044 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7045 file_storage_dir, instance.primary_node, result.fail_msg)
7051 def _ComputeDiskSizePerVG(disk_template, disks):
7052 """Compute disk size requirements in the volume group
7055 def _compute(disks, payload):
7056 """Universal algorithm
7061 vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
7065 # Required free disk space as a function of disk and swap space
7067 constants.DT_DISKLESS: {},
7068 constants.DT_PLAIN: _compute(disks, 0),
7069 # 128 MB are added for drbd metadata for each disk
7070 constants.DT_DRBD8: _compute(disks, 128),
7071 constants.DT_FILE: {},
7072 constants.DT_SHARED_FILE: {},
7075 if disk_template not in req_size_dict:
7076 raise errors.ProgrammerError("Disk template '%s' size requirement"
7077 " is unknown" % disk_template)
7079 return req_size_dict[disk_template]
7082 def _ComputeDiskSize(disk_template, disks):
7083 """Compute disk size requirements in the volume group
7086 # Required free disk space as a function of disk and swap space
7088 constants.DT_DISKLESS: None,
7089 constants.DT_PLAIN: sum(d["size"] for d in disks),
7090 # 128 MB are added for drbd metadata for each disk
7091 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
7092 constants.DT_FILE: None,
7093 constants.DT_SHARED_FILE: 0,
7094 constants.DT_BLOCK: 0,
7097 if disk_template not in req_size_dict:
7098 raise errors.ProgrammerError("Disk template '%s' size requirement"
7099 " is unknown" % disk_template)
7101 return req_size_dict[disk_template]
7104 def _FilterVmNodes(lu, nodenames):
7105 """Filters out non-vm_capable nodes from a list.
7107 @type lu: L{LogicalUnit}
7108 @param lu: the logical unit for which we check
7109 @type nodenames: list
7110 @param nodenames: the list of nodes on which we should check
7112 @return: the list of vm-capable nodes
7115 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7116 return [name for name in nodenames if name not in vm_nodes]
7119 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7120 """Hypervisor parameter validation.
7122 This function abstract the hypervisor parameter validation to be
7123 used in both instance create and instance modify.
7125 @type lu: L{LogicalUnit}
7126 @param lu: the logical unit for which we check
7127 @type nodenames: list
7128 @param nodenames: the list of nodes on which we should check
7129 @type hvname: string
7130 @param hvname: the name of the hypervisor we should use
7131 @type hvparams: dict
7132 @param hvparams: the parameters which we need to check
7133 @raise errors.OpPrereqError: if the parameters are not valid
7136 nodenames = _FilterVmNodes(lu, nodenames)
7137 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7140 for node in nodenames:
7144 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7147 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7148 """OS parameters validation.
7150 @type lu: L{LogicalUnit}
7151 @param lu: the logical unit for which we check
7152 @type required: boolean
7153 @param required: whether the validation should fail if the OS is not
7155 @type nodenames: list
7156 @param nodenames: the list of nodes on which we should check
7157 @type osname: string
7158 @param osname: the name of the hypervisor we should use
7159 @type osparams: dict
7160 @param osparams: the parameters which we need to check
7161 @raise errors.OpPrereqError: if the parameters are not valid
7164 nodenames = _FilterVmNodes(lu, nodenames)
7165 result = lu.rpc.call_os_validate(required, nodenames, osname,
7166 [constants.OS_VALIDATE_PARAMETERS],
7168 for node, nres in result.items():
7169 # we don't check for offline cases since this should be run only
7170 # against the master node and/or an instance's nodes
7171 nres.Raise("OS Parameters validation failed on node %s" % node)
7172 if not nres.payload:
7173 lu.LogInfo("OS %s not found on node %s, validation skipped",
7177 class LUInstanceCreate(LogicalUnit):
7178 """Create an instance.
7181 HPATH = "instance-add"
7182 HTYPE = constants.HTYPE_INSTANCE
7185 def CheckArguments(self):
7189 # do not require name_check to ease forward/backward compatibility
7191 if self.op.no_install and self.op.start:
7192 self.LogInfo("No-installation mode selected, disabling startup")
7193 self.op.start = False
7194 # validate/normalize the instance name
7195 self.op.instance_name = \
7196 netutils.Hostname.GetNormalizedName(self.op.instance_name)
7198 if self.op.ip_check and not self.op.name_check:
7199 # TODO: make the ip check more flexible and not depend on the name check
7200 raise errors.OpPrereqError("Cannot do ip check without a name check",
7203 # check nics' parameter names
7204 for nic in self.op.nics:
7205 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7207 # check disks. parameter names and consistent adopt/no-adopt strategy
7208 has_adopt = has_no_adopt = False
7209 for disk in self.op.disks:
7210 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7215 if has_adopt and has_no_adopt:
7216 raise errors.OpPrereqError("Either all disks are adopted or none is",
7219 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7220 raise errors.OpPrereqError("Disk adoption is not supported for the"
7221 " '%s' disk template" %
7222 self.op.disk_template,
7224 if self.op.iallocator is not None:
7225 raise errors.OpPrereqError("Disk adoption not allowed with an"
7226 " iallocator script", errors.ECODE_INVAL)
7227 if self.op.mode == constants.INSTANCE_IMPORT:
7228 raise errors.OpPrereqError("Disk adoption not allowed for"
7229 " instance import", errors.ECODE_INVAL)
7231 if self.op.disk_template in constants.DTS_MUST_ADOPT:
7232 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7233 " but no 'adopt' parameter given" %
7234 self.op.disk_template,
7237 self.adopt_disks = has_adopt
7239 # instance name verification
7240 if self.op.name_check:
7241 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7242 self.op.instance_name = self.hostname1.name
7243 # used in CheckPrereq for ip ping check
7244 self.check_ip = self.hostname1.ip
7246 self.check_ip = None
7248 # file storage checks
7249 if (self.op.file_driver and
7250 not self.op.file_driver in constants.FILE_DRIVER):
7251 raise errors.OpPrereqError("Invalid file driver name '%s'" %
7252 self.op.file_driver, errors.ECODE_INVAL)
7254 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7255 raise errors.OpPrereqError("File storage directory path not absolute",
7258 ### Node/iallocator related checks
7259 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7261 if self.op.pnode is not None:
7262 if self.op.disk_template in constants.DTS_INT_MIRROR:
7263 if self.op.snode is None:
7264 raise errors.OpPrereqError("The networked disk templates need"
7265 " a mirror node", errors.ECODE_INVAL)
7267 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7269 self.op.snode = None
7271 self._cds = _GetClusterDomainSecret()
7273 if self.op.mode == constants.INSTANCE_IMPORT:
7274 # On import force_variant must be True, because if we forced it at
7275 # initial install, our only chance when importing it back is that it
7277 self.op.force_variant = True
7279 if self.op.no_install:
7280 self.LogInfo("No-installation mode has no effect during import")
7282 elif self.op.mode == constants.INSTANCE_CREATE:
7283 if self.op.os_type is None:
7284 raise errors.OpPrereqError("No guest OS specified",
7286 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7287 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7288 " installation" % self.op.os_type,
7290 if self.op.disk_template is None:
7291 raise errors.OpPrereqError("No disk template specified",
7294 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7295 # Check handshake to ensure both clusters have the same domain secret
7296 src_handshake = self.op.source_handshake
7297 if not src_handshake:
7298 raise errors.OpPrereqError("Missing source handshake",
7301 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7304 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7307 # Load and check source CA
7308 self.source_x509_ca_pem = self.op.source_x509_ca
7309 if not self.source_x509_ca_pem:
7310 raise errors.OpPrereqError("Missing source X509 CA",
7314 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7316 except OpenSSL.crypto.Error, err:
7317 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7318 (err, ), errors.ECODE_INVAL)
7320 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7321 if errcode is not None:
7322 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7325 self.source_x509_ca = cert
7327 src_instance_name = self.op.source_instance_name
7328 if not src_instance_name:
7329 raise errors.OpPrereqError("Missing source instance name",
7332 self.source_instance_name = \
7333 netutils.GetHostname(name=src_instance_name).name
7336 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7337 self.op.mode, errors.ECODE_INVAL)
7339 def ExpandNames(self):
7340 """ExpandNames for CreateInstance.
7342 Figure out the right locks for instance creation.
7345 self.needed_locks = {}
7347 instance_name = self.op.instance_name
7348 # this is just a preventive check, but someone might still add this
7349 # instance in the meantime, and creation will fail at lock-add time
7350 if instance_name in self.cfg.GetInstanceList():
7351 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7352 instance_name, errors.ECODE_EXISTS)
7354 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7356 if self.op.iallocator:
7357 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7359 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7360 nodelist = [self.op.pnode]
7361 if self.op.snode is not None:
7362 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7363 nodelist.append(self.op.snode)
7364 self.needed_locks[locking.LEVEL_NODE] = nodelist
7366 # in case of import lock the source node too
7367 if self.op.mode == constants.INSTANCE_IMPORT:
7368 src_node = self.op.src_node
7369 src_path = self.op.src_path
7371 if src_path is None:
7372 self.op.src_path = src_path = self.op.instance_name
7374 if src_node is None:
7375 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7376 self.op.src_node = None
7377 if os.path.isabs(src_path):
7378 raise errors.OpPrereqError("Importing an instance from an absolute"
7379 " path requires a source node option.",
7382 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7383 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7384 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7385 if not os.path.isabs(src_path):
7386 self.op.src_path = src_path = \
7387 utils.PathJoin(constants.EXPORT_DIR, src_path)
7389 def _RunAllocator(self):
7390 """Run the allocator based on input opcode.
7393 nics = [n.ToDict() for n in self.nics]
7394 ial = IAllocator(self.cfg, self.rpc,
7395 mode=constants.IALLOCATOR_MODE_ALLOC,
7396 name=self.op.instance_name,
7397 disk_template=self.op.disk_template,
7400 vcpus=self.be_full[constants.BE_VCPUS],
7401 mem_size=self.be_full[constants.BE_MEMORY],
7404 hypervisor=self.op.hypervisor,
7407 ial.Run(self.op.iallocator)
7410 raise errors.OpPrereqError("Can't compute nodes using"
7411 " iallocator '%s': %s" %
7412 (self.op.iallocator, ial.info),
7414 if len(ial.result) != ial.required_nodes:
7415 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7416 " of nodes (%s), required %s" %
7417 (self.op.iallocator, len(ial.result),
7418 ial.required_nodes), errors.ECODE_FAULT)
7419 self.op.pnode = ial.result[0]
7420 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7421 self.op.instance_name, self.op.iallocator,
7422 utils.CommaJoin(ial.result))
7423 if ial.required_nodes == 2:
7424 self.op.snode = ial.result[1]
7426 def BuildHooksEnv(self):
7429 This runs on master, primary and secondary nodes of the instance.
7433 "ADD_MODE": self.op.mode,
7435 if self.op.mode == constants.INSTANCE_IMPORT:
7436 env["SRC_NODE"] = self.op.src_node
7437 env["SRC_PATH"] = self.op.src_path
7438 env["SRC_IMAGES"] = self.src_images
7440 env.update(_BuildInstanceHookEnv(
7441 name=self.op.instance_name,
7442 primary_node=self.op.pnode,
7443 secondary_nodes=self.secondaries,
7444 status=self.op.start,
7445 os_type=self.op.os_type,
7446 memory=self.be_full[constants.BE_MEMORY],
7447 vcpus=self.be_full[constants.BE_VCPUS],
7448 nics=_NICListToTuple(self, self.nics),
7449 disk_template=self.op.disk_template,
7450 disks=[(d["size"], d["mode"]) for d in self.disks],
7453 hypervisor_name=self.op.hypervisor,
7456 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7460 def _ReadExportInfo(self):
7461 """Reads the export information from disk.
7463 It will override the opcode source node and path with the actual
7464 information, if these two were not specified before.
7466 @return: the export information
7469 assert self.op.mode == constants.INSTANCE_IMPORT
7471 src_node = self.op.src_node
7472 src_path = self.op.src_path
7474 if src_node is None:
7475 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7476 exp_list = self.rpc.call_export_list(locked_nodes)
7478 for node in exp_list:
7479 if exp_list[node].fail_msg:
7481 if src_path in exp_list[node].payload:
7483 self.op.src_node = src_node = node
7484 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7488 raise errors.OpPrereqError("No export found for relative path %s" %
7489 src_path, errors.ECODE_INVAL)
7491 _CheckNodeOnline(self, src_node)
7492 result = self.rpc.call_export_info(src_node, src_path)
7493 result.Raise("No export or invalid export found in dir %s" % src_path)
7495 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7496 if not export_info.has_section(constants.INISECT_EXP):
7497 raise errors.ProgrammerError("Corrupted export config",
7498 errors.ECODE_ENVIRON)
7500 ei_version = export_info.get(constants.INISECT_EXP, "version")
7501 if (int(ei_version) != constants.EXPORT_VERSION):
7502 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7503 (ei_version, constants.EXPORT_VERSION),
7504 errors.ECODE_ENVIRON)
7507 def _ReadExportParams(self, einfo):
7508 """Use export parameters as defaults.
7510 In case the opcode doesn't specify (as in override) some instance
7511 parameters, then try to use them from the export information, if
7515 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7517 if self.op.disk_template is None:
7518 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7519 self.op.disk_template = einfo.get(constants.INISECT_INS,
7522 raise errors.OpPrereqError("No disk template specified and the export"
7523 " is missing the disk_template information",
7526 if not self.op.disks:
7527 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7529 # TODO: import the disk iv_name too
7530 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7531 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7532 disks.append({"size": disk_sz})
7533 self.op.disks = disks
7535 raise errors.OpPrereqError("No disk info specified and the export"
7536 " is missing the disk information",
7539 if (not self.op.nics and
7540 einfo.has_option(constants.INISECT_INS, "nic_count")):
7542 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7544 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7545 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7550 if (self.op.hypervisor is None and
7551 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7552 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7553 if einfo.has_section(constants.INISECT_HYP):
7554 # use the export parameters but do not override the ones
7555 # specified by the user
7556 for name, value in einfo.items(constants.INISECT_HYP):
7557 if name not in self.op.hvparams:
7558 self.op.hvparams[name] = value
7560 if einfo.has_section(constants.INISECT_BEP):
7561 # use the parameters, without overriding
7562 for name, value in einfo.items(constants.INISECT_BEP):
7563 if name not in self.op.beparams:
7564 self.op.beparams[name] = value
7566 # try to read the parameters old style, from the main section
7567 for name in constants.BES_PARAMETERS:
7568 if (name not in self.op.beparams and
7569 einfo.has_option(constants.INISECT_INS, name)):
7570 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7572 if einfo.has_section(constants.INISECT_OSP):
7573 # use the parameters, without overriding
7574 for name, value in einfo.items(constants.INISECT_OSP):
7575 if name not in self.op.osparams:
7576 self.op.osparams[name] = value
7578 def _RevertToDefaults(self, cluster):
7579 """Revert the instance parameters to the default values.
7583 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7584 for name in self.op.hvparams.keys():
7585 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7586 del self.op.hvparams[name]
7588 be_defs = cluster.SimpleFillBE({})
7589 for name in self.op.beparams.keys():
7590 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7591 del self.op.beparams[name]
7593 nic_defs = cluster.SimpleFillNIC({})
7594 for nic in self.op.nics:
7595 for name in constants.NICS_PARAMETERS:
7596 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7599 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7600 for name in self.op.osparams.keys():
7601 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7602 del self.op.osparams[name]
7604 def CheckPrereq(self):
7605 """Check prerequisites.
7608 if self.op.mode == constants.INSTANCE_IMPORT:
7609 export_info = self._ReadExportInfo()
7610 self._ReadExportParams(export_info)
7612 if (not self.cfg.GetVGName() and
7613 self.op.disk_template not in constants.DTS_NOT_LVM):
7614 raise errors.OpPrereqError("Cluster does not support lvm-based"
7615 " instances", errors.ECODE_STATE)
7617 if self.op.hypervisor is None:
7618 self.op.hypervisor = self.cfg.GetHypervisorType()
7620 cluster = self.cfg.GetClusterInfo()
7621 enabled_hvs = cluster.enabled_hypervisors
7622 if self.op.hypervisor not in enabled_hvs:
7623 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7624 " cluster (%s)" % (self.op.hypervisor,
7625 ",".join(enabled_hvs)),
7628 # check hypervisor parameter syntax (locally)
7629 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7630 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7632 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7633 hv_type.CheckParameterSyntax(filled_hvp)
7634 self.hv_full = filled_hvp
7635 # check that we don't specify global parameters on an instance
7636 _CheckGlobalHvParams(self.op.hvparams)
7638 # fill and remember the beparams dict
7639 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7640 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7642 # build os parameters
7643 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7645 # now that hvp/bep are in final format, let's reset to defaults,
7647 if self.op.identify_defaults:
7648 self._RevertToDefaults(cluster)
7652 for idx, nic in enumerate(self.op.nics):
7653 nic_mode_req = nic.get("mode", None)
7654 nic_mode = nic_mode_req
7655 if nic_mode is None:
7656 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7658 # in routed mode, for the first nic, the default ip is 'auto'
7659 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7660 default_ip_mode = constants.VALUE_AUTO
7662 default_ip_mode = constants.VALUE_NONE
7664 # ip validity checks
7665 ip = nic.get("ip", default_ip_mode)
7666 if ip is None or ip.lower() == constants.VALUE_NONE:
7668 elif ip.lower() == constants.VALUE_AUTO:
7669 if not self.op.name_check:
7670 raise errors.OpPrereqError("IP address set to auto but name checks"
7671 " have been skipped",
7673 nic_ip = self.hostname1.ip
7675 if not netutils.IPAddress.IsValid(ip):
7676 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7680 # TODO: check the ip address for uniqueness
7681 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7682 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7685 # MAC address verification
7686 mac = nic.get("mac", constants.VALUE_AUTO)
7687 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7688 mac = utils.NormalizeAndValidateMac(mac)
7691 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7692 except errors.ReservationError:
7693 raise errors.OpPrereqError("MAC address %s already in use"
7694 " in cluster" % mac,
7695 errors.ECODE_NOTUNIQUE)
7697 # Build nic parameters
7698 link = nic.get(constants.INIC_LINK, None)
7701 nicparams[constants.NIC_MODE] = nic_mode_req
7703 nicparams[constants.NIC_LINK] = link
7705 check_params = cluster.SimpleFillNIC(nicparams)
7706 objects.NIC.CheckParameterSyntax(check_params)
7707 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7709 # disk checks/pre-build
7711 for disk in self.op.disks:
7712 mode = disk.get("mode", constants.DISK_RDWR)
7713 if mode not in constants.DISK_ACCESS_SET:
7714 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7715 mode, errors.ECODE_INVAL)
7716 size = disk.get("size", None)
7718 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7721 except (TypeError, ValueError):
7722 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7724 vg = disk.get("vg", self.cfg.GetVGName())
7725 new_disk = {"size": size, "mode": mode, "vg": vg}
7727 new_disk["adopt"] = disk["adopt"]
7728 self.disks.append(new_disk)
7730 if self.op.mode == constants.INSTANCE_IMPORT:
7732 # Check that the new instance doesn't have less disks than the export
7733 instance_disks = len(self.disks)
7734 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7735 if instance_disks < export_disks:
7736 raise errors.OpPrereqError("Not enough disks to import."
7737 " (instance: %d, export: %d)" %
7738 (instance_disks, export_disks),
7742 for idx in range(export_disks):
7743 option = 'disk%d_dump' % idx
7744 if export_info.has_option(constants.INISECT_INS, option):
7745 # FIXME: are the old os-es, disk sizes, etc. useful?
7746 export_name = export_info.get(constants.INISECT_INS, option)
7747 image = utils.PathJoin(self.op.src_path, export_name)
7748 disk_images.append(image)
7750 disk_images.append(False)
7752 self.src_images = disk_images
7754 old_name = export_info.get(constants.INISECT_INS, 'name')
7756 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7757 except (TypeError, ValueError), err:
7758 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7759 " an integer: %s" % str(err),
7761 if self.op.instance_name == old_name:
7762 for idx, nic in enumerate(self.nics):
7763 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7764 nic_mac_ini = 'nic%d_mac' % idx
7765 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7767 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7769 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7770 if self.op.ip_check:
7771 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7772 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7773 (self.check_ip, self.op.instance_name),
7774 errors.ECODE_NOTUNIQUE)
7776 #### mac address generation
7777 # By generating here the mac address both the allocator and the hooks get
7778 # the real final mac address rather than the 'auto' or 'generate' value.
7779 # There is a race condition between the generation and the instance object
7780 # creation, which means that we know the mac is valid now, but we're not
7781 # sure it will be when we actually add the instance. If things go bad
7782 # adding the instance will abort because of a duplicate mac, and the
7783 # creation job will fail.
7784 for nic in self.nics:
7785 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7786 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7790 if self.op.iallocator is not None:
7791 self._RunAllocator()
7793 #### node related checks
7795 # check primary node
7796 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7797 assert self.pnode is not None, \
7798 "Cannot retrieve locked node %s" % self.op.pnode
7800 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7801 pnode.name, errors.ECODE_STATE)
7803 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7804 pnode.name, errors.ECODE_STATE)
7805 if not pnode.vm_capable:
7806 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7807 " '%s'" % pnode.name, errors.ECODE_STATE)
7809 self.secondaries = []
7811 # mirror node verification
7812 if self.op.disk_template in constants.DTS_INT_MIRROR:
7813 if self.op.snode == pnode.name:
7814 raise errors.OpPrereqError("The secondary node cannot be the"
7815 " primary node.", errors.ECODE_INVAL)
7816 _CheckNodeOnline(self, self.op.snode)
7817 _CheckNodeNotDrained(self, self.op.snode)
7818 _CheckNodeVmCapable(self, self.op.snode)
7819 self.secondaries.append(self.op.snode)
7821 nodenames = [pnode.name] + self.secondaries
7823 if not self.adopt_disks:
7824 # Check lv size requirements, if not adopting
7825 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7826 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7828 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
7829 all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7830 if len(all_lvs) != len(self.disks):
7831 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7833 for lv_name in all_lvs:
7835 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7836 # to ReserveLV uses the same syntax
7837 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7838 except errors.ReservationError:
7839 raise errors.OpPrereqError("LV named %s used by another instance" %
7840 lv_name, errors.ECODE_NOTUNIQUE)
7842 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7843 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7845 node_lvs = self.rpc.call_lv_list([pnode.name],
7846 vg_names.payload.keys())[pnode.name]
7847 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7848 node_lvs = node_lvs.payload
7850 delta = all_lvs.difference(node_lvs.keys())
7852 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7853 utils.CommaJoin(delta),
7855 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7857 raise errors.OpPrereqError("Online logical volumes found, cannot"
7858 " adopt: %s" % utils.CommaJoin(online_lvs),
7860 # update the size of disk based on what is found
7861 for dsk in self.disks:
7862 dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7864 elif self.op.disk_template == constants.DT_BLOCK:
7865 # Normalize and de-duplicate device paths
7866 all_disks = set([os.path.abspath(i["adopt"]) for i in self.disks])
7867 if len(all_disks) != len(self.disks):
7868 raise errors.OpPrereqError("Duplicate disk names given for adoption",
7870 baddisks = [d for d in all_disks
7871 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
7873 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
7874 " cannot be adopted" %
7875 (", ".join(baddisks),
7876 constants.ADOPTABLE_BLOCKDEV_ROOT),
7879 node_disks = self.rpc.call_bdev_sizes([pnode.name],
7880 list(all_disks))[pnode.name]
7881 node_disks.Raise("Cannot get block device information from node %s" %
7883 node_disks = node_disks.payload
7884 delta = all_disks.difference(node_disks.keys())
7886 raise errors.OpPrereqError("Missing block device(s): %s" %
7887 utils.CommaJoin(delta),
7889 for dsk in self.disks:
7890 dsk["size"] = int(float(node_disks[dsk["adopt"]]))
7892 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7894 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7895 # check OS parameters (remotely)
7896 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7898 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7900 # memory check on primary node
7902 _CheckNodeFreeMemory(self, self.pnode.name,
7903 "creating instance %s" % self.op.instance_name,
7904 self.be_full[constants.BE_MEMORY],
7907 self.dry_run_result = list(nodenames)
7909 def Exec(self, feedback_fn):
7910 """Create and add the instance to the cluster.
7913 instance = self.op.instance_name
7914 pnode_name = self.pnode.name
7916 ht_kind = self.op.hypervisor
7917 if ht_kind in constants.HTS_REQ_PORT:
7918 network_port = self.cfg.AllocatePort()
7922 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
7923 # this is needed because os.path.join does not accept None arguments
7924 if self.op.file_storage_dir is None:
7925 string_file_storage_dir = ""
7927 string_file_storage_dir = self.op.file_storage_dir
7929 # build the full file storage dir path
7930 if self.op.disk_template == constants.DT_SHARED_FILE:
7931 get_fsd_fn = self.cfg.GetSharedFileStorageDir
7933 get_fsd_fn = self.cfg.GetFileStorageDir
7935 file_storage_dir = utils.PathJoin(get_fsd_fn(),
7936 string_file_storage_dir, instance)
7938 file_storage_dir = ""
7940 disks = _GenerateDiskTemplate(self,
7941 self.op.disk_template,
7942 instance, pnode_name,
7946 self.op.file_driver,
7950 iobj = objects.Instance(name=instance, os=self.op.os_type,
7951 primary_node=pnode_name,
7952 nics=self.nics, disks=disks,
7953 disk_template=self.op.disk_template,
7955 network_port=network_port,
7956 beparams=self.op.beparams,
7957 hvparams=self.op.hvparams,
7958 hypervisor=self.op.hypervisor,
7959 osparams=self.op.osparams,
7962 if self.adopt_disks:
7963 if self.op.disk_template == constants.DT_PLAIN:
7964 # rename LVs to the newly-generated names; we need to construct
7965 # 'fake' LV disks with the old data, plus the new unique_id
7966 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7968 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7969 rename_to.append(t_dsk.logical_id)
7970 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7971 self.cfg.SetDiskID(t_dsk, pnode_name)
7972 result = self.rpc.call_blockdev_rename(pnode_name,
7973 zip(tmp_disks, rename_to))
7974 result.Raise("Failed to rename adoped LVs")
7976 feedback_fn("* creating instance disks...")
7978 _CreateDisks(self, iobj)
7979 except errors.OpExecError:
7980 self.LogWarning("Device creation failed, reverting...")
7982 _RemoveDisks(self, iobj)
7984 self.cfg.ReleaseDRBDMinors(instance)
7987 if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7988 feedback_fn("* wiping instance disks...")
7990 _WipeDisks(self, iobj)
7991 except errors.OpExecError:
7992 self.LogWarning("Device wiping failed, reverting...")
7994 _RemoveDisks(self, iobj)
7996 self.cfg.ReleaseDRBDMinors(instance)
7999 feedback_fn("adding instance %s to cluster config" % instance)
8001 self.cfg.AddInstance(iobj, self.proc.GetECId())
8003 # Declare that we don't want to remove the instance lock anymore, as we've
8004 # added the instance to the config
8005 del self.remove_locks[locking.LEVEL_INSTANCE]
8006 # Unlock all the nodes
8007 if self.op.mode == constants.INSTANCE_IMPORT:
8008 nodes_keep = [self.op.src_node]
8009 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
8010 if node != self.op.src_node]
8011 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
8012 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
8014 self.context.glm.release(locking.LEVEL_NODE)
8015 del self.acquired_locks[locking.LEVEL_NODE]
8017 if self.op.wait_for_sync:
8018 disk_abort = not _WaitForSync(self, iobj)
8019 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8020 # make sure the disks are not degraded (still sync-ing is ok)
8022 feedback_fn("* checking mirrors status")
8023 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8028 _RemoveDisks(self, iobj)
8029 self.cfg.RemoveInstance(iobj.name)
8030 # Make sure the instance lock gets removed
8031 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8032 raise errors.OpExecError("There are some degraded disks for"
8035 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8036 if self.op.mode == constants.INSTANCE_CREATE:
8037 if not self.op.no_install:
8038 feedback_fn("* running the instance OS create scripts...")
8039 # FIXME: pass debug option from opcode to backend
8040 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8041 self.op.debug_level)
8042 result.Raise("Could not add os for instance %s"
8043 " on node %s" % (instance, pnode_name))
8045 elif self.op.mode == constants.INSTANCE_IMPORT:
8046 feedback_fn("* running the instance OS import scripts...")
8050 for idx, image in enumerate(self.src_images):
8054 # FIXME: pass debug option from opcode to backend
8055 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8056 constants.IEIO_FILE, (image, ),
8057 constants.IEIO_SCRIPT,
8058 (iobj.disks[idx], idx),
8060 transfers.append(dt)
8063 masterd.instance.TransferInstanceData(self, feedback_fn,
8064 self.op.src_node, pnode_name,
8065 self.pnode.secondary_ip,
8067 if not compat.all(import_result):
8068 self.LogWarning("Some disks for instance %s on node %s were not"
8069 " imported successfully" % (instance, pnode_name))
8071 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8072 feedback_fn("* preparing remote import...")
8073 # The source cluster will stop the instance before attempting to make a
8074 # connection. In some cases stopping an instance can take a long time,
8075 # hence the shutdown timeout is added to the connection timeout.
8076 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8077 self.op.source_shutdown_timeout)
8078 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8080 assert iobj.primary_node == self.pnode.name
8082 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8083 self.source_x509_ca,
8084 self._cds, timeouts)
8085 if not compat.all(disk_results):
8086 # TODO: Should the instance still be started, even if some disks
8087 # failed to import (valid for local imports, too)?
8088 self.LogWarning("Some disks for instance %s on node %s were not"
8089 " imported successfully" % (instance, pnode_name))
8091 # Run rename script on newly imported instance
8092 assert iobj.name == instance
8093 feedback_fn("Running rename script for %s" % instance)
8094 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8095 self.source_instance_name,
8096 self.op.debug_level)
8098 self.LogWarning("Failed to run rename script for %s on node"
8099 " %s: %s" % (instance, pnode_name, result.fail_msg))
8102 # also checked in the prereq part
8103 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8107 iobj.admin_up = True
8108 self.cfg.Update(iobj, feedback_fn)
8109 logging.info("Starting instance %s on node %s", instance, pnode_name)
8110 feedback_fn("* starting instance...")
8111 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8112 result.Raise("Could not start instance")
8114 return list(iobj.all_nodes)
8117 class LUInstanceConsole(NoHooksLU):
8118 """Connect to an instance's console.
8120 This is somewhat special in that it returns the command line that
8121 you need to run on the master node in order to connect to the
8127 def ExpandNames(self):
8128 self._ExpandAndLockInstance()
8130 def CheckPrereq(self):
8131 """Check prerequisites.
8133 This checks that the instance is in the cluster.
8136 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8137 assert self.instance is not None, \
8138 "Cannot retrieve locked instance %s" % self.op.instance_name
8139 _CheckNodeOnline(self, self.instance.primary_node)
8141 def Exec(self, feedback_fn):
8142 """Connect to the console of an instance
8145 instance = self.instance
8146 node = instance.primary_node
8148 node_insts = self.rpc.call_instance_list([node],
8149 [instance.hypervisor])[node]
8150 node_insts.Raise("Can't get node information from %s" % node)
8152 if instance.name not in node_insts.payload:
8153 if instance.admin_up:
8154 state = constants.INSTST_ERRORDOWN
8156 state = constants.INSTST_ADMINDOWN
8157 raise errors.OpExecError("Instance %s is not running (state %s)" %
8158 (instance.name, state))
8160 logging.debug("Connecting to console of %s on %s", instance.name, node)
8162 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8165 def _GetInstanceConsole(cluster, instance):
8166 """Returns console information for an instance.
8168 @type cluster: L{objects.Cluster}
8169 @type instance: L{objects.Instance}
8173 hyper = hypervisor.GetHypervisor(instance.hypervisor)
8174 # beparams and hvparams are passed separately, to avoid editing the
8175 # instance and then saving the defaults in the instance itself.
8176 hvparams = cluster.FillHV(instance)
8177 beparams = cluster.FillBE(instance)
8178 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8180 assert console.instance == instance.name
8181 assert console.Validate()
8183 return console.ToDict()
8186 class LUInstanceReplaceDisks(LogicalUnit):
8187 """Replace the disks of an instance.
8190 HPATH = "mirrors-replace"
8191 HTYPE = constants.HTYPE_INSTANCE
8194 def CheckArguments(self):
8195 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8198 def ExpandNames(self):
8199 self._ExpandAndLockInstance()
8201 if self.op.iallocator is not None:
8202 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8204 elif self.op.remote_node is not None:
8205 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8206 self.op.remote_node = remote_node
8208 # Warning: do not remove the locking of the new secondary here
8209 # unless DRBD8.AddChildren is changed to work in parallel;
8210 # currently it doesn't since parallel invocations of
8211 # FindUnusedMinor will conflict
8212 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8213 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8216 self.needed_locks[locking.LEVEL_NODE] = []
8217 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8219 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8220 self.op.iallocator, self.op.remote_node,
8221 self.op.disks, False, self.op.early_release)
8223 self.tasklets = [self.replacer]
8225 def DeclareLocks(self, level):
8226 # If we're not already locking all nodes in the set we have to declare the
8227 # instance's primary/secondary nodes.
8228 if (level == locking.LEVEL_NODE and
8229 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8230 self._LockInstancesNodes()
8232 def BuildHooksEnv(self):
8235 This runs on the master, the primary and all the secondaries.
8238 instance = self.replacer.instance
8240 "MODE": self.op.mode,
8241 "NEW_SECONDARY": self.op.remote_node,
8242 "OLD_SECONDARY": instance.secondary_nodes[0],
8244 env.update(_BuildInstanceHookEnvByObject(self, instance))
8246 self.cfg.GetMasterNode(),
8247 instance.primary_node,
8249 if self.op.remote_node is not None:
8250 nl.append(self.op.remote_node)
8254 class TLReplaceDisks(Tasklet):
8255 """Replaces disks for an instance.
8257 Note: Locking is not within the scope of this class.
8260 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8261 disks, delay_iallocator, early_release):
8262 """Initializes this class.
8265 Tasklet.__init__(self, lu)
8268 self.instance_name = instance_name
8270 self.iallocator_name = iallocator_name
8271 self.remote_node = remote_node
8273 self.delay_iallocator = delay_iallocator
8274 self.early_release = early_release
8277 self.instance = None
8278 self.new_node = None
8279 self.target_node = None
8280 self.other_node = None
8281 self.remote_node_info = None
8282 self.node_secondary_ip = None
8285 def CheckArguments(mode, remote_node, iallocator):
8286 """Helper function for users of this class.
8289 # check for valid parameter combination
8290 if mode == constants.REPLACE_DISK_CHG:
8291 if remote_node is None and iallocator is None:
8292 raise errors.OpPrereqError("When changing the secondary either an"
8293 " iallocator script must be used or the"
8294 " new node given", errors.ECODE_INVAL)
8296 if remote_node is not None and iallocator is not None:
8297 raise errors.OpPrereqError("Give either the iallocator or the new"
8298 " secondary, not both", errors.ECODE_INVAL)
8300 elif remote_node is not None or iallocator is not None:
8301 # Not replacing the secondary
8302 raise errors.OpPrereqError("The iallocator and new node options can"
8303 " only be used when changing the"
8304 " secondary node", errors.ECODE_INVAL)
8307 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8308 """Compute a new secondary node using an IAllocator.
8311 ial = IAllocator(lu.cfg, lu.rpc,
8312 mode=constants.IALLOCATOR_MODE_RELOC,
8314 relocate_from=relocate_from)
8316 ial.Run(iallocator_name)
8319 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8320 " %s" % (iallocator_name, ial.info),
8323 if len(ial.result) != ial.required_nodes:
8324 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8325 " of nodes (%s), required %s" %
8327 len(ial.result), ial.required_nodes),
8330 remote_node_name = ial.result[0]
8332 lu.LogInfo("Selected new secondary for instance '%s': %s",
8333 instance_name, remote_node_name)
8335 return remote_node_name
8337 def _FindFaultyDisks(self, node_name):
8338 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8341 def CheckPrereq(self):
8342 """Check prerequisites.
8344 This checks that the instance is in the cluster.
8347 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8348 assert instance is not None, \
8349 "Cannot retrieve locked instance %s" % self.instance_name
8351 if instance.disk_template != constants.DT_DRBD8:
8352 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8353 " instances", errors.ECODE_INVAL)
8355 if len(instance.secondary_nodes) != 1:
8356 raise errors.OpPrereqError("The instance has a strange layout,"
8357 " expected one secondary but found %d" %
8358 len(instance.secondary_nodes),
8361 if not self.delay_iallocator:
8362 self._CheckPrereq2()
8364 def _CheckPrereq2(self):
8365 """Check prerequisites, second part.
8367 This function should always be part of CheckPrereq. It was separated and is
8368 now called from Exec because during node evacuation iallocator was only
8369 called with an unmodified cluster model, not taking planned changes into
8373 instance = self.instance
8374 secondary_node = instance.secondary_nodes[0]
8376 if self.iallocator_name is None:
8377 remote_node = self.remote_node
8379 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8380 instance.name, instance.secondary_nodes)
8382 if remote_node is not None:
8383 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8384 assert self.remote_node_info is not None, \
8385 "Cannot retrieve locked node %s" % remote_node
8387 self.remote_node_info = None
8389 if remote_node == self.instance.primary_node:
8390 raise errors.OpPrereqError("The specified node is the primary node of"
8391 " the instance.", errors.ECODE_INVAL)
8393 if remote_node == secondary_node:
8394 raise errors.OpPrereqError("The specified node is already the"
8395 " secondary node of the instance.",
8398 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8399 constants.REPLACE_DISK_CHG):
8400 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8403 if self.mode == constants.REPLACE_DISK_AUTO:
8404 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8405 faulty_secondary = self._FindFaultyDisks(secondary_node)
8407 if faulty_primary and faulty_secondary:
8408 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8409 " one node and can not be repaired"
8410 " automatically" % self.instance_name,
8414 self.disks = faulty_primary
8415 self.target_node = instance.primary_node
8416 self.other_node = secondary_node
8417 check_nodes = [self.target_node, self.other_node]
8418 elif faulty_secondary:
8419 self.disks = faulty_secondary
8420 self.target_node = secondary_node
8421 self.other_node = instance.primary_node
8422 check_nodes = [self.target_node, self.other_node]
8428 # Non-automatic modes
8429 if self.mode == constants.REPLACE_DISK_PRI:
8430 self.target_node = instance.primary_node
8431 self.other_node = secondary_node
8432 check_nodes = [self.target_node, self.other_node]
8434 elif self.mode == constants.REPLACE_DISK_SEC:
8435 self.target_node = secondary_node
8436 self.other_node = instance.primary_node
8437 check_nodes = [self.target_node, self.other_node]
8439 elif self.mode == constants.REPLACE_DISK_CHG:
8440 self.new_node = remote_node
8441 self.other_node = instance.primary_node
8442 self.target_node = secondary_node
8443 check_nodes = [self.new_node, self.other_node]
8445 _CheckNodeNotDrained(self.lu, remote_node)
8446 _CheckNodeVmCapable(self.lu, remote_node)
8448 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8449 assert old_node_info is not None
8450 if old_node_info.offline and not self.early_release:
8451 # doesn't make sense to delay the release
8452 self.early_release = True
8453 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8454 " early-release mode", secondary_node)
8457 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8460 # If not specified all disks should be replaced
8462 self.disks = range(len(self.instance.disks))
8464 for node in check_nodes:
8465 _CheckNodeOnline(self.lu, node)
8467 # Check whether disks are valid
8468 for disk_idx in self.disks:
8469 instance.FindDisk(disk_idx)
8471 # Get secondary node IP addresses
8474 for node_name in [self.target_node, self.other_node, self.new_node]:
8475 if node_name is not None:
8476 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8478 self.node_secondary_ip = node_2nd_ip
8480 def Exec(self, feedback_fn):
8481 """Execute disk replacement.
8483 This dispatches the disk replacement to the appropriate handler.
8486 if self.delay_iallocator:
8487 self._CheckPrereq2()
8490 feedback_fn("No disks need replacement")
8493 feedback_fn("Replacing disk(s) %s for %s" %
8494 (utils.CommaJoin(self.disks), self.instance.name))
8496 activate_disks = (not self.instance.admin_up)
8498 # Activate the instance disks if we're replacing them on a down instance
8500 _StartInstanceDisks(self.lu, self.instance, True)
8503 # Should we replace the secondary node?
8504 if self.new_node is not None:
8505 fn = self._ExecDrbd8Secondary
8507 fn = self._ExecDrbd8DiskOnly
8509 return fn(feedback_fn)
8512 # Deactivate the instance disks if we're replacing them on a
8515 _SafeShutdownInstanceDisks(self.lu, self.instance)
8517 def _CheckVolumeGroup(self, nodes):
8518 self.lu.LogInfo("Checking volume groups")
8520 vgname = self.cfg.GetVGName()
8522 # Make sure volume group exists on all involved nodes
8523 results = self.rpc.call_vg_list(nodes)
8525 raise errors.OpExecError("Can't list volume groups on the nodes")
8529 res.Raise("Error checking node %s" % node)
8530 if vgname not in res.payload:
8531 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8534 def _CheckDisksExistence(self, nodes):
8535 # Check disk existence
8536 for idx, dev in enumerate(self.instance.disks):
8537 if idx not in self.disks:
8541 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8542 self.cfg.SetDiskID(dev, node)
8544 result = self.rpc.call_blockdev_find(node, dev)
8546 msg = result.fail_msg
8547 if msg or not result.payload:
8549 msg = "disk not found"
8550 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8553 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8554 for idx, dev in enumerate(self.instance.disks):
8555 if idx not in self.disks:
8558 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8561 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8563 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8564 " replace disks for instance %s" %
8565 (node_name, self.instance.name))
8567 def _CreateNewStorage(self, node_name):
8568 vgname = self.cfg.GetVGName()
8571 for idx, dev in enumerate(self.instance.disks):
8572 if idx not in self.disks:
8575 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8577 self.cfg.SetDiskID(dev, node_name)
8579 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8580 names = _GenerateUniqueNames(self.lu, lv_names)
8582 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8583 logical_id=(vgname, names[0]))
8584 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8585 logical_id=(vgname, names[1]))
8587 new_lvs = [lv_data, lv_meta]
8588 old_lvs = dev.children
8589 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8591 # we pass force_create=True to force the LVM creation
8592 for new_lv in new_lvs:
8593 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8594 _GetInstanceInfoText(self.instance), False)
8598 def _CheckDevices(self, node_name, iv_names):
8599 for name, (dev, _, _) in iv_names.iteritems():
8600 self.cfg.SetDiskID(dev, node_name)
8602 result = self.rpc.call_blockdev_find(node_name, dev)
8604 msg = result.fail_msg
8605 if msg or not result.payload:
8607 msg = "disk not found"
8608 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8611 if result.payload.is_degraded:
8612 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8614 def _RemoveOldStorage(self, node_name, iv_names):
8615 for name, (_, old_lvs, _) in iv_names.iteritems():
8616 self.lu.LogInfo("Remove logical volumes for %s" % name)
8619 self.cfg.SetDiskID(lv, node_name)
8621 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8623 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8624 hint="remove unused LVs manually")
8626 def _ReleaseNodeLock(self, node_name):
8627 """Releases the lock for a given node."""
8628 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8630 def _ExecDrbd8DiskOnly(self, feedback_fn):
8631 """Replace a disk on the primary or secondary for DRBD 8.
8633 The algorithm for replace is quite complicated:
8635 1. for each disk to be replaced:
8637 1. create new LVs on the target node with unique names
8638 1. detach old LVs from the drbd device
8639 1. rename old LVs to name_replaced.<time_t>
8640 1. rename new LVs to old LVs
8641 1. attach the new LVs (with the old names now) to the drbd device
8643 1. wait for sync across all devices
8645 1. for each modified disk:
8647 1. remove old LVs (which have the name name_replaces.<time_t>)
8649 Failures are not very well handled.
8654 # Step: check device activation
8655 self.lu.LogStep(1, steps_total, "Check device existence")
8656 self._CheckDisksExistence([self.other_node, self.target_node])
8657 self._CheckVolumeGroup([self.target_node, self.other_node])
8659 # Step: check other node consistency
8660 self.lu.LogStep(2, steps_total, "Check peer consistency")
8661 self._CheckDisksConsistency(self.other_node,
8662 self.other_node == self.instance.primary_node,
8665 # Step: create new storage
8666 self.lu.LogStep(3, steps_total, "Allocate new storage")
8667 iv_names = self._CreateNewStorage(self.target_node)
8669 # Step: for each lv, detach+rename*2+attach
8670 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8671 for dev, old_lvs, new_lvs in iv_names.itervalues():
8672 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8674 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8676 result.Raise("Can't detach drbd from local storage on node"
8677 " %s for device %s" % (self.target_node, dev.iv_name))
8679 #cfg.Update(instance)
8681 # ok, we created the new LVs, so now we know we have the needed
8682 # storage; as such, we proceed on the target node to rename
8683 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8684 # using the assumption that logical_id == physical_id (which in
8685 # turn is the unique_id on that node)
8687 # FIXME(iustin): use a better name for the replaced LVs
8688 temp_suffix = int(time.time())
8689 ren_fn = lambda d, suff: (d.physical_id[0],
8690 d.physical_id[1] + "_replaced-%s" % suff)
8692 # Build the rename list based on what LVs exist on the node
8693 rename_old_to_new = []
8694 for to_ren in old_lvs:
8695 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8696 if not result.fail_msg and result.payload:
8698 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8700 self.lu.LogInfo("Renaming the old LVs on the target node")
8701 result = self.rpc.call_blockdev_rename(self.target_node,
8703 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8705 # Now we rename the new LVs to the old LVs
8706 self.lu.LogInfo("Renaming the new LVs on the target node")
8707 rename_new_to_old = [(new, old.physical_id)
8708 for old, new in zip(old_lvs, new_lvs)]
8709 result = self.rpc.call_blockdev_rename(self.target_node,
8711 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8713 for old, new in zip(old_lvs, new_lvs):
8714 new.logical_id = old.logical_id
8715 self.cfg.SetDiskID(new, self.target_node)
8717 for disk in old_lvs:
8718 disk.logical_id = ren_fn(disk, temp_suffix)
8719 self.cfg.SetDiskID(disk, self.target_node)
8721 # Now that the new lvs have the old name, we can add them to the device
8722 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8723 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8725 msg = result.fail_msg
8727 for new_lv in new_lvs:
8728 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8731 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8732 hint=("cleanup manually the unused logical"
8734 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8736 dev.children = new_lvs
8738 self.cfg.Update(self.instance, feedback_fn)
8741 if self.early_release:
8742 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8744 self._RemoveOldStorage(self.target_node, iv_names)
8745 # WARNING: we release both node locks here, do not do other RPCs
8746 # than WaitForSync to the primary node
8747 self._ReleaseNodeLock([self.target_node, self.other_node])
8750 # This can fail as the old devices are degraded and _WaitForSync
8751 # does a combined result over all disks, so we don't check its return value
8752 self.lu.LogStep(cstep, steps_total, "Sync devices")
8754 _WaitForSync(self.lu, self.instance)
8756 # Check all devices manually
8757 self._CheckDevices(self.instance.primary_node, iv_names)
8759 # Step: remove old storage
8760 if not self.early_release:
8761 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8763 self._RemoveOldStorage(self.target_node, iv_names)
8765 def _ExecDrbd8Secondary(self, feedback_fn):
8766 """Replace the secondary node for DRBD 8.
8768 The algorithm for replace is quite complicated:
8769 - for all disks of the instance:
8770 - create new LVs on the new node with same names
8771 - shutdown the drbd device on the old secondary
8772 - disconnect the drbd network on the primary
8773 - create the drbd device on the new secondary
8774 - network attach the drbd on the primary, using an artifice:
8775 the drbd code for Attach() will connect to the network if it
8776 finds a device which is connected to the good local disks but
8778 - wait for sync across all devices
8779 - remove all disks from the old secondary
8781 Failures are not very well handled.
8786 # Step: check device activation
8787 self.lu.LogStep(1, steps_total, "Check device existence")
8788 self._CheckDisksExistence([self.instance.primary_node])
8789 self._CheckVolumeGroup([self.instance.primary_node])
8791 # Step: check other node consistency
8792 self.lu.LogStep(2, steps_total, "Check peer consistency")
8793 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8795 # Step: create new storage
8796 self.lu.LogStep(3, steps_total, "Allocate new storage")
8797 for idx, dev in enumerate(self.instance.disks):
8798 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8799 (self.new_node, idx))
8800 # we pass force_create=True to force LVM creation
8801 for new_lv in dev.children:
8802 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8803 _GetInstanceInfoText(self.instance), False)
8805 # Step 4: dbrd minors and drbd setups changes
8806 # after this, we must manually remove the drbd minors on both the
8807 # error and the success paths
8808 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8809 minors = self.cfg.AllocateDRBDMinor([self.new_node
8810 for dev in self.instance.disks],
8812 logging.debug("Allocated minors %r", minors)
8815 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8816 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8817 (self.new_node, idx))
8818 # create new devices on new_node; note that we create two IDs:
8819 # one without port, so the drbd will be activated without
8820 # networking information on the new node at this stage, and one
8821 # with network, for the latter activation in step 4
8822 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8823 if self.instance.primary_node == o_node1:
8826 assert self.instance.primary_node == o_node2, "Three-node instance?"
8829 new_alone_id = (self.instance.primary_node, self.new_node, None,
8830 p_minor, new_minor, o_secret)
8831 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8832 p_minor, new_minor, o_secret)
8834 iv_names[idx] = (dev, dev.children, new_net_id)
8835 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8837 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8838 logical_id=new_alone_id,
8839 children=dev.children,
8842 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8843 _GetInstanceInfoText(self.instance), False)
8844 except errors.GenericError:
8845 self.cfg.ReleaseDRBDMinors(self.instance.name)
8848 # We have new devices, shutdown the drbd on the old secondary
8849 for idx, dev in enumerate(self.instance.disks):
8850 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8851 self.cfg.SetDiskID(dev, self.target_node)
8852 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8854 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8855 "node: %s" % (idx, msg),
8856 hint=("Please cleanup this device manually as"
8857 " soon as possible"))
8859 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8860 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8861 self.node_secondary_ip,
8862 self.instance.disks)\
8863 [self.instance.primary_node]
8865 msg = result.fail_msg
8867 # detaches didn't succeed (unlikely)
8868 self.cfg.ReleaseDRBDMinors(self.instance.name)
8869 raise errors.OpExecError("Can't detach the disks from the network on"
8870 " old node: %s" % (msg,))
8872 # if we managed to detach at least one, we update all the disks of
8873 # the instance to point to the new secondary
8874 self.lu.LogInfo("Updating instance configuration")
8875 for dev, _, new_logical_id in iv_names.itervalues():
8876 dev.logical_id = new_logical_id
8877 self.cfg.SetDiskID(dev, self.instance.primary_node)
8879 self.cfg.Update(self.instance, feedback_fn)
8881 # and now perform the drbd attach
8882 self.lu.LogInfo("Attaching primary drbds to new secondary"
8883 " (standalone => connected)")
8884 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8886 self.node_secondary_ip,
8887 self.instance.disks,
8890 for to_node, to_result in result.items():
8891 msg = to_result.fail_msg
8893 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8895 hint=("please do a gnt-instance info to see the"
8896 " status of disks"))
8898 if self.early_release:
8899 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8901 self._RemoveOldStorage(self.target_node, iv_names)
8902 # WARNING: we release all node locks here, do not do other RPCs
8903 # than WaitForSync to the primary node
8904 self._ReleaseNodeLock([self.instance.primary_node,
8909 # This can fail as the old devices are degraded and _WaitForSync
8910 # does a combined result over all disks, so we don't check its return value
8911 self.lu.LogStep(cstep, steps_total, "Sync devices")
8913 _WaitForSync(self.lu, self.instance)
8915 # Check all devices manually
8916 self._CheckDevices(self.instance.primary_node, iv_names)
8918 # Step: remove old storage
8919 if not self.early_release:
8920 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8921 self._RemoveOldStorage(self.target_node, iv_names)
8924 class LURepairNodeStorage(NoHooksLU):
8925 """Repairs the volume group on a node.
8930 def CheckArguments(self):
8931 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8933 storage_type = self.op.storage_type
8935 if (constants.SO_FIX_CONSISTENCY not in
8936 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8937 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8938 " repaired" % storage_type,
8941 def ExpandNames(self):
8942 self.needed_locks = {
8943 locking.LEVEL_NODE: [self.op.node_name],
8946 def _CheckFaultyDisks(self, instance, node_name):
8947 """Ensure faulty disks abort the opcode or at least warn."""
8949 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8951 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8952 " node '%s'" % (instance.name, node_name),
8954 except errors.OpPrereqError, err:
8955 if self.op.ignore_consistency:
8956 self.proc.LogWarning(str(err.args[0]))
8960 def CheckPrereq(self):
8961 """Check prerequisites.
8964 # Check whether any instance on this node has faulty disks
8965 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8966 if not inst.admin_up:
8968 check_nodes = set(inst.all_nodes)
8969 check_nodes.discard(self.op.node_name)
8970 for inst_node_name in check_nodes:
8971 self._CheckFaultyDisks(inst, inst_node_name)
8973 def Exec(self, feedback_fn):
8974 feedback_fn("Repairing storage unit '%s' on %s ..." %
8975 (self.op.name, self.op.node_name))
8977 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8978 result = self.rpc.call_storage_execute(self.op.node_name,
8979 self.op.storage_type, st_args,
8981 constants.SO_FIX_CONSISTENCY)
8982 result.Raise("Failed to repair storage unit '%s' on %s" %
8983 (self.op.name, self.op.node_name))
8986 class LUNodeEvacStrategy(NoHooksLU):
8987 """Computes the node evacuation strategy.
8992 def CheckArguments(self):
8993 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8995 def ExpandNames(self):
8996 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8997 self.needed_locks = locks = {}
8998 if self.op.remote_node is None:
8999 locks[locking.LEVEL_NODE] = locking.ALL_SET
9001 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9002 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9004 def Exec(self, feedback_fn):
9005 if self.op.remote_node is not None:
9007 for node in self.op.nodes:
9008 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9011 if i.primary_node == self.op.remote_node:
9012 raise errors.OpPrereqError("Node %s is the primary node of"
9013 " instance %s, cannot use it as"
9015 (self.op.remote_node, i.name),
9017 result.append([i.name, self.op.remote_node])
9019 ial = IAllocator(self.cfg, self.rpc,
9020 mode=constants.IALLOCATOR_MODE_MEVAC,
9021 evac_nodes=self.op.nodes)
9022 ial.Run(self.op.iallocator, validate=True)
9024 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9030 class LUInstanceGrowDisk(LogicalUnit):
9031 """Grow a disk of an instance.
9035 HTYPE = constants.HTYPE_INSTANCE
9038 def ExpandNames(self):
9039 self._ExpandAndLockInstance()
9040 self.needed_locks[locking.LEVEL_NODE] = []
9041 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9043 def DeclareLocks(self, level):
9044 if level == locking.LEVEL_NODE:
9045 self._LockInstancesNodes()
9047 def BuildHooksEnv(self):
9050 This runs on the master, the primary and all the secondaries.
9054 "DISK": self.op.disk,
9055 "AMOUNT": self.op.amount,
9057 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9058 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9061 def CheckPrereq(self):
9062 """Check prerequisites.
9064 This checks that the instance is in the cluster.
9067 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9068 assert instance is not None, \
9069 "Cannot retrieve locked instance %s" % self.op.instance_name
9070 nodenames = list(instance.all_nodes)
9071 for node in nodenames:
9072 _CheckNodeOnline(self, node)
9074 self.instance = instance
9076 if instance.disk_template not in constants.DTS_GROWABLE:
9077 raise errors.OpPrereqError("Instance's disk layout does not support"
9078 " growing.", errors.ECODE_INVAL)
9080 self.disk = instance.FindDisk(self.op.disk)
9082 if instance.disk_template not in (constants.DT_FILE,
9083 constants.DT_SHARED_FILE):
9084 # TODO: check the free disk space for file, when that feature will be
9086 _CheckNodesFreeDiskPerVG(self, nodenames,
9087 self.disk.ComputeGrowth(self.op.amount))
9089 def Exec(self, feedback_fn):
9090 """Execute disk grow.
9093 instance = self.instance
9096 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9098 raise errors.OpExecError("Cannot activate block device to grow")
9100 for node in instance.all_nodes:
9101 self.cfg.SetDiskID(disk, node)
9102 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9103 result.Raise("Grow request failed to node %s" % node)
9105 # TODO: Rewrite code to work properly
9106 # DRBD goes into sync mode for a short amount of time after executing the
9107 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9108 # calling "resize" in sync mode fails. Sleeping for a short amount of
9109 # time is a work-around.
9112 disk.RecordGrow(self.op.amount)
9113 self.cfg.Update(instance, feedback_fn)
9114 if self.op.wait_for_sync:
9115 disk_abort = not _WaitForSync(self, instance, disks=[disk])
9117 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9118 " status.\nPlease check the instance.")
9119 if not instance.admin_up:
9120 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9121 elif not instance.admin_up:
9122 self.proc.LogWarning("Not shutting down the disk even if the instance is"
9123 " not supposed to be running because no wait for"
9124 " sync mode was requested.")
9127 class LUInstanceQueryData(NoHooksLU):
9128 """Query runtime instance data.
9133 def ExpandNames(self):
9134 self.needed_locks = {}
9135 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9137 if self.op.instances:
9138 self.wanted_names = []
9139 for name in self.op.instances:
9140 full_name = _ExpandInstanceName(self.cfg, name)
9141 self.wanted_names.append(full_name)
9142 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9144 self.wanted_names = None
9145 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9147 self.needed_locks[locking.LEVEL_NODE] = []
9148 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9150 def DeclareLocks(self, level):
9151 if level == locking.LEVEL_NODE:
9152 self._LockInstancesNodes()
9154 def CheckPrereq(self):
9155 """Check prerequisites.
9157 This only checks the optional instance list against the existing names.
9160 if self.wanted_names is None:
9161 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9163 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
9164 in self.wanted_names]
9166 def _ComputeBlockdevStatus(self, node, instance_name, dev):
9167 """Returns the status of a block device
9170 if self.op.static or not node:
9173 self.cfg.SetDiskID(dev, node)
9175 result = self.rpc.call_blockdev_find(node, dev)
9179 result.Raise("Can't compute disk status for %s" % instance_name)
9181 status = result.payload
9185 return (status.dev_path, status.major, status.minor,
9186 status.sync_percent, status.estimated_time,
9187 status.is_degraded, status.ldisk_status)
9189 def _ComputeDiskStatus(self, instance, snode, dev):
9190 """Compute block device status.
9193 if dev.dev_type in constants.LDS_DRBD:
9194 # we change the snode then (otherwise we use the one passed in)
9195 if dev.logical_id[0] == instance.primary_node:
9196 snode = dev.logical_id[1]
9198 snode = dev.logical_id[0]
9200 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9202 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9205 dev_children = [self._ComputeDiskStatus(instance, snode, child)
9206 for child in dev.children]
9211 "iv_name": dev.iv_name,
9212 "dev_type": dev.dev_type,
9213 "logical_id": dev.logical_id,
9214 "physical_id": dev.physical_id,
9215 "pstatus": dev_pstatus,
9216 "sstatus": dev_sstatus,
9217 "children": dev_children,
9224 def Exec(self, feedback_fn):
9225 """Gather and return data"""
9228 cluster = self.cfg.GetClusterInfo()
9230 for instance in self.wanted_instances:
9231 if not self.op.static:
9232 remote_info = self.rpc.call_instance_info(instance.primary_node,
9234 instance.hypervisor)
9235 remote_info.Raise("Error checking node %s" % instance.primary_node)
9236 remote_info = remote_info.payload
9237 if remote_info and "state" in remote_info:
9240 remote_state = "down"
9243 if instance.admin_up:
9246 config_state = "down"
9248 disks = [self._ComputeDiskStatus(instance, None, device)
9249 for device in instance.disks]
9252 "name": instance.name,
9253 "config_state": config_state,
9254 "run_state": remote_state,
9255 "pnode": instance.primary_node,
9256 "snodes": instance.secondary_nodes,
9258 # this happens to be the same format used for hooks
9259 "nics": _NICListToTuple(self, instance.nics),
9260 "disk_template": instance.disk_template,
9262 "hypervisor": instance.hypervisor,
9263 "network_port": instance.network_port,
9264 "hv_instance": instance.hvparams,
9265 "hv_actual": cluster.FillHV(instance, skip_globals=True),
9266 "be_instance": instance.beparams,
9267 "be_actual": cluster.FillBE(instance),
9268 "os_instance": instance.osparams,
9269 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9270 "serial_no": instance.serial_no,
9271 "mtime": instance.mtime,
9272 "ctime": instance.ctime,
9273 "uuid": instance.uuid,
9276 result[instance.name] = idict
9281 class LUInstanceSetParams(LogicalUnit):
9282 """Modifies an instances's parameters.
9285 HPATH = "instance-modify"
9286 HTYPE = constants.HTYPE_INSTANCE
9289 def CheckArguments(self):
9290 if not (self.op.nics or self.op.disks or self.op.disk_template or
9291 self.op.hvparams or self.op.beparams or self.op.os_name):
9292 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9294 if self.op.hvparams:
9295 _CheckGlobalHvParams(self.op.hvparams)
9299 for disk_op, disk_dict in self.op.disks:
9300 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9301 if disk_op == constants.DDM_REMOVE:
9304 elif disk_op == constants.DDM_ADD:
9307 if not isinstance(disk_op, int):
9308 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9309 if not isinstance(disk_dict, dict):
9310 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9311 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9313 if disk_op == constants.DDM_ADD:
9314 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9315 if mode not in constants.DISK_ACCESS_SET:
9316 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9318 size = disk_dict.get('size', None)
9320 raise errors.OpPrereqError("Required disk parameter size missing",
9324 except (TypeError, ValueError), err:
9325 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9326 str(err), errors.ECODE_INVAL)
9327 disk_dict['size'] = size
9329 # modification of disk
9330 if 'size' in disk_dict:
9331 raise errors.OpPrereqError("Disk size change not possible, use"
9332 " grow-disk", errors.ECODE_INVAL)
9334 if disk_addremove > 1:
9335 raise errors.OpPrereqError("Only one disk add or remove operation"
9336 " supported at a time", errors.ECODE_INVAL)
9338 if self.op.disks and self.op.disk_template is not None:
9339 raise errors.OpPrereqError("Disk template conversion and other disk"
9340 " changes not supported at the same time",
9343 if (self.op.disk_template and
9344 self.op.disk_template in constants.DTS_INT_MIRROR and
9345 self.op.remote_node is None):
9346 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9347 " one requires specifying a secondary node",
9352 for nic_op, nic_dict in self.op.nics:
9353 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9354 if nic_op == constants.DDM_REMOVE:
9357 elif nic_op == constants.DDM_ADD:
9360 if not isinstance(nic_op, int):
9361 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9362 if not isinstance(nic_dict, dict):
9363 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9364 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9366 # nic_dict should be a dict
9367 nic_ip = nic_dict.get('ip', None)
9368 if nic_ip is not None:
9369 if nic_ip.lower() == constants.VALUE_NONE:
9370 nic_dict['ip'] = None
9372 if not netutils.IPAddress.IsValid(nic_ip):
9373 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9376 nic_bridge = nic_dict.get('bridge', None)
9377 nic_link = nic_dict.get('link', None)
9378 if nic_bridge and nic_link:
9379 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9380 " at the same time", errors.ECODE_INVAL)
9381 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9382 nic_dict['bridge'] = None
9383 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9384 nic_dict['link'] = None
9386 if nic_op == constants.DDM_ADD:
9387 nic_mac = nic_dict.get('mac', None)
9389 nic_dict['mac'] = constants.VALUE_AUTO
9391 if 'mac' in nic_dict:
9392 nic_mac = nic_dict['mac']
9393 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9394 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9396 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9397 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9398 " modifying an existing nic",
9401 if nic_addremove > 1:
9402 raise errors.OpPrereqError("Only one NIC add or remove operation"
9403 " supported at a time", errors.ECODE_INVAL)
9405 def ExpandNames(self):
9406 self._ExpandAndLockInstance()
9407 self.needed_locks[locking.LEVEL_NODE] = []
9408 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9410 def DeclareLocks(self, level):
9411 if level == locking.LEVEL_NODE:
9412 self._LockInstancesNodes()
9413 if self.op.disk_template and self.op.remote_node:
9414 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9415 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9417 def BuildHooksEnv(self):
9420 This runs on the master, primary and secondaries.
9424 if constants.BE_MEMORY in self.be_new:
9425 args['memory'] = self.be_new[constants.BE_MEMORY]
9426 if constants.BE_VCPUS in self.be_new:
9427 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9428 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9429 # information at all.
9432 nic_override = dict(self.op.nics)
9433 for idx, nic in enumerate(self.instance.nics):
9434 if idx in nic_override:
9435 this_nic_override = nic_override[idx]
9437 this_nic_override = {}
9438 if 'ip' in this_nic_override:
9439 ip = this_nic_override['ip']
9442 if 'mac' in this_nic_override:
9443 mac = this_nic_override['mac']
9446 if idx in self.nic_pnew:
9447 nicparams = self.nic_pnew[idx]
9449 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9450 mode = nicparams[constants.NIC_MODE]
9451 link = nicparams[constants.NIC_LINK]
9452 args['nics'].append((ip, mac, mode, link))
9453 if constants.DDM_ADD in nic_override:
9454 ip = nic_override[constants.DDM_ADD].get('ip', None)
9455 mac = nic_override[constants.DDM_ADD]['mac']
9456 nicparams = self.nic_pnew[constants.DDM_ADD]
9457 mode = nicparams[constants.NIC_MODE]
9458 link = nicparams[constants.NIC_LINK]
9459 args['nics'].append((ip, mac, mode, link))
9460 elif constants.DDM_REMOVE in nic_override:
9461 del args['nics'][-1]
9463 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9464 if self.op.disk_template:
9465 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9466 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9469 def CheckPrereq(self):
9470 """Check prerequisites.
9472 This only checks the instance list against the existing names.
9475 # checking the new params on the primary/secondary nodes
9477 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9478 cluster = self.cluster = self.cfg.GetClusterInfo()
9479 assert self.instance is not None, \
9480 "Cannot retrieve locked instance %s" % self.op.instance_name
9481 pnode = instance.primary_node
9482 nodelist = list(instance.all_nodes)
9485 if self.op.os_name and not self.op.force:
9486 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9487 self.op.force_variant)
9488 instance_os = self.op.os_name
9490 instance_os = instance.os
9492 if self.op.disk_template:
9493 if instance.disk_template == self.op.disk_template:
9494 raise errors.OpPrereqError("Instance already has disk template %s" %
9495 instance.disk_template, errors.ECODE_INVAL)
9497 if (instance.disk_template,
9498 self.op.disk_template) not in self._DISK_CONVERSIONS:
9499 raise errors.OpPrereqError("Unsupported disk template conversion from"
9500 " %s to %s" % (instance.disk_template,
9501 self.op.disk_template),
9503 _CheckInstanceDown(self, instance, "cannot change disk template")
9504 if self.op.disk_template in constants.DTS_INT_MIRROR:
9505 if self.op.remote_node == pnode:
9506 raise errors.OpPrereqError("Given new secondary node %s is the same"
9507 " as the primary node of the instance" %
9508 self.op.remote_node, errors.ECODE_STATE)
9509 _CheckNodeOnline(self, self.op.remote_node)
9510 _CheckNodeNotDrained(self, self.op.remote_node)
9511 # FIXME: here we assume that the old instance type is DT_PLAIN
9512 assert instance.disk_template == constants.DT_PLAIN
9513 disks = [{"size": d.size, "vg": d.logical_id[0]}
9514 for d in instance.disks]
9515 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9516 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9518 # hvparams processing
9519 if self.op.hvparams:
9520 hv_type = instance.hypervisor
9521 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9522 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9523 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9526 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9527 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9528 self.hv_new = hv_new # the new actual values
9529 self.hv_inst = i_hvdict # the new dict (without defaults)
9531 self.hv_new = self.hv_inst = {}
9533 # beparams processing
9534 if self.op.beparams:
9535 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9537 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9538 be_new = cluster.SimpleFillBE(i_bedict)
9539 self.be_new = be_new # the new actual values
9540 self.be_inst = i_bedict # the new dict (without defaults)
9542 self.be_new = self.be_inst = {}
9544 # osparams processing
9545 if self.op.osparams:
9546 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9547 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9548 self.os_inst = i_osdict # the new dict (without defaults)
9554 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9555 mem_check_list = [pnode]
9556 if be_new[constants.BE_AUTO_BALANCE]:
9557 # either we changed auto_balance to yes or it was from before
9558 mem_check_list.extend(instance.secondary_nodes)
9559 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9560 instance.hypervisor)
9561 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9562 instance.hypervisor)
9563 pninfo = nodeinfo[pnode]
9564 msg = pninfo.fail_msg
9566 # Assume the primary node is unreachable and go ahead
9567 self.warn.append("Can't get info from primary node %s: %s" %
9569 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9570 self.warn.append("Node data from primary node %s doesn't contain"
9571 " free memory information" % pnode)
9572 elif instance_info.fail_msg:
9573 self.warn.append("Can't get instance runtime information: %s" %
9574 instance_info.fail_msg)
9576 if instance_info.payload:
9577 current_mem = int(instance_info.payload['memory'])
9579 # Assume instance not running
9580 # (there is a slight race condition here, but it's not very probable,
9581 # and we have no other way to check)
9583 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9584 pninfo.payload['memory_free'])
9586 raise errors.OpPrereqError("This change will prevent the instance"
9587 " from starting, due to %d MB of memory"
9588 " missing on its primary node" % miss_mem,
9591 if be_new[constants.BE_AUTO_BALANCE]:
9592 for node, nres in nodeinfo.items():
9593 if node not in instance.secondary_nodes:
9597 self.warn.append("Can't get info from secondary node %s: %s" %
9599 elif not isinstance(nres.payload.get('memory_free', None), int):
9600 self.warn.append("Secondary node %s didn't return free"
9601 " memory information" % node)
9602 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9603 self.warn.append("Not enough memory to failover instance to"
9604 " secondary node %s" % node)
9609 for nic_op, nic_dict in self.op.nics:
9610 if nic_op == constants.DDM_REMOVE:
9611 if not instance.nics:
9612 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9615 if nic_op != constants.DDM_ADD:
9617 if not instance.nics:
9618 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9619 " no NICs" % nic_op,
9621 if nic_op < 0 or nic_op >= len(instance.nics):
9622 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9624 (nic_op, len(instance.nics) - 1),
9626 old_nic_params = instance.nics[nic_op].nicparams
9627 old_nic_ip = instance.nics[nic_op].ip
9632 update_params_dict = dict([(key, nic_dict[key])
9633 for key in constants.NICS_PARAMETERS
9634 if key in nic_dict])
9636 if 'bridge' in nic_dict:
9637 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9639 new_nic_params = _GetUpdatedParams(old_nic_params,
9641 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9642 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9643 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9644 self.nic_pinst[nic_op] = new_nic_params
9645 self.nic_pnew[nic_op] = new_filled_nic_params
9646 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9648 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9649 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9650 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9652 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9654 self.warn.append(msg)
9656 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9657 if new_nic_mode == constants.NIC_MODE_ROUTED:
9658 if 'ip' in nic_dict:
9659 nic_ip = nic_dict['ip']
9663 raise errors.OpPrereqError('Cannot set the nic ip to None'
9664 ' on a routed nic', errors.ECODE_INVAL)
9665 if 'mac' in nic_dict:
9666 nic_mac = nic_dict['mac']
9668 raise errors.OpPrereqError('Cannot set the nic mac to None',
9670 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9671 # otherwise generate the mac
9672 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9674 # or validate/reserve the current one
9676 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9677 except errors.ReservationError:
9678 raise errors.OpPrereqError("MAC address %s already in use"
9679 " in cluster" % nic_mac,
9680 errors.ECODE_NOTUNIQUE)
9683 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9684 raise errors.OpPrereqError("Disk operations not supported for"
9685 " diskless instances",
9687 for disk_op, _ in self.op.disks:
9688 if disk_op == constants.DDM_REMOVE:
9689 if len(instance.disks) == 1:
9690 raise errors.OpPrereqError("Cannot remove the last disk of"
9691 " an instance", errors.ECODE_INVAL)
9692 _CheckInstanceDown(self, instance, "cannot remove disks")
9694 if (disk_op == constants.DDM_ADD and
9695 len(instance.disks) >= constants.MAX_DISKS):
9696 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9697 " add more" % constants.MAX_DISKS,
9699 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9701 if disk_op < 0 or disk_op >= len(instance.disks):
9702 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9704 (disk_op, len(instance.disks)),
9709 def _ConvertPlainToDrbd(self, feedback_fn):
9710 """Converts an instance from plain to drbd.
9713 feedback_fn("Converting template to drbd")
9714 instance = self.instance
9715 pnode = instance.primary_node
9716 snode = self.op.remote_node
9718 # create a fake disk info for _GenerateDiskTemplate
9719 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9720 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9721 instance.name, pnode, [snode],
9722 disk_info, None, None, 0, feedback_fn)
9723 info = _GetInstanceInfoText(instance)
9724 feedback_fn("Creating aditional volumes...")
9725 # first, create the missing data and meta devices
9726 for disk in new_disks:
9727 # unfortunately this is... not too nice
9728 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9730 for child in disk.children:
9731 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9732 # at this stage, all new LVs have been created, we can rename the
9734 feedback_fn("Renaming original volumes...")
9735 rename_list = [(o, n.children[0].logical_id)
9736 for (o, n) in zip(instance.disks, new_disks)]
9737 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9738 result.Raise("Failed to rename original LVs")
9740 feedback_fn("Initializing DRBD devices...")
9741 # all child devices are in place, we can now create the DRBD devices
9742 for disk in new_disks:
9743 for node in [pnode, snode]:
9744 f_create = node == pnode
9745 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9747 # at this point, the instance has been modified
9748 instance.disk_template = constants.DT_DRBD8
9749 instance.disks = new_disks
9750 self.cfg.Update(instance, feedback_fn)
9752 # disks are created, waiting for sync
9753 disk_abort = not _WaitForSync(self, instance)
9755 raise errors.OpExecError("There are some degraded disks for"
9756 " this instance, please cleanup manually")
9758 def _ConvertDrbdToPlain(self, feedback_fn):
9759 """Converts an instance from drbd to plain.
9762 instance = self.instance
9763 assert len(instance.secondary_nodes) == 1
9764 pnode = instance.primary_node
9765 snode = instance.secondary_nodes[0]
9766 feedback_fn("Converting template to plain")
9768 old_disks = instance.disks
9769 new_disks = [d.children[0] for d in old_disks]
9771 # copy over size and mode
9772 for parent, child in zip(old_disks, new_disks):
9773 child.size = parent.size
9774 child.mode = parent.mode
9776 # update instance structure
9777 instance.disks = new_disks
9778 instance.disk_template = constants.DT_PLAIN
9779 self.cfg.Update(instance, feedback_fn)
9781 feedback_fn("Removing volumes on the secondary node...")
9782 for disk in old_disks:
9783 self.cfg.SetDiskID(disk, snode)
9784 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9786 self.LogWarning("Could not remove block device %s on node %s,"
9787 " continuing anyway: %s", disk.iv_name, snode, msg)
9789 feedback_fn("Removing unneeded volumes on the primary node...")
9790 for idx, disk in enumerate(old_disks):
9791 meta = disk.children[1]
9792 self.cfg.SetDiskID(meta, pnode)
9793 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9795 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9796 " continuing anyway: %s", idx, pnode, msg)
9798 def Exec(self, feedback_fn):
9799 """Modifies an instance.
9801 All parameters take effect only at the next restart of the instance.
9804 # Process here the warnings from CheckPrereq, as we don't have a
9805 # feedback_fn there.
9806 for warn in self.warn:
9807 feedback_fn("WARNING: %s" % warn)
9810 instance = self.instance
9812 for disk_op, disk_dict in self.op.disks:
9813 if disk_op == constants.DDM_REMOVE:
9814 # remove the last disk
9815 device = instance.disks.pop()
9816 device_idx = len(instance.disks)
9817 for node, disk in device.ComputeNodeTree(instance.primary_node):
9818 self.cfg.SetDiskID(disk, node)
9819 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9821 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9822 " continuing anyway", device_idx, node, msg)
9823 result.append(("disk/%d" % device_idx, "remove"))
9824 elif disk_op == constants.DDM_ADD:
9826 if instance.disk_template in (constants.DT_FILE,
9827 constants.DT_SHARED_FILE):
9828 file_driver, file_path = instance.disks[0].logical_id
9829 file_path = os.path.dirname(file_path)
9831 file_driver = file_path = None
9832 disk_idx_base = len(instance.disks)
9833 new_disk = _GenerateDiskTemplate(self,
9834 instance.disk_template,
9835 instance.name, instance.primary_node,
9836 instance.secondary_nodes,
9840 disk_idx_base, feedback_fn)[0]
9841 instance.disks.append(new_disk)
9842 info = _GetInstanceInfoText(instance)
9844 logging.info("Creating volume %s for instance %s",
9845 new_disk.iv_name, instance.name)
9846 # Note: this needs to be kept in sync with _CreateDisks
9848 for node in instance.all_nodes:
9849 f_create = node == instance.primary_node
9851 _CreateBlockDev(self, node, instance, new_disk,
9852 f_create, info, f_create)
9853 except errors.OpExecError, err:
9854 self.LogWarning("Failed to create volume %s (%s) on"
9856 new_disk.iv_name, new_disk, node, err)
9857 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9858 (new_disk.size, new_disk.mode)))
9860 # change a given disk
9861 instance.disks[disk_op].mode = disk_dict['mode']
9862 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9864 if self.op.disk_template:
9865 r_shut = _ShutdownInstanceDisks(self, instance)
9867 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9868 " proceed with disk template conversion")
9869 mode = (instance.disk_template, self.op.disk_template)
9871 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9873 self.cfg.ReleaseDRBDMinors(instance.name)
9875 result.append(("disk_template", self.op.disk_template))
9878 for nic_op, nic_dict in self.op.nics:
9879 if nic_op == constants.DDM_REMOVE:
9880 # remove the last nic
9881 del instance.nics[-1]
9882 result.append(("nic.%d" % len(instance.nics), "remove"))
9883 elif nic_op == constants.DDM_ADD:
9884 # mac and bridge should be set, by now
9885 mac = nic_dict['mac']
9886 ip = nic_dict.get('ip', None)
9887 nicparams = self.nic_pinst[constants.DDM_ADD]
9888 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9889 instance.nics.append(new_nic)
9890 result.append(("nic.%d" % (len(instance.nics) - 1),
9891 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9892 (new_nic.mac, new_nic.ip,
9893 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9894 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9897 for key in 'mac', 'ip':
9899 setattr(instance.nics[nic_op], key, nic_dict[key])
9900 if nic_op in self.nic_pinst:
9901 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9902 for key, val in nic_dict.iteritems():
9903 result.append(("nic.%s/%d" % (key, nic_op), val))
9906 if self.op.hvparams:
9907 instance.hvparams = self.hv_inst
9908 for key, val in self.op.hvparams.iteritems():
9909 result.append(("hv/%s" % key, val))
9912 if self.op.beparams:
9913 instance.beparams = self.be_inst
9914 for key, val in self.op.beparams.iteritems():
9915 result.append(("be/%s" % key, val))
9919 instance.os = self.op.os_name
9922 if self.op.osparams:
9923 instance.osparams = self.os_inst
9924 for key, val in self.op.osparams.iteritems():
9925 result.append(("os/%s" % key, val))
9927 self.cfg.Update(instance, feedback_fn)
9931 _DISK_CONVERSIONS = {
9932 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9933 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9937 class LUBackupQuery(NoHooksLU):
9938 """Query the exports list
9943 def ExpandNames(self):
9944 self.needed_locks = {}
9945 self.share_locks[locking.LEVEL_NODE] = 1
9946 if not self.op.nodes:
9947 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9949 self.needed_locks[locking.LEVEL_NODE] = \
9950 _GetWantedNodes(self, self.op.nodes)
9952 def Exec(self, feedback_fn):
9953 """Compute the list of all the exported system images.
9956 @return: a dictionary with the structure node->(export-list)
9957 where export-list is a list of the instances exported on
9961 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9962 rpcresult = self.rpc.call_export_list(self.nodes)
9964 for node in rpcresult:
9965 if rpcresult[node].fail_msg:
9966 result[node] = False
9968 result[node] = rpcresult[node].payload
9973 class LUBackupPrepare(NoHooksLU):
9974 """Prepares an instance for an export and returns useful information.
9979 def ExpandNames(self):
9980 self._ExpandAndLockInstance()
9982 def CheckPrereq(self):
9983 """Check prerequisites.
9986 instance_name = self.op.instance_name
9988 self.instance = self.cfg.GetInstanceInfo(instance_name)
9989 assert self.instance is not None, \
9990 "Cannot retrieve locked instance %s" % self.op.instance_name
9991 _CheckNodeOnline(self, self.instance.primary_node)
9993 self._cds = _GetClusterDomainSecret()
9995 def Exec(self, feedback_fn):
9996 """Prepares an instance for an export.
9999 instance = self.instance
10001 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10002 salt = utils.GenerateSecret(8)
10004 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10005 result = self.rpc.call_x509_cert_create(instance.primary_node,
10006 constants.RIE_CERT_VALIDITY)
10007 result.Raise("Can't create X509 key and certificate on %s" % result.node)
10009 (name, cert_pem) = result.payload
10011 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10015 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10016 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10018 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10024 class LUBackupExport(LogicalUnit):
10025 """Export an instance to an image in the cluster.
10028 HPATH = "instance-export"
10029 HTYPE = constants.HTYPE_INSTANCE
10032 def CheckArguments(self):
10033 """Check the arguments.
10036 self.x509_key_name = self.op.x509_key_name
10037 self.dest_x509_ca_pem = self.op.destination_x509_ca
10039 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10040 if not self.x509_key_name:
10041 raise errors.OpPrereqError("Missing X509 key name for encryption",
10042 errors.ECODE_INVAL)
10044 if not self.dest_x509_ca_pem:
10045 raise errors.OpPrereqError("Missing destination X509 CA",
10046 errors.ECODE_INVAL)
10048 def ExpandNames(self):
10049 self._ExpandAndLockInstance()
10051 # Lock all nodes for local exports
10052 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10053 # FIXME: lock only instance primary and destination node
10055 # Sad but true, for now we have do lock all nodes, as we don't know where
10056 # the previous export might be, and in this LU we search for it and
10057 # remove it from its current node. In the future we could fix this by:
10058 # - making a tasklet to search (share-lock all), then create the
10059 # new one, then one to remove, after
10060 # - removing the removal operation altogether
10061 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10063 def DeclareLocks(self, level):
10064 """Last minute lock declaration."""
10065 # All nodes are locked anyway, so nothing to do here.
10067 def BuildHooksEnv(self):
10068 """Build hooks env.
10070 This will run on the master, primary node and target node.
10074 "EXPORT_MODE": self.op.mode,
10075 "EXPORT_NODE": self.op.target_node,
10076 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10077 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10078 # TODO: Generic function for boolean env variables
10079 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10082 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10084 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10086 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10087 nl.append(self.op.target_node)
10091 def CheckPrereq(self):
10092 """Check prerequisites.
10094 This checks that the instance and node names are valid.
10097 instance_name = self.op.instance_name
10099 self.instance = self.cfg.GetInstanceInfo(instance_name)
10100 assert self.instance is not None, \
10101 "Cannot retrieve locked instance %s" % self.op.instance_name
10102 _CheckNodeOnline(self, self.instance.primary_node)
10104 if (self.op.remove_instance and self.instance.admin_up and
10105 not self.op.shutdown):
10106 raise errors.OpPrereqError("Can not remove instance without shutting it"
10109 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10110 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10111 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10112 assert self.dst_node is not None
10114 _CheckNodeOnline(self, self.dst_node.name)
10115 _CheckNodeNotDrained(self, self.dst_node.name)
10118 self.dest_disk_info = None
10119 self.dest_x509_ca = None
10121 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10122 self.dst_node = None
10124 if len(self.op.target_node) != len(self.instance.disks):
10125 raise errors.OpPrereqError(("Received destination information for %s"
10126 " disks, but instance %s has %s disks") %
10127 (len(self.op.target_node), instance_name,
10128 len(self.instance.disks)),
10129 errors.ECODE_INVAL)
10131 cds = _GetClusterDomainSecret()
10133 # Check X509 key name
10135 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10136 except (TypeError, ValueError), err:
10137 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10139 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10140 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10141 errors.ECODE_INVAL)
10143 # Load and verify CA
10145 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10146 except OpenSSL.crypto.Error, err:
10147 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10148 (err, ), errors.ECODE_INVAL)
10150 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10151 if errcode is not None:
10152 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10153 (msg, ), errors.ECODE_INVAL)
10155 self.dest_x509_ca = cert
10157 # Verify target information
10159 for idx, disk_data in enumerate(self.op.target_node):
10161 (host, port, magic) = \
10162 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10163 except errors.GenericError, err:
10164 raise errors.OpPrereqError("Target info for disk %s: %s" %
10165 (idx, err), errors.ECODE_INVAL)
10167 disk_info.append((host, port, magic))
10169 assert len(disk_info) == len(self.op.target_node)
10170 self.dest_disk_info = disk_info
10173 raise errors.ProgrammerError("Unhandled export mode %r" %
10176 # instance disk type verification
10177 # TODO: Implement export support for file-based disks
10178 for disk in self.instance.disks:
10179 if disk.dev_type == constants.LD_FILE:
10180 raise errors.OpPrereqError("Export not supported for instances with"
10181 " file-based disks", errors.ECODE_INVAL)
10183 def _CleanupExports(self, feedback_fn):
10184 """Removes exports of current instance from all other nodes.
10186 If an instance in a cluster with nodes A..D was exported to node C, its
10187 exports will be removed from the nodes A, B and D.
10190 assert self.op.mode != constants.EXPORT_MODE_REMOTE
10192 nodelist = self.cfg.GetNodeList()
10193 nodelist.remove(self.dst_node.name)
10195 # on one-node clusters nodelist will be empty after the removal
10196 # if we proceed the backup would be removed because OpBackupQuery
10197 # substitutes an empty list with the full cluster node list.
10198 iname = self.instance.name
10200 feedback_fn("Removing old exports for instance %s" % iname)
10201 exportlist = self.rpc.call_export_list(nodelist)
10202 for node in exportlist:
10203 if exportlist[node].fail_msg:
10205 if iname in exportlist[node].payload:
10206 msg = self.rpc.call_export_remove(node, iname).fail_msg
10208 self.LogWarning("Could not remove older export for instance %s"
10209 " on node %s: %s", iname, node, msg)
10211 def Exec(self, feedback_fn):
10212 """Export an instance to an image in the cluster.
10215 assert self.op.mode in constants.EXPORT_MODES
10217 instance = self.instance
10218 src_node = instance.primary_node
10220 if self.op.shutdown:
10221 # shutdown the instance, but not the disks
10222 feedback_fn("Shutting down instance %s" % instance.name)
10223 result = self.rpc.call_instance_shutdown(src_node, instance,
10224 self.op.shutdown_timeout)
10225 # TODO: Maybe ignore failures if ignore_remove_failures is set
10226 result.Raise("Could not shutdown instance %s on"
10227 " node %s" % (instance.name, src_node))
10229 # set the disks ID correctly since call_instance_start needs the
10230 # correct drbd minor to create the symlinks
10231 for disk in instance.disks:
10232 self.cfg.SetDiskID(disk, src_node)
10234 activate_disks = (not instance.admin_up)
10237 # Activate the instance disks if we'exporting a stopped instance
10238 feedback_fn("Activating disks for %s" % instance.name)
10239 _StartInstanceDisks(self, instance, None)
10242 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10245 helper.CreateSnapshots()
10247 if (self.op.shutdown and instance.admin_up and
10248 not self.op.remove_instance):
10249 assert not activate_disks
10250 feedback_fn("Starting instance %s" % instance.name)
10251 result = self.rpc.call_instance_start(src_node, instance, None, None)
10252 msg = result.fail_msg
10254 feedback_fn("Failed to start instance: %s" % msg)
10255 _ShutdownInstanceDisks(self, instance)
10256 raise errors.OpExecError("Could not start instance: %s" % msg)
10258 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10259 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10260 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10261 connect_timeout = constants.RIE_CONNECT_TIMEOUT
10262 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10264 (key_name, _, _) = self.x509_key_name
10267 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10270 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10271 key_name, dest_ca_pem,
10276 # Check for backwards compatibility
10277 assert len(dresults) == len(instance.disks)
10278 assert compat.all(isinstance(i, bool) for i in dresults), \
10279 "Not all results are boolean: %r" % dresults
10283 feedback_fn("Deactivating disks for %s" % instance.name)
10284 _ShutdownInstanceDisks(self, instance)
10286 if not (compat.all(dresults) and fin_resu):
10289 failures.append("export finalization")
10290 if not compat.all(dresults):
10291 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10293 failures.append("disk export: disk(s) %s" % fdsk)
10295 raise errors.OpExecError("Export failed, errors in %s" %
10296 utils.CommaJoin(failures))
10298 # At this point, the export was successful, we can cleanup/finish
10300 # Remove instance if requested
10301 if self.op.remove_instance:
10302 feedback_fn("Removing instance %s" % instance.name)
10303 _RemoveInstance(self, feedback_fn, instance,
10304 self.op.ignore_remove_failures)
10306 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10307 self._CleanupExports(feedback_fn)
10309 return fin_resu, dresults
10312 class LUBackupRemove(NoHooksLU):
10313 """Remove exports related to the named instance.
10318 def ExpandNames(self):
10319 self.needed_locks = {}
10320 # We need all nodes to be locked in order for RemoveExport to work, but we
10321 # don't need to lock the instance itself, as nothing will happen to it (and
10322 # we can remove exports also for a removed instance)
10323 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10325 def Exec(self, feedback_fn):
10326 """Remove any export.
10329 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10330 # If the instance was not found we'll try with the name that was passed in.
10331 # This will only work if it was an FQDN, though.
10333 if not instance_name:
10335 instance_name = self.op.instance_name
10337 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10338 exportlist = self.rpc.call_export_list(locked_nodes)
10340 for node in exportlist:
10341 msg = exportlist[node].fail_msg
10343 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10345 if instance_name in exportlist[node].payload:
10347 result = self.rpc.call_export_remove(node, instance_name)
10348 msg = result.fail_msg
10350 logging.error("Could not remove export for instance %s"
10351 " on node %s: %s", instance_name, node, msg)
10353 if fqdn_warn and not found:
10354 feedback_fn("Export not found. If trying to remove an export belonging"
10355 " to a deleted instance please use its Fully Qualified"
10359 class LUGroupAdd(LogicalUnit):
10360 """Logical unit for creating node groups.
10363 HPATH = "group-add"
10364 HTYPE = constants.HTYPE_GROUP
10367 def ExpandNames(self):
10368 # We need the new group's UUID here so that we can create and acquire the
10369 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10370 # that it should not check whether the UUID exists in the configuration.
10371 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10372 self.needed_locks = {}
10373 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10375 def CheckPrereq(self):
10376 """Check prerequisites.
10378 This checks that the given group name is not an existing node group
10383 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10384 except errors.OpPrereqError:
10387 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10388 " node group (UUID: %s)" %
10389 (self.op.group_name, existing_uuid),
10390 errors.ECODE_EXISTS)
10392 if self.op.ndparams:
10393 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10395 def BuildHooksEnv(self):
10396 """Build hooks env.
10400 "GROUP_NAME": self.op.group_name,
10402 mn = self.cfg.GetMasterNode()
10403 return env, [mn], [mn]
10405 def Exec(self, feedback_fn):
10406 """Add the node group to the cluster.
10409 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10410 uuid=self.group_uuid,
10411 alloc_policy=self.op.alloc_policy,
10412 ndparams=self.op.ndparams)
10414 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10415 del self.remove_locks[locking.LEVEL_NODEGROUP]
10418 class LUGroupAssignNodes(NoHooksLU):
10419 """Logical unit for assigning nodes to groups.
10424 def ExpandNames(self):
10425 # These raise errors.OpPrereqError on their own:
10426 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10427 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10429 # We want to lock all the affected nodes and groups. We have readily
10430 # available the list of nodes, and the *destination* group. To gather the
10431 # list of "source" groups, we need to fetch node information.
10432 self.node_data = self.cfg.GetAllNodesInfo()
10433 affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10434 affected_groups.add(self.group_uuid)
10436 self.needed_locks = {
10437 locking.LEVEL_NODEGROUP: list(affected_groups),
10438 locking.LEVEL_NODE: self.op.nodes,
10441 def CheckPrereq(self):
10442 """Check prerequisites.
10445 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10446 instance_data = self.cfg.GetAllInstancesInfo()
10448 if self.group is None:
10449 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10450 (self.op.group_name, self.group_uuid))
10452 (new_splits, previous_splits) = \
10453 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10454 for node in self.op.nodes],
10455 self.node_data, instance_data)
10458 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10460 if not self.op.force:
10461 raise errors.OpExecError("The following instances get split by this"
10462 " change and --force was not given: %s" %
10465 self.LogWarning("This operation will split the following instances: %s",
10468 if previous_splits:
10469 self.LogWarning("In addition, these already-split instances continue"
10470 " to be spit across groups: %s",
10471 utils.CommaJoin(utils.NiceSort(previous_splits)))
10473 def Exec(self, feedback_fn):
10474 """Assign nodes to a new group.
10477 for node in self.op.nodes:
10478 self.node_data[node].group = self.group_uuid
10480 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10483 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10484 """Check for split instances after a node assignment.
10486 This method considers a series of node assignments as an atomic operation,
10487 and returns information about split instances after applying the set of
10490 In particular, it returns information about newly split instances, and
10491 instances that were already split, and remain so after the change.
10493 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10496 @type changes: list of (node_name, new_group_uuid) pairs.
10497 @param changes: list of node assignments to consider.
10498 @param node_data: a dict with data for all nodes
10499 @param instance_data: a dict with all instances to consider
10500 @rtype: a two-tuple
10501 @return: a list of instances that were previously okay and result split as a
10502 consequence of this change, and a list of instances that were previously
10503 split and this change does not fix.
10506 changed_nodes = dict((node, group) for node, group in changes
10507 if node_data[node].group != group)
10509 all_split_instances = set()
10510 previously_split_instances = set()
10512 def InstanceNodes(instance):
10513 return [instance.primary_node] + list(instance.secondary_nodes)
10515 for inst in instance_data.values():
10516 if inst.disk_template not in constants.DTS_INT_MIRROR:
10519 instance_nodes = InstanceNodes(inst)
10521 if len(set(node_data[node].group for node in instance_nodes)) > 1:
10522 previously_split_instances.add(inst.name)
10524 if len(set(changed_nodes.get(node, node_data[node].group)
10525 for node in instance_nodes)) > 1:
10526 all_split_instances.add(inst.name)
10528 return (list(all_split_instances - previously_split_instances),
10529 list(previously_split_instances & all_split_instances))
10532 class _GroupQuery(_QueryBase):
10533 FIELDS = query.GROUP_FIELDS
10535 def ExpandNames(self, lu):
10536 lu.needed_locks = {}
10538 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10539 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10542 self.wanted = [name_to_uuid[name]
10543 for name in utils.NiceSort(name_to_uuid.keys())]
10545 # Accept names to be either names or UUIDs.
10548 all_uuid = frozenset(self._all_groups.keys())
10550 for name in self.names:
10551 if name in all_uuid:
10552 self.wanted.append(name)
10553 elif name in name_to_uuid:
10554 self.wanted.append(name_to_uuid[name])
10556 missing.append(name)
10559 raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10560 errors.ECODE_NOENT)
10562 def DeclareLocks(self, lu, level):
10565 def _GetQueryData(self, lu):
10566 """Computes the list of node groups and their attributes.
10569 do_nodes = query.GQ_NODE in self.requested_data
10570 do_instances = query.GQ_INST in self.requested_data
10572 group_to_nodes = None
10573 group_to_instances = None
10575 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10576 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10577 # latter GetAllInstancesInfo() is not enough, for we have to go through
10578 # instance->node. Hence, we will need to process nodes even if we only need
10579 # instance information.
10580 if do_nodes or do_instances:
10581 all_nodes = lu.cfg.GetAllNodesInfo()
10582 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10585 for node in all_nodes.values():
10586 if node.group in group_to_nodes:
10587 group_to_nodes[node.group].append(node.name)
10588 node_to_group[node.name] = node.group
10591 all_instances = lu.cfg.GetAllInstancesInfo()
10592 group_to_instances = dict((uuid, []) for uuid in self.wanted)
10594 for instance in all_instances.values():
10595 node = instance.primary_node
10596 if node in node_to_group:
10597 group_to_instances[node_to_group[node]].append(instance.name)
10600 # Do not pass on node information if it was not requested.
10601 group_to_nodes = None
10603 return query.GroupQueryData([self._all_groups[uuid]
10604 for uuid in self.wanted],
10605 group_to_nodes, group_to_instances)
10608 class LUGroupQuery(NoHooksLU):
10609 """Logical unit for querying node groups.
10614 def CheckArguments(self):
10615 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10616 self.op.output_fields, False)
10618 def ExpandNames(self):
10619 self.gq.ExpandNames(self)
10621 def Exec(self, feedback_fn):
10622 return self.gq.OldStyleQuery(self)
10625 class LUGroupSetParams(LogicalUnit):
10626 """Modifies the parameters of a node group.
10629 HPATH = "group-modify"
10630 HTYPE = constants.HTYPE_GROUP
10633 def CheckArguments(self):
10636 self.op.alloc_policy,
10639 if all_changes.count(None) == len(all_changes):
10640 raise errors.OpPrereqError("Please pass at least one modification",
10641 errors.ECODE_INVAL)
10643 def ExpandNames(self):
10644 # This raises errors.OpPrereqError on its own:
10645 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10647 self.needed_locks = {
10648 locking.LEVEL_NODEGROUP: [self.group_uuid],
10651 def CheckPrereq(self):
10652 """Check prerequisites.
10655 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10657 if self.group is None:
10658 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10659 (self.op.group_name, self.group_uuid))
10661 if self.op.ndparams:
10662 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10663 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10664 self.new_ndparams = new_ndparams
10666 def BuildHooksEnv(self):
10667 """Build hooks env.
10671 "GROUP_NAME": self.op.group_name,
10672 "NEW_ALLOC_POLICY": self.op.alloc_policy,
10674 mn = self.cfg.GetMasterNode()
10675 return env, [mn], [mn]
10677 def Exec(self, feedback_fn):
10678 """Modifies the node group.
10683 if self.op.ndparams:
10684 self.group.ndparams = self.new_ndparams
10685 result.append(("ndparams", str(self.group.ndparams)))
10687 if self.op.alloc_policy:
10688 self.group.alloc_policy = self.op.alloc_policy
10690 self.cfg.Update(self.group, feedback_fn)
10695 class LUGroupRemove(LogicalUnit):
10696 HPATH = "group-remove"
10697 HTYPE = constants.HTYPE_GROUP
10700 def ExpandNames(self):
10701 # This will raises errors.OpPrereqError on its own:
10702 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10703 self.needed_locks = {
10704 locking.LEVEL_NODEGROUP: [self.group_uuid],
10707 def CheckPrereq(self):
10708 """Check prerequisites.
10710 This checks that the given group name exists as a node group, that is
10711 empty (i.e., contains no nodes), and that is not the last group of the
10715 # Verify that the group is empty.
10716 group_nodes = [node.name
10717 for node in self.cfg.GetAllNodesInfo().values()
10718 if node.group == self.group_uuid]
10721 raise errors.OpPrereqError("Group '%s' not empty, has the following"
10723 (self.op.group_name,
10724 utils.CommaJoin(utils.NiceSort(group_nodes))),
10725 errors.ECODE_STATE)
10727 # Verify the cluster would not be left group-less.
10728 if len(self.cfg.GetNodeGroupList()) == 1:
10729 raise errors.OpPrereqError("Group '%s' is the only group,"
10730 " cannot be removed" %
10731 self.op.group_name,
10732 errors.ECODE_STATE)
10734 def BuildHooksEnv(self):
10735 """Build hooks env.
10739 "GROUP_NAME": self.op.group_name,
10741 mn = self.cfg.GetMasterNode()
10742 return env, [mn], [mn]
10744 def Exec(self, feedback_fn):
10745 """Remove the node group.
10749 self.cfg.RemoveNodeGroup(self.group_uuid)
10750 except errors.ConfigurationError:
10751 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10752 (self.op.group_name, self.group_uuid))
10754 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10757 class LUGroupRename(LogicalUnit):
10758 HPATH = "group-rename"
10759 HTYPE = constants.HTYPE_GROUP
10762 def ExpandNames(self):
10763 # This raises errors.OpPrereqError on its own:
10764 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10766 self.needed_locks = {
10767 locking.LEVEL_NODEGROUP: [self.group_uuid],
10770 def CheckPrereq(self):
10771 """Check prerequisites.
10773 Ensures requested new name is not yet used.
10777 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10778 except errors.OpPrereqError:
10781 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10782 " node group (UUID: %s)" %
10783 (self.op.new_name, new_name_uuid),
10784 errors.ECODE_EXISTS)
10786 def BuildHooksEnv(self):
10787 """Build hooks env.
10791 "OLD_NAME": self.op.group_name,
10792 "NEW_NAME": self.op.new_name,
10795 mn = self.cfg.GetMasterNode()
10796 all_nodes = self.cfg.GetAllNodesInfo()
10798 all_nodes.pop(mn, None)
10800 for node in all_nodes.values():
10801 if node.group == self.group_uuid:
10802 run_nodes.append(node.name)
10804 return env, run_nodes, run_nodes
10806 def Exec(self, feedback_fn):
10807 """Rename the node group.
10810 group = self.cfg.GetNodeGroup(self.group_uuid)
10813 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10814 (self.op.group_name, self.group_uuid))
10816 group.name = self.op.new_name
10817 self.cfg.Update(group, feedback_fn)
10819 return self.op.new_name
10822 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10823 """Generic tags LU.
10825 This is an abstract class which is the parent of all the other tags LUs.
10829 def ExpandNames(self):
10830 self.needed_locks = {}
10831 if self.op.kind == constants.TAG_NODE:
10832 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10833 self.needed_locks[locking.LEVEL_NODE] = self.op.name
10834 elif self.op.kind == constants.TAG_INSTANCE:
10835 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10836 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10838 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10839 # not possible to acquire the BGL based on opcode parameters)
10841 def CheckPrereq(self):
10842 """Check prerequisites.
10845 if self.op.kind == constants.TAG_CLUSTER:
10846 self.target = self.cfg.GetClusterInfo()
10847 elif self.op.kind == constants.TAG_NODE:
10848 self.target = self.cfg.GetNodeInfo(self.op.name)
10849 elif self.op.kind == constants.TAG_INSTANCE:
10850 self.target = self.cfg.GetInstanceInfo(self.op.name)
10852 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10853 str(self.op.kind), errors.ECODE_INVAL)
10856 class LUTagsGet(TagsLU):
10857 """Returns the tags of a given object.
10862 def ExpandNames(self):
10863 TagsLU.ExpandNames(self)
10865 # Share locks as this is only a read operation
10866 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10868 def Exec(self, feedback_fn):
10869 """Returns the tag list.
10872 return list(self.target.GetTags())
10875 class LUTagsSearch(NoHooksLU):
10876 """Searches the tags for a given pattern.
10881 def ExpandNames(self):
10882 self.needed_locks = {}
10884 def CheckPrereq(self):
10885 """Check prerequisites.
10887 This checks the pattern passed for validity by compiling it.
10891 self.re = re.compile(self.op.pattern)
10892 except re.error, err:
10893 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10894 (self.op.pattern, err), errors.ECODE_INVAL)
10896 def Exec(self, feedback_fn):
10897 """Returns the tag list.
10901 tgts = [("/cluster", cfg.GetClusterInfo())]
10902 ilist = cfg.GetAllInstancesInfo().values()
10903 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10904 nlist = cfg.GetAllNodesInfo().values()
10905 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10907 for path, target in tgts:
10908 for tag in target.GetTags():
10909 if self.re.search(tag):
10910 results.append((path, tag))
10914 class LUTagsSet(TagsLU):
10915 """Sets a tag on a given object.
10920 def CheckPrereq(self):
10921 """Check prerequisites.
10923 This checks the type and length of the tag name and value.
10926 TagsLU.CheckPrereq(self)
10927 for tag in self.op.tags:
10928 objects.TaggableObject.ValidateTag(tag)
10930 def Exec(self, feedback_fn):
10935 for tag in self.op.tags:
10936 self.target.AddTag(tag)
10937 except errors.TagError, err:
10938 raise errors.OpExecError("Error while setting tag: %s" % str(err))
10939 self.cfg.Update(self.target, feedback_fn)
10942 class LUTagsDel(TagsLU):
10943 """Delete a list of tags from a given object.
10948 def CheckPrereq(self):
10949 """Check prerequisites.
10951 This checks that we have the given tag.
10954 TagsLU.CheckPrereq(self)
10955 for tag in self.op.tags:
10956 objects.TaggableObject.ValidateTag(tag)
10957 del_tags = frozenset(self.op.tags)
10958 cur_tags = self.target.GetTags()
10960 diff_tags = del_tags - cur_tags
10962 diff_names = ("'%s'" % i for i in sorted(diff_tags))
10963 raise errors.OpPrereqError("Tag(s) %s not found" %
10964 (utils.CommaJoin(diff_names), ),
10965 errors.ECODE_NOENT)
10967 def Exec(self, feedback_fn):
10968 """Remove the tag from the object.
10971 for tag in self.op.tags:
10972 self.target.RemoveTag(tag)
10973 self.cfg.Update(self.target, feedback_fn)
10976 class LUTestDelay(NoHooksLU):
10977 """Sleep for a specified amount of time.
10979 This LU sleeps on the master and/or nodes for a specified amount of
10985 def ExpandNames(self):
10986 """Expand names and set required locks.
10988 This expands the node list, if any.
10991 self.needed_locks = {}
10992 if self.op.on_nodes:
10993 # _GetWantedNodes can be used here, but is not always appropriate to use
10994 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10995 # more information.
10996 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10997 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10999 def _TestDelay(self):
11000 """Do the actual sleep.
11003 if self.op.on_master:
11004 if not utils.TestDelay(self.op.duration):
11005 raise errors.OpExecError("Error during master delay test")
11006 if self.op.on_nodes:
11007 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11008 for node, node_result in result.items():
11009 node_result.Raise("Failure during rpc call to node %s" % node)
11011 def Exec(self, feedback_fn):
11012 """Execute the test delay opcode, with the wanted repetitions.
11015 if self.op.repeat == 0:
11018 top_value = self.op.repeat - 1
11019 for i in range(self.op.repeat):
11020 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11024 class LUTestJqueue(NoHooksLU):
11025 """Utility LU to test some aspects of the job queue.
11030 # Must be lower than default timeout for WaitForJobChange to see whether it
11031 # notices changed jobs
11032 _CLIENT_CONNECT_TIMEOUT = 20.0
11033 _CLIENT_CONFIRM_TIMEOUT = 60.0
11036 def _NotifyUsingSocket(cls, cb, errcls):
11037 """Opens a Unix socket and waits for another program to connect.
11040 @param cb: Callback to send socket name to client
11041 @type errcls: class
11042 @param errcls: Exception class to use for errors
11045 # Using a temporary directory as there's no easy way to create temporary
11046 # sockets without writing a custom loop around tempfile.mktemp and
11048 tmpdir = tempfile.mkdtemp()
11050 tmpsock = utils.PathJoin(tmpdir, "sock")
11052 logging.debug("Creating temporary socket at %s", tmpsock)
11053 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11058 # Send details to client
11061 # Wait for client to connect before continuing
11062 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11064 (conn, _) = sock.accept()
11065 except socket.error, err:
11066 raise errcls("Client didn't connect in time (%s)" % err)
11070 # Remove as soon as client is connected
11071 shutil.rmtree(tmpdir)
11073 # Wait for client to close
11076 # pylint: disable-msg=E1101
11077 # Instance of '_socketobject' has no ... member
11078 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11080 except socket.error, err:
11081 raise errcls("Client failed to confirm notification (%s)" % err)
11085 def _SendNotification(self, test, arg, sockname):
11086 """Sends a notification to the client.
11089 @param test: Test name
11090 @param arg: Test argument (depends on test)
11091 @type sockname: string
11092 @param sockname: Socket path
11095 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11097 def _Notify(self, prereq, test, arg):
11098 """Notifies the client of a test.
11101 @param prereq: Whether this is a prereq-phase test
11103 @param test: Test name
11104 @param arg: Test argument (depends on test)
11108 errcls = errors.OpPrereqError
11110 errcls = errors.OpExecError
11112 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11116 def CheckArguments(self):
11117 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11118 self.expandnames_calls = 0
11120 def ExpandNames(self):
11121 checkargs_calls = getattr(self, "checkargs_calls", 0)
11122 if checkargs_calls < 1:
11123 raise errors.ProgrammerError("CheckArguments was not called")
11125 self.expandnames_calls += 1
11127 if self.op.notify_waitlock:
11128 self._Notify(True, constants.JQT_EXPANDNAMES, None)
11130 self.LogInfo("Expanding names")
11132 # Get lock on master node (just to get a lock, not for a particular reason)
11133 self.needed_locks = {
11134 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11137 def Exec(self, feedback_fn):
11138 if self.expandnames_calls < 1:
11139 raise errors.ProgrammerError("ExpandNames was not called")
11141 if self.op.notify_exec:
11142 self._Notify(False, constants.JQT_EXEC, None)
11144 self.LogInfo("Executing")
11146 if self.op.log_messages:
11147 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11148 for idx, msg in enumerate(self.op.log_messages):
11149 self.LogInfo("Sending log message %s", idx + 1)
11150 feedback_fn(constants.JQT_MSGPREFIX + msg)
11151 # Report how many test messages have been sent
11152 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11155 raise errors.OpExecError("Opcode failure was requested")
11160 class IAllocator(object):
11161 """IAllocator framework.
11163 An IAllocator instance has three sets of attributes:
11164 - cfg that is needed to query the cluster
11165 - input data (all members of the _KEYS class attribute are required)
11166 - four buffer attributes (in|out_data|text), that represent the
11167 input (to the external script) in text and data structure format,
11168 and the output from it, again in two formats
11169 - the result variables from the script (success, info, nodes) for
11173 # pylint: disable-msg=R0902
11174 # lots of instance attributes
11176 "name", "mem_size", "disks", "disk_template",
11177 "os", "tags", "nics", "vcpus", "hypervisor",
11180 "name", "relocate_from",
11186 def __init__(self, cfg, rpc, mode, **kwargs):
11189 # init buffer variables
11190 self.in_text = self.out_text = self.in_data = self.out_data = None
11191 # init all input fields so that pylint is happy
11193 self.mem_size = self.disks = self.disk_template = None
11194 self.os = self.tags = self.nics = self.vcpus = None
11195 self.hypervisor = None
11196 self.relocate_from = None
11198 self.evac_nodes = None
11200 self.required_nodes = None
11201 # init result fields
11202 self.success = self.info = self.result = None
11203 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11204 keyset = self._ALLO_KEYS
11205 fn = self._AddNewInstance
11206 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11207 keyset = self._RELO_KEYS
11208 fn = self._AddRelocateInstance
11209 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11210 keyset = self._EVAC_KEYS
11211 fn = self._AddEvacuateNodes
11213 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11214 " IAllocator" % self.mode)
11216 if key not in keyset:
11217 raise errors.ProgrammerError("Invalid input parameter '%s' to"
11218 " IAllocator" % key)
11219 setattr(self, key, kwargs[key])
11222 if key not in kwargs:
11223 raise errors.ProgrammerError("Missing input parameter '%s' to"
11224 " IAllocator" % key)
11225 self._BuildInputData(fn)
11227 def _ComputeClusterData(self):
11228 """Compute the generic allocator input data.
11230 This is the data that is independent of the actual operation.
11234 cluster_info = cfg.GetClusterInfo()
11237 "version": constants.IALLOCATOR_VERSION,
11238 "cluster_name": cfg.GetClusterName(),
11239 "cluster_tags": list(cluster_info.GetTags()),
11240 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11241 # we don't have job IDs
11243 ninfo = cfg.GetAllNodesInfo()
11244 iinfo = cfg.GetAllInstancesInfo().values()
11245 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11248 node_list = [n.name for n in ninfo.values() if n.vm_capable]
11250 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11251 hypervisor_name = self.hypervisor
11252 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11253 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11254 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11255 hypervisor_name = cluster_info.enabled_hypervisors[0]
11257 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11260 self.rpc.call_all_instances_info(node_list,
11261 cluster_info.enabled_hypervisors)
11263 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11265 config_ndata = self._ComputeBasicNodeData(ninfo)
11266 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11267 i_list, config_ndata)
11268 assert len(data["nodes"]) == len(ninfo), \
11269 "Incomplete node data computed"
11271 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11273 self.in_data = data
11276 def _ComputeNodeGroupData(cfg):
11277 """Compute node groups data.
11281 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11283 "name": gdata.name,
11284 "alloc_policy": gdata.alloc_policy,
11289 def _ComputeBasicNodeData(node_cfg):
11290 """Compute global node data.
11293 @returns: a dict of name: (node dict, node config)
11297 for ninfo in node_cfg.values():
11298 # fill in static (config-based) values
11300 "tags": list(ninfo.GetTags()),
11301 "primary_ip": ninfo.primary_ip,
11302 "secondary_ip": ninfo.secondary_ip,
11303 "offline": ninfo.offline,
11304 "drained": ninfo.drained,
11305 "master_candidate": ninfo.master_candidate,
11306 "group": ninfo.group,
11307 "master_capable": ninfo.master_capable,
11308 "vm_capable": ninfo.vm_capable,
11311 node_results[ninfo.name] = pnr
11313 return node_results
11316 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11318 """Compute global node data.
11320 @param node_results: the basic node structures as filled from the config
11323 # make a copy of the current dict
11324 node_results = dict(node_results)
11325 for nname, nresult in node_data.items():
11326 assert nname in node_results, "Missing basic data for node %s" % nname
11327 ninfo = node_cfg[nname]
11329 if not (ninfo.offline or ninfo.drained):
11330 nresult.Raise("Can't get data for node %s" % nname)
11331 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11333 remote_info = nresult.payload
11335 for attr in ['memory_total', 'memory_free', 'memory_dom0',
11336 'vg_size', 'vg_free', 'cpu_total']:
11337 if attr not in remote_info:
11338 raise errors.OpExecError("Node '%s' didn't return attribute"
11339 " '%s'" % (nname, attr))
11340 if not isinstance(remote_info[attr], int):
11341 raise errors.OpExecError("Node '%s' returned invalid value"
11343 (nname, attr, remote_info[attr]))
11344 # compute memory used by primary instances
11345 i_p_mem = i_p_up_mem = 0
11346 for iinfo, beinfo in i_list:
11347 if iinfo.primary_node == nname:
11348 i_p_mem += beinfo[constants.BE_MEMORY]
11349 if iinfo.name not in node_iinfo[nname].payload:
11352 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11353 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11354 remote_info['memory_free'] -= max(0, i_mem_diff)
11357 i_p_up_mem += beinfo[constants.BE_MEMORY]
11359 # compute memory used by instances
11361 "total_memory": remote_info['memory_total'],
11362 "reserved_memory": remote_info['memory_dom0'],
11363 "free_memory": remote_info['memory_free'],
11364 "total_disk": remote_info['vg_size'],
11365 "free_disk": remote_info['vg_free'],
11366 "total_cpus": remote_info['cpu_total'],
11367 "i_pri_memory": i_p_mem,
11368 "i_pri_up_memory": i_p_up_mem,
11370 pnr_dyn.update(node_results[nname])
11371 node_results[nname] = pnr_dyn
11373 return node_results
11376 def _ComputeInstanceData(cluster_info, i_list):
11377 """Compute global instance data.
11381 for iinfo, beinfo in i_list:
11383 for nic in iinfo.nics:
11384 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11385 nic_dict = {"mac": nic.mac,
11387 "mode": filled_params[constants.NIC_MODE],
11388 "link": filled_params[constants.NIC_LINK],
11390 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11391 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11392 nic_data.append(nic_dict)
11394 "tags": list(iinfo.GetTags()),
11395 "admin_up": iinfo.admin_up,
11396 "vcpus": beinfo[constants.BE_VCPUS],
11397 "memory": beinfo[constants.BE_MEMORY],
11399 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11401 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11402 "disk_template": iinfo.disk_template,
11403 "hypervisor": iinfo.hypervisor,
11405 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11407 instance_data[iinfo.name] = pir
11409 return instance_data
11411 def _AddNewInstance(self):
11412 """Add new instance data to allocator structure.
11414 This in combination with _AllocatorGetClusterData will create the
11415 correct structure needed as input for the allocator.
11417 The checks for the completeness of the opcode must have already been
11421 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11423 if self.disk_template in constants.DTS_INT_MIRROR:
11424 self.required_nodes = 2
11426 self.required_nodes = 1
11429 "disk_template": self.disk_template,
11432 "vcpus": self.vcpus,
11433 "memory": self.mem_size,
11434 "disks": self.disks,
11435 "disk_space_total": disk_space,
11437 "required_nodes": self.required_nodes,
11441 def _AddRelocateInstance(self):
11442 """Add relocate instance data to allocator structure.
11444 This in combination with _IAllocatorGetClusterData will create the
11445 correct structure needed as input for the allocator.
11447 The checks for the completeness of the opcode must have already been
11451 instance = self.cfg.GetInstanceInfo(self.name)
11452 if instance is None:
11453 raise errors.ProgrammerError("Unknown instance '%s' passed to"
11454 " IAllocator" % self.name)
11456 if instance.disk_template not in constants.DTS_MIRRORED:
11457 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11458 errors.ECODE_INVAL)
11460 if instance.disk_template in constants.DTS_INT_MIRROR and \
11461 len(instance.secondary_nodes) != 1:
11462 raise errors.OpPrereqError("Instance has not exactly one secondary node",
11463 errors.ECODE_STATE)
11465 self.required_nodes = 1
11466 disk_sizes = [{'size': disk.size} for disk in instance.disks]
11467 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11471 "disk_space_total": disk_space,
11472 "required_nodes": self.required_nodes,
11473 "relocate_from": self.relocate_from,
11477 def _AddEvacuateNodes(self):
11478 """Add evacuate nodes data to allocator structure.
11482 "evac_nodes": self.evac_nodes
11486 def _BuildInputData(self, fn):
11487 """Build input data structures.
11490 self._ComputeClusterData()
11493 request["type"] = self.mode
11494 self.in_data["request"] = request
11496 self.in_text = serializer.Dump(self.in_data)
11498 def Run(self, name, validate=True, call_fn=None):
11499 """Run an instance allocator and return the results.
11502 if call_fn is None:
11503 call_fn = self.rpc.call_iallocator_runner
11505 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11506 result.Raise("Failure while running the iallocator script")
11508 self.out_text = result.payload
11510 self._ValidateResult()
11512 def _ValidateResult(self):
11513 """Process the allocator results.
11515 This will process and if successful save the result in
11516 self.out_data and the other parameters.
11520 rdict = serializer.Load(self.out_text)
11521 except Exception, err:
11522 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11524 if not isinstance(rdict, dict):
11525 raise errors.OpExecError("Can't parse iallocator results: not a dict")
11527 # TODO: remove backwards compatiblity in later versions
11528 if "nodes" in rdict and "result" not in rdict:
11529 rdict["result"] = rdict["nodes"]
11532 for key in "success", "info", "result":
11533 if key not in rdict:
11534 raise errors.OpExecError("Can't parse iallocator results:"
11535 " missing key '%s'" % key)
11536 setattr(self, key, rdict[key])
11538 if not isinstance(rdict["result"], list):
11539 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11541 self.out_data = rdict
11544 class LUTestAllocator(NoHooksLU):
11545 """Run allocator tests.
11547 This LU runs the allocator tests
11550 def CheckPrereq(self):
11551 """Check prerequisites.
11553 This checks the opcode parameters depending on the director and mode test.
11556 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11557 for attr in ["mem_size", "disks", "disk_template",
11558 "os", "tags", "nics", "vcpus"]:
11559 if not hasattr(self.op, attr):
11560 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11561 attr, errors.ECODE_INVAL)
11562 iname = self.cfg.ExpandInstanceName(self.op.name)
11563 if iname is not None:
11564 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11565 iname, errors.ECODE_EXISTS)
11566 if not isinstance(self.op.nics, list):
11567 raise errors.OpPrereqError("Invalid parameter 'nics'",
11568 errors.ECODE_INVAL)
11569 if not isinstance(self.op.disks, list):
11570 raise errors.OpPrereqError("Invalid parameter 'disks'",
11571 errors.ECODE_INVAL)
11572 for row in self.op.disks:
11573 if (not isinstance(row, dict) or
11574 "size" not in row or
11575 not isinstance(row["size"], int) or
11576 "mode" not in row or
11577 row["mode"] not in ['r', 'w']):
11578 raise errors.OpPrereqError("Invalid contents of the 'disks'"
11579 " parameter", errors.ECODE_INVAL)
11580 if self.op.hypervisor is None:
11581 self.op.hypervisor = self.cfg.GetHypervisorType()
11582 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11583 fname = _ExpandInstanceName(self.cfg, self.op.name)
11584 self.op.name = fname
11585 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11586 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11587 if not hasattr(self.op, "evac_nodes"):
11588 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11589 " opcode input", errors.ECODE_INVAL)
11591 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11592 self.op.mode, errors.ECODE_INVAL)
11594 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11595 if self.op.allocator is None:
11596 raise errors.OpPrereqError("Missing allocator name",
11597 errors.ECODE_INVAL)
11598 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11599 raise errors.OpPrereqError("Wrong allocator test '%s'" %
11600 self.op.direction, errors.ECODE_INVAL)
11602 def Exec(self, feedback_fn):
11603 """Run the allocator test.
11606 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11607 ial = IAllocator(self.cfg, self.rpc,
11610 mem_size=self.op.mem_size,
11611 disks=self.op.disks,
11612 disk_template=self.op.disk_template,
11616 vcpus=self.op.vcpus,
11617 hypervisor=self.op.hypervisor,
11619 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11620 ial = IAllocator(self.cfg, self.rpc,
11623 relocate_from=list(self.relocate_from),
11625 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11626 ial = IAllocator(self.cfg, self.rpc,
11628 evac_nodes=self.op.evac_nodes)
11630 raise errors.ProgrammerError("Uncatched mode %s in"
11631 " LUTestAllocator.Exec", self.op.mode)
11633 if self.op.direction == constants.IALLOCATOR_DIR_IN:
11634 result = ial.in_text
11636 ial.Run(self.op.allocator, validate=False)
11637 result = ial.out_text
11641 #: Query type implementations
11643 constants.QR_INSTANCE: _InstanceQuery,
11644 constants.QR_NODE: _NodeQuery,
11645 constants.QR_GROUP: _GroupQuery,
11649 def _GetQueryImplementation(name):
11650 """Returns the implemtnation for a query type.
11652 @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11656 return _QUERY_IMPL[name]
11658 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11659 errors.ECODE_INVAL)