4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 import ganeti.masterd.instance # pylint: disable-msg=W0611
64 def _SupportsOob(cfg, node):
65 """Tells if node supports OOB.
67 @type cfg: L{config.ConfigWriter}
68 @param cfg: The cluster configuration
69 @type node: L{objects.Node}
71 @return: The OOB script if supported or an empty string otherwise
74 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
78 class LogicalUnit(object):
79 """Logical Unit base class.
81 Subclasses must follow these rules:
82 - implement ExpandNames
83 - implement CheckPrereq (except when tasklets are used)
84 - implement Exec (except when tasklets are used)
85 - implement BuildHooksEnv
86 - redefine HPATH and HTYPE
87 - optionally redefine their run requirements:
88 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
90 Note that all commands require root permissions.
92 @ivar dry_run_result: the value (if any) that will be returned to the caller
93 in dry-run mode (signalled by opcode dry_run parameter)
100 def __init__(self, processor, op, context, rpc):
101 """Constructor for LogicalUnit.
103 This needs to be overridden in derived classes in order to check op
107 self.proc = processor
109 self.cfg = context.cfg
110 self.context = context
112 # Dicts used to declare locking needs to mcpu
113 self.needed_locks = None
114 self.acquired_locks = {}
115 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
117 self.remove_locks = {}
118 # Used to force good behavior when calling helper functions
119 self.recalculate_locks = {}
122 self.Log = processor.Log # pylint: disable-msg=C0103
123 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
124 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
125 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
126 # support for dry-run
127 self.dry_run_result = None
128 # support for generic debug attribute
129 if (not hasattr(self.op, "debug_level") or
130 not isinstance(self.op.debug_level, int)):
131 self.op.debug_level = 0
136 # Validate opcode parameters and set defaults
137 self.op.Validate(True)
139 self.CheckArguments()
142 """Returns the SshRunner object
146 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
149 ssh = property(fget=__GetSSH)
151 def CheckArguments(self):
152 """Check syntactic validity for the opcode arguments.
154 This method is for doing a simple syntactic check and ensure
155 validity of opcode parameters, without any cluster-related
156 checks. While the same can be accomplished in ExpandNames and/or
157 CheckPrereq, doing these separate is better because:
159 - ExpandNames is left as as purely a lock-related function
160 - CheckPrereq is run after we have acquired locks (and possible
163 The function is allowed to change the self.op attribute so that
164 later methods can no longer worry about missing parameters.
169 def ExpandNames(self):
170 """Expand names for this LU.
172 This method is called before starting to execute the opcode, and it should
173 update all the parameters of the opcode to their canonical form (e.g. a
174 short node name must be fully expanded after this method has successfully
175 completed). This way locking, hooks, logging, etc. can work correctly.
177 LUs which implement this method must also populate the self.needed_locks
178 member, as a dict with lock levels as keys, and a list of needed lock names
181 - use an empty dict if you don't need any lock
182 - if you don't need any lock at a particular level omit that level
183 - don't put anything for the BGL level
184 - if you want all locks at a level use locking.ALL_SET as a value
186 If you need to share locks (rather than acquire them exclusively) at one
187 level you can modify self.share_locks, setting a true value (usually 1) for
188 that level. By default locks are not shared.
190 This function can also define a list of tasklets, which then will be
191 executed in order instead of the usual LU-level CheckPrereq and Exec
192 functions, if those are not defined by the LU.
196 # Acquire all nodes and one instance
197 self.needed_locks = {
198 locking.LEVEL_NODE: locking.ALL_SET,
199 locking.LEVEL_INSTANCE: ['instance1.example.com'],
201 # Acquire just two nodes
202 self.needed_locks = {
203 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206 self.needed_locks = {} # No, you can't leave it to the default value None
209 # The implementation of this method is mandatory only if the new LU is
210 # concurrent, so that old LUs don't need to be changed all at the same
213 self.needed_locks = {} # Exclusive LUs don't need locks.
215 raise NotImplementedError
217 def DeclareLocks(self, level):
218 """Declare LU locking needs for a level
220 While most LUs can just declare their locking needs at ExpandNames time,
221 sometimes there's the need to calculate some locks after having acquired
222 the ones before. This function is called just before acquiring locks at a
223 particular level, but after acquiring the ones at lower levels, and permits
224 such calculations. It can be used to modify self.needed_locks, and by
225 default it does nothing.
227 This function is only called if you have something already set in
228 self.needed_locks for the level.
230 @param level: Locking level which is going to be locked
231 @type level: member of ganeti.locking.LEVELS
235 def CheckPrereq(self):
236 """Check prerequisites for this LU.
238 This method should check that the prerequisites for the execution
239 of this LU are fulfilled. It can do internode communication, but
240 it should be idempotent - no cluster or system changes are
243 The method should raise errors.OpPrereqError in case something is
244 not fulfilled. Its return value is ignored.
246 This method should also update all the parameters of the opcode to
247 their canonical form if it hasn't been done by ExpandNames before.
250 if self.tasklets is not None:
251 for (idx, tl) in enumerate(self.tasklets):
252 logging.debug("Checking prerequisites for tasklet %s/%s",
253 idx + 1, len(self.tasklets))
258 def Exec(self, feedback_fn):
261 This method should implement the actual work. It should raise
262 errors.OpExecError for failures that are somewhat dealt with in
266 if self.tasklets is not None:
267 for (idx, tl) in enumerate(self.tasklets):
268 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271 raise NotImplementedError
273 def BuildHooksEnv(self):
274 """Build hooks environment for this LU.
276 This method should return a three-node tuple consisting of: a dict
277 containing the environment that will be used for running the
278 specific hook for this LU, a list of node names on which the hook
279 should run before the execution, and a list of node names on which
280 the hook should run after the execution.
282 The keys of the dict must not have 'GANETI_' prefixed as this will
283 be handled in the hooks runner. Also note additional keys will be
284 added by the hooks runner. If the LU doesn't define any
285 environment, an empty dict (and not None) should be returned.
287 No nodes should be returned as an empty list (and not None).
289 Note that if the HPATH for a LU class is None, this function will
293 raise NotImplementedError
295 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
296 """Notify the LU about the results of its hooks.
298 This method is called every time a hooks phase is executed, and notifies
299 the Logical Unit about the hooks' result. The LU can then use it to alter
300 its result based on the hooks. By default the method does nothing and the
301 previous result is passed back unchanged but any LU can define it if it
302 wants to use the local cluster hook-scripts somehow.
304 @param phase: one of L{constants.HOOKS_PHASE_POST} or
305 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
306 @param hook_results: the results of the multi-node hooks rpc call
307 @param feedback_fn: function used send feedback back to the caller
308 @param lu_result: the previous Exec result this LU had, or None
310 @return: the new Exec result, based on the previous result
314 # API must be kept, thus we ignore the unused argument and could
315 # be a function warnings
316 # pylint: disable-msg=W0613,R0201
319 def _ExpandAndLockInstance(self):
320 """Helper function to expand and lock an instance.
322 Many LUs that work on an instance take its name in self.op.instance_name
323 and need to expand it and then declare the expanded name for locking. This
324 function does it, and then updates self.op.instance_name to the expanded
325 name. It also initializes needed_locks as a dict, if this hasn't been done
329 if self.needed_locks is None:
330 self.needed_locks = {}
332 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
333 "_ExpandAndLockInstance called with instance-level locks set"
334 self.op.instance_name = _ExpandInstanceName(self.cfg,
335 self.op.instance_name)
336 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
338 def _LockInstancesNodes(self, primary_only=False):
339 """Helper function to declare instances' nodes for locking.
341 This function should be called after locking one or more instances to lock
342 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
343 with all primary or secondary nodes for instances already locked and
344 present in self.needed_locks[locking.LEVEL_INSTANCE].
346 It should be called from DeclareLocks, and for safety only works if
347 self.recalculate_locks[locking.LEVEL_NODE] is set.
349 In the future it may grow parameters to just lock some instance's nodes, or
350 to just lock primaries or secondary nodes, if needed.
352 If should be called in DeclareLocks in a way similar to::
354 if level == locking.LEVEL_NODE:
355 self._LockInstancesNodes()
357 @type primary_only: boolean
358 @param primary_only: only lock primary nodes of locked instances
361 assert locking.LEVEL_NODE in self.recalculate_locks, \
362 "_LockInstancesNodes helper function called with no nodes to recalculate"
364 # TODO: check if we're really been called with the instance locks held
366 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
367 # future we might want to have different behaviors depending on the value
368 # of self.recalculate_locks[locking.LEVEL_NODE]
370 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
371 instance = self.context.cfg.GetInstanceInfo(instance_name)
372 wanted_nodes.append(instance.primary_node)
374 wanted_nodes.extend(instance.secondary_nodes)
376 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
377 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
378 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
379 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
381 del self.recalculate_locks[locking.LEVEL_NODE]
384 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
385 """Simple LU which runs no hooks.
387 This LU is intended as a parent for other LogicalUnits which will
388 run no hooks, in order to reduce duplicate code.
394 def BuildHooksEnv(self):
395 """Empty BuildHooksEnv for NoHooksLu.
397 This just raises an error.
400 assert False, "BuildHooksEnv called for NoHooksLUs"
404 """Tasklet base class.
406 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
407 they can mix legacy code with tasklets. Locking needs to be done in the LU,
408 tasklets know nothing about locks.
410 Subclasses must follow these rules:
411 - Implement CheckPrereq
415 def __init__(self, lu):
422 def CheckPrereq(self):
423 """Check prerequisites for this tasklets.
425 This method should check whether the prerequisites for the execution of
426 this tasklet are fulfilled. It can do internode communication, but it
427 should be idempotent - no cluster or system changes are allowed.
429 The method should raise errors.OpPrereqError in case something is not
430 fulfilled. Its return value is ignored.
432 This method should also update all parameters to their canonical form if it
433 hasn't been done before.
438 def Exec(self, feedback_fn):
439 """Execute the tasklet.
441 This method should implement the actual work. It should raise
442 errors.OpExecError for failures that are somewhat dealt with in code, or
446 raise NotImplementedError
450 """Base for query utility classes.
453 #: Attribute holding field definitions
456 def __init__(self, filter_, fields, use_locking):
457 """Initializes this class.
460 self.use_locking = use_locking
462 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
464 self.requested_data = self.query.RequestedData()
465 self.names = self.query.RequestedNames()
467 # Sort only if no names were requested
468 self.sort_by_name = not self.names
470 self.do_locking = None
473 def _GetNames(self, lu, all_names, lock_level):
474 """Helper function to determine names asked for in the query.
478 names = lu.acquired_locks[lock_level]
482 if self.wanted == locking.ALL_SET:
483 assert not self.names
484 # caller didn't specify names, so ordering is not important
485 return utils.NiceSort(names)
487 # caller specified names and we must keep the same order
489 assert not self.do_locking or lu.acquired_locks[lock_level]
491 missing = set(self.wanted).difference(names)
493 raise errors.OpExecError("Some items were removed before retrieving"
494 " their data: %s" % missing)
496 # Return expanded names
499 def ExpandNames(self, lu):
500 """Expand names for this query.
502 See L{LogicalUnit.ExpandNames}.
505 raise NotImplementedError()
507 def DeclareLocks(self, lu, level):
508 """Declare locks for this query.
510 See L{LogicalUnit.DeclareLocks}.
513 raise NotImplementedError()
515 def _GetQueryData(self, lu):
516 """Collects all data for this query.
518 @return: Query data object
521 raise NotImplementedError()
523 def NewStyleQuery(self, lu):
524 """Collect data and execute query.
527 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
528 sort_by_name=self.sort_by_name)
530 def OldStyleQuery(self, lu):
531 """Collect data and execute query.
534 return self.query.OldStyleQuery(self._GetQueryData(lu),
535 sort_by_name=self.sort_by_name)
538 def _GetWantedNodes(lu, nodes):
539 """Returns list of checked and expanded node names.
541 @type lu: L{LogicalUnit}
542 @param lu: the logical unit on whose behalf we execute
544 @param nodes: list of node names or None for all nodes
546 @return: the list of nodes, sorted
547 @raise errors.ProgrammerError: if the nodes parameter is wrong type
551 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
553 return utils.NiceSort(lu.cfg.GetNodeList())
556 def _GetWantedInstances(lu, instances):
557 """Returns list of checked and expanded instance names.
559 @type lu: L{LogicalUnit}
560 @param lu: the logical unit on whose behalf we execute
561 @type instances: list
562 @param instances: list of instance names or None for all instances
564 @return: the list of instances, sorted
565 @raise errors.OpPrereqError: if the instances parameter is wrong type
566 @raise errors.OpPrereqError: if any of the passed instances is not found
570 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
572 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
576 def _GetUpdatedParams(old_params, update_dict,
577 use_default=True, use_none=False):
578 """Return the new version of a parameter dictionary.
580 @type old_params: dict
581 @param old_params: old parameters
582 @type update_dict: dict
583 @param update_dict: dict containing new parameter values, or
584 constants.VALUE_DEFAULT to reset the parameter to its default
586 @param use_default: boolean
587 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
588 values as 'to be deleted' values
589 @param use_none: boolean
590 @type use_none: whether to recognise C{None} values as 'to be
593 @return: the new parameter dictionary
596 params_copy = copy.deepcopy(old_params)
597 for key, val in update_dict.iteritems():
598 if ((use_default and val == constants.VALUE_DEFAULT) or
599 (use_none and val is None)):
605 params_copy[key] = val
609 def _RunPostHook(lu, node_name):
610 """Runs the post-hook for an opcode on a single node.
613 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
615 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
617 # pylint: disable-msg=W0702
618 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
621 def _CheckOutputFields(static, dynamic, selected):
622 """Checks whether all selected fields are valid.
624 @type static: L{utils.FieldSet}
625 @param static: static fields set
626 @type dynamic: L{utils.FieldSet}
627 @param dynamic: dynamic fields set
634 delta = f.NonMatching(selected)
636 raise errors.OpPrereqError("Unknown output fields selected: %s"
637 % ",".join(delta), errors.ECODE_INVAL)
640 def _CheckGlobalHvParams(params):
641 """Validates that given hypervisor params are not global ones.
643 This will ensure that instances don't get customised versions of
647 used_globals = constants.HVC_GLOBALS.intersection(params)
649 msg = ("The following hypervisor parameters are global and cannot"
650 " be customized at instance level, please modify them at"
651 " cluster level: %s" % utils.CommaJoin(used_globals))
652 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
655 def _CheckNodeOnline(lu, node, msg=None):
656 """Ensure that a given node is online.
658 @param lu: the LU on behalf of which we make the check
659 @param node: the node to check
660 @param msg: if passed, should be a message to replace the default one
661 @raise errors.OpPrereqError: if the node is offline
665 msg = "Can't use offline node"
666 if lu.cfg.GetNodeInfo(node).offline:
667 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
670 def _CheckNodeNotDrained(lu, node):
671 """Ensure that a given node is not drained.
673 @param lu: the LU on behalf of which we make the check
674 @param node: the node to check
675 @raise errors.OpPrereqError: if the node is drained
678 if lu.cfg.GetNodeInfo(node).drained:
679 raise errors.OpPrereqError("Can't use drained node %s" % node,
683 def _CheckNodeVmCapable(lu, node):
684 """Ensure that a given node is vm capable.
686 @param lu: the LU on behalf of which we make the check
687 @param node: the node to check
688 @raise errors.OpPrereqError: if the node is not vm capable
691 if not lu.cfg.GetNodeInfo(node).vm_capable:
692 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
696 def _CheckNodeHasOS(lu, node, os_name, force_variant):
697 """Ensure that a node supports a given OS.
699 @param lu: the LU on behalf of which we make the check
700 @param node: the node to check
701 @param os_name: the OS to query about
702 @param force_variant: whether to ignore variant errors
703 @raise errors.OpPrereqError: if the node is not supporting the OS
706 result = lu.rpc.call_os_get(node, os_name)
707 result.Raise("OS '%s' not in supported OS list for node %s" %
709 prereq=True, ecode=errors.ECODE_INVAL)
710 if not force_variant:
711 _CheckOSVariant(result.payload, os_name)
714 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
715 """Ensure that a node has the given secondary ip.
717 @type lu: L{LogicalUnit}
718 @param lu: the LU on behalf of which we make the check
720 @param node: the node to check
721 @type secondary_ip: string
722 @param secondary_ip: the ip to check
723 @type prereq: boolean
724 @param prereq: whether to throw a prerequisite or an execute error
725 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
726 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
729 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
730 result.Raise("Failure checking secondary ip on node %s" % node,
731 prereq=prereq, ecode=errors.ECODE_ENVIRON)
732 if not result.payload:
733 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
734 " please fix and re-run this command" % secondary_ip)
736 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
738 raise errors.OpExecError(msg)
741 def _GetClusterDomainSecret():
742 """Reads the cluster domain secret.
745 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
749 def _CheckInstanceDown(lu, instance, reason):
750 """Ensure that an instance is not running."""
751 if instance.admin_up:
752 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
753 (instance.name, reason), errors.ECODE_STATE)
755 pnode = instance.primary_node
756 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
757 ins_l.Raise("Can't contact node %s for instance information" % pnode,
758 prereq=True, ecode=errors.ECODE_ENVIRON)
760 if instance.name in ins_l.payload:
761 raise errors.OpPrereqError("Instance %s is running, %s" %
762 (instance.name, reason), errors.ECODE_STATE)
765 def _ExpandItemName(fn, name, kind):
766 """Expand an item name.
768 @param fn: the function to use for expansion
769 @param name: requested item name
770 @param kind: text description ('Node' or 'Instance')
771 @return: the resolved (full) name
772 @raise errors.OpPrereqError: if the item is not found
776 if full_name is None:
777 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
782 def _ExpandNodeName(cfg, name):
783 """Wrapper over L{_ExpandItemName} for nodes."""
784 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
787 def _ExpandInstanceName(cfg, name):
788 """Wrapper over L{_ExpandItemName} for instance."""
789 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
792 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
793 memory, vcpus, nics, disk_template, disks,
794 bep, hvp, hypervisor_name):
795 """Builds instance related env variables for hooks
797 This builds the hook environment from individual variables.
800 @param name: the name of the instance
801 @type primary_node: string
802 @param primary_node: the name of the instance's primary node
803 @type secondary_nodes: list
804 @param secondary_nodes: list of secondary nodes as strings
805 @type os_type: string
806 @param os_type: the name of the instance's OS
807 @type status: boolean
808 @param status: the should_run status of the instance
810 @param memory: the memory size of the instance
812 @param vcpus: the count of VCPUs the instance has
814 @param nics: list of tuples (ip, mac, mode, link) representing
815 the NICs the instance has
816 @type disk_template: string
817 @param disk_template: the disk template of the instance
819 @param disks: the list of (size, mode) pairs
821 @param bep: the backend parameters for the instance
823 @param hvp: the hypervisor parameters for the instance
824 @type hypervisor_name: string
825 @param hypervisor_name: the hypervisor for the instance
827 @return: the hook environment for this instance
836 "INSTANCE_NAME": name,
837 "INSTANCE_PRIMARY": primary_node,
838 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
839 "INSTANCE_OS_TYPE": os_type,
840 "INSTANCE_STATUS": str_status,
841 "INSTANCE_MEMORY": memory,
842 "INSTANCE_VCPUS": vcpus,
843 "INSTANCE_DISK_TEMPLATE": disk_template,
844 "INSTANCE_HYPERVISOR": hypervisor_name,
848 nic_count = len(nics)
849 for idx, (ip, mac, mode, link) in enumerate(nics):
852 env["INSTANCE_NIC%d_IP" % idx] = ip
853 env["INSTANCE_NIC%d_MAC" % idx] = mac
854 env["INSTANCE_NIC%d_MODE" % idx] = mode
855 env["INSTANCE_NIC%d_LINK" % idx] = link
856 if mode == constants.NIC_MODE_BRIDGED:
857 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
861 env["INSTANCE_NIC_COUNT"] = nic_count
864 disk_count = len(disks)
865 for idx, (size, mode) in enumerate(disks):
866 env["INSTANCE_DISK%d_SIZE" % idx] = size
867 env["INSTANCE_DISK%d_MODE" % idx] = mode
871 env["INSTANCE_DISK_COUNT"] = disk_count
873 for source, kind in [(bep, "BE"), (hvp, "HV")]:
874 for key, value in source.items():
875 env["INSTANCE_%s_%s" % (kind, key)] = value
880 def _NICListToTuple(lu, nics):
881 """Build a list of nic information tuples.
883 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
884 value in LUInstanceQueryData.
886 @type lu: L{LogicalUnit}
887 @param lu: the logical unit on whose behalf we execute
888 @type nics: list of L{objects.NIC}
889 @param nics: list of nics to convert to hooks tuples
893 cluster = lu.cfg.GetClusterInfo()
897 filled_params = cluster.SimpleFillNIC(nic.nicparams)
898 mode = filled_params[constants.NIC_MODE]
899 link = filled_params[constants.NIC_LINK]
900 hooks_nics.append((ip, mac, mode, link))
904 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
905 """Builds instance related env variables for hooks from an object.
907 @type lu: L{LogicalUnit}
908 @param lu: the logical unit on whose behalf we execute
909 @type instance: L{objects.Instance}
910 @param instance: the instance for which we should build the
913 @param override: dictionary with key/values that will override
916 @return: the hook environment dictionary
919 cluster = lu.cfg.GetClusterInfo()
920 bep = cluster.FillBE(instance)
921 hvp = cluster.FillHV(instance)
923 'name': instance.name,
924 'primary_node': instance.primary_node,
925 'secondary_nodes': instance.secondary_nodes,
926 'os_type': instance.os,
927 'status': instance.admin_up,
928 'memory': bep[constants.BE_MEMORY],
929 'vcpus': bep[constants.BE_VCPUS],
930 'nics': _NICListToTuple(lu, instance.nics),
931 'disk_template': instance.disk_template,
932 'disks': [(disk.size, disk.mode) for disk in instance.disks],
935 'hypervisor_name': instance.hypervisor,
938 args.update(override)
939 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
942 def _AdjustCandidatePool(lu, exceptions):
943 """Adjust the candidate pool after node operations.
946 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
948 lu.LogInfo("Promoted nodes to master candidate role: %s",
949 utils.CommaJoin(node.name for node in mod_list))
950 for name in mod_list:
951 lu.context.ReaddNode(name)
952 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
954 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
958 def _DecideSelfPromotion(lu, exceptions=None):
959 """Decide whether I should promote myself as a master candidate.
962 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
963 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
964 # the new node will increase mc_max with one, so:
965 mc_should = min(mc_should + 1, cp_size)
966 return mc_now < mc_should
969 def _CheckNicsBridgesExist(lu, target_nics, target_node):
970 """Check that the brigdes needed by a list of nics exist.
973 cluster = lu.cfg.GetClusterInfo()
974 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
975 brlist = [params[constants.NIC_LINK] for params in paramslist
976 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
978 result = lu.rpc.call_bridges_exist(target_node, brlist)
979 result.Raise("Error checking bridges on destination node '%s'" %
980 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
983 def _CheckInstanceBridgesExist(lu, instance, node=None):
984 """Check that the brigdes needed by an instance exist.
988 node = instance.primary_node
989 _CheckNicsBridgesExist(lu, instance.nics, node)
992 def _CheckOSVariant(os_obj, name):
993 """Check whether an OS name conforms to the os variants specification.
995 @type os_obj: L{objects.OS}
996 @param os_obj: OS object to check
998 @param name: OS name passed by the user, to check for validity
1001 if not os_obj.supported_variants:
1003 variant = objects.OS.GetVariant(name)
1005 raise errors.OpPrereqError("OS name must include a variant",
1008 if variant not in os_obj.supported_variants:
1009 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1012 def _GetNodeInstancesInner(cfg, fn):
1013 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1016 def _GetNodeInstances(cfg, node_name):
1017 """Returns a list of all primary and secondary instances on a node.
1021 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1024 def _GetNodePrimaryInstances(cfg, node_name):
1025 """Returns primary instances on a node.
1028 return _GetNodeInstancesInner(cfg,
1029 lambda inst: node_name == inst.primary_node)
1032 def _GetNodeSecondaryInstances(cfg, node_name):
1033 """Returns secondary instances on a node.
1036 return _GetNodeInstancesInner(cfg,
1037 lambda inst: node_name in inst.secondary_nodes)
1040 def _GetStorageTypeArgs(cfg, storage_type):
1041 """Returns the arguments for a storage type.
1044 # Special case for file storage
1045 if storage_type == constants.ST_FILE:
1046 # storage.FileStorage wants a list of storage directories
1047 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1052 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1055 for dev in instance.disks:
1056 cfg.SetDiskID(dev, node_name)
1058 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1059 result.Raise("Failed to get disk status from node %s" % node_name,
1060 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1062 for idx, bdev_status in enumerate(result.payload):
1063 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1069 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1070 """Check the sanity of iallocator and node arguments and use the
1071 cluster-wide iallocator if appropriate.
1073 Check that at most one of (iallocator, node) is specified. If none is
1074 specified, then the LU's opcode's iallocator slot is filled with the
1075 cluster-wide default iallocator.
1077 @type iallocator_slot: string
1078 @param iallocator_slot: the name of the opcode iallocator slot
1079 @type node_slot: string
1080 @param node_slot: the name of the opcode target node slot
1083 node = getattr(lu.op, node_slot, None)
1084 iallocator = getattr(lu.op, iallocator_slot, None)
1086 if node is not None and iallocator is not None:
1087 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1089 elif node is None and iallocator is None:
1090 default_iallocator = lu.cfg.GetDefaultIAllocator()
1091 if default_iallocator:
1092 setattr(lu.op, iallocator_slot, default_iallocator)
1094 raise errors.OpPrereqError("No iallocator or node given and no"
1095 " cluster-wide default iallocator found."
1096 " Please specify either an iallocator or a"
1097 " node, or set a cluster-wide default"
1101 class LUClusterPostInit(LogicalUnit):
1102 """Logical unit for running hooks after cluster initialization.
1105 HPATH = "cluster-init"
1106 HTYPE = constants.HTYPE_CLUSTER
1108 def BuildHooksEnv(self):
1112 env = {"OP_TARGET": self.cfg.GetClusterName()}
1113 mn = self.cfg.GetMasterNode()
1114 return env, [], [mn]
1116 def Exec(self, feedback_fn):
1123 class LUClusterDestroy(LogicalUnit):
1124 """Logical unit for destroying the cluster.
1127 HPATH = "cluster-destroy"
1128 HTYPE = constants.HTYPE_CLUSTER
1130 def BuildHooksEnv(self):
1134 env = {"OP_TARGET": self.cfg.GetClusterName()}
1137 def CheckPrereq(self):
1138 """Check prerequisites.
1140 This checks whether the cluster is empty.
1142 Any errors are signaled by raising errors.OpPrereqError.
1145 master = self.cfg.GetMasterNode()
1147 nodelist = self.cfg.GetNodeList()
1148 if len(nodelist) != 1 or nodelist[0] != master:
1149 raise errors.OpPrereqError("There are still %d node(s) in"
1150 " this cluster." % (len(nodelist) - 1),
1152 instancelist = self.cfg.GetInstanceList()
1154 raise errors.OpPrereqError("There are still %d instance(s) in"
1155 " this cluster." % len(instancelist),
1158 def Exec(self, feedback_fn):
1159 """Destroys the cluster.
1162 master = self.cfg.GetMasterNode()
1164 # Run post hooks on master node before it's removed
1165 _RunPostHook(self, master)
1167 result = self.rpc.call_node_stop_master(master, False)
1168 result.Raise("Could not disable the master role")
1173 def _VerifyCertificate(filename):
1174 """Verifies a certificate for LUClusterVerify.
1176 @type filename: string
1177 @param filename: Path to PEM file
1181 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1182 utils.ReadFile(filename))
1183 except Exception, err: # pylint: disable-msg=W0703
1184 return (LUClusterVerify.ETYPE_ERROR,
1185 "Failed to load X509 certificate %s: %s" % (filename, err))
1188 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1189 constants.SSL_CERT_EXPIRATION_ERROR)
1192 fnamemsg = "While verifying %s: %s" % (filename, msg)
1197 return (None, fnamemsg)
1198 elif errcode == utils.CERT_WARNING:
1199 return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1200 elif errcode == utils.CERT_ERROR:
1201 return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1203 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1206 class LUClusterVerify(LogicalUnit):
1207 """Verifies the cluster status.
1210 HPATH = "cluster-verify"
1211 HTYPE = constants.HTYPE_CLUSTER
1214 TCLUSTER = "cluster"
1216 TINSTANCE = "instance"
1218 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1219 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1220 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1221 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1222 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1223 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1224 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1225 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1226 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1227 ENODEDRBD = (TNODE, "ENODEDRBD")
1228 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1229 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1230 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1231 ENODEHV = (TNODE, "ENODEHV")
1232 ENODELVM = (TNODE, "ENODELVM")
1233 ENODEN1 = (TNODE, "ENODEN1")
1234 ENODENET = (TNODE, "ENODENET")
1235 ENODEOS = (TNODE, "ENODEOS")
1236 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1237 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1238 ENODERPC = (TNODE, "ENODERPC")
1239 ENODESSH = (TNODE, "ENODESSH")
1240 ENODEVERSION = (TNODE, "ENODEVERSION")
1241 ENODESETUP = (TNODE, "ENODESETUP")
1242 ENODETIME = (TNODE, "ENODETIME")
1243 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1245 ETYPE_FIELD = "code"
1246 ETYPE_ERROR = "ERROR"
1247 ETYPE_WARNING = "WARNING"
1249 _HOOKS_INDENT_RE = re.compile("^", re.M)
1251 class NodeImage(object):
1252 """A class representing the logical and physical status of a node.
1255 @ivar name: the node name to which this object refers
1256 @ivar volumes: a structure as returned from
1257 L{ganeti.backend.GetVolumeList} (runtime)
1258 @ivar instances: a list of running instances (runtime)
1259 @ivar pinst: list of configured primary instances (config)
1260 @ivar sinst: list of configured secondary instances (config)
1261 @ivar sbp: dictionary of {primary-node: list of instances} for all
1262 instances for which this node is secondary (config)
1263 @ivar mfree: free memory, as reported by hypervisor (runtime)
1264 @ivar dfree: free disk, as reported by the node (runtime)
1265 @ivar offline: the offline status (config)
1266 @type rpc_fail: boolean
1267 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1268 not whether the individual keys were correct) (runtime)
1269 @type lvm_fail: boolean
1270 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1271 @type hyp_fail: boolean
1272 @ivar hyp_fail: whether the RPC call didn't return the instance list
1273 @type ghost: boolean
1274 @ivar ghost: whether this is a known node or not (config)
1275 @type os_fail: boolean
1276 @ivar os_fail: whether the RPC call didn't return valid OS data
1278 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1279 @type vm_capable: boolean
1280 @ivar vm_capable: whether the node can host instances
1283 def __init__(self, offline=False, name=None, vm_capable=True):
1292 self.offline = offline
1293 self.vm_capable = vm_capable
1294 self.rpc_fail = False
1295 self.lvm_fail = False
1296 self.hyp_fail = False
1298 self.os_fail = False
1301 def ExpandNames(self):
1302 self.needed_locks = {
1303 locking.LEVEL_NODE: locking.ALL_SET,
1304 locking.LEVEL_INSTANCE: locking.ALL_SET,
1306 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1308 def _Error(self, ecode, item, msg, *args, **kwargs):
1309 """Format an error message.
1311 Based on the opcode's error_codes parameter, either format a
1312 parseable error code, or a simpler error string.
1314 This must be called only from Exec and functions called from Exec.
1317 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1319 # first complete the msg
1322 # then format the whole message
1323 if self.op.error_codes:
1324 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1330 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1331 # and finally report it via the feedback_fn
1332 self._feedback_fn(" - %s" % msg)
1334 def _ErrorIf(self, cond, *args, **kwargs):
1335 """Log an error message if the passed condition is True.
1338 cond = bool(cond) or self.op.debug_simulate_errors
1340 self._Error(*args, **kwargs)
1341 # do not mark the operation as failed for WARN cases only
1342 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1343 self.bad = self.bad or cond
1345 def _VerifyNode(self, ninfo, nresult):
1346 """Perform some basic validation on data returned from a node.
1348 - check the result data structure is well formed and has all the
1350 - check ganeti version
1352 @type ninfo: L{objects.Node}
1353 @param ninfo: the node to check
1354 @param nresult: the results from the node
1356 @return: whether overall this call was successful (and we can expect
1357 reasonable values in the respose)
1361 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1363 # main result, nresult should be a non-empty dict
1364 test = not nresult or not isinstance(nresult, dict)
1365 _ErrorIf(test, self.ENODERPC, node,
1366 "unable to verify node: no data returned")
1370 # compares ganeti version
1371 local_version = constants.PROTOCOL_VERSION
1372 remote_version = nresult.get("version", None)
1373 test = not (remote_version and
1374 isinstance(remote_version, (list, tuple)) and
1375 len(remote_version) == 2)
1376 _ErrorIf(test, self.ENODERPC, node,
1377 "connection to node returned invalid data")
1381 test = local_version != remote_version[0]
1382 _ErrorIf(test, self.ENODEVERSION, node,
1383 "incompatible protocol versions: master %s,"
1384 " node %s", local_version, remote_version[0])
1388 # node seems compatible, we can actually try to look into its results
1390 # full package version
1391 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1392 self.ENODEVERSION, node,
1393 "software version mismatch: master %s, node %s",
1394 constants.RELEASE_VERSION, remote_version[1],
1395 code=self.ETYPE_WARNING)
1397 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1398 if ninfo.vm_capable and isinstance(hyp_result, dict):
1399 for hv_name, hv_result in hyp_result.iteritems():
1400 test = hv_result is not None
1401 _ErrorIf(test, self.ENODEHV, node,
1402 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1404 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1405 if ninfo.vm_capable and isinstance(hvp_result, list):
1406 for item, hv_name, hv_result in hvp_result:
1407 _ErrorIf(True, self.ENODEHV, node,
1408 "hypervisor %s parameter verify failure (source %s): %s",
1409 hv_name, item, hv_result)
1411 test = nresult.get(constants.NV_NODESETUP,
1412 ["Missing NODESETUP results"])
1413 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1418 def _VerifyNodeTime(self, ninfo, nresult,
1419 nvinfo_starttime, nvinfo_endtime):
1420 """Check the node time.
1422 @type ninfo: L{objects.Node}
1423 @param ninfo: the node to check
1424 @param nresult: the remote results for the node
1425 @param nvinfo_starttime: the start time of the RPC call
1426 @param nvinfo_endtime: the end time of the RPC call
1430 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1432 ntime = nresult.get(constants.NV_TIME, None)
1434 ntime_merged = utils.MergeTime(ntime)
1435 except (ValueError, TypeError):
1436 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1439 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1440 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1441 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1442 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1446 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1447 "Node time diverges by at least %s from master node time",
1450 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1451 """Check the node time.
1453 @type ninfo: L{objects.Node}
1454 @param ninfo: the node to check
1455 @param nresult: the remote results for the node
1456 @param vg_name: the configured VG name
1463 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1465 # checks vg existence and size > 20G
1466 vglist = nresult.get(constants.NV_VGLIST, None)
1468 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1470 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1471 constants.MIN_VG_SIZE)
1472 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1475 pvlist = nresult.get(constants.NV_PVLIST, None)
1476 test = pvlist is None
1477 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1479 # check that ':' is not present in PV names, since it's a
1480 # special character for lvcreate (denotes the range of PEs to
1482 for _, pvname, owner_vg in pvlist:
1483 test = ":" in pvname
1484 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1485 " '%s' of VG '%s'", pvname, owner_vg)
1487 def _VerifyNodeNetwork(self, ninfo, nresult):
1488 """Check the node time.
1490 @type ninfo: L{objects.Node}
1491 @param ninfo: the node to check
1492 @param nresult: the remote results for the node
1496 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1498 test = constants.NV_NODELIST not in nresult
1499 _ErrorIf(test, self.ENODESSH, node,
1500 "node hasn't returned node ssh connectivity data")
1502 if nresult[constants.NV_NODELIST]:
1503 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1504 _ErrorIf(True, self.ENODESSH, node,
1505 "ssh communication with node '%s': %s", a_node, a_msg)
1507 test = constants.NV_NODENETTEST not in nresult
1508 _ErrorIf(test, self.ENODENET, node,
1509 "node hasn't returned node tcp connectivity data")
1511 if nresult[constants.NV_NODENETTEST]:
1512 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1514 _ErrorIf(True, self.ENODENET, node,
1515 "tcp communication with node '%s': %s",
1516 anode, nresult[constants.NV_NODENETTEST][anode])
1518 test = constants.NV_MASTERIP not in nresult
1519 _ErrorIf(test, self.ENODENET, node,
1520 "node hasn't returned node master IP reachability data")
1522 if not nresult[constants.NV_MASTERIP]:
1523 if node == self.master_node:
1524 msg = "the master node cannot reach the master IP (not configured?)"
1526 msg = "cannot reach the master IP"
1527 _ErrorIf(True, self.ENODENET, node, msg)
1529 def _VerifyInstance(self, instance, instanceconfig, node_image,
1531 """Verify an instance.
1533 This function checks to see if the required block devices are
1534 available on the instance's node.
1537 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1538 node_current = instanceconfig.primary_node
1540 node_vol_should = {}
1541 instanceconfig.MapLVsByNode(node_vol_should)
1543 for node in node_vol_should:
1544 n_img = node_image[node]
1545 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1546 # ignore missing volumes on offline or broken nodes
1548 for volume in node_vol_should[node]:
1549 test = volume not in n_img.volumes
1550 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1551 "volume %s missing on node %s", volume, node)
1553 if instanceconfig.admin_up:
1554 pri_img = node_image[node_current]
1555 test = instance not in pri_img.instances and not pri_img.offline
1556 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1557 "instance not running on its primary node %s",
1560 for node, n_img in node_image.items():
1561 if node != node_current:
1562 test = instance in n_img.instances
1563 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1564 "instance should not run on node %s", node)
1566 diskdata = [(nname, success, status, idx)
1567 for (nname, disks) in diskstatus.items()
1568 for idx, (success, status) in enumerate(disks)]
1570 for nname, success, bdev_status, idx in diskdata:
1571 # the 'ghost node' construction in Exec() ensures that we have a
1573 snode = node_image[nname]
1574 bad_snode = snode.ghost or snode.offline
1575 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1576 self.EINSTANCEFAULTYDISK, instance,
1577 "couldn't retrieve status for disk/%s on %s: %s",
1578 idx, nname, bdev_status)
1579 _ErrorIf((instanceconfig.admin_up and success and
1580 bdev_status.ldisk_status == constants.LDS_FAULTY),
1581 self.EINSTANCEFAULTYDISK, instance,
1582 "disk/%s on %s is faulty", idx, nname)
1584 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1585 """Verify if there are any unknown volumes in the cluster.
1587 The .os, .swap and backup volumes are ignored. All other volumes are
1588 reported as unknown.
1590 @type reserved: L{ganeti.utils.FieldSet}
1591 @param reserved: a FieldSet of reserved volume names
1594 for node, n_img in node_image.items():
1595 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1596 # skip non-healthy nodes
1598 for volume in n_img.volumes:
1599 test = ((node not in node_vol_should or
1600 volume not in node_vol_should[node]) and
1601 not reserved.Matches(volume))
1602 self._ErrorIf(test, self.ENODEORPHANLV, node,
1603 "volume %s is unknown", volume)
1605 def _VerifyOrphanInstances(self, instancelist, node_image):
1606 """Verify the list of running instances.
1608 This checks what instances are running but unknown to the cluster.
1611 for node, n_img in node_image.items():
1612 for o_inst in n_img.instances:
1613 test = o_inst not in instancelist
1614 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1615 "instance %s on node %s should not exist", o_inst, node)
1617 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1618 """Verify N+1 Memory Resilience.
1620 Check that if one single node dies we can still start all the
1621 instances it was primary for.
1624 cluster_info = self.cfg.GetClusterInfo()
1625 for node, n_img in node_image.items():
1626 # This code checks that every node which is now listed as
1627 # secondary has enough memory to host all instances it is
1628 # supposed to should a single other node in the cluster fail.
1629 # FIXME: not ready for failover to an arbitrary node
1630 # FIXME: does not support file-backed instances
1631 # WARNING: we currently take into account down instances as well
1632 # as up ones, considering that even if they're down someone
1633 # might want to start them even in the event of a node failure.
1635 # we're skipping offline nodes from the N+1 warning, since
1636 # most likely we don't have good memory infromation from them;
1637 # we already list instances living on such nodes, and that's
1640 for prinode, instances in n_img.sbp.items():
1642 for instance in instances:
1643 bep = cluster_info.FillBE(instance_cfg[instance])
1644 if bep[constants.BE_AUTO_BALANCE]:
1645 needed_mem += bep[constants.BE_MEMORY]
1646 test = n_img.mfree < needed_mem
1647 self._ErrorIf(test, self.ENODEN1, node,
1648 "not enough memory to accomodate instance failovers"
1649 " should node %s fail", prinode)
1651 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1653 """Verifies and computes the node required file checksums.
1655 @type ninfo: L{objects.Node}
1656 @param ninfo: the node to check
1657 @param nresult: the remote results for the node
1658 @param file_list: required list of files
1659 @param local_cksum: dictionary of local files and their checksums
1660 @param master_files: list of files that only masters should have
1664 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1666 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1667 test = not isinstance(remote_cksum, dict)
1668 _ErrorIf(test, self.ENODEFILECHECK, node,
1669 "node hasn't returned file checksum data")
1673 for file_name in file_list:
1674 node_is_mc = ninfo.master_candidate
1675 must_have = (file_name not in master_files) or node_is_mc
1677 test1 = file_name not in remote_cksum
1679 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1681 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1682 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1683 "file '%s' missing", file_name)
1684 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1685 "file '%s' has wrong checksum", file_name)
1686 # not candidate and this is not a must-have file
1687 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1688 "file '%s' should not exist on non master"
1689 " candidates (and the file is outdated)", file_name)
1690 # all good, except non-master/non-must have combination
1691 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1692 "file '%s' should not exist"
1693 " on non master candidates", file_name)
1695 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1697 """Verifies and the node DRBD status.
1699 @type ninfo: L{objects.Node}
1700 @param ninfo: the node to check
1701 @param nresult: the remote results for the node
1702 @param instanceinfo: the dict of instances
1703 @param drbd_helper: the configured DRBD usermode helper
1704 @param drbd_map: the DRBD map as returned by
1705 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1709 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1712 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1713 test = (helper_result == None)
1714 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1715 "no drbd usermode helper returned")
1717 status, payload = helper_result
1719 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1720 "drbd usermode helper check unsuccessful: %s", payload)
1721 test = status and (payload != drbd_helper)
1722 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1723 "wrong drbd usermode helper: %s", payload)
1725 # compute the DRBD minors
1727 for minor, instance in drbd_map[node].items():
1728 test = instance not in instanceinfo
1729 _ErrorIf(test, self.ECLUSTERCFG, None,
1730 "ghost instance '%s' in temporary DRBD map", instance)
1731 # ghost instance should not be running, but otherwise we
1732 # don't give double warnings (both ghost instance and
1733 # unallocated minor in use)
1735 node_drbd[minor] = (instance, False)
1737 instance = instanceinfo[instance]
1738 node_drbd[minor] = (instance.name, instance.admin_up)
1740 # and now check them
1741 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1742 test = not isinstance(used_minors, (tuple, list))
1743 _ErrorIf(test, self.ENODEDRBD, node,
1744 "cannot parse drbd status file: %s", str(used_minors))
1746 # we cannot check drbd status
1749 for minor, (iname, must_exist) in node_drbd.items():
1750 test = minor not in used_minors and must_exist
1751 _ErrorIf(test, self.ENODEDRBD, node,
1752 "drbd minor %d of instance %s is not active", minor, iname)
1753 for minor in used_minors:
1754 test = minor not in node_drbd
1755 _ErrorIf(test, self.ENODEDRBD, node,
1756 "unallocated drbd minor %d is in use", minor)
1758 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1759 """Builds the node OS structures.
1761 @type ninfo: L{objects.Node}
1762 @param ninfo: the node to check
1763 @param nresult: the remote results for the node
1764 @param nimg: the node image object
1768 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1770 remote_os = nresult.get(constants.NV_OSLIST, None)
1771 test = (not isinstance(remote_os, list) or
1772 not compat.all(isinstance(v, list) and len(v) == 7
1773 for v in remote_os))
1775 _ErrorIf(test, self.ENODEOS, node,
1776 "node hasn't returned valid OS data")
1785 for (name, os_path, status, diagnose,
1786 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1788 if name not in os_dict:
1791 # parameters is a list of lists instead of list of tuples due to
1792 # JSON lacking a real tuple type, fix it:
1793 parameters = [tuple(v) for v in parameters]
1794 os_dict[name].append((os_path, status, diagnose,
1795 set(variants), set(parameters), set(api_ver)))
1797 nimg.oslist = os_dict
1799 def _VerifyNodeOS(self, ninfo, nimg, base):
1800 """Verifies the node OS list.
1802 @type ninfo: L{objects.Node}
1803 @param ninfo: the node to check
1804 @param nimg: the node image object
1805 @param base: the 'template' node we match against (e.g. from the master)
1809 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1811 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1813 for os_name, os_data in nimg.oslist.items():
1814 assert os_data, "Empty OS status for OS %s?!" % os_name
1815 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1816 _ErrorIf(not f_status, self.ENODEOS, node,
1817 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1818 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1819 "OS '%s' has multiple entries (first one shadows the rest): %s",
1820 os_name, utils.CommaJoin([v[0] for v in os_data]))
1821 # this will catched in backend too
1822 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1823 and not f_var, self.ENODEOS, node,
1824 "OS %s with API at least %d does not declare any variant",
1825 os_name, constants.OS_API_V15)
1826 # comparisons with the 'base' image
1827 test = os_name not in base.oslist
1828 _ErrorIf(test, self.ENODEOS, node,
1829 "Extra OS %s not present on reference node (%s)",
1833 assert base.oslist[os_name], "Base node has empty OS status?"
1834 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1836 # base OS is invalid, skipping
1838 for kind, a, b in [("API version", f_api, b_api),
1839 ("variants list", f_var, b_var),
1840 ("parameters", f_param, b_param)]:
1841 _ErrorIf(a != b, self.ENODEOS, node,
1842 "OS %s %s differs from reference node %s: %s vs. %s",
1843 kind, os_name, base.name,
1844 utils.CommaJoin(a), utils.CommaJoin(b))
1846 # check any missing OSes
1847 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1848 _ErrorIf(missing, self.ENODEOS, node,
1849 "OSes present on reference node %s but missing on this node: %s",
1850 base.name, utils.CommaJoin(missing))
1852 def _VerifyOob(self, ninfo, nresult):
1853 """Verifies out of band functionality of a node.
1855 @type ninfo: L{objects.Node}
1856 @param ninfo: the node to check
1857 @param nresult: the remote results for the node
1861 # We just have to verify the paths on master and/or master candidates
1862 # as the oob helper is invoked on the master
1863 if ((ninfo.master_candidate or ninfo.master_capable) and
1864 constants.NV_OOB_PATHS in nresult):
1865 for path_result in nresult[constants.NV_OOB_PATHS]:
1866 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1868 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1869 """Verifies and updates the node volume data.
1871 This function will update a L{NodeImage}'s internal structures
1872 with data from the remote call.
1874 @type ninfo: L{objects.Node}
1875 @param ninfo: the node to check
1876 @param nresult: the remote results for the node
1877 @param nimg: the node image object
1878 @param vg_name: the configured VG name
1882 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1884 nimg.lvm_fail = True
1885 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1888 elif isinstance(lvdata, basestring):
1889 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1890 utils.SafeEncode(lvdata))
1891 elif not isinstance(lvdata, dict):
1892 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1894 nimg.volumes = lvdata
1895 nimg.lvm_fail = False
1897 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1898 """Verifies and updates the node instance list.
1900 If the listing was successful, then updates this node's instance
1901 list. Otherwise, it marks the RPC call as failed for the instance
1904 @type ninfo: L{objects.Node}
1905 @param ninfo: the node to check
1906 @param nresult: the remote results for the node
1907 @param nimg: the node image object
1910 idata = nresult.get(constants.NV_INSTANCELIST, None)
1911 test = not isinstance(idata, list)
1912 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1913 " (instancelist): %s", utils.SafeEncode(str(idata)))
1915 nimg.hyp_fail = True
1917 nimg.instances = idata
1919 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1920 """Verifies and computes a node information map
1922 @type ninfo: L{objects.Node}
1923 @param ninfo: the node to check
1924 @param nresult: the remote results for the node
1925 @param nimg: the node image object
1926 @param vg_name: the configured VG name
1930 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1932 # try to read free memory (from the hypervisor)
1933 hv_info = nresult.get(constants.NV_HVINFO, None)
1934 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1935 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1938 nimg.mfree = int(hv_info["memory_free"])
1939 except (ValueError, TypeError):
1940 _ErrorIf(True, self.ENODERPC, node,
1941 "node returned invalid nodeinfo, check hypervisor")
1943 # FIXME: devise a free space model for file based instances as well
1944 if vg_name is not None:
1945 test = (constants.NV_VGLIST not in nresult or
1946 vg_name not in nresult[constants.NV_VGLIST])
1947 _ErrorIf(test, self.ENODELVM, node,
1948 "node didn't return data for the volume group '%s'"
1949 " - it is either missing or broken", vg_name)
1952 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1953 except (ValueError, TypeError):
1954 _ErrorIf(True, self.ENODERPC, node,
1955 "node returned invalid LVM info, check LVM status")
1957 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1958 """Gets per-disk status information for all instances.
1960 @type nodelist: list of strings
1961 @param nodelist: Node names
1962 @type node_image: dict of (name, L{objects.Node})
1963 @param node_image: Node objects
1964 @type instanceinfo: dict of (name, L{objects.Instance})
1965 @param instanceinfo: Instance objects
1966 @rtype: {instance: {node: [(succes, payload)]}}
1967 @return: a dictionary of per-instance dictionaries with nodes as
1968 keys and disk information as values; the disk information is a
1969 list of tuples (success, payload)
1972 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1975 node_disks_devonly = {}
1976 diskless_instances = set()
1977 diskless = constants.DT_DISKLESS
1979 for nname in nodelist:
1980 node_instances = list(itertools.chain(node_image[nname].pinst,
1981 node_image[nname].sinst))
1982 diskless_instances.update(inst for inst in node_instances
1983 if instanceinfo[inst].disk_template == diskless)
1984 disks = [(inst, disk)
1985 for inst in node_instances
1986 for disk in instanceinfo[inst].disks]
1989 # No need to collect data
1992 node_disks[nname] = disks
1994 # Creating copies as SetDiskID below will modify the objects and that can
1995 # lead to incorrect data returned from nodes
1996 devonly = [dev.Copy() for (_, dev) in disks]
1999 self.cfg.SetDiskID(dev, nname)
2001 node_disks_devonly[nname] = devonly
2003 assert len(node_disks) == len(node_disks_devonly)
2005 # Collect data from all nodes with disks
2006 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2009 assert len(result) == len(node_disks)
2013 for (nname, nres) in result.items():
2014 disks = node_disks[nname]
2017 # No data from this node
2018 data = len(disks) * [(False, "node offline")]
2021 _ErrorIf(msg, self.ENODERPC, nname,
2022 "while getting disk information: %s", msg)
2024 # No data from this node
2025 data = len(disks) * [(False, msg)]
2028 for idx, i in enumerate(nres.payload):
2029 if isinstance(i, (tuple, list)) and len(i) == 2:
2032 logging.warning("Invalid result from node %s, entry %d: %s",
2034 data.append((False, "Invalid result from the remote node"))
2036 for ((inst, _), status) in zip(disks, data):
2037 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2039 # Add empty entries for diskless instances.
2040 for inst in diskless_instances:
2041 assert inst not in instdisk
2044 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2045 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2046 compat.all(isinstance(s, (tuple, list)) and
2047 len(s) == 2 for s in statuses)
2048 for inst, nnames in instdisk.items()
2049 for nname, statuses in nnames.items())
2050 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2054 def _VerifyHVP(self, hvp_data):
2055 """Verifies locally the syntax of the hypervisor parameters.
2058 for item, hv_name, hv_params in hvp_data:
2059 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2062 hv_class = hypervisor.GetHypervisor(hv_name)
2063 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2064 hv_class.CheckParameterSyntax(hv_params)
2065 except errors.GenericError, err:
2066 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2069 def BuildHooksEnv(self):
2072 Cluster-Verify hooks just ran in the post phase and their failure makes
2073 the output be logged in the verify output and the verification to fail.
2076 all_nodes = self.cfg.GetNodeList()
2078 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2080 for node in self.cfg.GetAllNodesInfo().values():
2081 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2083 return env, [], all_nodes
2085 def Exec(self, feedback_fn):
2086 """Verify integrity of cluster, performing various test on nodes.
2089 # This method has too many local variables. pylint: disable-msg=R0914
2091 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2092 verbose = self.op.verbose
2093 self._feedback_fn = feedback_fn
2094 feedback_fn("* Verifying global settings")
2095 for msg in self.cfg.VerifyConfig():
2096 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2098 # Check the cluster certificates
2099 for cert_filename in constants.ALL_CERT_FILES:
2100 (errcode, msg) = _VerifyCertificate(cert_filename)
2101 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2103 vg_name = self.cfg.GetVGName()
2104 drbd_helper = self.cfg.GetDRBDHelper()
2105 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2106 cluster = self.cfg.GetClusterInfo()
2107 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2108 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2109 nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2110 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2111 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2112 for iname in instancelist)
2113 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2114 i_non_redundant = [] # Non redundant instances
2115 i_non_a_balanced = [] # Non auto-balanced instances
2116 n_offline = 0 # Count of offline nodes
2117 n_drained = 0 # Count of nodes being drained
2118 node_vol_should = {}
2120 # FIXME: verify OS list
2121 # do local checksums
2122 master_files = [constants.CLUSTER_CONF_FILE]
2123 master_node = self.master_node = self.cfg.GetMasterNode()
2124 master_ip = self.cfg.GetMasterIP()
2126 file_names = ssconf.SimpleStore().GetFileList()
2127 file_names.extend(constants.ALL_CERT_FILES)
2128 file_names.extend(master_files)
2129 if cluster.modify_etc_hosts:
2130 file_names.append(constants.ETC_HOSTS)
2132 local_checksums = utils.FingerprintFiles(file_names)
2134 # Compute the set of hypervisor parameters
2136 for hv_name in hypervisors:
2137 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2138 for os_name, os_hvp in cluster.os_hvp.items():
2139 for hv_name, hv_params in os_hvp.items():
2142 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2143 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2144 # TODO: collapse identical parameter values in a single one
2145 for instance in instanceinfo.values():
2146 if not instance.hvparams:
2148 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2149 cluster.FillHV(instance)))
2150 # and verify them locally
2151 self._VerifyHVP(hvp_data)
2153 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2154 node_verify_param = {
2155 constants.NV_FILELIST: file_names,
2156 constants.NV_NODELIST: [node.name for node in nodeinfo
2157 if not node.offline],
2158 constants.NV_HYPERVISOR: hypervisors,
2159 constants.NV_HVPARAMS: hvp_data,
2160 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2161 node.secondary_ip) for node in nodeinfo
2162 if not node.offline],
2163 constants.NV_INSTANCELIST: hypervisors,
2164 constants.NV_VERSION: None,
2165 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2166 constants.NV_NODESETUP: None,
2167 constants.NV_TIME: None,
2168 constants.NV_MASTERIP: (master_node, master_ip),
2169 constants.NV_OSLIST: None,
2170 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2173 if vg_name is not None:
2174 node_verify_param[constants.NV_VGLIST] = None
2175 node_verify_param[constants.NV_LVLIST] = vg_name
2176 node_verify_param[constants.NV_PVLIST] = [vg_name]
2177 node_verify_param[constants.NV_DRBDLIST] = None
2180 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2182 # Build our expected cluster state
2183 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2185 vm_capable=node.vm_capable))
2186 for node in nodeinfo)
2190 for node in nodeinfo:
2191 path = _SupportsOob(self.cfg, node)
2192 if path and path not in oob_paths:
2193 oob_paths.append(path)
2196 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2198 for instance in instancelist:
2199 inst_config = instanceinfo[instance]
2201 for nname in inst_config.all_nodes:
2202 if nname not in node_image:
2204 gnode = self.NodeImage(name=nname)
2206 node_image[nname] = gnode
2208 inst_config.MapLVsByNode(node_vol_should)
2210 pnode = inst_config.primary_node
2211 node_image[pnode].pinst.append(instance)
2213 for snode in inst_config.secondary_nodes:
2214 nimg = node_image[snode]
2215 nimg.sinst.append(instance)
2216 if pnode not in nimg.sbp:
2217 nimg.sbp[pnode] = []
2218 nimg.sbp[pnode].append(instance)
2220 # At this point, we have the in-memory data structures complete,
2221 # except for the runtime information, which we'll gather next
2223 # Due to the way our RPC system works, exact response times cannot be
2224 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2225 # time before and after executing the request, we can at least have a time
2227 nvinfo_starttime = time.time()
2228 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2229 self.cfg.GetClusterName())
2230 nvinfo_endtime = time.time()
2232 all_drbd_map = self.cfg.ComputeDRBDMap()
2234 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2235 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2237 feedback_fn("* Verifying node status")
2241 for node_i in nodeinfo:
2243 nimg = node_image[node]
2247 feedback_fn("* Skipping offline node %s" % (node,))
2251 if node == master_node:
2253 elif node_i.master_candidate:
2254 ntype = "master candidate"
2255 elif node_i.drained:
2261 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2263 msg = all_nvinfo[node].fail_msg
2264 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2266 nimg.rpc_fail = True
2269 nresult = all_nvinfo[node].payload
2271 nimg.call_ok = self._VerifyNode(node_i, nresult)
2272 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2273 self._VerifyNodeNetwork(node_i, nresult)
2274 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2277 self._VerifyOob(node_i, nresult)
2280 self._VerifyNodeLVM(node_i, nresult, vg_name)
2281 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2284 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2285 self._UpdateNodeInstances(node_i, nresult, nimg)
2286 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2287 self._UpdateNodeOS(node_i, nresult, nimg)
2288 if not nimg.os_fail:
2289 if refos_img is None:
2291 self._VerifyNodeOS(node_i, nimg, refos_img)
2293 feedback_fn("* Verifying instance status")
2294 for instance in instancelist:
2296 feedback_fn("* Verifying instance %s" % instance)
2297 inst_config = instanceinfo[instance]
2298 self._VerifyInstance(instance, inst_config, node_image,
2300 inst_nodes_offline = []
2302 pnode = inst_config.primary_node
2303 pnode_img = node_image[pnode]
2304 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2305 self.ENODERPC, pnode, "instance %s, connection to"
2306 " primary node failed", instance)
2308 _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2309 "instance lives on offline node %s", inst_config.primary_node)
2311 # If the instance is non-redundant we cannot survive losing its primary
2312 # node, so we are not N+1 compliant. On the other hand we have no disk
2313 # templates with more than one secondary so that situation is not well
2315 # FIXME: does not support file-backed instances
2316 if not inst_config.secondary_nodes:
2317 i_non_redundant.append(instance)
2319 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2320 instance, "instance has multiple secondary nodes: %s",
2321 utils.CommaJoin(inst_config.secondary_nodes),
2322 code=self.ETYPE_WARNING)
2324 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2325 pnode = inst_config.primary_node
2326 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2327 instance_groups = {}
2329 for node in instance_nodes:
2330 instance_groups.setdefault(nodeinfo_byname[node].group,
2334 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2335 # Sort so that we always list the primary node first.
2336 for group, nodes in sorted(instance_groups.items(),
2337 key=lambda (_, nodes): pnode in nodes,
2340 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2341 instance, "instance has primary and secondary nodes in"
2342 " different groups: %s", utils.CommaJoin(pretty_list),
2343 code=self.ETYPE_WARNING)
2345 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2346 i_non_a_balanced.append(instance)
2348 for snode in inst_config.secondary_nodes:
2349 s_img = node_image[snode]
2350 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2351 "instance %s, connection to secondary node failed", instance)
2354 inst_nodes_offline.append(snode)
2356 # warn that the instance lives on offline nodes
2357 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2358 "instance has offline secondary node(s) %s",
2359 utils.CommaJoin(inst_nodes_offline))
2360 # ... or ghost/non-vm_capable nodes
2361 for node in inst_config.all_nodes:
2362 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2363 "instance lives on ghost node %s", node)
2364 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2365 instance, "instance lives on non-vm_capable node %s", node)
2367 feedback_fn("* Verifying orphan volumes")
2368 reserved = utils.FieldSet(*cluster.reserved_lvs)
2369 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2371 feedback_fn("* Verifying orphan instances")
2372 self._VerifyOrphanInstances(instancelist, node_image)
2374 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2375 feedback_fn("* Verifying N+1 Memory redundancy")
2376 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2378 feedback_fn("* Other Notes")
2380 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2381 % len(i_non_redundant))
2383 if i_non_a_balanced:
2384 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2385 % len(i_non_a_balanced))
2388 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2391 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2395 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2396 """Analyze the post-hooks' result
2398 This method analyses the hook result, handles it, and sends some
2399 nicely-formatted feedback back to the user.
2401 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2402 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2403 @param hooks_results: the results of the multi-node hooks rpc call
2404 @param feedback_fn: function used send feedback back to the caller
2405 @param lu_result: previous Exec result
2406 @return: the new Exec result, based on the previous result
2410 # We only really run POST phase hooks, and are only interested in
2412 if phase == constants.HOOKS_PHASE_POST:
2413 # Used to change hooks' output to proper indentation
2414 feedback_fn("* Hooks Results")
2415 assert hooks_results, "invalid result from hooks"
2417 for node_name in hooks_results:
2418 res = hooks_results[node_name]
2420 test = msg and not res.offline
2421 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2422 "Communication failure in hooks execution: %s", msg)
2423 if res.offline or msg:
2424 # No need to investigate payload if node is offline or gave an error.
2425 # override manually lu_result here as _ErrorIf only
2426 # overrides self.bad
2429 for script, hkr, output in res.payload:
2430 test = hkr == constants.HKR_FAIL
2431 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2432 "Script %s failed, output:", script)
2434 output = self._HOOKS_INDENT_RE.sub(' ', output)
2435 feedback_fn("%s" % output)
2441 class LUClusterVerifyDisks(NoHooksLU):
2442 """Verifies the cluster disks status.
2447 def ExpandNames(self):
2448 self.needed_locks = {
2449 locking.LEVEL_NODE: locking.ALL_SET,
2450 locking.LEVEL_INSTANCE: locking.ALL_SET,
2452 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2454 def Exec(self, feedback_fn):
2455 """Verify integrity of cluster disks.
2457 @rtype: tuple of three items
2458 @return: a tuple of (dict of node-to-node_error, list of instances
2459 which need activate-disks, dict of instance: (node, volume) for
2463 result = res_nodes, res_instances, res_missing = {}, [], {}
2465 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2466 instances = self.cfg.GetAllInstancesInfo().values()
2469 for inst in instances:
2471 if not inst.admin_up:
2473 inst.MapLVsByNode(inst_lvs)
2474 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2475 for node, vol_list in inst_lvs.iteritems():
2476 for vol in vol_list:
2477 nv_dict[(node, vol)] = inst
2482 node_lvs = self.rpc.call_lv_list(nodes, [])
2483 for node, node_res in node_lvs.items():
2484 if node_res.offline:
2486 msg = node_res.fail_msg
2488 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2489 res_nodes[node] = msg
2492 lvs = node_res.payload
2493 for lv_name, (_, _, lv_online) in lvs.items():
2494 inst = nv_dict.pop((node, lv_name), None)
2495 if (not lv_online and inst is not None
2496 and inst.name not in res_instances):
2497 res_instances.append(inst.name)
2499 # any leftover items in nv_dict are missing LVs, let's arrange the
2501 for key, inst in nv_dict.iteritems():
2502 if inst.name not in res_missing:
2503 res_missing[inst.name] = []
2504 res_missing[inst.name].append(key)
2509 class LUClusterRepairDiskSizes(NoHooksLU):
2510 """Verifies the cluster disks sizes.
2515 def ExpandNames(self):
2516 if self.op.instances:
2517 self.wanted_names = []
2518 for name in self.op.instances:
2519 full_name = _ExpandInstanceName(self.cfg, name)
2520 self.wanted_names.append(full_name)
2521 self.needed_locks = {
2522 locking.LEVEL_NODE: [],
2523 locking.LEVEL_INSTANCE: self.wanted_names,
2525 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2527 self.wanted_names = None
2528 self.needed_locks = {
2529 locking.LEVEL_NODE: locking.ALL_SET,
2530 locking.LEVEL_INSTANCE: locking.ALL_SET,
2532 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2534 def DeclareLocks(self, level):
2535 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2536 self._LockInstancesNodes(primary_only=True)
2538 def CheckPrereq(self):
2539 """Check prerequisites.
2541 This only checks the optional instance list against the existing names.
2544 if self.wanted_names is None:
2545 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2547 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2548 in self.wanted_names]
2550 def _EnsureChildSizes(self, disk):
2551 """Ensure children of the disk have the needed disk size.
2553 This is valid mainly for DRBD8 and fixes an issue where the
2554 children have smaller disk size.
2556 @param disk: an L{ganeti.objects.Disk} object
2559 if disk.dev_type == constants.LD_DRBD8:
2560 assert disk.children, "Empty children for DRBD8?"
2561 fchild = disk.children[0]
2562 mismatch = fchild.size < disk.size
2564 self.LogInfo("Child disk has size %d, parent %d, fixing",
2565 fchild.size, disk.size)
2566 fchild.size = disk.size
2568 # and we recurse on this child only, not on the metadev
2569 return self._EnsureChildSizes(fchild) or mismatch
2573 def Exec(self, feedback_fn):
2574 """Verify the size of cluster disks.
2577 # TODO: check child disks too
2578 # TODO: check differences in size between primary/secondary nodes
2580 for instance in self.wanted_instances:
2581 pnode = instance.primary_node
2582 if pnode not in per_node_disks:
2583 per_node_disks[pnode] = []
2584 for idx, disk in enumerate(instance.disks):
2585 per_node_disks[pnode].append((instance, idx, disk))
2588 for node, dskl in per_node_disks.items():
2589 newl = [v[2].Copy() for v in dskl]
2591 self.cfg.SetDiskID(dsk, node)
2592 result = self.rpc.call_blockdev_getsize(node, newl)
2594 self.LogWarning("Failure in blockdev_getsize call to node"
2595 " %s, ignoring", node)
2597 if len(result.payload) != len(dskl):
2598 logging.warning("Invalid result from node %s: len(dksl)=%d,"
2599 " result.payload=%s", node, len(dskl), result.payload)
2600 self.LogWarning("Invalid result from node %s, ignoring node results",
2603 for ((instance, idx, disk), size) in zip(dskl, result.payload):
2605 self.LogWarning("Disk %d of instance %s did not return size"
2606 " information, ignoring", idx, instance.name)
2608 if not isinstance(size, (int, long)):
2609 self.LogWarning("Disk %d of instance %s did not return valid"
2610 " size information, ignoring", idx, instance.name)
2613 if size != disk.size:
2614 self.LogInfo("Disk %d of instance %s has mismatched size,"
2615 " correcting: recorded %d, actual %d", idx,
2616 instance.name, disk.size, size)
2618 self.cfg.Update(instance, feedback_fn)
2619 changed.append((instance.name, idx, size))
2620 if self._EnsureChildSizes(disk):
2621 self.cfg.Update(instance, feedback_fn)
2622 changed.append((instance.name, idx, disk.size))
2626 class LUClusterRename(LogicalUnit):
2627 """Rename the cluster.
2630 HPATH = "cluster-rename"
2631 HTYPE = constants.HTYPE_CLUSTER
2633 def BuildHooksEnv(self):
2638 "OP_TARGET": self.cfg.GetClusterName(),
2639 "NEW_NAME": self.op.name,
2641 mn = self.cfg.GetMasterNode()
2642 all_nodes = self.cfg.GetNodeList()
2643 return env, [mn], all_nodes
2645 def CheckPrereq(self):
2646 """Verify that the passed name is a valid one.
2649 hostname = netutils.GetHostname(name=self.op.name,
2650 family=self.cfg.GetPrimaryIPFamily())
2652 new_name = hostname.name
2653 self.ip = new_ip = hostname.ip
2654 old_name = self.cfg.GetClusterName()
2655 old_ip = self.cfg.GetMasterIP()
2656 if new_name == old_name and new_ip == old_ip:
2657 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2658 " cluster has changed",
2660 if new_ip != old_ip:
2661 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2662 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2663 " reachable on the network" %
2664 new_ip, errors.ECODE_NOTUNIQUE)
2666 self.op.name = new_name
2668 def Exec(self, feedback_fn):
2669 """Rename the cluster.
2672 clustername = self.op.name
2675 # shutdown the master IP
2676 master = self.cfg.GetMasterNode()
2677 result = self.rpc.call_node_stop_master(master, False)
2678 result.Raise("Could not disable the master role")
2681 cluster = self.cfg.GetClusterInfo()
2682 cluster.cluster_name = clustername
2683 cluster.master_ip = ip
2684 self.cfg.Update(cluster, feedback_fn)
2686 # update the known hosts file
2687 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2688 node_list = self.cfg.GetOnlineNodeList()
2690 node_list.remove(master)
2693 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2695 result = self.rpc.call_node_start_master(master, False, False)
2696 msg = result.fail_msg
2698 self.LogWarning("Could not re-enable the master role on"
2699 " the master, please restart manually: %s", msg)
2704 class LUClusterSetParams(LogicalUnit):
2705 """Change the parameters of the cluster.
2708 HPATH = "cluster-modify"
2709 HTYPE = constants.HTYPE_CLUSTER
2712 def CheckArguments(self):
2716 if self.op.uid_pool:
2717 uidpool.CheckUidPool(self.op.uid_pool)
2719 if self.op.add_uids:
2720 uidpool.CheckUidPool(self.op.add_uids)
2722 if self.op.remove_uids:
2723 uidpool.CheckUidPool(self.op.remove_uids)
2725 def ExpandNames(self):
2726 # FIXME: in the future maybe other cluster params won't require checking on
2727 # all nodes to be modified.
2728 self.needed_locks = {
2729 locking.LEVEL_NODE: locking.ALL_SET,
2731 self.share_locks[locking.LEVEL_NODE] = 1
2733 def BuildHooksEnv(self):
2738 "OP_TARGET": self.cfg.GetClusterName(),
2739 "NEW_VG_NAME": self.op.vg_name,
2741 mn = self.cfg.GetMasterNode()
2742 return env, [mn], [mn]
2744 def CheckPrereq(self):
2745 """Check prerequisites.
2747 This checks whether the given params don't conflict and
2748 if the given volume group is valid.
2751 if self.op.vg_name is not None and not self.op.vg_name:
2752 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2753 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2754 " instances exist", errors.ECODE_INVAL)
2756 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2757 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2758 raise errors.OpPrereqError("Cannot disable drbd helper while"
2759 " drbd-based instances exist",
2762 node_list = self.acquired_locks[locking.LEVEL_NODE]
2764 # if vg_name not None, checks given volume group on all nodes
2766 vglist = self.rpc.call_vg_list(node_list)
2767 for node in node_list:
2768 msg = vglist[node].fail_msg
2770 # ignoring down node
2771 self.LogWarning("Error while gathering data on node %s"
2772 " (ignoring node): %s", node, msg)
2774 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2776 constants.MIN_VG_SIZE)
2778 raise errors.OpPrereqError("Error on node '%s': %s" %
2779 (node, vgstatus), errors.ECODE_ENVIRON)
2781 if self.op.drbd_helper:
2782 # checks given drbd helper on all nodes
2783 helpers = self.rpc.call_drbd_helper(node_list)
2784 for node in node_list:
2785 ninfo = self.cfg.GetNodeInfo(node)
2787 self.LogInfo("Not checking drbd helper on offline node %s", node)
2789 msg = helpers[node].fail_msg
2791 raise errors.OpPrereqError("Error checking drbd helper on node"
2792 " '%s': %s" % (node, msg),
2793 errors.ECODE_ENVIRON)
2794 node_helper = helpers[node].payload
2795 if node_helper != self.op.drbd_helper:
2796 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2797 (node, node_helper), errors.ECODE_ENVIRON)
2799 self.cluster = cluster = self.cfg.GetClusterInfo()
2800 # validate params changes
2801 if self.op.beparams:
2802 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2803 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2805 if self.op.ndparams:
2806 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2807 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2809 if self.op.nicparams:
2810 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2811 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2812 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2815 # check all instances for consistency
2816 for instance in self.cfg.GetAllInstancesInfo().values():
2817 for nic_idx, nic in enumerate(instance.nics):
2818 params_copy = copy.deepcopy(nic.nicparams)
2819 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2821 # check parameter syntax
2823 objects.NIC.CheckParameterSyntax(params_filled)
2824 except errors.ConfigurationError, err:
2825 nic_errors.append("Instance %s, nic/%d: %s" %
2826 (instance.name, nic_idx, err))
2828 # if we're moving instances to routed, check that they have an ip
2829 target_mode = params_filled[constants.NIC_MODE]
2830 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2831 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2832 (instance.name, nic_idx))
2834 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2835 "\n".join(nic_errors))
2837 # hypervisor list/parameters
2838 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2839 if self.op.hvparams:
2840 for hv_name, hv_dict in self.op.hvparams.items():
2841 if hv_name not in self.new_hvparams:
2842 self.new_hvparams[hv_name] = hv_dict
2844 self.new_hvparams[hv_name].update(hv_dict)
2846 # os hypervisor parameters
2847 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2849 for os_name, hvs in self.op.os_hvp.items():
2850 if os_name not in self.new_os_hvp:
2851 self.new_os_hvp[os_name] = hvs
2853 for hv_name, hv_dict in hvs.items():
2854 if hv_name not in self.new_os_hvp[os_name]:
2855 self.new_os_hvp[os_name][hv_name] = hv_dict
2857 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2860 self.new_osp = objects.FillDict(cluster.osparams, {})
2861 if self.op.osparams:
2862 for os_name, osp in self.op.osparams.items():
2863 if os_name not in self.new_osp:
2864 self.new_osp[os_name] = {}
2866 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2869 if not self.new_osp[os_name]:
2870 # we removed all parameters
2871 del self.new_osp[os_name]
2873 # check the parameter validity (remote check)
2874 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2875 os_name, self.new_osp[os_name])
2877 # changes to the hypervisor list
2878 if self.op.enabled_hypervisors is not None:
2879 self.hv_list = self.op.enabled_hypervisors
2880 for hv in self.hv_list:
2881 # if the hypervisor doesn't already exist in the cluster
2882 # hvparams, we initialize it to empty, and then (in both
2883 # cases) we make sure to fill the defaults, as we might not
2884 # have a complete defaults list if the hypervisor wasn't
2886 if hv not in new_hvp:
2888 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2889 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2891 self.hv_list = cluster.enabled_hypervisors
2893 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2894 # either the enabled list has changed, or the parameters have, validate
2895 for hv_name, hv_params in self.new_hvparams.items():
2896 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2897 (self.op.enabled_hypervisors and
2898 hv_name in self.op.enabled_hypervisors)):
2899 # either this is a new hypervisor, or its parameters have changed
2900 hv_class = hypervisor.GetHypervisor(hv_name)
2901 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2902 hv_class.CheckParameterSyntax(hv_params)
2903 _CheckHVParams(self, node_list, hv_name, hv_params)
2906 # no need to check any newly-enabled hypervisors, since the
2907 # defaults have already been checked in the above code-block
2908 for os_name, os_hvp in self.new_os_hvp.items():
2909 for hv_name, hv_params in os_hvp.items():
2910 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2911 # we need to fill in the new os_hvp on top of the actual hv_p
2912 cluster_defaults = self.new_hvparams.get(hv_name, {})
2913 new_osp = objects.FillDict(cluster_defaults, hv_params)
2914 hv_class = hypervisor.GetHypervisor(hv_name)
2915 hv_class.CheckParameterSyntax(new_osp)
2916 _CheckHVParams(self, node_list, hv_name, new_osp)
2918 if self.op.default_iallocator:
2919 alloc_script = utils.FindFile(self.op.default_iallocator,
2920 constants.IALLOCATOR_SEARCH_PATH,
2922 if alloc_script is None:
2923 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2924 " specified" % self.op.default_iallocator,
2927 def Exec(self, feedback_fn):
2928 """Change the parameters of the cluster.
2931 if self.op.vg_name is not None:
2932 new_volume = self.op.vg_name
2935 if new_volume != self.cfg.GetVGName():
2936 self.cfg.SetVGName(new_volume)
2938 feedback_fn("Cluster LVM configuration already in desired"
2939 " state, not changing")
2940 if self.op.drbd_helper is not None:
2941 new_helper = self.op.drbd_helper
2944 if new_helper != self.cfg.GetDRBDHelper():
2945 self.cfg.SetDRBDHelper(new_helper)
2947 feedback_fn("Cluster DRBD helper already in desired state,"
2949 if self.op.hvparams:
2950 self.cluster.hvparams = self.new_hvparams
2952 self.cluster.os_hvp = self.new_os_hvp
2953 if self.op.enabled_hypervisors is not None:
2954 self.cluster.hvparams = self.new_hvparams
2955 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2956 if self.op.beparams:
2957 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2958 if self.op.nicparams:
2959 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2960 if self.op.osparams:
2961 self.cluster.osparams = self.new_osp
2962 if self.op.ndparams:
2963 self.cluster.ndparams = self.new_ndparams
2965 if self.op.candidate_pool_size is not None:
2966 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2967 # we need to update the pool size here, otherwise the save will fail
2968 _AdjustCandidatePool(self, [])
2970 if self.op.maintain_node_health is not None:
2971 self.cluster.maintain_node_health = self.op.maintain_node_health
2973 if self.op.prealloc_wipe_disks is not None:
2974 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2976 if self.op.add_uids is not None:
2977 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2979 if self.op.remove_uids is not None:
2980 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2982 if self.op.uid_pool is not None:
2983 self.cluster.uid_pool = self.op.uid_pool
2985 if self.op.default_iallocator is not None:
2986 self.cluster.default_iallocator = self.op.default_iallocator
2988 if self.op.reserved_lvs is not None:
2989 self.cluster.reserved_lvs = self.op.reserved_lvs
2991 def helper_os(aname, mods, desc):
2993 lst = getattr(self.cluster, aname)
2994 for key, val in mods:
2995 if key == constants.DDM_ADD:
2997 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3000 elif key == constants.DDM_REMOVE:
3004 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3006 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3008 if self.op.hidden_os:
3009 helper_os("hidden_os", self.op.hidden_os, "hidden")
3011 if self.op.blacklisted_os:
3012 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3014 if self.op.master_netdev:
3015 master = self.cfg.GetMasterNode()
3016 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3017 self.cluster.master_netdev)
3018 result = self.rpc.call_node_stop_master(master, False)
3019 result.Raise("Could not disable the master ip")
3020 feedback_fn("Changing master_netdev from %s to %s" %
3021 (self.cluster.master_netdev, self.op.master_netdev))
3022 self.cluster.master_netdev = self.op.master_netdev
3024 self.cfg.Update(self.cluster, feedback_fn)
3026 if self.op.master_netdev:
3027 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3028 self.op.master_netdev)
3029 result = self.rpc.call_node_start_master(master, False, False)
3031 self.LogWarning("Could not re-enable the master ip on"
3032 " the master, please restart manually: %s",
3036 def _UploadHelper(lu, nodes, fname):
3037 """Helper for uploading a file and showing warnings.
3040 if os.path.exists(fname):
3041 result = lu.rpc.call_upload_file(nodes, fname)
3042 for to_node, to_result in result.items():
3043 msg = to_result.fail_msg
3045 msg = ("Copy of file %s to node %s failed: %s" %
3046 (fname, to_node, msg))
3047 lu.proc.LogWarning(msg)
3050 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3051 """Distribute additional files which are part of the cluster configuration.
3053 ConfigWriter takes care of distributing the config and ssconf files, but
3054 there are more files which should be distributed to all nodes. This function
3055 makes sure those are copied.
3057 @param lu: calling logical unit
3058 @param additional_nodes: list of nodes not in the config to distribute to
3059 @type additional_vm: boolean
3060 @param additional_vm: whether the additional nodes are vm-capable or not
3063 # 1. Gather target nodes
3064 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3065 dist_nodes = lu.cfg.GetOnlineNodeList()
3066 nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3067 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3068 if additional_nodes is not None:
3069 dist_nodes.extend(additional_nodes)
3071 vm_nodes.extend(additional_nodes)
3072 if myself.name in dist_nodes:
3073 dist_nodes.remove(myself.name)
3074 if myself.name in vm_nodes:
3075 vm_nodes.remove(myself.name)
3077 # 2. Gather files to distribute
3078 dist_files = set([constants.ETC_HOSTS,
3079 constants.SSH_KNOWN_HOSTS_FILE,
3080 constants.RAPI_CERT_FILE,
3081 constants.RAPI_USERS_FILE,
3082 constants.CONFD_HMAC_KEY,
3083 constants.CLUSTER_DOMAIN_SECRET_FILE,
3087 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3088 for hv_name in enabled_hypervisors:
3089 hv_class = hypervisor.GetHypervisor(hv_name)
3090 vm_files.update(hv_class.GetAncillaryFiles())
3092 # 3. Perform the files upload
3093 for fname in dist_files:
3094 _UploadHelper(lu, dist_nodes, fname)
3095 for fname in vm_files:
3096 _UploadHelper(lu, vm_nodes, fname)
3099 class LUClusterRedistConf(NoHooksLU):
3100 """Force the redistribution of cluster configuration.
3102 This is a very simple LU.
3107 def ExpandNames(self):
3108 self.needed_locks = {
3109 locking.LEVEL_NODE: locking.ALL_SET,
3111 self.share_locks[locking.LEVEL_NODE] = 1
3113 def Exec(self, feedback_fn):
3114 """Redistribute the configuration.
3117 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3118 _RedistributeAncillaryFiles(self)
3121 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3122 """Sleep and poll for an instance's disk to sync.
3125 if not instance.disks or disks is not None and not disks:
3128 disks = _ExpandCheckDisks(instance, disks)
3131 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3133 node = instance.primary_node
3136 lu.cfg.SetDiskID(dev, node)
3138 # TODO: Convert to utils.Retry
3141 degr_retries = 10 # in seconds, as we sleep 1 second each time
3145 cumul_degraded = False
3146 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3147 msg = rstats.fail_msg
3149 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3152 raise errors.RemoteError("Can't contact node %s for mirror data,"
3153 " aborting." % node)
3156 rstats = rstats.payload
3158 for i, mstat in enumerate(rstats):
3160 lu.LogWarning("Can't compute data for node %s/%s",
3161 node, disks[i].iv_name)
3164 cumul_degraded = (cumul_degraded or
3165 (mstat.is_degraded and mstat.sync_percent is None))
3166 if mstat.sync_percent is not None:
3168 if mstat.estimated_time is not None:
3169 rem_time = ("%s remaining (estimated)" %
3170 utils.FormatSeconds(mstat.estimated_time))
3171 max_time = mstat.estimated_time
3173 rem_time = "no time estimate"
3174 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3175 (disks[i].iv_name, mstat.sync_percent, rem_time))
3177 # if we're done but degraded, let's do a few small retries, to
3178 # make sure we see a stable and not transient situation; therefore
3179 # we force restart of the loop
3180 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3181 logging.info("Degraded disks found, %d retries left", degr_retries)
3189 time.sleep(min(60, max_time))
3192 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3193 return not cumul_degraded
3196 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3197 """Check that mirrors are not degraded.
3199 The ldisk parameter, if True, will change the test from the
3200 is_degraded attribute (which represents overall non-ok status for
3201 the device(s)) to the ldisk (representing the local storage status).
3204 lu.cfg.SetDiskID(dev, node)
3208 if on_primary or dev.AssembleOnSecondary():
3209 rstats = lu.rpc.call_blockdev_find(node, dev)
3210 msg = rstats.fail_msg
3212 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3214 elif not rstats.payload:
3215 lu.LogWarning("Can't find disk on node %s", node)
3219 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3221 result = result and not rstats.payload.is_degraded
3224 for child in dev.children:
3225 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3230 class LUOobCommand(NoHooksLU):
3231 """Logical unit for OOB handling.
3235 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3237 def CheckPrereq(self):
3238 """Check prerequisites.
3241 - the node exists in the configuration
3244 Any errors are signaled by raising errors.OpPrereqError.
3248 self.master_node = self.cfg.GetMasterNode()
3250 assert self.op.power_delay >= 0.0
3252 if self.op.node_names:
3253 if self.op.command in self._SKIP_MASTER:
3254 if self.master_node in self.op.node_names:
3255 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3256 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3258 if master_oob_handler:
3259 additional_text = ("Run '%s %s %s' if you want to operate on the"
3260 " master regardless") % (master_oob_handler,
3264 additional_text = "The master node does not support out-of-band"
3266 raise errors.OpPrereqError(("Operating on the master node %s is not"
3267 " allowed for %s\n%s") %
3268 (self.master_node, self.op.command,
3269 additional_text), errors.ECODE_INVAL)
3271 self.op.node_names = self.cfg.GetNodeList()
3272 if self.op.command in self._SKIP_MASTER:
3273 self.op.node_names.remove(self.master_node)
3275 if self.op.command in self._SKIP_MASTER:
3276 assert self.master_node not in self.op.node_names
3278 for node_name in self.op.node_names:
3279 node = self.cfg.GetNodeInfo(node_name)
3282 raise errors.OpPrereqError("Node %s not found" % node_name,
3285 self.nodes.append(node)
3287 if (not self.op.ignore_status and
3288 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3289 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3290 " not marked offline") % node_name,
3293 def ExpandNames(self):
3294 """Gather locks we need.
3297 if self.op.node_names:
3298 self.op.node_names = [_ExpandNodeName(self.cfg, name)
3299 for name in self.op.node_names]
3300 lock_names = self.op.node_names
3302 lock_names = locking.ALL_SET
3304 self.needed_locks = {
3305 locking.LEVEL_NODE: lock_names,
3308 def Exec(self, feedback_fn):
3309 """Execute OOB and return result if we expect any.
3312 master_node = self.master_node
3315 for idx, node in enumerate(self.nodes):
3316 node_entry = [(constants.RS_NORMAL, node.name)]
3317 ret.append(node_entry)
3319 oob_program = _SupportsOob(self.cfg, node)
3322 node_entry.append((constants.RS_UNAVAIL, None))
3325 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3326 self.op.command, oob_program, node.name)
3327 result = self.rpc.call_run_oob(master_node, oob_program,
3328 self.op.command, node.name,
3332 self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3333 node.name, result.fail_msg)
3334 node_entry.append((constants.RS_NODATA, None))
3337 self._CheckPayload(result)
3338 except errors.OpExecError, err:
3339 self.LogWarning("The payload returned by '%s' is not valid: %s",
3341 node_entry.append((constants.RS_NODATA, None))
3343 if self.op.command == constants.OOB_HEALTH:
3344 # For health we should log important events
3345 for item, status in result.payload:
3346 if status in [constants.OOB_STATUS_WARNING,
3347 constants.OOB_STATUS_CRITICAL]:
3348 self.LogWarning("On node '%s' item '%s' has status '%s'",
3349 node.name, item, status)
3351 if self.op.command == constants.OOB_POWER_ON:
3353 elif self.op.command == constants.OOB_POWER_OFF:
3354 node.powered = False
3355 elif self.op.command == constants.OOB_POWER_STATUS:
3356 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3357 if powered != node.powered:
3358 logging.warning(("Recorded power state (%s) of node '%s' does not"
3359 " match actual power state (%s)"), node.powered,
3362 # For configuration changing commands we should update the node
3363 if self.op.command in (constants.OOB_POWER_ON,
3364 constants.OOB_POWER_OFF):
3365 self.cfg.Update(node, feedback_fn)
3367 node_entry.append((constants.RS_NORMAL, result.payload))
3369 if (self.op.command == constants.OOB_POWER_ON and
3370 idx < len(self.nodes) - 1):
3371 time.sleep(self.op.power_delay)
3375 def _CheckPayload(self, result):
3376 """Checks if the payload is valid.
3378 @param result: RPC result
3379 @raises errors.OpExecError: If payload is not valid
3383 if self.op.command == constants.OOB_HEALTH:
3384 if not isinstance(result.payload, list):
3385 errs.append("command 'health' is expected to return a list but got %s" %
3386 type(result.payload))
3388 for item, status in result.payload:
3389 if status not in constants.OOB_STATUSES:
3390 errs.append("health item '%s' has invalid status '%s'" %
3393 if self.op.command == constants.OOB_POWER_STATUS:
3394 if not isinstance(result.payload, dict):
3395 errs.append("power-status is expected to return a dict but got %s" %
3396 type(result.payload))
3398 if self.op.command in [
3399 constants.OOB_POWER_ON,
3400 constants.OOB_POWER_OFF,
3401 constants.OOB_POWER_CYCLE,
3403 if result.payload is not None:
3404 errs.append("%s is expected to not return payload but got '%s'" %
3405 (self.op.command, result.payload))
3408 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3409 utils.CommaJoin(errs))
3411 class _OsQuery(_QueryBase):
3412 FIELDS = query.OS_FIELDS
3414 def ExpandNames(self, lu):
3415 # Lock all nodes in shared mode
3416 # Temporary removal of locks, should be reverted later
3417 # TODO: reintroduce locks when they are lighter-weight
3418 lu.needed_locks = {}
3419 #self.share_locks[locking.LEVEL_NODE] = 1
3420 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3422 # The following variables interact with _QueryBase._GetNames
3424 self.wanted = self.names
3426 self.wanted = locking.ALL_SET
3428 self.do_locking = self.use_locking
3430 def DeclareLocks(self, lu, level):
3434 def _DiagnoseByOS(rlist):
3435 """Remaps a per-node return list into an a per-os per-node dictionary
3437 @param rlist: a map with node names as keys and OS objects as values
3440 @return: a dictionary with osnames as keys and as value another
3441 map, with nodes as keys and tuples of (path, status, diagnose,
3442 variants, parameters, api_versions) as values, eg::
3444 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3445 (/srv/..., False, "invalid api")],
3446 "node2": [(/srv/..., True, "", [], [])]}
3451 # we build here the list of nodes that didn't fail the RPC (at RPC
3452 # level), so that nodes with a non-responding node daemon don't
3453 # make all OSes invalid
3454 good_nodes = [node_name for node_name in rlist
3455 if not rlist[node_name].fail_msg]
3456 for node_name, nr in rlist.items():
3457 if nr.fail_msg or not nr.payload:
3459 for (name, path, status, diagnose, variants,
3460 params, api_versions) in nr.payload:
3461 if name not in all_os:
3462 # build a list of nodes for this os containing empty lists
3463 # for each node in node_list
3465 for nname in good_nodes:
3466 all_os[name][nname] = []
3467 # convert params from [name, help] to (name, help)
3468 params = [tuple(v) for v in params]
3469 all_os[name][node_name].append((path, status, diagnose,
3470 variants, params, api_versions))
3473 def _GetQueryData(self, lu):
3474 """Computes the list of nodes and their attributes.
3477 # Locking is not used
3478 assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3480 valid_nodes = [node.name
3481 for node in lu.cfg.GetAllNodesInfo().values()
3482 if not node.offline and node.vm_capable]
3483 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3484 cluster = lu.cfg.GetClusterInfo()
3488 for (os_name, os_data) in pol.items():
3489 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3490 hidden=(os_name in cluster.hidden_os),
3491 blacklisted=(os_name in cluster.blacklisted_os))
3495 api_versions = set()
3497 for idx, osl in enumerate(os_data.values()):
3498 info.valid = bool(info.valid and osl and osl[0][1])
3502 (node_variants, node_params, node_api) = osl[0][3:6]
3505 variants.update(node_variants)
3506 parameters.update(node_params)
3507 api_versions.update(node_api)
3509 # Filter out inconsistent values
3510 variants.intersection_update(node_variants)
3511 parameters.intersection_update(node_params)
3512 api_versions.intersection_update(node_api)
3514 info.variants = list(variants)
3515 info.parameters = list(parameters)
3516 info.api_versions = list(api_versions)
3518 data[os_name] = info
3520 # Prepare data in requested order
3521 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3525 class LUOsDiagnose(NoHooksLU):
3526 """Logical unit for OS diagnose/query.
3532 def _BuildFilter(fields, names):
3533 """Builds a filter for querying OSes.
3536 name_filter = qlang.MakeSimpleFilter("name", names)
3538 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3539 # respective field is not requested
3540 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3541 for fname in ["hidden", "blacklisted"]
3542 if fname not in fields]
3543 if "valid" not in fields:
3544 status_filter.append([qlang.OP_TRUE, "valid"])
3547 status_filter.insert(0, qlang.OP_AND)
3549 status_filter = None
3551 if name_filter and status_filter:
3552 return [qlang.OP_AND, name_filter, status_filter]
3556 return status_filter
3558 def CheckArguments(self):
3559 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3560 self.op.output_fields, False)
3562 def ExpandNames(self):
3563 self.oq.ExpandNames(self)
3565 def Exec(self, feedback_fn):
3566 return self.oq.OldStyleQuery(self)
3569 class LUNodeRemove(LogicalUnit):
3570 """Logical unit for removing a node.
3573 HPATH = "node-remove"
3574 HTYPE = constants.HTYPE_NODE
3576 def BuildHooksEnv(self):
3579 This doesn't run on the target node in the pre phase as a failed
3580 node would then be impossible to remove.
3584 "OP_TARGET": self.op.node_name,
3585 "NODE_NAME": self.op.node_name,
3587 all_nodes = self.cfg.GetNodeList()
3589 all_nodes.remove(self.op.node_name)
3591 logging.warning("Node %s which is about to be removed not found"
3592 " in the all nodes list", self.op.node_name)
3593 return env, all_nodes, all_nodes
3595 def CheckPrereq(self):
3596 """Check prerequisites.
3599 - the node exists in the configuration
3600 - it does not have primary or secondary instances
3601 - it's not the master
3603 Any errors are signaled by raising errors.OpPrereqError.
3606 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3607 node = self.cfg.GetNodeInfo(self.op.node_name)
3608 assert node is not None
3610 instance_list = self.cfg.GetInstanceList()
3612 masternode = self.cfg.GetMasterNode()
3613 if node.name == masternode:
3614 raise errors.OpPrereqError("Node is the master node,"
3615 " you need to failover first.",
3618 for instance_name in instance_list:
3619 instance = self.cfg.GetInstanceInfo(instance_name)
3620 if node.name in instance.all_nodes:
3621 raise errors.OpPrereqError("Instance %s is still running on the node,"
3622 " please remove first." % instance_name,
3624 self.op.node_name = node.name
3627 def Exec(self, feedback_fn):
3628 """Removes the node from the cluster.
3632 logging.info("Stopping the node daemon and removing configs from node %s",
3635 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3637 # Promote nodes to master candidate as needed
3638 _AdjustCandidatePool(self, exceptions=[node.name])
3639 self.context.RemoveNode(node.name)
3641 # Run post hooks on the node before it's removed
3642 _RunPostHook(self, node.name)
3644 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3645 msg = result.fail_msg
3647 self.LogWarning("Errors encountered on the remote node while leaving"
3648 " the cluster: %s", msg)
3650 # Remove node from our /etc/hosts
3651 if self.cfg.GetClusterInfo().modify_etc_hosts:
3652 master_node = self.cfg.GetMasterNode()
3653 result = self.rpc.call_etc_hosts_modify(master_node,
3654 constants.ETC_HOSTS_REMOVE,
3656 result.Raise("Can't update hosts file with new host data")
3657 _RedistributeAncillaryFiles(self)
3660 class _NodeQuery(_QueryBase):
3661 FIELDS = query.NODE_FIELDS
3663 def ExpandNames(self, lu):
3664 lu.needed_locks = {}
3665 lu.share_locks[locking.LEVEL_NODE] = 1
3668 self.wanted = _GetWantedNodes(lu, self.names)
3670 self.wanted = locking.ALL_SET
3672 self.do_locking = (self.use_locking and
3673 query.NQ_LIVE in self.requested_data)
3676 # if we don't request only static fields, we need to lock the nodes
3677 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3679 def DeclareLocks(self, lu, level):
3682 def _GetQueryData(self, lu):
3683 """Computes the list of nodes and their attributes.
3686 all_info = lu.cfg.GetAllNodesInfo()
3688 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3690 # Gather data as requested
3691 if query.NQ_LIVE in self.requested_data:
3692 # filter out non-vm_capable nodes
3693 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3695 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3696 lu.cfg.GetHypervisorType())
3697 live_data = dict((name, nresult.payload)
3698 for (name, nresult) in node_data.items()
3699 if not nresult.fail_msg and nresult.payload)
3703 if query.NQ_INST in self.requested_data:
3704 node_to_primary = dict([(name, set()) for name in nodenames])
3705 node_to_secondary = dict([(name, set()) for name in nodenames])
3707 inst_data = lu.cfg.GetAllInstancesInfo()
3709 for inst in inst_data.values():
3710 if inst.primary_node in node_to_primary:
3711 node_to_primary[inst.primary_node].add(inst.name)
3712 for secnode in inst.secondary_nodes:
3713 if secnode in node_to_secondary:
3714 node_to_secondary[secnode].add(inst.name)
3716 node_to_primary = None
3717 node_to_secondary = None
3719 if query.NQ_OOB in self.requested_data:
3720 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3721 for name, node in all_info.iteritems())
3725 if query.NQ_GROUP in self.requested_data:
3726 groups = lu.cfg.GetAllNodeGroupsInfo()
3730 return query.NodeQueryData([all_info[name] for name in nodenames],
3731 live_data, lu.cfg.GetMasterNode(),
3732 node_to_primary, node_to_secondary, groups,
3733 oob_support, lu.cfg.GetClusterInfo())
3736 class LUNodeQuery(NoHooksLU):
3737 """Logical unit for querying nodes.
3740 # pylint: disable-msg=W0142
3743 def CheckArguments(self):
3744 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3745 self.op.output_fields, self.op.use_locking)
3747 def ExpandNames(self):
3748 self.nq.ExpandNames(self)
3750 def Exec(self, feedback_fn):
3751 return self.nq.OldStyleQuery(self)
3754 class LUNodeQueryvols(NoHooksLU):
3755 """Logical unit for getting volumes on node(s).
3759 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3760 _FIELDS_STATIC = utils.FieldSet("node")
3762 def CheckArguments(self):
3763 _CheckOutputFields(static=self._FIELDS_STATIC,
3764 dynamic=self._FIELDS_DYNAMIC,
3765 selected=self.op.output_fields)
3767 def ExpandNames(self):
3768 self.needed_locks = {}
3769 self.share_locks[locking.LEVEL_NODE] = 1
3770 if not self.op.nodes:
3771 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3773 self.needed_locks[locking.LEVEL_NODE] = \
3774 _GetWantedNodes(self, self.op.nodes)
3776 def Exec(self, feedback_fn):
3777 """Computes the list of nodes and their attributes.
3780 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3781 volumes = self.rpc.call_node_volumes(nodenames)
3783 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3784 in self.cfg.GetInstanceList()]
3786 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3789 for node in nodenames:
3790 nresult = volumes[node]
3793 msg = nresult.fail_msg
3795 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3798 node_vols = nresult.payload[:]
3799 node_vols.sort(key=lambda vol: vol['dev'])
3801 for vol in node_vols:
3803 for field in self.op.output_fields:
3806 elif field == "phys":
3810 elif field == "name":
3812 elif field == "size":
3813 val = int(float(vol['size']))
3814 elif field == "instance":
3816 if node not in lv_by_node[inst]:
3818 if vol['name'] in lv_by_node[inst][node]:
3824 raise errors.ParameterError(field)
3825 node_output.append(str(val))
3827 output.append(node_output)
3832 class LUNodeQueryStorage(NoHooksLU):
3833 """Logical unit for getting information on storage units on node(s).
3836 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3839 def CheckArguments(self):
3840 _CheckOutputFields(static=self._FIELDS_STATIC,
3841 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3842 selected=self.op.output_fields)
3844 def ExpandNames(self):
3845 self.needed_locks = {}
3846 self.share_locks[locking.LEVEL_NODE] = 1
3849 self.needed_locks[locking.LEVEL_NODE] = \
3850 _GetWantedNodes(self, self.op.nodes)
3852 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3854 def Exec(self, feedback_fn):
3855 """Computes the list of nodes and their attributes.
3858 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3860 # Always get name to sort by
3861 if constants.SF_NAME in self.op.output_fields:
3862 fields = self.op.output_fields[:]
3864 fields = [constants.SF_NAME] + self.op.output_fields
3866 # Never ask for node or type as it's only known to the LU
3867 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3868 while extra in fields:
3869 fields.remove(extra)
3871 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3872 name_idx = field_idx[constants.SF_NAME]
3874 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3875 data = self.rpc.call_storage_list(self.nodes,
3876 self.op.storage_type, st_args,
3877 self.op.name, fields)
3881 for node in utils.NiceSort(self.nodes):
3882 nresult = data[node]
3886 msg = nresult.fail_msg
3888 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3891 rows = dict([(row[name_idx], row) for row in nresult.payload])
3893 for name in utils.NiceSort(rows.keys()):
3898 for field in self.op.output_fields:
3899 if field == constants.SF_NODE:
3901 elif field == constants.SF_TYPE:
3902 val = self.op.storage_type
3903 elif field in field_idx:
3904 val = row[field_idx[field]]
3906 raise errors.ParameterError(field)
3915 class _InstanceQuery(_QueryBase):
3916 FIELDS = query.INSTANCE_FIELDS
3918 def ExpandNames(self, lu):
3919 lu.needed_locks = {}
3920 lu.share_locks[locking.LEVEL_INSTANCE] = 1
3921 lu.share_locks[locking.LEVEL_NODE] = 1
3924 self.wanted = _GetWantedInstances(lu, self.names)
3926 self.wanted = locking.ALL_SET
3928 self.do_locking = (self.use_locking and
3929 query.IQ_LIVE in self.requested_data)
3931 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3932 lu.needed_locks[locking.LEVEL_NODE] = []
3933 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3935 def DeclareLocks(self, lu, level):
3936 if level == locking.LEVEL_NODE and self.do_locking:
3937 lu._LockInstancesNodes() # pylint: disable-msg=W0212
3939 def _GetQueryData(self, lu):
3940 """Computes the list of instances and their attributes.
3943 cluster = lu.cfg.GetClusterInfo()
3944 all_info = lu.cfg.GetAllInstancesInfo()
3946 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3948 instance_list = [all_info[name] for name in instance_names]
3949 nodes = frozenset(itertools.chain(*(inst.all_nodes
3950 for inst in instance_list)))
3951 hv_list = list(set([inst.hypervisor for inst in instance_list]))
3954 wrongnode_inst = set()
3956 # Gather data as requested
3957 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
3959 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3961 result = node_data[name]
3963 # offline nodes will be in both lists
3964 assert result.fail_msg
3965 offline_nodes.append(name)
3967 bad_nodes.append(name)
3968 elif result.payload:
3969 for inst in result.payload:
3970 if all_info[inst].primary_node == name:
3971 live_data.update(result.payload)
3973 wrongnode_inst.add(inst)
3974 # else no instance is alive
3978 if query.IQ_DISKUSAGE in self.requested_data:
3979 disk_usage = dict((inst.name,
3980 _ComputeDiskSize(inst.disk_template,
3981 [{"size": disk.size}
3982 for disk in inst.disks]))
3983 for inst in instance_list)
3987 if query.IQ_CONSOLE in self.requested_data:
3989 for inst in instance_list:
3990 if inst.name in live_data:
3991 # Instance is running
3992 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
3994 consinfo[inst.name] = None
3995 assert set(consinfo.keys()) == set(instance_names)
3999 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4000 disk_usage, offline_nodes, bad_nodes,
4001 live_data, wrongnode_inst, consinfo)
4004 class LUQuery(NoHooksLU):
4005 """Query for resources/items of a certain kind.
4008 # pylint: disable-msg=W0142
4011 def CheckArguments(self):
4012 qcls = _GetQueryImplementation(self.op.what)
4014 self.impl = qcls(self.op.filter, self.op.fields, False)
4016 def ExpandNames(self):
4017 self.impl.ExpandNames(self)
4019 def DeclareLocks(self, level):
4020 self.impl.DeclareLocks(self, level)
4022 def Exec(self, feedback_fn):
4023 return self.impl.NewStyleQuery(self)
4026 class LUQueryFields(NoHooksLU):
4027 """Query for resources/items of a certain kind.
4030 # pylint: disable-msg=W0142
4033 def CheckArguments(self):
4034 self.qcls = _GetQueryImplementation(self.op.what)
4036 def ExpandNames(self):
4037 self.needed_locks = {}
4039 def Exec(self, feedback_fn):
4040 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4043 class LUNodeModifyStorage(NoHooksLU):
4044 """Logical unit for modifying a storage volume on a node.
4049 def CheckArguments(self):
4050 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4052 storage_type = self.op.storage_type
4055 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4057 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4058 " modified" % storage_type,
4061 diff = set(self.op.changes.keys()) - modifiable
4063 raise errors.OpPrereqError("The following fields can not be modified for"
4064 " storage units of type '%s': %r" %
4065 (storage_type, list(diff)),
4068 def ExpandNames(self):
4069 self.needed_locks = {
4070 locking.LEVEL_NODE: self.op.node_name,
4073 def Exec(self, feedback_fn):
4074 """Computes the list of nodes and their attributes.
4077 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4078 result = self.rpc.call_storage_modify(self.op.node_name,
4079 self.op.storage_type, st_args,
4080 self.op.name, self.op.changes)
4081 result.Raise("Failed to modify storage unit '%s' on %s" %
4082 (self.op.name, self.op.node_name))
4085 class LUNodeAdd(LogicalUnit):
4086 """Logical unit for adding node to the cluster.
4090 HTYPE = constants.HTYPE_NODE
4091 _NFLAGS = ["master_capable", "vm_capable"]
4093 def CheckArguments(self):
4094 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4095 # validate/normalize the node name
4096 self.hostname = netutils.GetHostname(name=self.op.node_name,
4097 family=self.primary_ip_family)
4098 self.op.node_name = self.hostname.name
4099 if self.op.readd and self.op.group:
4100 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4101 " being readded", errors.ECODE_INVAL)
4103 def BuildHooksEnv(self):
4106 This will run on all nodes before, and on all nodes + the new node after.
4110 "OP_TARGET": self.op.node_name,
4111 "NODE_NAME": self.op.node_name,
4112 "NODE_PIP": self.op.primary_ip,
4113 "NODE_SIP": self.op.secondary_ip,
4114 "MASTER_CAPABLE": str(self.op.master_capable),
4115 "VM_CAPABLE": str(self.op.vm_capable),
4117 nodes_0 = self.cfg.GetNodeList()
4118 nodes_1 = nodes_0 + [self.op.node_name, ]
4119 return env, nodes_0, nodes_1
4121 def CheckPrereq(self):
4122 """Check prerequisites.
4125 - the new node is not already in the config
4127 - its parameters (single/dual homed) matches the cluster
4129 Any errors are signaled by raising errors.OpPrereqError.
4133 hostname = self.hostname
4134 node = hostname.name
4135 primary_ip = self.op.primary_ip = hostname.ip
4136 if self.op.secondary_ip is None:
4137 if self.primary_ip_family == netutils.IP6Address.family:
4138 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4139 " IPv4 address must be given as secondary",
4141 self.op.secondary_ip = primary_ip
4143 secondary_ip = self.op.secondary_ip
4144 if not netutils.IP4Address.IsValid(secondary_ip):
4145 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4146 " address" % secondary_ip, errors.ECODE_INVAL)
4148 node_list = cfg.GetNodeList()
4149 if not self.op.readd and node in node_list:
4150 raise errors.OpPrereqError("Node %s is already in the configuration" %
4151 node, errors.ECODE_EXISTS)
4152 elif self.op.readd and node not in node_list:
4153 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4156 self.changed_primary_ip = False
4158 for existing_node_name in node_list:
4159 existing_node = cfg.GetNodeInfo(existing_node_name)
4161 if self.op.readd and node == existing_node_name:
4162 if existing_node.secondary_ip != secondary_ip:
4163 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4164 " address configuration as before",
4166 if existing_node.primary_ip != primary_ip:
4167 self.changed_primary_ip = True
4171 if (existing_node.primary_ip == primary_ip or
4172 existing_node.secondary_ip == primary_ip or
4173 existing_node.primary_ip == secondary_ip or
4174 existing_node.secondary_ip == secondary_ip):
4175 raise errors.OpPrereqError("New node ip address(es) conflict with"
4176 " existing node %s" % existing_node.name,
4177 errors.ECODE_NOTUNIQUE)
4179 # After this 'if' block, None is no longer a valid value for the
4180 # _capable op attributes
4182 old_node = self.cfg.GetNodeInfo(node)
4183 assert old_node is not None, "Can't retrieve locked node %s" % node
4184 for attr in self._NFLAGS:
4185 if getattr(self.op, attr) is None:
4186 setattr(self.op, attr, getattr(old_node, attr))
4188 for attr in self._NFLAGS:
4189 if getattr(self.op, attr) is None:
4190 setattr(self.op, attr, True)
4192 if self.op.readd and not self.op.vm_capable:
4193 pri, sec = cfg.GetNodeInstances(node)
4195 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4196 " flag set to false, but it already holds"
4197 " instances" % node,
4200 # check that the type of the node (single versus dual homed) is the
4201 # same as for the master
4202 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4203 master_singlehomed = myself.secondary_ip == myself.primary_ip
4204 newbie_singlehomed = secondary_ip == primary_ip
4205 if master_singlehomed != newbie_singlehomed:
4206 if master_singlehomed:
4207 raise errors.OpPrereqError("The master has no secondary ip but the"
4208 " new node has one",
4211 raise errors.OpPrereqError("The master has a secondary ip but the"
4212 " new node doesn't have one",
4215 # checks reachability
4216 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4217 raise errors.OpPrereqError("Node not reachable by ping",
4218 errors.ECODE_ENVIRON)
4220 if not newbie_singlehomed:
4221 # check reachability from my secondary ip to newbie's secondary ip
4222 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4223 source=myself.secondary_ip):
4224 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4225 " based ping to node daemon port",
4226 errors.ECODE_ENVIRON)
4233 if self.op.master_capable:
4234 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4236 self.master_candidate = False
4239 self.new_node = old_node
4241 node_group = cfg.LookupNodeGroup(self.op.group)
4242 self.new_node = objects.Node(name=node,
4243 primary_ip=primary_ip,
4244 secondary_ip=secondary_ip,
4245 master_candidate=self.master_candidate,
4246 offline=False, drained=False,
4249 if self.op.ndparams:
4250 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4252 def Exec(self, feedback_fn):
4253 """Adds the new node to the cluster.
4256 new_node = self.new_node
4257 node = new_node.name
4259 # We adding a new node so we assume it's powered
4260 new_node.powered = True
4262 # for re-adds, reset the offline/drained/master-candidate flags;
4263 # we need to reset here, otherwise offline would prevent RPC calls
4264 # later in the procedure; this also means that if the re-add
4265 # fails, we are left with a non-offlined, broken node
4267 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4268 self.LogInfo("Readding a node, the offline/drained flags were reset")
4269 # if we demote the node, we do cleanup later in the procedure
4270 new_node.master_candidate = self.master_candidate
4271 if self.changed_primary_ip:
4272 new_node.primary_ip = self.op.primary_ip
4274 # copy the master/vm_capable flags
4275 for attr in self._NFLAGS:
4276 setattr(new_node, attr, getattr(self.op, attr))
4278 # notify the user about any possible mc promotion
4279 if new_node.master_candidate:
4280 self.LogInfo("Node will be a master candidate")
4282 if self.op.ndparams:
4283 new_node.ndparams = self.op.ndparams
4285 new_node.ndparams = {}
4287 # check connectivity
4288 result = self.rpc.call_version([node])[node]
4289 result.Raise("Can't get version information from node %s" % node)
4290 if constants.PROTOCOL_VERSION == result.payload:
4291 logging.info("Communication to node %s fine, sw version %s match",
4292 node, result.payload)
4294 raise errors.OpExecError("Version mismatch master version %s,"
4295 " node version %s" %
4296 (constants.PROTOCOL_VERSION, result.payload))
4298 # Add node to our /etc/hosts, and add key to known_hosts
4299 if self.cfg.GetClusterInfo().modify_etc_hosts:
4300 master_node = self.cfg.GetMasterNode()
4301 result = self.rpc.call_etc_hosts_modify(master_node,
4302 constants.ETC_HOSTS_ADD,
4305 result.Raise("Can't update hosts file with new host data")
4307 if new_node.secondary_ip != new_node.primary_ip:
4308 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4311 node_verify_list = [self.cfg.GetMasterNode()]
4312 node_verify_param = {
4313 constants.NV_NODELIST: [node],
4314 # TODO: do a node-net-test as well?
4317 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4318 self.cfg.GetClusterName())
4319 for verifier in node_verify_list:
4320 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4321 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4323 for failed in nl_payload:
4324 feedback_fn("ssh/hostname verification failed"
4325 " (checking from %s): %s" %
4326 (verifier, nl_payload[failed]))
4327 raise errors.OpExecError("ssh/hostname verification failed.")
4330 _RedistributeAncillaryFiles(self)
4331 self.context.ReaddNode(new_node)
4332 # make sure we redistribute the config
4333 self.cfg.Update(new_node, feedback_fn)
4334 # and make sure the new node will not have old files around
4335 if not new_node.master_candidate:
4336 result = self.rpc.call_node_demote_from_mc(new_node.name)
4337 msg = result.fail_msg
4339 self.LogWarning("Node failed to demote itself from master"
4340 " candidate status: %s" % msg)
4342 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4343 additional_vm=self.op.vm_capable)
4344 self.context.AddNode(new_node, self.proc.GetECId())
4347 class LUNodeSetParams(LogicalUnit):
4348 """Modifies the parameters of a node.
4350 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4351 to the node role (as _ROLE_*)
4352 @cvar _R2F: a dictionary from node role to tuples of flags
4353 @cvar _FLAGS: a list of attribute names corresponding to the flags
4356 HPATH = "node-modify"
4357 HTYPE = constants.HTYPE_NODE
4359 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4361 (True, False, False): _ROLE_CANDIDATE,
4362 (False, True, False): _ROLE_DRAINED,
4363 (False, False, True): _ROLE_OFFLINE,
4364 (False, False, False): _ROLE_REGULAR,
4366 _R2F = dict((v, k) for k, v in _F2R.items())
4367 _FLAGS = ["master_candidate", "drained", "offline"]
4369 def CheckArguments(self):
4370 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4371 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4372 self.op.master_capable, self.op.vm_capable,
4373 self.op.secondary_ip, self.op.ndparams]
4374 if all_mods.count(None) == len(all_mods):
4375 raise errors.OpPrereqError("Please pass at least one modification",
4377 if all_mods.count(True) > 1:
4378 raise errors.OpPrereqError("Can't set the node into more than one"
4379 " state at the same time",
4382 # Boolean value that tells us whether we might be demoting from MC
4383 self.might_demote = (self.op.master_candidate == False or
4384 self.op.offline == True or
4385 self.op.drained == True or
4386 self.op.master_capable == False)
4388 if self.op.secondary_ip:
4389 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4390 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4391 " address" % self.op.secondary_ip,
4394 self.lock_all = self.op.auto_promote and self.might_demote
4395 self.lock_instances = self.op.secondary_ip is not None
4397 def ExpandNames(self):
4399 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4401 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4403 if self.lock_instances:
4404 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4406 def DeclareLocks(self, level):
4407 # If we have locked all instances, before waiting to lock nodes, release
4408 # all the ones living on nodes unrelated to the current operation.
4409 if level == locking.LEVEL_NODE and self.lock_instances:
4410 instances_release = []
4412 self.affected_instances = []
4413 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4414 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4415 instance = self.context.cfg.GetInstanceInfo(instance_name)
4416 i_mirrored = instance.disk_template in constants.DTS_INT_MIRROR
4417 if i_mirrored and self.op.node_name in instance.all_nodes:
4418 instances_keep.append(instance_name)
4419 self.affected_instances.append(instance)
4421 instances_release.append(instance_name)
4422 if instances_release:
4423 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4424 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4426 def BuildHooksEnv(self):
4429 This runs on the master node.
4433 "OP_TARGET": self.op.node_name,
4434 "MASTER_CANDIDATE": str(self.op.master_candidate),
4435 "OFFLINE": str(self.op.offline),
4436 "DRAINED": str(self.op.drained),
4437 "MASTER_CAPABLE": str(self.op.master_capable),
4438 "VM_CAPABLE": str(self.op.vm_capable),
4440 nl = [self.cfg.GetMasterNode(),
4444 def CheckPrereq(self):
4445 """Check prerequisites.
4447 This only checks the instance list against the existing names.
4450 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4452 if (self.op.master_candidate is not None or
4453 self.op.drained is not None or
4454 self.op.offline is not None):
4455 # we can't change the master's node flags
4456 if self.op.node_name == self.cfg.GetMasterNode():
4457 raise errors.OpPrereqError("The master role can be changed"
4458 " only via master-failover",
4461 if self.op.master_candidate and not node.master_capable:
4462 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4463 " it a master candidate" % node.name,
4466 if self.op.vm_capable == False:
4467 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4469 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4470 " the vm_capable flag" % node.name,
4473 if node.master_candidate and self.might_demote and not self.lock_all:
4474 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4475 # check if after removing the current node, we're missing master
4477 (mc_remaining, mc_should, _) = \
4478 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4479 if mc_remaining < mc_should:
4480 raise errors.OpPrereqError("Not enough master candidates, please"
4481 " pass auto promote option to allow"
4482 " promotion", errors.ECODE_STATE)
4484 self.old_flags = old_flags = (node.master_candidate,
4485 node.drained, node.offline)
4486 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4487 self.old_role = old_role = self._F2R[old_flags]
4489 # Check for ineffective changes
4490 for attr in self._FLAGS:
4491 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4492 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4493 setattr(self.op, attr, None)
4495 # Past this point, any flag change to False means a transition
4496 # away from the respective state, as only real changes are kept
4498 # TODO: We might query the real power state if it supports OOB
4499 if _SupportsOob(self.cfg, node):
4500 if self.op.offline is False and not (node.powered or
4501 self.op.powered == True):
4502 raise errors.OpPrereqError(("Please power on node %s first before you"
4503 " can reset offline state") %
4505 elif self.op.powered is not None:
4506 raise errors.OpPrereqError(("Unable to change powered state for node %s"
4507 " which does not support out-of-band"
4508 " handling") % self.op.node_name)
4510 # If we're being deofflined/drained, we'll MC ourself if needed
4511 if (self.op.drained == False or self.op.offline == False or
4512 (self.op.master_capable and not node.master_capable)):
4513 if _DecideSelfPromotion(self):
4514 self.op.master_candidate = True
4515 self.LogInfo("Auto-promoting node to master candidate")
4517 # If we're no longer master capable, we'll demote ourselves from MC
4518 if self.op.master_capable == False and node.master_candidate:
4519 self.LogInfo("Demoting from master candidate")
4520 self.op.master_candidate = False
4523 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4524 if self.op.master_candidate:
4525 new_role = self._ROLE_CANDIDATE
4526 elif self.op.drained:
4527 new_role = self._ROLE_DRAINED
4528 elif self.op.offline:
4529 new_role = self._ROLE_OFFLINE
4530 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4531 # False is still in new flags, which means we're un-setting (the
4533 new_role = self._ROLE_REGULAR
4534 else: # no new flags, nothing, keep old role
4537 self.new_role = new_role
4539 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4540 # Trying to transition out of offline status
4541 result = self.rpc.call_version([node.name])[node.name]
4543 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4544 " to report its version: %s" %
4545 (node.name, result.fail_msg),
4548 self.LogWarning("Transitioning node from offline to online state"
4549 " without using re-add. Please make sure the node"
4552 if self.op.secondary_ip:
4553 # Ok even without locking, because this can't be changed by any LU
4554 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4555 master_singlehomed = master.secondary_ip == master.primary_ip
4556 if master_singlehomed and self.op.secondary_ip:
4557 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4558 " homed cluster", errors.ECODE_INVAL)
4561 if self.affected_instances:
4562 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4563 " node has instances (%s) configured"
4564 " to use it" % self.affected_instances)
4566 # On online nodes, check that no instances are running, and that
4567 # the node has the new ip and we can reach it.
4568 for instance in self.affected_instances:
4569 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4571 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4572 if master.name != node.name:
4573 # check reachability from master secondary ip to new secondary ip
4574 if not netutils.TcpPing(self.op.secondary_ip,
4575 constants.DEFAULT_NODED_PORT,
4576 source=master.secondary_ip):
4577 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4578 " based ping to node daemon port",
4579 errors.ECODE_ENVIRON)
4581 if self.op.ndparams:
4582 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4583 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4584 self.new_ndparams = new_ndparams
4586 def Exec(self, feedback_fn):
4591 old_role = self.old_role
4592 new_role = self.new_role
4596 if self.op.ndparams:
4597 node.ndparams = self.new_ndparams
4599 if self.op.powered is not None:
4600 node.powered = self.op.powered
4602 for attr in ["master_capable", "vm_capable"]:
4603 val = getattr(self.op, attr)
4605 setattr(node, attr, val)
4606 result.append((attr, str(val)))
4608 if new_role != old_role:
4609 # Tell the node to demote itself, if no longer MC and not offline
4610 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4611 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4613 self.LogWarning("Node failed to demote itself: %s", msg)
4615 new_flags = self._R2F[new_role]
4616 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4618 result.append((desc, str(nf)))
4619 (node.master_candidate, node.drained, node.offline) = new_flags
4621 # we locked all nodes, we adjust the CP before updating this node
4623 _AdjustCandidatePool(self, [node.name])
4625 if self.op.secondary_ip:
4626 node.secondary_ip = self.op.secondary_ip
4627 result.append(("secondary_ip", self.op.secondary_ip))
4629 # this will trigger configuration file update, if needed
4630 self.cfg.Update(node, feedback_fn)
4632 # this will trigger job queue propagation or cleanup if the mc
4634 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4635 self.context.ReaddNode(node)
4640 class LUNodePowercycle(NoHooksLU):
4641 """Powercycles a node.
4646 def CheckArguments(self):
4647 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4648 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4649 raise errors.OpPrereqError("The node is the master and the force"
4650 " parameter was not set",
4653 def ExpandNames(self):
4654 """Locking for PowercycleNode.
4656 This is a last-resort option and shouldn't block on other
4657 jobs. Therefore, we grab no locks.
4660 self.needed_locks = {}
4662 def Exec(self, feedback_fn):
4666 result = self.rpc.call_node_powercycle(self.op.node_name,
4667 self.cfg.GetHypervisorType())
4668 result.Raise("Failed to schedule the reboot")
4669 return result.payload
4672 class LUClusterQuery(NoHooksLU):
4673 """Query cluster configuration.
4678 def ExpandNames(self):
4679 self.needed_locks = {}
4681 def Exec(self, feedback_fn):
4682 """Return cluster config.
4685 cluster = self.cfg.GetClusterInfo()
4688 # Filter just for enabled hypervisors
4689 for os_name, hv_dict in cluster.os_hvp.items():
4690 os_hvp[os_name] = {}
4691 for hv_name, hv_params in hv_dict.items():
4692 if hv_name in cluster.enabled_hypervisors:
4693 os_hvp[os_name][hv_name] = hv_params
4695 # Convert ip_family to ip_version
4696 primary_ip_version = constants.IP4_VERSION
4697 if cluster.primary_ip_family == netutils.IP6Address.family:
4698 primary_ip_version = constants.IP6_VERSION
4701 "software_version": constants.RELEASE_VERSION,
4702 "protocol_version": constants.PROTOCOL_VERSION,
4703 "config_version": constants.CONFIG_VERSION,
4704 "os_api_version": max(constants.OS_API_VERSIONS),
4705 "export_version": constants.EXPORT_VERSION,
4706 "architecture": (platform.architecture()[0], platform.machine()),
4707 "name": cluster.cluster_name,
4708 "master": cluster.master_node,
4709 "default_hypervisor": cluster.enabled_hypervisors[0],
4710 "enabled_hypervisors": cluster.enabled_hypervisors,
4711 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4712 for hypervisor_name in cluster.enabled_hypervisors]),
4714 "beparams": cluster.beparams,
4715 "osparams": cluster.osparams,
4716 "nicparams": cluster.nicparams,
4717 "ndparams": cluster.ndparams,
4718 "candidate_pool_size": cluster.candidate_pool_size,
4719 "master_netdev": cluster.master_netdev,
4720 "volume_group_name": cluster.volume_group_name,
4721 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4722 "file_storage_dir": cluster.file_storage_dir,
4723 "shared_file_storage_dir": cluster.shared_file_storage_dir,
4724 "maintain_node_health": cluster.maintain_node_health,
4725 "ctime": cluster.ctime,
4726 "mtime": cluster.mtime,
4727 "uuid": cluster.uuid,
4728 "tags": list(cluster.GetTags()),
4729 "uid_pool": cluster.uid_pool,
4730 "default_iallocator": cluster.default_iallocator,
4731 "reserved_lvs": cluster.reserved_lvs,
4732 "primary_ip_version": primary_ip_version,
4733 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4734 "hidden_os": cluster.hidden_os,
4735 "blacklisted_os": cluster.blacklisted_os,
4741 class LUClusterConfigQuery(NoHooksLU):
4742 """Return configuration values.
4746 _FIELDS_DYNAMIC = utils.FieldSet()
4747 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4748 "watcher_pause", "volume_group_name")
4750 def CheckArguments(self):
4751 _CheckOutputFields(static=self._FIELDS_STATIC,
4752 dynamic=self._FIELDS_DYNAMIC,
4753 selected=self.op.output_fields)
4755 def ExpandNames(self):
4756 self.needed_locks = {}
4758 def Exec(self, feedback_fn):
4759 """Dump a representation of the cluster config to the standard output.
4763 for field in self.op.output_fields:
4764 if field == "cluster_name":
4765 entry = self.cfg.GetClusterName()
4766 elif field == "master_node":
4767 entry = self.cfg.GetMasterNode()
4768 elif field == "drain_flag":
4769 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4770 elif field == "watcher_pause":
4771 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4772 elif field == "volume_group_name":
4773 entry = self.cfg.GetVGName()
4775 raise errors.ParameterError(field)
4776 values.append(entry)
4780 class LUInstanceActivateDisks(NoHooksLU):
4781 """Bring up an instance's disks.
4786 def ExpandNames(self):
4787 self._ExpandAndLockInstance()
4788 self.needed_locks[locking.LEVEL_NODE] = []
4789 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4791 def DeclareLocks(self, level):
4792 if level == locking.LEVEL_NODE:
4793 self._LockInstancesNodes()
4795 def CheckPrereq(self):
4796 """Check prerequisites.
4798 This checks that the instance is in the cluster.
4801 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4802 assert self.instance is not None, \
4803 "Cannot retrieve locked instance %s" % self.op.instance_name
4804 _CheckNodeOnline(self, self.instance.primary_node)
4806 def Exec(self, feedback_fn):
4807 """Activate the disks.
4810 disks_ok, disks_info = \
4811 _AssembleInstanceDisks(self, self.instance,
4812 ignore_size=self.op.ignore_size)
4814 raise errors.OpExecError("Cannot activate block devices")
4819 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4821 """Prepare the block devices for an instance.
4823 This sets up the block devices on all nodes.
4825 @type lu: L{LogicalUnit}
4826 @param lu: the logical unit on whose behalf we execute
4827 @type instance: L{objects.Instance}
4828 @param instance: the instance for whose disks we assemble
4829 @type disks: list of L{objects.Disk} or None
4830 @param disks: which disks to assemble (or all, if None)
4831 @type ignore_secondaries: boolean
4832 @param ignore_secondaries: if true, errors on secondary nodes
4833 won't result in an error return from the function
4834 @type ignore_size: boolean
4835 @param ignore_size: if true, the current known size of the disk
4836 will not be used during the disk activation, useful for cases
4837 when the size is wrong
4838 @return: False if the operation failed, otherwise a list of
4839 (host, instance_visible_name, node_visible_name)
4840 with the mapping from node devices to instance devices
4845 iname = instance.name
4846 disks = _ExpandCheckDisks(instance, disks)
4848 # With the two passes mechanism we try to reduce the window of
4849 # opportunity for the race condition of switching DRBD to primary
4850 # before handshaking occured, but we do not eliminate it
4852 # The proper fix would be to wait (with some limits) until the
4853 # connection has been made and drbd transitions from WFConnection
4854 # into any other network-connected state (Connected, SyncTarget,
4857 # 1st pass, assemble on all nodes in secondary mode
4858 for idx, inst_disk in enumerate(disks):
4859 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4861 node_disk = node_disk.Copy()
4862 node_disk.UnsetSize()
4863 lu.cfg.SetDiskID(node_disk, node)
4864 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4865 msg = result.fail_msg
4867 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4868 " (is_primary=False, pass=1): %s",
4869 inst_disk.iv_name, node, msg)
4870 if not ignore_secondaries:
4873 # FIXME: race condition on drbd migration to primary
4875 # 2nd pass, do only the primary node
4876 for idx, inst_disk in enumerate(disks):
4879 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4880 if node != instance.primary_node:
4883 node_disk = node_disk.Copy()
4884 node_disk.UnsetSize()
4885 lu.cfg.SetDiskID(node_disk, node)
4886 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4887 msg = result.fail_msg
4889 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4890 " (is_primary=True, pass=2): %s",
4891 inst_disk.iv_name, node, msg)
4894 dev_path = result.payload
4896 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4898 # leave the disks configured for the primary node
4899 # this is a workaround that would be fixed better by
4900 # improving the logical/physical id handling
4902 lu.cfg.SetDiskID(disk, instance.primary_node)
4904 return disks_ok, device_info
4907 def _StartInstanceDisks(lu, instance, force):
4908 """Start the disks of an instance.
4911 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4912 ignore_secondaries=force)
4914 _ShutdownInstanceDisks(lu, instance)
4915 if force is not None and not force:
4916 lu.proc.LogWarning("", hint="If the message above refers to a"
4918 " you can retry the operation using '--force'.")
4919 raise errors.OpExecError("Disk consistency error")
4922 class LUInstanceDeactivateDisks(NoHooksLU):
4923 """Shutdown an instance's disks.
4928 def ExpandNames(self):
4929 self._ExpandAndLockInstance()
4930 self.needed_locks[locking.LEVEL_NODE] = []
4931 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4933 def DeclareLocks(self, level):
4934 if level == locking.LEVEL_NODE:
4935 self._LockInstancesNodes()
4937 def CheckPrereq(self):
4938 """Check prerequisites.
4940 This checks that the instance is in the cluster.
4943 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4944 assert self.instance is not None, \
4945 "Cannot retrieve locked instance %s" % self.op.instance_name
4947 def Exec(self, feedback_fn):
4948 """Deactivate the disks
4951 instance = self.instance
4953 _ShutdownInstanceDisks(self, instance)
4955 _SafeShutdownInstanceDisks(self, instance)
4958 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4959 """Shutdown block devices of an instance.
4961 This function checks if an instance is running, before calling
4962 _ShutdownInstanceDisks.
4965 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4966 _ShutdownInstanceDisks(lu, instance, disks=disks)
4969 def _ExpandCheckDisks(instance, disks):
4970 """Return the instance disks selected by the disks list
4972 @type disks: list of L{objects.Disk} or None
4973 @param disks: selected disks
4974 @rtype: list of L{objects.Disk}
4975 @return: selected instance disks to act on
4979 return instance.disks
4981 if not set(disks).issubset(instance.disks):
4982 raise errors.ProgrammerError("Can only act on disks belonging to the"
4987 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4988 """Shutdown block devices of an instance.
4990 This does the shutdown on all nodes of the instance.
4992 If the ignore_primary is false, errors on the primary node are
4997 disks = _ExpandCheckDisks(instance, disks)
5000 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5001 lu.cfg.SetDiskID(top_disk, node)
5002 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5003 msg = result.fail_msg
5005 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5006 disk.iv_name, node, msg)
5007 if ((node == instance.primary_node and not ignore_primary) or
5008 (node != instance.primary_node and not result.offline)):
5013 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5014 """Checks if a node has enough free memory.
5016 This function check if a given node has the needed amount of free
5017 memory. In case the node has less memory or we cannot get the
5018 information from the node, this function raise an OpPrereqError
5021 @type lu: C{LogicalUnit}
5022 @param lu: a logical unit from which we get configuration data
5024 @param node: the node to check
5025 @type reason: C{str}
5026 @param reason: string to use in the error message
5027 @type requested: C{int}
5028 @param requested: the amount of memory in MiB to check for
5029 @type hypervisor_name: C{str}
5030 @param hypervisor_name: the hypervisor to ask for memory stats
5031 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5032 we cannot check the node
5035 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5036 nodeinfo[node].Raise("Can't get data from node %s" % node,
5037 prereq=True, ecode=errors.ECODE_ENVIRON)
5038 free_mem = nodeinfo[node].payload.get('memory_free', None)
5039 if not isinstance(free_mem, int):
5040 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5041 " was '%s'" % (node, free_mem),
5042 errors.ECODE_ENVIRON)
5043 if requested > free_mem:
5044 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5045 " needed %s MiB, available %s MiB" %
5046 (node, reason, requested, free_mem),
5050 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5051 """Checks if nodes have enough free disk space in the all VGs.
5053 This function check if all given nodes have the needed amount of
5054 free disk. In case any node has less disk or we cannot get the
5055 information from the node, this function raise an OpPrereqError
5058 @type lu: C{LogicalUnit}
5059 @param lu: a logical unit from which we get configuration data
5060 @type nodenames: C{list}
5061 @param nodenames: the list of node names to check
5062 @type req_sizes: C{dict}
5063 @param req_sizes: the hash of vg and corresponding amount of disk in
5065 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5066 or we cannot check the node
5069 for vg, req_size in req_sizes.items():
5070 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5073 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5074 """Checks if nodes have enough free disk space in the specified VG.
5076 This function check if all given nodes have the needed amount of
5077 free disk. In case any node has less disk or we cannot get the
5078 information from the node, this function raise an OpPrereqError
5081 @type lu: C{LogicalUnit}
5082 @param lu: a logical unit from which we get configuration data
5083 @type nodenames: C{list}
5084 @param nodenames: the list of node names to check
5086 @param vg: the volume group to check
5087 @type requested: C{int}
5088 @param requested: the amount of disk in MiB to check for
5089 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5090 or we cannot check the node
5093 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5094 for node in nodenames:
5095 info = nodeinfo[node]
5096 info.Raise("Cannot get current information from node %s" % node,
5097 prereq=True, ecode=errors.ECODE_ENVIRON)
5098 vg_free = info.payload.get("vg_free", None)
5099 if not isinstance(vg_free, int):
5100 raise errors.OpPrereqError("Can't compute free disk space on node"
5101 " %s for vg %s, result was '%s'" %
5102 (node, vg, vg_free), errors.ECODE_ENVIRON)
5103 if requested > vg_free:
5104 raise errors.OpPrereqError("Not enough disk space on target node %s"
5105 " vg %s: required %d MiB, available %d MiB" %
5106 (node, vg, requested, vg_free),
5110 class LUInstanceStartup(LogicalUnit):
5111 """Starts an instance.
5114 HPATH = "instance-start"
5115 HTYPE = constants.HTYPE_INSTANCE
5118 def CheckArguments(self):
5120 if self.op.beparams:
5121 # fill the beparams dict
5122 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5124 def ExpandNames(self):
5125 self._ExpandAndLockInstance()
5127 def BuildHooksEnv(self):
5130 This runs on master, primary and secondary nodes of the instance.
5134 "FORCE": self.op.force,
5136 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5137 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5140 def CheckPrereq(self):
5141 """Check prerequisites.
5143 This checks that the instance is in the cluster.
5146 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5147 assert self.instance is not None, \
5148 "Cannot retrieve locked instance %s" % self.op.instance_name
5151 if self.op.hvparams:
5152 # check hypervisor parameter syntax (locally)
5153 cluster = self.cfg.GetClusterInfo()
5154 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5155 filled_hvp = cluster.FillHV(instance)
5156 filled_hvp.update(self.op.hvparams)
5157 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5158 hv_type.CheckParameterSyntax(filled_hvp)
5159 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5161 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5163 if self.primary_offline and self.op.ignore_offline_nodes:
5164 self.proc.LogWarning("Ignoring offline primary node")
5166 if self.op.hvparams or self.op.beparams:
5167 self.proc.LogWarning("Overridden parameters are ignored")
5169 _CheckNodeOnline(self, instance.primary_node)
5171 bep = self.cfg.GetClusterInfo().FillBE(instance)
5173 # check bridges existence
5174 _CheckInstanceBridgesExist(self, instance)
5176 remote_info = self.rpc.call_instance_info(instance.primary_node,
5178 instance.hypervisor)
5179 remote_info.Raise("Error checking node %s" % instance.primary_node,
5180 prereq=True, ecode=errors.ECODE_ENVIRON)
5181 if not remote_info.payload: # not running already
5182 _CheckNodeFreeMemory(self, instance.primary_node,
5183 "starting instance %s" % instance.name,
5184 bep[constants.BE_MEMORY], instance.hypervisor)
5186 def Exec(self, feedback_fn):
5187 """Start the instance.
5190 instance = self.instance
5191 force = self.op.force
5193 self.cfg.MarkInstanceUp(instance.name)
5195 if self.primary_offline:
5196 assert self.op.ignore_offline_nodes
5197 self.proc.LogInfo("Primary node offline, marked instance as started")
5199 node_current = instance.primary_node
5201 _StartInstanceDisks(self, instance, force)
5203 result = self.rpc.call_instance_start(node_current, instance,
5204 self.op.hvparams, self.op.beparams)
5205 msg = result.fail_msg
5207 _ShutdownInstanceDisks(self, instance)
5208 raise errors.OpExecError("Could not start instance: %s" % msg)
5211 class LUInstanceReboot(LogicalUnit):
5212 """Reboot an instance.
5215 HPATH = "instance-reboot"
5216 HTYPE = constants.HTYPE_INSTANCE
5219 def ExpandNames(self):
5220 self._ExpandAndLockInstance()
5222 def BuildHooksEnv(self):
5225 This runs on master, primary and secondary nodes of the instance.
5229 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5230 "REBOOT_TYPE": self.op.reboot_type,
5231 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5233 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5234 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5237 def CheckPrereq(self):
5238 """Check prerequisites.
5240 This checks that the instance is in the cluster.
5243 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5244 assert self.instance is not None, \
5245 "Cannot retrieve locked instance %s" % self.op.instance_name
5247 _CheckNodeOnline(self, instance.primary_node)
5249 # check bridges existence
5250 _CheckInstanceBridgesExist(self, instance)
5252 def Exec(self, feedback_fn):
5253 """Reboot the instance.
5256 instance = self.instance
5257 ignore_secondaries = self.op.ignore_secondaries
5258 reboot_type = self.op.reboot_type
5260 remote_info = self.rpc.call_instance_info(instance.primary_node,
5262 instance.hypervisor)
5263 remote_info.Raise("Error checking node %s" % instance.primary_node)
5264 instance_running = bool(remote_info.payload)
5266 node_current = instance.primary_node
5268 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5269 constants.INSTANCE_REBOOT_HARD]:
5270 for disk in instance.disks:
5271 self.cfg.SetDiskID(disk, node_current)
5272 result = self.rpc.call_instance_reboot(node_current, instance,
5274 self.op.shutdown_timeout)
5275 result.Raise("Could not reboot instance")
5277 if instance_running:
5278 result = self.rpc.call_instance_shutdown(node_current, instance,
5279 self.op.shutdown_timeout)
5280 result.Raise("Could not shutdown instance for full reboot")
5281 _ShutdownInstanceDisks(self, instance)
5283 self.LogInfo("Instance %s was already stopped, starting now",
5285 _StartInstanceDisks(self, instance, ignore_secondaries)
5286 result = self.rpc.call_instance_start(node_current, instance, None, None)
5287 msg = result.fail_msg
5289 _ShutdownInstanceDisks(self, instance)
5290 raise errors.OpExecError("Could not start instance for"
5291 " full reboot: %s" % msg)
5293 self.cfg.MarkInstanceUp(instance.name)
5296 class LUInstanceShutdown(LogicalUnit):
5297 """Shutdown an instance.
5300 HPATH = "instance-stop"
5301 HTYPE = constants.HTYPE_INSTANCE
5304 def ExpandNames(self):
5305 self._ExpandAndLockInstance()
5307 def BuildHooksEnv(self):
5310 This runs on master, primary and secondary nodes of the instance.
5313 env = _BuildInstanceHookEnvByObject(self, self.instance)
5314 env["TIMEOUT"] = self.op.timeout
5315 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5318 def CheckPrereq(self):
5319 """Check prerequisites.
5321 This checks that the instance is in the cluster.
5324 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5325 assert self.instance is not None, \
5326 "Cannot retrieve locked instance %s" % self.op.instance_name
5328 self.primary_offline = \
5329 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5331 if self.primary_offline and self.op.ignore_offline_nodes:
5332 self.proc.LogWarning("Ignoring offline primary node")
5334 _CheckNodeOnline(self, self.instance.primary_node)
5336 def Exec(self, feedback_fn):
5337 """Shutdown the instance.
5340 instance = self.instance
5341 node_current = instance.primary_node
5342 timeout = self.op.timeout
5344 self.cfg.MarkInstanceDown(instance.name)
5346 if self.primary_offline:
5347 assert self.op.ignore_offline_nodes
5348 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5350 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5351 msg = result.fail_msg
5353 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5355 _ShutdownInstanceDisks(self, instance)
5358 class LUInstanceReinstall(LogicalUnit):
5359 """Reinstall an instance.
5362 HPATH = "instance-reinstall"
5363 HTYPE = constants.HTYPE_INSTANCE
5366 def ExpandNames(self):
5367 self._ExpandAndLockInstance()
5369 def BuildHooksEnv(self):
5372 This runs on master, primary and secondary nodes of the instance.
5375 env = _BuildInstanceHookEnvByObject(self, self.instance)
5376 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5379 def CheckPrereq(self):
5380 """Check prerequisites.
5382 This checks that the instance is in the cluster and is not running.
5385 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5386 assert instance is not None, \
5387 "Cannot retrieve locked instance %s" % self.op.instance_name
5388 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5389 " offline, cannot reinstall")
5390 for node in instance.secondary_nodes:
5391 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5392 " cannot reinstall")
5394 if instance.disk_template == constants.DT_DISKLESS:
5395 raise errors.OpPrereqError("Instance '%s' has no disks" %
5396 self.op.instance_name,
5398 _CheckInstanceDown(self, instance, "cannot reinstall")
5400 if self.op.os_type is not None:
5402 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5403 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5404 instance_os = self.op.os_type
5406 instance_os = instance.os
5408 nodelist = list(instance.all_nodes)
5410 if self.op.osparams:
5411 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5412 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5413 self.os_inst = i_osdict # the new dict (without defaults)
5417 self.instance = instance
5419 def Exec(self, feedback_fn):
5420 """Reinstall the instance.
5423 inst = self.instance
5425 if self.op.os_type is not None:
5426 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5427 inst.os = self.op.os_type
5428 # Write to configuration
5429 self.cfg.Update(inst, feedback_fn)
5431 _StartInstanceDisks(self, inst, None)
5433 feedback_fn("Running the instance OS create scripts...")
5434 # FIXME: pass debug option from opcode to backend
5435 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5436 self.op.debug_level,
5437 osparams=self.os_inst)
5438 result.Raise("Could not install OS for instance %s on node %s" %
5439 (inst.name, inst.primary_node))
5441 _ShutdownInstanceDisks(self, inst)
5444 class LUInstanceRecreateDisks(LogicalUnit):
5445 """Recreate an instance's missing disks.
5448 HPATH = "instance-recreate-disks"
5449 HTYPE = constants.HTYPE_INSTANCE
5452 def ExpandNames(self):
5453 self._ExpandAndLockInstance()
5455 def BuildHooksEnv(self):
5458 This runs on master, primary and secondary nodes of the instance.
5461 env = _BuildInstanceHookEnvByObject(self, self.instance)
5462 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5465 def CheckPrereq(self):
5466 """Check prerequisites.
5468 This checks that the instance is in the cluster and is not running.
5471 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5472 assert instance is not None, \
5473 "Cannot retrieve locked instance %s" % self.op.instance_name
5474 _CheckNodeOnline(self, instance.primary_node)
5476 if instance.disk_template == constants.DT_DISKLESS:
5477 raise errors.OpPrereqError("Instance '%s' has no disks" %
5478 self.op.instance_name, errors.ECODE_INVAL)
5479 _CheckInstanceDown(self, instance, "cannot recreate disks")
5481 if not self.op.disks:
5482 self.op.disks = range(len(instance.disks))
5484 for idx in self.op.disks:
5485 if idx >= len(instance.disks):
5486 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5489 self.instance = instance
5491 def Exec(self, feedback_fn):
5492 """Recreate the disks.
5496 for idx, _ in enumerate(self.instance.disks):
5497 if idx not in self.op.disks: # disk idx has not been passed in
5501 _CreateDisks(self, self.instance, to_skip=to_skip)
5504 class LUInstanceRename(LogicalUnit):
5505 """Rename an instance.
5508 HPATH = "instance-rename"
5509 HTYPE = constants.HTYPE_INSTANCE
5511 def CheckArguments(self):
5515 if self.op.ip_check and not self.op.name_check:
5516 # TODO: make the ip check more flexible and not depend on the name check
5517 raise errors.OpPrereqError("Cannot do ip check without a name check",
5520 def BuildHooksEnv(self):
5523 This runs on master, primary and secondary nodes of the instance.
5526 env = _BuildInstanceHookEnvByObject(self, self.instance)
5527 env["INSTANCE_NEW_NAME"] = self.op.new_name
5528 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5531 def CheckPrereq(self):
5532 """Check prerequisites.
5534 This checks that the instance is in the cluster and is not running.
5537 self.op.instance_name = _ExpandInstanceName(self.cfg,
5538 self.op.instance_name)
5539 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5540 assert instance is not None
5541 _CheckNodeOnline(self, instance.primary_node)
5542 _CheckInstanceDown(self, instance, "cannot rename")
5543 self.instance = instance
5545 new_name = self.op.new_name
5546 if self.op.name_check:
5547 hostname = netutils.GetHostname(name=new_name)
5548 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5550 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5551 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5552 " same as given hostname '%s'") %
5553 (hostname.name, self.op.new_name),
5555 new_name = self.op.new_name = hostname.name
5556 if (self.op.ip_check and
5557 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5558 raise errors.OpPrereqError("IP %s of instance %s already in use" %
5559 (hostname.ip, new_name),
5560 errors.ECODE_NOTUNIQUE)
5562 instance_list = self.cfg.GetInstanceList()
5563 if new_name in instance_list and new_name != instance.name:
5564 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5565 new_name, errors.ECODE_EXISTS)
5567 def Exec(self, feedback_fn):
5568 """Rename the instance.
5571 inst = self.instance
5572 old_name = inst.name
5574 rename_file_storage = False
5575 if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5576 self.op.new_name != inst.name):
5577 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5578 rename_file_storage = True
5580 self.cfg.RenameInstance(inst.name, self.op.new_name)
5581 # Change the instance lock. This is definitely safe while we hold the BGL
5582 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5583 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5585 # re-read the instance from the configuration after rename
5586 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5588 if rename_file_storage:
5589 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5590 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5591 old_file_storage_dir,
5592 new_file_storage_dir)
5593 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5594 " (but the instance has been renamed in Ganeti)" %
5595 (inst.primary_node, old_file_storage_dir,
5596 new_file_storage_dir))
5598 _StartInstanceDisks(self, inst, None)
5600 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5601 old_name, self.op.debug_level)
5602 msg = result.fail_msg
5604 msg = ("Could not run OS rename script for instance %s on node %s"
5605 " (but the instance has been renamed in Ganeti): %s" %
5606 (inst.name, inst.primary_node, msg))
5607 self.proc.LogWarning(msg)
5609 _ShutdownInstanceDisks(self, inst)
5614 class LUInstanceRemove(LogicalUnit):
5615 """Remove an instance.
5618 HPATH = "instance-remove"
5619 HTYPE = constants.HTYPE_INSTANCE
5622 def ExpandNames(self):
5623 self._ExpandAndLockInstance()
5624 self.needed_locks[locking.LEVEL_NODE] = []
5625 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5627 def DeclareLocks(self, level):
5628 if level == locking.LEVEL_NODE:
5629 self._LockInstancesNodes()
5631 def BuildHooksEnv(self):
5634 This runs on master, primary and secondary nodes of the instance.
5637 env = _BuildInstanceHookEnvByObject(self, self.instance)
5638 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5639 nl = [self.cfg.GetMasterNode()]
5640 nl_post = list(self.instance.all_nodes) + nl
5641 return env, nl, nl_post
5643 def CheckPrereq(self):
5644 """Check prerequisites.
5646 This checks that the instance is in the cluster.
5649 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5650 assert self.instance is not None, \
5651 "Cannot retrieve locked instance %s" % self.op.instance_name
5653 def Exec(self, feedback_fn):
5654 """Remove the instance.
5657 instance = self.instance
5658 logging.info("Shutting down instance %s on node %s",
5659 instance.name, instance.primary_node)
5661 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5662 self.op.shutdown_timeout)
5663 msg = result.fail_msg
5665 if self.op.ignore_failures:
5666 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5668 raise errors.OpExecError("Could not shutdown instance %s on"
5670 (instance.name, instance.primary_node, msg))
5672 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5675 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5676 """Utility function to remove an instance.
5679 logging.info("Removing block devices for instance %s", instance.name)
5681 if not _RemoveDisks(lu, instance):
5682 if not ignore_failures:
5683 raise errors.OpExecError("Can't remove instance's disks")
5684 feedback_fn("Warning: can't remove instance's disks")
5686 logging.info("Removing instance %s out of cluster config", instance.name)
5688 lu.cfg.RemoveInstance(instance.name)
5690 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5691 "Instance lock removal conflict"
5693 # Remove lock for the instance
5694 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5697 class LUInstanceQuery(NoHooksLU):
5698 """Logical unit for querying instances.
5701 # pylint: disable-msg=W0142
5704 def CheckArguments(self):
5705 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5706 self.op.output_fields, self.op.use_locking)
5708 def ExpandNames(self):
5709 self.iq.ExpandNames(self)
5711 def DeclareLocks(self, level):
5712 self.iq.DeclareLocks(self, level)
5714 def Exec(self, feedback_fn):
5715 return self.iq.OldStyleQuery(self)
5718 class LUInstanceFailover(LogicalUnit):
5719 """Failover an instance.
5722 HPATH = "instance-failover"
5723 HTYPE = constants.HTYPE_INSTANCE
5726 def CheckArguments(self):
5727 """Check the arguments.
5730 self.iallocator = getattr(self.op, "iallocator", None)
5731 self.target_node = getattr(self.op, "target_node", None)
5733 def ExpandNames(self):
5734 self._ExpandAndLockInstance()
5736 if self.op.target_node is not None:
5737 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5739 self.needed_locks[locking.LEVEL_NODE] = []
5740 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5742 def DeclareLocks(self, level):
5743 if level == locking.LEVEL_NODE:
5744 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5745 if instance.disk_template in constants.DTS_EXT_MIRROR:
5746 if self.op.target_node is None:
5747 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5749 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5750 self.op.target_node]
5751 del self.recalculate_locks[locking.LEVEL_NODE]
5753 self._LockInstancesNodes()
5755 def BuildHooksEnv(self):
5758 This runs on master, primary and secondary nodes of the instance.
5761 instance = self.instance
5762 source_node = instance.primary_node
5764 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5765 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5766 "OLD_PRIMARY": source_node,
5767 "NEW_PRIMARY": self.op.target_node,
5770 if instance.disk_template in constants.DTS_INT_MIRROR:
5771 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
5772 env["NEW_SECONDARY"] = source_node
5774 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
5776 env.update(_BuildInstanceHookEnvByObject(self, instance))
5777 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5779 nl_post.append(source_node)
5780 return env, nl, nl_post
5782 def CheckPrereq(self):
5783 """Check prerequisites.
5785 This checks that the instance is in the cluster.
5788 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5789 assert self.instance is not None, \
5790 "Cannot retrieve locked instance %s" % self.op.instance_name
5792 bep = self.cfg.GetClusterInfo().FillBE(instance)
5793 if instance.disk_template not in constants.DTS_MIRRORED:
5794 raise errors.OpPrereqError("Instance's disk layout is not"
5795 " mirrored, cannot failover.",
5798 if instance.disk_template in constants.DTS_EXT_MIRROR:
5799 _CheckIAllocatorOrNode(self, "iallocator", "target_node")
5800 if self.op.iallocator:
5801 self._RunAllocator()
5802 # Release all unnecessary node locks
5803 nodes_keep = [instance.primary_node, self.op.target_node]
5804 nodes_rel = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5805 if node not in nodes_keep]
5806 self.context.glm.release(locking.LEVEL_NODE, nodes_rel)
5807 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5809 # self.op.target_node is already populated, either directly or by the
5811 target_node = self.op.target_node
5814 secondary_nodes = instance.secondary_nodes
5815 if not secondary_nodes:
5816 raise errors.ConfigurationError("No secondary node but using"
5817 " %s disk template" %
5818 instance.disk_template)
5819 target_node = secondary_nodes[0]
5821 if self.op.iallocator or (self.op.target_node and
5822 self.op.target_node != target_node):
5823 raise errors.OpPrereqError("Instances with disk template %s cannot"
5824 " be failed over to arbitrary nodes"
5825 " (neither an iallocator nor a target"
5826 " node can be passed)" %
5827 instance.disk_template, errors.ECODE_INVAL)
5828 _CheckNodeOnline(self, target_node)
5829 _CheckNodeNotDrained(self, target_node)
5831 # Save target_node so that we can use it in BuildHooksEnv
5832 self.op.target_node = target_node
5834 if instance.admin_up:
5835 # check memory requirements on the secondary node
5836 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5837 instance.name, bep[constants.BE_MEMORY],
5838 instance.hypervisor)
5840 self.LogInfo("Not checking memory on the secondary node as"
5841 " instance will not be started")
5843 # check bridge existance
5844 _CheckInstanceBridgesExist(self, instance, node=target_node)
5846 def Exec(self, feedback_fn):
5847 """Failover an instance.
5849 The failover is done by shutting it down on its present node and
5850 starting it on the secondary.
5853 instance = self.instance
5854 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5856 source_node = instance.primary_node
5857 target_node = self.op.target_node
5859 if instance.admin_up:
5860 feedback_fn("* checking disk consistency between source and target")
5861 for dev in instance.disks:
5862 # for drbd, these are drbd over lvm
5863 if not _CheckDiskConsistency(self, dev, target_node, False):
5864 if not self.op.ignore_consistency:
5865 raise errors.OpExecError("Disk %s is degraded on target node,"
5866 " aborting failover." % dev.iv_name)
5868 feedback_fn("* not checking disk consistency as instance is not running")
5870 feedback_fn("* shutting down instance on source node")
5871 logging.info("Shutting down instance %s on node %s",
5872 instance.name, source_node)
5874 result = self.rpc.call_instance_shutdown(source_node, instance,
5875 self.op.shutdown_timeout)
5876 msg = result.fail_msg
5878 if self.op.ignore_consistency or primary_node.offline:
5879 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5880 " Proceeding anyway. Please make sure node"
5881 " %s is down. Error details: %s",
5882 instance.name, source_node, source_node, msg)
5884 raise errors.OpExecError("Could not shutdown instance %s on"
5886 (instance.name, source_node, msg))
5888 feedback_fn("* deactivating the instance's disks on source node")
5889 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5890 raise errors.OpExecError("Can't shut down the instance's disks.")
5892 instance.primary_node = target_node
5893 # distribute new instance config to the other nodes
5894 self.cfg.Update(instance, feedback_fn)
5896 # Only start the instance if it's marked as up
5897 if instance.admin_up:
5898 feedback_fn("* activating the instance's disks on target node")
5899 logging.info("Starting instance %s on node %s",
5900 instance.name, target_node)
5902 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5903 ignore_secondaries=True)
5905 _ShutdownInstanceDisks(self, instance)
5906 raise errors.OpExecError("Can't activate the instance's disks")
5908 feedback_fn("* starting the instance on the target node")
5909 result = self.rpc.call_instance_start(target_node, instance, None, None)
5910 msg = result.fail_msg
5912 _ShutdownInstanceDisks(self, instance)
5913 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5914 (instance.name, target_node, msg))
5916 def _RunAllocator(self):
5917 """Run the allocator based on input opcode.
5920 ial = IAllocator(self.cfg, self.rpc,
5921 mode=constants.IALLOCATOR_MODE_RELOC,
5922 name=self.instance.name,
5923 # TODO See why hail breaks with a single node below
5924 relocate_from=[self.instance.primary_node,
5925 self.instance.primary_node],
5928 ial.Run(self.op.iallocator)
5931 raise errors.OpPrereqError("Can't compute nodes using"
5932 " iallocator '%s': %s" %
5933 (self.op.iallocator, ial.info),
5935 if len(ial.result) != ial.required_nodes:
5936 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5937 " of nodes (%s), required %s" %
5938 (self.op.iallocator, len(ial.result),
5939 ial.required_nodes), errors.ECODE_FAULT)
5940 self.op.target_node = ial.result[0]
5941 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5942 self.instance.name, self.op.iallocator,
5943 utils.CommaJoin(ial.result))
5946 class LUInstanceMigrate(LogicalUnit):
5947 """Migrate an instance.
5949 This is migration without shutting down, compared to the failover,
5950 which is done with shutdown.
5953 HPATH = "instance-migrate"
5954 HTYPE = constants.HTYPE_INSTANCE
5957 def ExpandNames(self):
5958 self._ExpandAndLockInstance()
5960 if self.op.target_node is not None:
5961 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5963 self.needed_locks[locking.LEVEL_NODE] = []
5964 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5966 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5967 self.op.cleanup, self.op.iallocator,
5968 self.op.target_node)
5969 self.tasklets = [self._migrater]
5971 def DeclareLocks(self, level):
5972 if level == locking.LEVEL_NODE:
5973 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5974 if instance.disk_template in constants.DTS_EXT_MIRROR:
5975 if self.op.target_node is None:
5976 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5978 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5979 self.op.target_node]
5980 del self.recalculate_locks[locking.LEVEL_NODE]
5982 self._LockInstancesNodes()
5984 def BuildHooksEnv(self):
5987 This runs on master, primary and secondary nodes of the instance.
5990 instance = self._migrater.instance
5991 source_node = instance.primary_node
5992 target_node = self._migrater.target_node
5993 env = _BuildInstanceHookEnvByObject(self, instance)
5994 env["MIGRATE_LIVE"] = self._migrater.live
5995 env["MIGRATE_CLEANUP"] = self.op.cleanup
5997 "OLD_PRIMARY": source_node,
5998 "NEW_PRIMARY": target_node,
6001 if instance.disk_template in constants.DTS_INT_MIRROR:
6002 env["OLD_SECONDARY"] = target_node
6003 env["NEW_SECONDARY"] = source_node
6005 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6007 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6009 nl_post.append(source_node)
6010 return env, nl, nl_post
6013 class LUInstanceMove(LogicalUnit):
6014 """Move an instance by data-copying.
6017 HPATH = "instance-move"
6018 HTYPE = constants.HTYPE_INSTANCE
6021 def ExpandNames(self):
6022 self._ExpandAndLockInstance()
6023 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6024 self.op.target_node = target_node
6025 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6026 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6028 def DeclareLocks(self, level):
6029 if level == locking.LEVEL_NODE:
6030 self._LockInstancesNodes(primary_only=True)
6032 def BuildHooksEnv(self):
6035 This runs on master, primary and secondary nodes of the instance.
6039 "TARGET_NODE": self.op.target_node,
6040 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6042 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6043 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
6044 self.op.target_node]
6047 def CheckPrereq(self):
6048 """Check prerequisites.
6050 This checks that the instance is in the cluster.
6053 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6054 assert self.instance is not None, \
6055 "Cannot retrieve locked instance %s" % self.op.instance_name
6057 node = self.cfg.GetNodeInfo(self.op.target_node)
6058 assert node is not None, \
6059 "Cannot retrieve locked node %s" % self.op.target_node
6061 self.target_node = target_node = node.name
6063 if target_node == instance.primary_node:
6064 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6065 (instance.name, target_node),
6068 bep = self.cfg.GetClusterInfo().FillBE(instance)
6070 for idx, dsk in enumerate(instance.disks):
6071 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6072 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6073 " cannot copy" % idx, errors.ECODE_STATE)
6075 _CheckNodeOnline(self, target_node)
6076 _CheckNodeNotDrained(self, target_node)
6077 _CheckNodeVmCapable(self, target_node)
6079 if instance.admin_up:
6080 # check memory requirements on the secondary node
6081 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6082 instance.name, bep[constants.BE_MEMORY],
6083 instance.hypervisor)
6085 self.LogInfo("Not checking memory on the secondary node as"
6086 " instance will not be started")
6088 # check bridge existance
6089 _CheckInstanceBridgesExist(self, instance, node=target_node)
6091 def Exec(self, feedback_fn):
6092 """Move an instance.
6094 The move is done by shutting it down on its present node, copying
6095 the data over (slow) and starting it on the new node.
6098 instance = self.instance
6100 source_node = instance.primary_node
6101 target_node = self.target_node
6103 self.LogInfo("Shutting down instance %s on source node %s",
6104 instance.name, source_node)
6106 result = self.rpc.call_instance_shutdown(source_node, instance,
6107 self.op.shutdown_timeout)
6108 msg = result.fail_msg
6110 if self.op.ignore_consistency:
6111 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6112 " Proceeding anyway. Please make sure node"
6113 " %s is down. Error details: %s",
6114 instance.name, source_node, source_node, msg)
6116 raise errors.OpExecError("Could not shutdown instance %s on"
6118 (instance.name, source_node, msg))
6120 # create the target disks
6122 _CreateDisks(self, instance, target_node=target_node)
6123 except errors.OpExecError:
6124 self.LogWarning("Device creation failed, reverting...")
6126 _RemoveDisks(self, instance, target_node=target_node)
6128 self.cfg.ReleaseDRBDMinors(instance.name)
6131 cluster_name = self.cfg.GetClusterInfo().cluster_name
6134 # activate, get path, copy the data over
6135 for idx, disk in enumerate(instance.disks):
6136 self.LogInfo("Copying data for disk %d", idx)
6137 result = self.rpc.call_blockdev_assemble(target_node, disk,
6138 instance.name, True, idx)
6140 self.LogWarning("Can't assemble newly created disk %d: %s",
6141 idx, result.fail_msg)
6142 errs.append(result.fail_msg)
6144 dev_path = result.payload
6145 result = self.rpc.call_blockdev_export(source_node, disk,
6146 target_node, dev_path,
6149 self.LogWarning("Can't copy data over for disk %d: %s",
6150 idx, result.fail_msg)
6151 errs.append(result.fail_msg)
6155 self.LogWarning("Some disks failed to copy, aborting")
6157 _RemoveDisks(self, instance, target_node=target_node)
6159 self.cfg.ReleaseDRBDMinors(instance.name)
6160 raise errors.OpExecError("Errors during disk copy: %s" %
6163 instance.primary_node = target_node
6164 self.cfg.Update(instance, feedback_fn)
6166 self.LogInfo("Removing the disks on the original node")
6167 _RemoveDisks(self, instance, target_node=source_node)
6169 # Only start the instance if it's marked as up
6170 if instance.admin_up:
6171 self.LogInfo("Starting instance %s on node %s",
6172 instance.name, target_node)
6174 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6175 ignore_secondaries=True)
6177 _ShutdownInstanceDisks(self, instance)
6178 raise errors.OpExecError("Can't activate the instance's disks")
6180 result = self.rpc.call_instance_start(target_node, instance, None, None)
6181 msg = result.fail_msg
6183 _ShutdownInstanceDisks(self, instance)
6184 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6185 (instance.name, target_node, msg))
6188 class LUNodeMigrate(LogicalUnit):
6189 """Migrate all instances from a node.
6192 HPATH = "node-migrate"
6193 HTYPE = constants.HTYPE_NODE
6196 def CheckArguments(self):
6197 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6199 def ExpandNames(self):
6200 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6202 self.needed_locks = {}
6204 # Create tasklets for migrating instances for all instances on this node
6208 self.lock_all_nodes = False
6210 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6211 logging.debug("Migrating instance %s", inst.name)
6212 names.append(inst.name)
6214 tasklets.append(TLMigrateInstance(self, inst.name, False,
6215 self.op.iallocator, None))
6217 if inst.disk_template in constants.DTS_EXT_MIRROR:
6218 # We need to lock all nodes, as the iallocator will choose the
6219 # destination nodes afterwards
6220 self.lock_all_nodes = True
6222 self.tasklets = tasklets
6224 # Declare node locks
6225 if self.lock_all_nodes:
6226 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6228 self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6229 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6231 # Declare instance locks
6232 self.needed_locks[locking.LEVEL_INSTANCE] = names
6234 def DeclareLocks(self, level):
6235 if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6236 self._LockInstancesNodes()
6238 def BuildHooksEnv(self):
6241 This runs on the master, the primary and all the secondaries.
6245 "NODE_NAME": self.op.node_name,
6248 nl = [self.cfg.GetMasterNode()]
6250 return (env, nl, nl)
6253 class TLMigrateInstance(Tasklet):
6254 """Tasklet class for instance migration.
6257 @ivar live: whether the migration will be done live or non-live;
6258 this variable is initalized only after CheckPrereq has run
6261 def __init__(self, lu, instance_name, cleanup,
6262 iallocator=None, target_node=None):
6263 """Initializes this class.
6266 Tasklet.__init__(self, lu)
6269 self.instance_name = instance_name
6270 self.cleanup = cleanup
6271 self.live = False # will be overridden later
6272 self.iallocator = iallocator
6273 self.target_node = target_node
6275 def CheckPrereq(self):
6276 """Check prerequisites.
6278 This checks that the instance is in the cluster.
6281 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6282 instance = self.cfg.GetInstanceInfo(instance_name)
6283 assert instance is not None
6284 self.instance = instance
6286 if instance.disk_template not in constants.DTS_MIRRORED:
6287 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6288 " migrations" % instance.disk_template,
6291 if instance.disk_template in constants.DTS_EXT_MIRROR:
6292 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6295 self._RunAllocator()
6297 # self.target_node is already populated, either directly or by the
6299 target_node = self.target_node
6301 if len(self.lu.tasklets) == 1:
6302 # It is safe to remove locks only when we're the only tasklet in the LU
6303 nodes_keep = [instance.primary_node, self.target_node]
6304 nodes_rel = [node for node in self.lu.acquired_locks[locking.LEVEL_NODE]
6305 if node not in nodes_keep]
6306 self.lu.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6307 self.lu.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6310 secondary_nodes = instance.secondary_nodes
6311 if not secondary_nodes:
6312 raise errors.ConfigurationError("No secondary node but using"
6313 " %s disk template" %
6314 instance.disk_template)
6315 target_node = secondary_nodes[0]
6316 if self.lu.op.iallocator or (self.lu.op.target_node and
6317 self.lu.op.target_node != target_node):
6318 raise errors.OpPrereqError("Instances with disk template %s cannot"
6319 " be migrated over to arbitrary nodes"
6320 " (neither an iallocator nor a target"
6321 " node can be passed)" %
6322 instance.disk_template, errors.ECODE_INVAL)
6324 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6326 # check memory requirements on the secondary node
6327 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6328 instance.name, i_be[constants.BE_MEMORY],
6329 instance.hypervisor)
6331 # check bridge existance
6332 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6334 if not self.cleanup:
6335 _CheckNodeNotDrained(self.lu, target_node)
6336 result = self.rpc.call_instance_migratable(instance.primary_node,
6338 result.Raise("Can't migrate, please use failover",
6339 prereq=True, ecode=errors.ECODE_STATE)
6342 def _RunAllocator(self):
6343 """Run the allocator based on input opcode.
6346 ial = IAllocator(self.cfg, self.rpc,
6347 mode=constants.IALLOCATOR_MODE_RELOC,
6348 name=self.instance_name,
6349 # TODO See why hail breaks with a single node below
6350 relocate_from=[self.instance.primary_node,
6351 self.instance.primary_node],
6354 ial.Run(self.iallocator)
6357 raise errors.OpPrereqError("Can't compute nodes using"
6358 " iallocator '%s': %s" %
6359 (self.iallocator, ial.info),
6361 if len(ial.result) != ial.required_nodes:
6362 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6363 " of nodes (%s), required %s" %
6364 (self.iallocator, len(ial.result),
6365 ial.required_nodes), errors.ECODE_FAULT)
6366 self.target_node = ial.result[0]
6367 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6368 self.instance_name, self.iallocator,
6369 utils.CommaJoin(ial.result))
6371 if self.lu.op.live is not None and self.lu.op.mode is not None:
6372 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6373 " parameters are accepted",
6375 if self.lu.op.live is not None:
6377 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6379 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6380 # reset the 'live' parameter to None so that repeated
6381 # invocations of CheckPrereq do not raise an exception
6382 self.lu.op.live = None
6383 elif self.lu.op.mode is None:
6384 # read the default value from the hypervisor
6385 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance, skip_globals=False)
6386 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6388 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6390 def _WaitUntilSync(self):
6391 """Poll with custom rpc for disk sync.
6393 This uses our own step-based rpc call.
6396 self.feedback_fn("* wait until resync is done")
6400 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6402 self.instance.disks)
6404 for node, nres in result.items():
6405 nres.Raise("Cannot resync disks on node %s" % node)
6406 node_done, node_percent = nres.payload
6407 all_done = all_done and node_done
6408 if node_percent is not None:
6409 min_percent = min(min_percent, node_percent)
6411 if min_percent < 100:
6412 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6415 def _EnsureSecondary(self, node):
6416 """Demote a node to secondary.
6419 self.feedback_fn("* switching node %s to secondary mode" % node)
6421 for dev in self.instance.disks:
6422 self.cfg.SetDiskID(dev, node)
6424 result = self.rpc.call_blockdev_close(node, self.instance.name,
6425 self.instance.disks)
6426 result.Raise("Cannot change disk to secondary on node %s" % node)
6428 def _GoStandalone(self):
6429 """Disconnect from the network.
6432 self.feedback_fn("* changing into standalone mode")
6433 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6434 self.instance.disks)
6435 for node, nres in result.items():
6436 nres.Raise("Cannot disconnect disks node %s" % node)
6438 def _GoReconnect(self, multimaster):
6439 """Reconnect to the network.
6445 msg = "single-master"
6446 self.feedback_fn("* changing disks into %s mode" % msg)
6447 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6448 self.instance.disks,
6449 self.instance.name, multimaster)
6450 for node, nres in result.items():
6451 nres.Raise("Cannot change disks config on node %s" % node)
6453 def _ExecCleanup(self):
6454 """Try to cleanup after a failed migration.
6456 The cleanup is done by:
6457 - check that the instance is running only on one node
6458 (and update the config if needed)
6459 - change disks on its secondary node to secondary
6460 - wait until disks are fully synchronized
6461 - disconnect from the network
6462 - change disks into single-master mode
6463 - wait again until disks are fully synchronized
6466 instance = self.instance
6467 target_node = self.target_node
6468 source_node = self.source_node
6470 # check running on only one node
6471 self.feedback_fn("* checking where the instance actually runs"
6472 " (if this hangs, the hypervisor might be in"
6474 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6475 for node, result in ins_l.items():
6476 result.Raise("Can't contact node %s" % node)
6478 runningon_source = instance.name in ins_l[source_node].payload
6479 runningon_target = instance.name in ins_l[target_node].payload
6481 if runningon_source and runningon_target:
6482 raise errors.OpExecError("Instance seems to be running on two nodes,"
6483 " or the hypervisor is confused. You will have"
6484 " to ensure manually that it runs only on one"
6485 " and restart this operation.")
6487 if not (runningon_source or runningon_target):
6488 raise errors.OpExecError("Instance does not seem to be running at all."
6489 " In this case, it's safer to repair by"
6490 " running 'gnt-instance stop' to ensure disk"
6491 " shutdown, and then restarting it.")
6493 if runningon_target:
6494 # the migration has actually succeeded, we need to update the config
6495 self.feedback_fn("* instance running on secondary node (%s),"
6496 " updating config" % target_node)
6497 instance.primary_node = target_node
6498 self.cfg.Update(instance, self.feedback_fn)
6499 demoted_node = source_node
6501 self.feedback_fn("* instance confirmed to be running on its"
6502 " primary node (%s)" % source_node)
6503 demoted_node = target_node
6505 if instance.disk_template in constants.DTS_INT_MIRROR:
6506 self._EnsureSecondary(demoted_node)
6508 self._WaitUntilSync()
6509 except errors.OpExecError:
6510 # we ignore here errors, since if the device is standalone, it
6511 # won't be able to sync
6513 self._GoStandalone()
6514 self._GoReconnect(False)
6515 self._WaitUntilSync()
6517 self.feedback_fn("* done")
6519 def _RevertDiskStatus(self):
6520 """Try to revert the disk status after a failed migration.
6523 target_node = self.target_node
6524 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6528 self._EnsureSecondary(target_node)
6529 self._GoStandalone()
6530 self._GoReconnect(False)
6531 self._WaitUntilSync()
6532 except errors.OpExecError, err:
6533 self.lu.LogWarning("Migration failed and I can't reconnect the"
6534 " drives: error '%s'\n"
6535 "Please look and recover the instance status" %
6538 def _AbortMigration(self):
6539 """Call the hypervisor code to abort a started migration.
6542 instance = self.instance
6543 target_node = self.target_node
6544 migration_info = self.migration_info
6546 abort_result = self.rpc.call_finalize_migration(target_node,
6550 abort_msg = abort_result.fail_msg
6552 logging.error("Aborting migration failed on target node %s: %s",
6553 target_node, abort_msg)
6554 # Don't raise an exception here, as we stil have to try to revert the
6555 # disk status, even if this step failed.
6557 def _ExecMigration(self):
6558 """Migrate an instance.
6560 The migrate is done by:
6561 - change the disks into dual-master mode
6562 - wait until disks are fully synchronized again
6563 - migrate the instance
6564 - change disks on the new secondary node (the old primary) to secondary
6565 - wait until disks are fully synchronized
6566 - change disks into single-master mode
6569 instance = self.instance
6570 target_node = self.target_node
6571 source_node = self.source_node
6573 self.feedback_fn("* checking disk consistency between source and target")
6574 for dev in instance.disks:
6575 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6576 raise errors.OpExecError("Disk %s is degraded or not fully"
6577 " synchronized on target node,"
6578 " aborting migrate." % dev.iv_name)
6580 # First get the migration information from the remote node
6581 result = self.rpc.call_migration_info(source_node, instance)
6582 msg = result.fail_msg
6584 log_err = ("Failed fetching source migration information from %s: %s" %
6586 logging.error(log_err)
6587 raise errors.OpExecError(log_err)
6589 self.migration_info = migration_info = result.payload
6591 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6592 # Then switch the disks to master/master mode
6593 self._EnsureSecondary(target_node)
6594 self._GoStandalone()
6595 self._GoReconnect(True)
6596 self._WaitUntilSync()
6598 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6599 result = self.rpc.call_accept_instance(target_node,
6602 self.nodes_ip[target_node])
6604 msg = result.fail_msg
6606 logging.error("Instance pre-migration failed, trying to revert"
6607 " disk status: %s", msg)
6608 self.feedback_fn("Pre-migration failed, aborting")
6609 self._AbortMigration()
6610 self._RevertDiskStatus()
6611 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6612 (instance.name, msg))
6614 self.feedback_fn("* migrating instance to %s" % target_node)
6616 result = self.rpc.call_instance_migrate(source_node, instance,
6617 self.nodes_ip[target_node],
6619 msg = result.fail_msg
6621 logging.error("Instance migration failed, trying to revert"
6622 " disk status: %s", msg)
6623 self.feedback_fn("Migration failed, aborting")
6624 self._AbortMigration()
6625 self._RevertDiskStatus()
6626 raise errors.OpExecError("Could not migrate instance %s: %s" %
6627 (instance.name, msg))
6630 instance.primary_node = target_node
6631 # distribute new instance config to the other nodes
6632 self.cfg.Update(instance, self.feedback_fn)
6634 result = self.rpc.call_finalize_migration(target_node,
6638 msg = result.fail_msg
6640 logging.error("Instance migration succeeded, but finalization failed:"
6642 raise errors.OpExecError("Could not finalize instance migration: %s" %
6645 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6646 self._EnsureSecondary(source_node)
6647 self._WaitUntilSync()
6648 self._GoStandalone()
6649 self._GoReconnect(False)
6650 self._WaitUntilSync()
6652 self.feedback_fn("* done")
6654 def Exec(self, feedback_fn):
6655 """Perform the migration.
6658 feedback_fn("Migrating instance %s" % self.instance.name)
6660 self.feedback_fn = feedback_fn
6662 self.source_node = self.instance.primary_node
6664 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6665 if self.instance.disk_template in constants.DTS_INT_MIRROR:
6666 self.target_node = self.instance.secondary_nodes[0]
6667 # Otherwise self.target_node has been populated either
6668 # directly, or through an iallocator.
6670 self.all_nodes = [self.source_node, self.target_node]
6672 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6673 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6677 return self._ExecCleanup()
6679 return self._ExecMigration()
6682 def _CreateBlockDev(lu, node, instance, device, force_create,
6684 """Create a tree of block devices on a given node.
6686 If this device type has to be created on secondaries, create it and
6689 If not, just recurse to children keeping the same 'force' value.
6691 @param lu: the lu on whose behalf we execute
6692 @param node: the node on which to create the device
6693 @type instance: L{objects.Instance}
6694 @param instance: the instance which owns the device
6695 @type device: L{objects.Disk}
6696 @param device: the device to create
6697 @type force_create: boolean
6698 @param force_create: whether to force creation of this device; this
6699 will be change to True whenever we find a device which has
6700 CreateOnSecondary() attribute
6701 @param info: the extra 'metadata' we should attach to the device
6702 (this will be represented as a LVM tag)
6703 @type force_open: boolean
6704 @param force_open: this parameter will be passes to the
6705 L{backend.BlockdevCreate} function where it specifies
6706 whether we run on primary or not, and it affects both
6707 the child assembly and the device own Open() execution
6710 if device.CreateOnSecondary():
6714 for child in device.children:
6715 _CreateBlockDev(lu, node, instance, child, force_create,
6718 if not force_create:
6721 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6724 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6725 """Create a single block device on a given node.
6727 This will not recurse over children of the device, so they must be
6730 @param lu: the lu on whose behalf we execute
6731 @param node: the node on which to create the device
6732 @type instance: L{objects.Instance}
6733 @param instance: the instance which owns the device
6734 @type device: L{objects.Disk}
6735 @param device: the device to create
6736 @param info: the extra 'metadata' we should attach to the device
6737 (this will be represented as a LVM tag)
6738 @type force_open: boolean
6739 @param force_open: this parameter will be passes to the
6740 L{backend.BlockdevCreate} function where it specifies
6741 whether we run on primary or not, and it affects both
6742 the child assembly and the device own Open() execution
6745 lu.cfg.SetDiskID(device, node)
6746 result = lu.rpc.call_blockdev_create(node, device, device.size,
6747 instance.name, force_open, info)
6748 result.Raise("Can't create block device %s on"
6749 " node %s for instance %s" % (device, node, instance.name))
6750 if device.physical_id is None:
6751 device.physical_id = result.payload
6754 def _GenerateUniqueNames(lu, exts):
6755 """Generate a suitable LV name.
6757 This will generate a logical volume name for the given instance.
6762 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6763 results.append("%s%s" % (new_id, val))
6767 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6769 """Generate a drbd8 device complete with its children.
6772 port = lu.cfg.AllocatePort()
6773 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6774 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6775 logical_id=(vgname, names[0]))
6776 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6777 logical_id=(vgname, names[1]))
6778 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6779 logical_id=(primary, secondary, port,
6782 children=[dev_data, dev_meta],
6787 def _GenerateDiskTemplate(lu, template_name,
6788 instance_name, primary_node,
6789 secondary_nodes, disk_info,
6790 file_storage_dir, file_driver,
6791 base_index, feedback_fn):
6792 """Generate the entire disk layout for a given template type.
6795 #TODO: compute space requirements
6797 vgname = lu.cfg.GetVGName()
6798 disk_count = len(disk_info)
6800 if template_name == constants.DT_DISKLESS:
6802 elif template_name == constants.DT_PLAIN:
6803 if len(secondary_nodes) != 0:
6804 raise errors.ProgrammerError("Wrong template configuration")
6806 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6807 for i in range(disk_count)])
6808 for idx, disk in enumerate(disk_info):
6809 disk_index = idx + base_index
6810 vg = disk.get("vg", vgname)
6811 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6812 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6813 logical_id=(vg, names[idx]),
6814 iv_name="disk/%d" % disk_index,
6816 disks.append(disk_dev)
6817 elif template_name == constants.DT_DRBD8:
6818 if len(secondary_nodes) != 1:
6819 raise errors.ProgrammerError("Wrong template configuration")
6820 remote_node = secondary_nodes[0]
6821 minors = lu.cfg.AllocateDRBDMinor(
6822 [primary_node, remote_node] * len(disk_info), instance_name)
6825 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6826 for i in range(disk_count)]):
6827 names.append(lv_prefix + "_data")
6828 names.append(lv_prefix + "_meta")
6829 for idx, disk in enumerate(disk_info):
6830 disk_index = idx + base_index
6831 vg = disk.get("vg", vgname)
6832 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6833 disk["size"], vg, names[idx*2:idx*2+2],
6834 "disk/%d" % disk_index,
6835 minors[idx*2], minors[idx*2+1])
6836 disk_dev.mode = disk["mode"]
6837 disks.append(disk_dev)
6838 elif template_name == constants.DT_FILE:
6839 if len(secondary_nodes) != 0:
6840 raise errors.ProgrammerError("Wrong template configuration")
6842 opcodes.RequireFileStorage()
6844 for idx, disk in enumerate(disk_info):
6845 disk_index = idx + base_index
6846 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6847 iv_name="disk/%d" % disk_index,
6848 logical_id=(file_driver,
6849 "%s/disk%d" % (file_storage_dir,
6852 disks.append(disk_dev)
6853 elif template_name == constants.DT_SHARED_FILE:
6854 if len(secondary_nodes) != 0:
6855 raise errors.ProgrammerError("Wrong template configuration")
6857 opcodes.RequireSharedFileStorage()
6859 for idx, disk in enumerate(disk_info):
6860 disk_index = idx + base_index
6861 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6862 iv_name="disk/%d" % disk_index,
6863 logical_id=(file_driver,
6864 "%s/disk%d" % (file_storage_dir,
6867 disks.append(disk_dev)
6868 elif template_name == constants.DT_BLOCK:
6869 if len(secondary_nodes) != 0:
6870 raise errors.ProgrammerError("Wrong template configuration")
6872 for idx, disk in enumerate(disk_info):
6873 disk_index = idx + base_index
6874 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV, size=disk["size"],
6875 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
6877 iv_name="disk/%d" % disk_index,
6879 disks.append(disk_dev)
6882 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6886 def _GetInstanceInfoText(instance):
6887 """Compute that text that should be added to the disk's metadata.
6890 return "originstname+%s" % instance.name
6893 def _CalcEta(time_taken, written, total_size):
6894 """Calculates the ETA based on size written and total size.
6896 @param time_taken: The time taken so far
6897 @param written: amount written so far
6898 @param total_size: The total size of data to be written
6899 @return: The remaining time in seconds
6902 avg_time = time_taken / float(written)
6903 return (total_size - written) * avg_time
6906 def _WipeDisks(lu, instance):
6907 """Wipes instance disks.
6909 @type lu: L{LogicalUnit}
6910 @param lu: the logical unit on whose behalf we execute
6911 @type instance: L{objects.Instance}
6912 @param instance: the instance whose disks we should create
6913 @return: the success of the wipe
6916 node = instance.primary_node
6918 for device in instance.disks:
6919 lu.cfg.SetDiskID(device, node)
6921 logging.info("Pause sync of instance %s disks", instance.name)
6922 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6924 for idx, success in enumerate(result.payload):
6926 logging.warn("pause-sync of instance %s for disks %d failed",
6930 for idx, device in enumerate(instance.disks):
6931 lu.LogInfo("* Wiping disk %d", idx)
6932 logging.info("Wiping disk %d for instance %s, node %s",
6933 idx, instance.name, node)
6935 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6936 # MAX_WIPE_CHUNK at max
6937 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6938 constants.MIN_WIPE_CHUNK_PERCENT)
6943 start_time = time.time()
6945 while offset < size:
6946 wipe_size = min(wipe_chunk_size, size - offset)
6947 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6948 result.Raise("Could not wipe disk %d at offset %d for size %d" %
6949 (idx, offset, wipe_size))
6952 if now - last_output >= 60:
6953 eta = _CalcEta(now - start_time, offset, size)
6954 lu.LogInfo(" - done: %.1f%% ETA: %s" %
6955 (offset / float(size) * 100, utils.FormatSeconds(eta)))
6958 logging.info("Resume sync of instance %s disks", instance.name)
6960 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6962 for idx, success in enumerate(result.payload):
6964 lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6965 " look at the status and troubleshoot the issue.", idx)
6966 logging.warn("resume-sync of instance %s for disks %d failed",
6970 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6971 """Create all disks for an instance.
6973 This abstracts away some work from AddInstance.
6975 @type lu: L{LogicalUnit}
6976 @param lu: the logical unit on whose behalf we execute
6977 @type instance: L{objects.Instance}
6978 @param instance: the instance whose disks we should create
6980 @param to_skip: list of indices to skip
6981 @type target_node: string
6982 @param target_node: if passed, overrides the target node for creation
6984 @return: the success of the creation
6987 info = _GetInstanceInfoText(instance)
6988 if target_node is None:
6989 pnode = instance.primary_node
6990 all_nodes = instance.all_nodes
6995 if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
6996 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6997 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6999 result.Raise("Failed to create directory '%s' on"
7000 " node %s" % (file_storage_dir, pnode))
7002 # Note: this needs to be kept in sync with adding of disks in
7003 # LUInstanceSetParams
7004 for idx, device in enumerate(instance.disks):
7005 if to_skip and idx in to_skip:
7007 logging.info("Creating volume %s for instance %s",
7008 device.iv_name, instance.name)
7010 for node in all_nodes:
7011 f_create = node == pnode
7012 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7015 def _RemoveDisks(lu, instance, target_node=None):
7016 """Remove all disks for an instance.
7018 This abstracts away some work from `AddInstance()` and
7019 `RemoveInstance()`. Note that in case some of the devices couldn't
7020 be removed, the removal will continue with the other ones (compare
7021 with `_CreateDisks()`).
7023 @type lu: L{LogicalUnit}
7024 @param lu: the logical unit on whose behalf we execute
7025 @type instance: L{objects.Instance}
7026 @param instance: the instance whose disks we should remove
7027 @type target_node: string
7028 @param target_node: used to override the node on which to remove the disks
7030 @return: the success of the removal
7033 logging.info("Removing block devices for instance %s", instance.name)
7036 for device in instance.disks:
7038 edata = [(target_node, device)]
7040 edata = device.ComputeNodeTree(instance.primary_node)
7041 for node, disk in edata:
7042 lu.cfg.SetDiskID(disk, node)
7043 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7045 lu.LogWarning("Could not remove block device %s on node %s,"
7046 " continuing anyway: %s", device.iv_name, node, msg)
7049 if instance.disk_template == constants.DT_FILE:
7050 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7054 tgt = instance.primary_node
7055 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7057 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7058 file_storage_dir, instance.primary_node, result.fail_msg)
7064 def _ComputeDiskSizePerVG(disk_template, disks):
7065 """Compute disk size requirements in the volume group
7068 def _compute(disks, payload):
7069 """Universal algorithm
7074 vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
7078 # Required free disk space as a function of disk and swap space
7080 constants.DT_DISKLESS: {},
7081 constants.DT_PLAIN: _compute(disks, 0),
7082 # 128 MB are added for drbd metadata for each disk
7083 constants.DT_DRBD8: _compute(disks, 128),
7084 constants.DT_FILE: {},
7085 constants.DT_SHARED_FILE: {},
7088 if disk_template not in req_size_dict:
7089 raise errors.ProgrammerError("Disk template '%s' size requirement"
7090 " is unknown" % disk_template)
7092 return req_size_dict[disk_template]
7095 def _ComputeDiskSize(disk_template, disks):
7096 """Compute disk size requirements in the volume group
7099 # Required free disk space as a function of disk and swap space
7101 constants.DT_DISKLESS: None,
7102 constants.DT_PLAIN: sum(d["size"] for d in disks),
7103 # 128 MB are added for drbd metadata for each disk
7104 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
7105 constants.DT_FILE: None,
7106 constants.DT_SHARED_FILE: 0,
7107 constants.DT_BLOCK: 0,
7110 if disk_template not in req_size_dict:
7111 raise errors.ProgrammerError("Disk template '%s' size requirement"
7112 " is unknown" % disk_template)
7114 return req_size_dict[disk_template]
7117 def _FilterVmNodes(lu, nodenames):
7118 """Filters out non-vm_capable nodes from a list.
7120 @type lu: L{LogicalUnit}
7121 @param lu: the logical unit for which we check
7122 @type nodenames: list
7123 @param nodenames: the list of nodes on which we should check
7125 @return: the list of vm-capable nodes
7128 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7129 return [name for name in nodenames if name not in vm_nodes]
7132 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7133 """Hypervisor parameter validation.
7135 This function abstract the hypervisor parameter validation to be
7136 used in both instance create and instance modify.
7138 @type lu: L{LogicalUnit}
7139 @param lu: the logical unit for which we check
7140 @type nodenames: list
7141 @param nodenames: the list of nodes on which we should check
7142 @type hvname: string
7143 @param hvname: the name of the hypervisor we should use
7144 @type hvparams: dict
7145 @param hvparams: the parameters which we need to check
7146 @raise errors.OpPrereqError: if the parameters are not valid
7149 nodenames = _FilterVmNodes(lu, nodenames)
7150 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7153 for node in nodenames:
7157 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7160 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7161 """OS parameters validation.
7163 @type lu: L{LogicalUnit}
7164 @param lu: the logical unit for which we check
7165 @type required: boolean
7166 @param required: whether the validation should fail if the OS is not
7168 @type nodenames: list
7169 @param nodenames: the list of nodes on which we should check
7170 @type osname: string
7171 @param osname: the name of the hypervisor we should use
7172 @type osparams: dict
7173 @param osparams: the parameters which we need to check
7174 @raise errors.OpPrereqError: if the parameters are not valid
7177 nodenames = _FilterVmNodes(lu, nodenames)
7178 result = lu.rpc.call_os_validate(required, nodenames, osname,
7179 [constants.OS_VALIDATE_PARAMETERS],
7181 for node, nres in result.items():
7182 # we don't check for offline cases since this should be run only
7183 # against the master node and/or an instance's nodes
7184 nres.Raise("OS Parameters validation failed on node %s" % node)
7185 if not nres.payload:
7186 lu.LogInfo("OS %s not found on node %s, validation skipped",
7190 class LUInstanceCreate(LogicalUnit):
7191 """Create an instance.
7194 HPATH = "instance-add"
7195 HTYPE = constants.HTYPE_INSTANCE
7198 def CheckArguments(self):
7202 # do not require name_check to ease forward/backward compatibility
7204 if self.op.no_install and self.op.start:
7205 self.LogInfo("No-installation mode selected, disabling startup")
7206 self.op.start = False
7207 # validate/normalize the instance name
7208 self.op.instance_name = \
7209 netutils.Hostname.GetNormalizedName(self.op.instance_name)
7211 if self.op.ip_check and not self.op.name_check:
7212 # TODO: make the ip check more flexible and not depend on the name check
7213 raise errors.OpPrereqError("Cannot do ip check without a name check",
7216 # check nics' parameter names
7217 for nic in self.op.nics:
7218 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7220 # check disks. parameter names and consistent adopt/no-adopt strategy
7221 has_adopt = has_no_adopt = False
7222 for disk in self.op.disks:
7223 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7228 if has_adopt and has_no_adopt:
7229 raise errors.OpPrereqError("Either all disks are adopted or none is",
7232 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7233 raise errors.OpPrereqError("Disk adoption is not supported for the"
7234 " '%s' disk template" %
7235 self.op.disk_template,
7237 if self.op.iallocator is not None:
7238 raise errors.OpPrereqError("Disk adoption not allowed with an"
7239 " iallocator script", errors.ECODE_INVAL)
7240 if self.op.mode == constants.INSTANCE_IMPORT:
7241 raise errors.OpPrereqError("Disk adoption not allowed for"
7242 " instance import", errors.ECODE_INVAL)
7244 if self.op.disk_template in constants.DTS_MUST_ADOPT:
7245 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7246 " but no 'adopt' parameter given" %
7247 self.op.disk_template,
7250 self.adopt_disks = has_adopt
7252 # instance name verification
7253 if self.op.name_check:
7254 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7255 self.op.instance_name = self.hostname1.name
7256 # used in CheckPrereq for ip ping check
7257 self.check_ip = self.hostname1.ip
7259 self.check_ip = None
7261 # file storage checks
7262 if (self.op.file_driver and
7263 not self.op.file_driver in constants.FILE_DRIVER):
7264 raise errors.OpPrereqError("Invalid file driver name '%s'" %
7265 self.op.file_driver, errors.ECODE_INVAL)
7267 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7268 raise errors.OpPrereqError("File storage directory path not absolute",
7271 ### Node/iallocator related checks
7272 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7274 if self.op.pnode is not None:
7275 if self.op.disk_template in constants.DTS_INT_MIRROR:
7276 if self.op.snode is None:
7277 raise errors.OpPrereqError("The networked disk templates need"
7278 " a mirror node", errors.ECODE_INVAL)
7280 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7282 self.op.snode = None
7284 self._cds = _GetClusterDomainSecret()
7286 if self.op.mode == constants.INSTANCE_IMPORT:
7287 # On import force_variant must be True, because if we forced it at
7288 # initial install, our only chance when importing it back is that it
7290 self.op.force_variant = True
7292 if self.op.no_install:
7293 self.LogInfo("No-installation mode has no effect during import")
7295 elif self.op.mode == constants.INSTANCE_CREATE:
7296 if self.op.os_type is None:
7297 raise errors.OpPrereqError("No guest OS specified",
7299 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7300 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7301 " installation" % self.op.os_type,
7303 if self.op.disk_template is None:
7304 raise errors.OpPrereqError("No disk template specified",
7307 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7308 # Check handshake to ensure both clusters have the same domain secret
7309 src_handshake = self.op.source_handshake
7310 if not src_handshake:
7311 raise errors.OpPrereqError("Missing source handshake",
7314 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7317 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7320 # Load and check source CA
7321 self.source_x509_ca_pem = self.op.source_x509_ca
7322 if not self.source_x509_ca_pem:
7323 raise errors.OpPrereqError("Missing source X509 CA",
7327 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7329 except OpenSSL.crypto.Error, err:
7330 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7331 (err, ), errors.ECODE_INVAL)
7333 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7334 if errcode is not None:
7335 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7338 self.source_x509_ca = cert
7340 src_instance_name = self.op.source_instance_name
7341 if not src_instance_name:
7342 raise errors.OpPrereqError("Missing source instance name",
7345 self.source_instance_name = \
7346 netutils.GetHostname(name=src_instance_name).name
7349 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7350 self.op.mode, errors.ECODE_INVAL)
7352 def ExpandNames(self):
7353 """ExpandNames for CreateInstance.
7355 Figure out the right locks for instance creation.
7358 self.needed_locks = {}
7360 instance_name = self.op.instance_name
7361 # this is just a preventive check, but someone might still add this
7362 # instance in the meantime, and creation will fail at lock-add time
7363 if instance_name in self.cfg.GetInstanceList():
7364 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7365 instance_name, errors.ECODE_EXISTS)
7367 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7369 if self.op.iallocator:
7370 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7372 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7373 nodelist = [self.op.pnode]
7374 if self.op.snode is not None:
7375 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7376 nodelist.append(self.op.snode)
7377 self.needed_locks[locking.LEVEL_NODE] = nodelist
7379 # in case of import lock the source node too
7380 if self.op.mode == constants.INSTANCE_IMPORT:
7381 src_node = self.op.src_node
7382 src_path = self.op.src_path
7384 if src_path is None:
7385 self.op.src_path = src_path = self.op.instance_name
7387 if src_node is None:
7388 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7389 self.op.src_node = None
7390 if os.path.isabs(src_path):
7391 raise errors.OpPrereqError("Importing an instance from an absolute"
7392 " path requires a source node option.",
7395 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7396 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7397 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7398 if not os.path.isabs(src_path):
7399 self.op.src_path = src_path = \
7400 utils.PathJoin(constants.EXPORT_DIR, src_path)
7402 def _RunAllocator(self):
7403 """Run the allocator based on input opcode.
7406 nics = [n.ToDict() for n in self.nics]
7407 ial = IAllocator(self.cfg, self.rpc,
7408 mode=constants.IALLOCATOR_MODE_ALLOC,
7409 name=self.op.instance_name,
7410 disk_template=self.op.disk_template,
7413 vcpus=self.be_full[constants.BE_VCPUS],
7414 mem_size=self.be_full[constants.BE_MEMORY],
7417 hypervisor=self.op.hypervisor,
7420 ial.Run(self.op.iallocator)
7423 raise errors.OpPrereqError("Can't compute nodes using"
7424 " iallocator '%s': %s" %
7425 (self.op.iallocator, ial.info),
7427 if len(ial.result) != ial.required_nodes:
7428 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7429 " of nodes (%s), required %s" %
7430 (self.op.iallocator, len(ial.result),
7431 ial.required_nodes), errors.ECODE_FAULT)
7432 self.op.pnode = ial.result[0]
7433 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7434 self.op.instance_name, self.op.iallocator,
7435 utils.CommaJoin(ial.result))
7436 if ial.required_nodes == 2:
7437 self.op.snode = ial.result[1]
7439 def BuildHooksEnv(self):
7442 This runs on master, primary and secondary nodes of the instance.
7446 "ADD_MODE": self.op.mode,
7448 if self.op.mode == constants.INSTANCE_IMPORT:
7449 env["SRC_NODE"] = self.op.src_node
7450 env["SRC_PATH"] = self.op.src_path
7451 env["SRC_IMAGES"] = self.src_images
7453 env.update(_BuildInstanceHookEnv(
7454 name=self.op.instance_name,
7455 primary_node=self.op.pnode,
7456 secondary_nodes=self.secondaries,
7457 status=self.op.start,
7458 os_type=self.op.os_type,
7459 memory=self.be_full[constants.BE_MEMORY],
7460 vcpus=self.be_full[constants.BE_VCPUS],
7461 nics=_NICListToTuple(self, self.nics),
7462 disk_template=self.op.disk_template,
7463 disks=[(d["size"], d["mode"]) for d in self.disks],
7466 hypervisor_name=self.op.hypervisor,
7469 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7473 def _ReadExportInfo(self):
7474 """Reads the export information from disk.
7476 It will override the opcode source node and path with the actual
7477 information, if these two were not specified before.
7479 @return: the export information
7482 assert self.op.mode == constants.INSTANCE_IMPORT
7484 src_node = self.op.src_node
7485 src_path = self.op.src_path
7487 if src_node is None:
7488 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7489 exp_list = self.rpc.call_export_list(locked_nodes)
7491 for node in exp_list:
7492 if exp_list[node].fail_msg:
7494 if src_path in exp_list[node].payload:
7496 self.op.src_node = src_node = node
7497 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7501 raise errors.OpPrereqError("No export found for relative path %s" %
7502 src_path, errors.ECODE_INVAL)
7504 _CheckNodeOnline(self, src_node)
7505 result = self.rpc.call_export_info(src_node, src_path)
7506 result.Raise("No export or invalid export found in dir %s" % src_path)
7508 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7509 if not export_info.has_section(constants.INISECT_EXP):
7510 raise errors.ProgrammerError("Corrupted export config",
7511 errors.ECODE_ENVIRON)
7513 ei_version = export_info.get(constants.INISECT_EXP, "version")
7514 if (int(ei_version) != constants.EXPORT_VERSION):
7515 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7516 (ei_version, constants.EXPORT_VERSION),
7517 errors.ECODE_ENVIRON)
7520 def _ReadExportParams(self, einfo):
7521 """Use export parameters as defaults.
7523 In case the opcode doesn't specify (as in override) some instance
7524 parameters, then try to use them from the export information, if
7528 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7530 if self.op.disk_template is None:
7531 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7532 self.op.disk_template = einfo.get(constants.INISECT_INS,
7535 raise errors.OpPrereqError("No disk template specified and the export"
7536 " is missing the disk_template information",
7539 if not self.op.disks:
7540 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7542 # TODO: import the disk iv_name too
7543 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7544 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7545 disks.append({"size": disk_sz})
7546 self.op.disks = disks
7548 raise errors.OpPrereqError("No disk info specified and the export"
7549 " is missing the disk information",
7552 if (not self.op.nics and
7553 einfo.has_option(constants.INISECT_INS, "nic_count")):
7555 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7557 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7558 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7563 if (self.op.hypervisor is None and
7564 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7565 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7566 if einfo.has_section(constants.INISECT_HYP):
7567 # use the export parameters but do not override the ones
7568 # specified by the user
7569 for name, value in einfo.items(constants.INISECT_HYP):
7570 if name not in self.op.hvparams:
7571 self.op.hvparams[name] = value
7573 if einfo.has_section(constants.INISECT_BEP):
7574 # use the parameters, without overriding
7575 for name, value in einfo.items(constants.INISECT_BEP):
7576 if name not in self.op.beparams:
7577 self.op.beparams[name] = value
7579 # try to read the parameters old style, from the main section
7580 for name in constants.BES_PARAMETERS:
7581 if (name not in self.op.beparams and
7582 einfo.has_option(constants.INISECT_INS, name)):
7583 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7585 if einfo.has_section(constants.INISECT_OSP):
7586 # use the parameters, without overriding
7587 for name, value in einfo.items(constants.INISECT_OSP):
7588 if name not in self.op.osparams:
7589 self.op.osparams[name] = value
7591 def _RevertToDefaults(self, cluster):
7592 """Revert the instance parameters to the default values.
7596 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7597 for name in self.op.hvparams.keys():
7598 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7599 del self.op.hvparams[name]
7601 be_defs = cluster.SimpleFillBE({})
7602 for name in self.op.beparams.keys():
7603 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7604 del self.op.beparams[name]
7606 nic_defs = cluster.SimpleFillNIC({})
7607 for nic in self.op.nics:
7608 for name in constants.NICS_PARAMETERS:
7609 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7612 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7613 for name in self.op.osparams.keys():
7614 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7615 del self.op.osparams[name]
7617 def CheckPrereq(self):
7618 """Check prerequisites.
7621 if self.op.mode == constants.INSTANCE_IMPORT:
7622 export_info = self._ReadExportInfo()
7623 self._ReadExportParams(export_info)
7625 if (not self.cfg.GetVGName() and
7626 self.op.disk_template not in constants.DTS_NOT_LVM):
7627 raise errors.OpPrereqError("Cluster does not support lvm-based"
7628 " instances", errors.ECODE_STATE)
7630 if self.op.hypervisor is None:
7631 self.op.hypervisor = self.cfg.GetHypervisorType()
7633 cluster = self.cfg.GetClusterInfo()
7634 enabled_hvs = cluster.enabled_hypervisors
7635 if self.op.hypervisor not in enabled_hvs:
7636 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7637 " cluster (%s)" % (self.op.hypervisor,
7638 ",".join(enabled_hvs)),
7641 # check hypervisor parameter syntax (locally)
7642 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7643 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7645 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7646 hv_type.CheckParameterSyntax(filled_hvp)
7647 self.hv_full = filled_hvp
7648 # check that we don't specify global parameters on an instance
7649 _CheckGlobalHvParams(self.op.hvparams)
7651 # fill and remember the beparams dict
7652 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7653 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7655 # build os parameters
7656 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7658 # now that hvp/bep are in final format, let's reset to defaults,
7660 if self.op.identify_defaults:
7661 self._RevertToDefaults(cluster)
7665 for idx, nic in enumerate(self.op.nics):
7666 nic_mode_req = nic.get("mode", None)
7667 nic_mode = nic_mode_req
7668 if nic_mode is None:
7669 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7671 # in routed mode, for the first nic, the default ip is 'auto'
7672 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7673 default_ip_mode = constants.VALUE_AUTO
7675 default_ip_mode = constants.VALUE_NONE
7677 # ip validity checks
7678 ip = nic.get("ip", default_ip_mode)
7679 if ip is None or ip.lower() == constants.VALUE_NONE:
7681 elif ip.lower() == constants.VALUE_AUTO:
7682 if not self.op.name_check:
7683 raise errors.OpPrereqError("IP address set to auto but name checks"
7684 " have been skipped",
7686 nic_ip = self.hostname1.ip
7688 if not netutils.IPAddress.IsValid(ip):
7689 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7693 # TODO: check the ip address for uniqueness
7694 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7695 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7698 # MAC address verification
7699 mac = nic.get("mac", constants.VALUE_AUTO)
7700 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7701 mac = utils.NormalizeAndValidateMac(mac)
7704 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7705 except errors.ReservationError:
7706 raise errors.OpPrereqError("MAC address %s already in use"
7707 " in cluster" % mac,
7708 errors.ECODE_NOTUNIQUE)
7710 # Build nic parameters
7711 link = nic.get(constants.INIC_LINK, None)
7714 nicparams[constants.NIC_MODE] = nic_mode_req
7716 nicparams[constants.NIC_LINK] = link
7718 check_params = cluster.SimpleFillNIC(nicparams)
7719 objects.NIC.CheckParameterSyntax(check_params)
7720 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7722 # disk checks/pre-build
7724 for disk in self.op.disks:
7725 mode = disk.get("mode", constants.DISK_RDWR)
7726 if mode not in constants.DISK_ACCESS_SET:
7727 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7728 mode, errors.ECODE_INVAL)
7729 size = disk.get("size", None)
7731 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7734 except (TypeError, ValueError):
7735 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7737 vg = disk.get("vg", self.cfg.GetVGName())
7738 new_disk = {"size": size, "mode": mode, "vg": vg}
7740 new_disk["adopt"] = disk["adopt"]
7741 self.disks.append(new_disk)
7743 if self.op.mode == constants.INSTANCE_IMPORT:
7745 # Check that the new instance doesn't have less disks than the export
7746 instance_disks = len(self.disks)
7747 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7748 if instance_disks < export_disks:
7749 raise errors.OpPrereqError("Not enough disks to import."
7750 " (instance: %d, export: %d)" %
7751 (instance_disks, export_disks),
7755 for idx in range(export_disks):
7756 option = 'disk%d_dump' % idx
7757 if export_info.has_option(constants.INISECT_INS, option):
7758 # FIXME: are the old os-es, disk sizes, etc. useful?
7759 export_name = export_info.get(constants.INISECT_INS, option)
7760 image = utils.PathJoin(self.op.src_path, export_name)
7761 disk_images.append(image)
7763 disk_images.append(False)
7765 self.src_images = disk_images
7767 old_name = export_info.get(constants.INISECT_INS, 'name')
7769 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7770 except (TypeError, ValueError), err:
7771 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7772 " an integer: %s" % str(err),
7774 if self.op.instance_name == old_name:
7775 for idx, nic in enumerate(self.nics):
7776 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7777 nic_mac_ini = 'nic%d_mac' % idx
7778 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7780 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7782 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7783 if self.op.ip_check:
7784 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7785 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7786 (self.check_ip, self.op.instance_name),
7787 errors.ECODE_NOTUNIQUE)
7789 #### mac address generation
7790 # By generating here the mac address both the allocator and the hooks get
7791 # the real final mac address rather than the 'auto' or 'generate' value.
7792 # There is a race condition between the generation and the instance object
7793 # creation, which means that we know the mac is valid now, but we're not
7794 # sure it will be when we actually add the instance. If things go bad
7795 # adding the instance will abort because of a duplicate mac, and the
7796 # creation job will fail.
7797 for nic in self.nics:
7798 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7799 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7803 if self.op.iallocator is not None:
7804 self._RunAllocator()
7806 #### node related checks
7808 # check primary node
7809 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7810 assert self.pnode is not None, \
7811 "Cannot retrieve locked node %s" % self.op.pnode
7813 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7814 pnode.name, errors.ECODE_STATE)
7816 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7817 pnode.name, errors.ECODE_STATE)
7818 if not pnode.vm_capable:
7819 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7820 " '%s'" % pnode.name, errors.ECODE_STATE)
7822 self.secondaries = []
7824 # mirror node verification
7825 if self.op.disk_template in constants.DTS_INT_MIRROR:
7826 if self.op.snode == pnode.name:
7827 raise errors.OpPrereqError("The secondary node cannot be the"
7828 " primary node.", errors.ECODE_INVAL)
7829 _CheckNodeOnline(self, self.op.snode)
7830 _CheckNodeNotDrained(self, self.op.snode)
7831 _CheckNodeVmCapable(self, self.op.snode)
7832 self.secondaries.append(self.op.snode)
7834 nodenames = [pnode.name] + self.secondaries
7836 if not self.adopt_disks:
7837 # Check lv size requirements, if not adopting
7838 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7839 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7841 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
7842 all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7843 if len(all_lvs) != len(self.disks):
7844 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7846 for lv_name in all_lvs:
7848 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7849 # to ReserveLV uses the same syntax
7850 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7851 except errors.ReservationError:
7852 raise errors.OpPrereqError("LV named %s used by another instance" %
7853 lv_name, errors.ECODE_NOTUNIQUE)
7855 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7856 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7858 node_lvs = self.rpc.call_lv_list([pnode.name],
7859 vg_names.payload.keys())[pnode.name]
7860 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7861 node_lvs = node_lvs.payload
7863 delta = all_lvs.difference(node_lvs.keys())
7865 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7866 utils.CommaJoin(delta),
7868 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7870 raise errors.OpPrereqError("Online logical volumes found, cannot"
7871 " adopt: %s" % utils.CommaJoin(online_lvs),
7873 # update the size of disk based on what is found
7874 for dsk in self.disks:
7875 dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7877 elif self.op.disk_template == constants.DT_BLOCK:
7878 # Normalize and de-duplicate device paths
7879 all_disks = set([os.path.abspath(i["adopt"]) for i in self.disks])
7880 if len(all_disks) != len(self.disks):
7881 raise errors.OpPrereqError("Duplicate disk names given for adoption",
7883 baddisks = [d for d in all_disks
7884 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
7886 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
7887 " cannot be adopted" %
7888 (", ".join(baddisks),
7889 constants.ADOPTABLE_BLOCKDEV_ROOT),
7892 node_disks = self.rpc.call_bdev_sizes([pnode.name],
7893 list(all_disks))[pnode.name]
7894 node_disks.Raise("Cannot get block device information from node %s" %
7896 node_disks = node_disks.payload
7897 delta = all_disks.difference(node_disks.keys())
7899 raise errors.OpPrereqError("Missing block device(s): %s" %
7900 utils.CommaJoin(delta),
7902 for dsk in self.disks:
7903 dsk["size"] = int(float(node_disks[dsk["adopt"]]))
7905 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7907 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7908 # check OS parameters (remotely)
7909 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7911 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7913 # memory check on primary node
7915 _CheckNodeFreeMemory(self, self.pnode.name,
7916 "creating instance %s" % self.op.instance_name,
7917 self.be_full[constants.BE_MEMORY],
7920 self.dry_run_result = list(nodenames)
7922 def Exec(self, feedback_fn):
7923 """Create and add the instance to the cluster.
7926 instance = self.op.instance_name
7927 pnode_name = self.pnode.name
7929 ht_kind = self.op.hypervisor
7930 if ht_kind in constants.HTS_REQ_PORT:
7931 network_port = self.cfg.AllocatePort()
7935 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
7936 # this is needed because os.path.join does not accept None arguments
7937 if self.op.file_storage_dir is None:
7938 string_file_storage_dir = ""
7940 string_file_storage_dir = self.op.file_storage_dir
7942 # build the full file storage dir path
7943 if self.op.disk_template == constants.DT_SHARED_FILE:
7944 get_fsd_fn = self.cfg.GetSharedFileStorageDir
7946 get_fsd_fn = self.cfg.GetFileStorageDir
7948 file_storage_dir = utils.PathJoin(get_fsd_fn(),
7949 string_file_storage_dir, instance)
7951 file_storage_dir = ""
7953 disks = _GenerateDiskTemplate(self,
7954 self.op.disk_template,
7955 instance, pnode_name,
7959 self.op.file_driver,
7963 iobj = objects.Instance(name=instance, os=self.op.os_type,
7964 primary_node=pnode_name,
7965 nics=self.nics, disks=disks,
7966 disk_template=self.op.disk_template,
7968 network_port=network_port,
7969 beparams=self.op.beparams,
7970 hvparams=self.op.hvparams,
7971 hypervisor=self.op.hypervisor,
7972 osparams=self.op.osparams,
7975 if self.adopt_disks:
7976 if self.op.disk_template == constants.DT_PLAIN:
7977 # rename LVs to the newly-generated names; we need to construct
7978 # 'fake' LV disks with the old data, plus the new unique_id
7979 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7981 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7982 rename_to.append(t_dsk.logical_id)
7983 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7984 self.cfg.SetDiskID(t_dsk, pnode_name)
7985 result = self.rpc.call_blockdev_rename(pnode_name,
7986 zip(tmp_disks, rename_to))
7987 result.Raise("Failed to rename adoped LVs")
7989 feedback_fn("* creating instance disks...")
7991 _CreateDisks(self, iobj)
7992 except errors.OpExecError:
7993 self.LogWarning("Device creation failed, reverting...")
7995 _RemoveDisks(self, iobj)
7997 self.cfg.ReleaseDRBDMinors(instance)
8000 if self.cfg.GetClusterInfo().prealloc_wipe_disks:
8001 feedback_fn("* wiping instance disks...")
8003 _WipeDisks(self, iobj)
8004 except errors.OpExecError:
8005 self.LogWarning("Device wiping failed, reverting...")
8007 _RemoveDisks(self, iobj)
8009 self.cfg.ReleaseDRBDMinors(instance)
8012 feedback_fn("adding instance %s to cluster config" % instance)
8014 self.cfg.AddInstance(iobj, self.proc.GetECId())
8016 # Declare that we don't want to remove the instance lock anymore, as we've
8017 # added the instance to the config
8018 del self.remove_locks[locking.LEVEL_INSTANCE]
8019 # Unlock all the nodes
8020 if self.op.mode == constants.INSTANCE_IMPORT:
8021 nodes_keep = [self.op.src_node]
8022 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
8023 if node != self.op.src_node]
8024 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
8025 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
8027 self.context.glm.release(locking.LEVEL_NODE)
8028 del self.acquired_locks[locking.LEVEL_NODE]
8030 if self.op.wait_for_sync:
8031 disk_abort = not _WaitForSync(self, iobj)
8032 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8033 # make sure the disks are not degraded (still sync-ing is ok)
8035 feedback_fn("* checking mirrors status")
8036 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8041 _RemoveDisks(self, iobj)
8042 self.cfg.RemoveInstance(iobj.name)
8043 # Make sure the instance lock gets removed
8044 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8045 raise errors.OpExecError("There are some degraded disks for"
8048 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8049 if self.op.mode == constants.INSTANCE_CREATE:
8050 if not self.op.no_install:
8051 feedback_fn("* running the instance OS create scripts...")
8052 # FIXME: pass debug option from opcode to backend
8053 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8054 self.op.debug_level)
8055 result.Raise("Could not add os for instance %s"
8056 " on node %s" % (instance, pnode_name))
8058 elif self.op.mode == constants.INSTANCE_IMPORT:
8059 feedback_fn("* running the instance OS import scripts...")
8063 for idx, image in enumerate(self.src_images):
8067 # FIXME: pass debug option from opcode to backend
8068 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8069 constants.IEIO_FILE, (image, ),
8070 constants.IEIO_SCRIPT,
8071 (iobj.disks[idx], idx),
8073 transfers.append(dt)
8076 masterd.instance.TransferInstanceData(self, feedback_fn,
8077 self.op.src_node, pnode_name,
8078 self.pnode.secondary_ip,
8080 if not compat.all(import_result):
8081 self.LogWarning("Some disks for instance %s on node %s were not"
8082 " imported successfully" % (instance, pnode_name))
8084 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8085 feedback_fn("* preparing remote import...")
8086 # The source cluster will stop the instance before attempting to make a
8087 # connection. In some cases stopping an instance can take a long time,
8088 # hence the shutdown timeout is added to the connection timeout.
8089 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8090 self.op.source_shutdown_timeout)
8091 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8093 assert iobj.primary_node == self.pnode.name
8095 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8096 self.source_x509_ca,
8097 self._cds, timeouts)
8098 if not compat.all(disk_results):
8099 # TODO: Should the instance still be started, even if some disks
8100 # failed to import (valid for local imports, too)?
8101 self.LogWarning("Some disks for instance %s on node %s were not"
8102 " imported successfully" % (instance, pnode_name))
8104 # Run rename script on newly imported instance
8105 assert iobj.name == instance
8106 feedback_fn("Running rename script for %s" % instance)
8107 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8108 self.source_instance_name,
8109 self.op.debug_level)
8111 self.LogWarning("Failed to run rename script for %s on node"
8112 " %s: %s" % (instance, pnode_name, result.fail_msg))
8115 # also checked in the prereq part
8116 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8120 iobj.admin_up = True
8121 self.cfg.Update(iobj, feedback_fn)
8122 logging.info("Starting instance %s on node %s", instance, pnode_name)
8123 feedback_fn("* starting instance...")
8124 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8125 result.Raise("Could not start instance")
8127 return list(iobj.all_nodes)
8130 class LUInstanceConsole(NoHooksLU):
8131 """Connect to an instance's console.
8133 This is somewhat special in that it returns the command line that
8134 you need to run on the master node in order to connect to the
8140 def ExpandNames(self):
8141 self._ExpandAndLockInstance()
8143 def CheckPrereq(self):
8144 """Check prerequisites.
8146 This checks that the instance is in the cluster.
8149 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8150 assert self.instance is not None, \
8151 "Cannot retrieve locked instance %s" % self.op.instance_name
8152 _CheckNodeOnline(self, self.instance.primary_node)
8154 def Exec(self, feedback_fn):
8155 """Connect to the console of an instance
8158 instance = self.instance
8159 node = instance.primary_node
8161 node_insts = self.rpc.call_instance_list([node],
8162 [instance.hypervisor])[node]
8163 node_insts.Raise("Can't get node information from %s" % node)
8165 if instance.name not in node_insts.payload:
8166 if instance.admin_up:
8167 state = constants.INSTST_ERRORDOWN
8169 state = constants.INSTST_ADMINDOWN
8170 raise errors.OpExecError("Instance %s is not running (state %s)" %
8171 (instance.name, state))
8173 logging.debug("Connecting to console of %s on %s", instance.name, node)
8175 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8178 def _GetInstanceConsole(cluster, instance):
8179 """Returns console information for an instance.
8181 @type cluster: L{objects.Cluster}
8182 @type instance: L{objects.Instance}
8186 hyper = hypervisor.GetHypervisor(instance.hypervisor)
8187 # beparams and hvparams are passed separately, to avoid editing the
8188 # instance and then saving the defaults in the instance itself.
8189 hvparams = cluster.FillHV(instance)
8190 beparams = cluster.FillBE(instance)
8191 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8193 assert console.instance == instance.name
8194 assert console.Validate()
8196 return console.ToDict()
8199 class LUInstanceReplaceDisks(LogicalUnit):
8200 """Replace the disks of an instance.
8203 HPATH = "mirrors-replace"
8204 HTYPE = constants.HTYPE_INSTANCE
8207 def CheckArguments(self):
8208 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8211 def ExpandNames(self):
8212 self._ExpandAndLockInstance()
8214 if self.op.iallocator is not None:
8215 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8217 elif self.op.remote_node is not None:
8218 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8219 self.op.remote_node = remote_node
8221 # Warning: do not remove the locking of the new secondary here
8222 # unless DRBD8.AddChildren is changed to work in parallel;
8223 # currently it doesn't since parallel invocations of
8224 # FindUnusedMinor will conflict
8225 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8226 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8229 self.needed_locks[locking.LEVEL_NODE] = []
8230 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8232 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8233 self.op.iallocator, self.op.remote_node,
8234 self.op.disks, False, self.op.early_release)
8236 self.tasklets = [self.replacer]
8238 def DeclareLocks(self, level):
8239 # If we're not already locking all nodes in the set we have to declare the
8240 # instance's primary/secondary nodes.
8241 if (level == locking.LEVEL_NODE and
8242 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8243 self._LockInstancesNodes()
8245 def BuildHooksEnv(self):
8248 This runs on the master, the primary and all the secondaries.
8251 instance = self.replacer.instance
8253 "MODE": self.op.mode,
8254 "NEW_SECONDARY": self.op.remote_node,
8255 "OLD_SECONDARY": instance.secondary_nodes[0],
8257 env.update(_BuildInstanceHookEnvByObject(self, instance))
8259 self.cfg.GetMasterNode(),
8260 instance.primary_node,
8262 if self.op.remote_node is not None:
8263 nl.append(self.op.remote_node)
8267 class TLReplaceDisks(Tasklet):
8268 """Replaces disks for an instance.
8270 Note: Locking is not within the scope of this class.
8273 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8274 disks, delay_iallocator, early_release):
8275 """Initializes this class.
8278 Tasklet.__init__(self, lu)
8281 self.instance_name = instance_name
8283 self.iallocator_name = iallocator_name
8284 self.remote_node = remote_node
8286 self.delay_iallocator = delay_iallocator
8287 self.early_release = early_release
8290 self.instance = None
8291 self.new_node = None
8292 self.target_node = None
8293 self.other_node = None
8294 self.remote_node_info = None
8295 self.node_secondary_ip = None
8298 def CheckArguments(mode, remote_node, iallocator):
8299 """Helper function for users of this class.
8302 # check for valid parameter combination
8303 if mode == constants.REPLACE_DISK_CHG:
8304 if remote_node is None and iallocator is None:
8305 raise errors.OpPrereqError("When changing the secondary either an"
8306 " iallocator script must be used or the"
8307 " new node given", errors.ECODE_INVAL)
8309 if remote_node is not None and iallocator is not None:
8310 raise errors.OpPrereqError("Give either the iallocator or the new"
8311 " secondary, not both", errors.ECODE_INVAL)
8313 elif remote_node is not None or iallocator is not None:
8314 # Not replacing the secondary
8315 raise errors.OpPrereqError("The iallocator and new node options can"
8316 " only be used when changing the"
8317 " secondary node", errors.ECODE_INVAL)
8320 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8321 """Compute a new secondary node using an IAllocator.
8324 ial = IAllocator(lu.cfg, lu.rpc,
8325 mode=constants.IALLOCATOR_MODE_RELOC,
8327 relocate_from=relocate_from)
8329 ial.Run(iallocator_name)
8332 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8333 " %s" % (iallocator_name, ial.info),
8336 if len(ial.result) != ial.required_nodes:
8337 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8338 " of nodes (%s), required %s" %
8340 len(ial.result), ial.required_nodes),
8343 remote_node_name = ial.result[0]
8345 lu.LogInfo("Selected new secondary for instance '%s': %s",
8346 instance_name, remote_node_name)
8348 return remote_node_name
8350 def _FindFaultyDisks(self, node_name):
8351 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8354 def _CheckDisksActivated(self, instance):
8355 """Checks if the instance disks are activated.
8357 @param instance: The instance to check disks
8358 @return: True if they are activated, False otherwise
8361 nodes = instance.all_nodes
8363 for idx, dev in enumerate(instance.disks):
8365 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8366 self.cfg.SetDiskID(dev, node)
8368 result = self.rpc.call_blockdev_find(node, dev)
8372 elif result.fail_msg or not result.payload:
8378 def CheckPrereq(self):
8379 """Check prerequisites.
8381 This checks that the instance is in the cluster.
8384 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8385 assert instance is not None, \
8386 "Cannot retrieve locked instance %s" % self.instance_name
8388 if instance.disk_template != constants.DT_DRBD8:
8389 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8390 " instances", errors.ECODE_INVAL)
8392 if len(instance.secondary_nodes) != 1:
8393 raise errors.OpPrereqError("The instance has a strange layout,"
8394 " expected one secondary but found %d" %
8395 len(instance.secondary_nodes),
8398 if not self.delay_iallocator:
8399 self._CheckPrereq2()
8401 def _CheckPrereq2(self):
8402 """Check prerequisites, second part.
8404 This function should always be part of CheckPrereq. It was separated and is
8405 now called from Exec because during node evacuation iallocator was only
8406 called with an unmodified cluster model, not taking planned changes into
8410 instance = self.instance
8411 secondary_node = instance.secondary_nodes[0]
8413 if self.iallocator_name is None:
8414 remote_node = self.remote_node
8416 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8417 instance.name, instance.secondary_nodes)
8419 if remote_node is not None:
8420 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8421 assert self.remote_node_info is not None, \
8422 "Cannot retrieve locked node %s" % remote_node
8424 self.remote_node_info = None
8426 if remote_node == self.instance.primary_node:
8427 raise errors.OpPrereqError("The specified node is the primary node of"
8428 " the instance.", errors.ECODE_INVAL)
8430 if remote_node == secondary_node:
8431 raise errors.OpPrereqError("The specified node is already the"
8432 " secondary node of the instance.",
8435 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8436 constants.REPLACE_DISK_CHG):
8437 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8440 if self.mode == constants.REPLACE_DISK_AUTO:
8441 if not self._CheckDisksActivated(instance):
8442 raise errors.OpPrereqError("Please run activate-disks on instance %s"
8443 " first" % self.instance_name,
8445 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8446 faulty_secondary = self._FindFaultyDisks(secondary_node)
8448 if faulty_primary and faulty_secondary:
8449 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8450 " one node and can not be repaired"
8451 " automatically" % self.instance_name,
8455 self.disks = faulty_primary
8456 self.target_node = instance.primary_node
8457 self.other_node = secondary_node
8458 check_nodes = [self.target_node, self.other_node]
8459 elif faulty_secondary:
8460 self.disks = faulty_secondary
8461 self.target_node = secondary_node
8462 self.other_node = instance.primary_node
8463 check_nodes = [self.target_node, self.other_node]
8469 # Non-automatic modes
8470 if self.mode == constants.REPLACE_DISK_PRI:
8471 self.target_node = instance.primary_node
8472 self.other_node = secondary_node
8473 check_nodes = [self.target_node, self.other_node]
8475 elif self.mode == constants.REPLACE_DISK_SEC:
8476 self.target_node = secondary_node
8477 self.other_node = instance.primary_node
8478 check_nodes = [self.target_node, self.other_node]
8480 elif self.mode == constants.REPLACE_DISK_CHG:
8481 self.new_node = remote_node
8482 self.other_node = instance.primary_node
8483 self.target_node = secondary_node
8484 check_nodes = [self.new_node, self.other_node]
8486 _CheckNodeNotDrained(self.lu, remote_node)
8487 _CheckNodeVmCapable(self.lu, remote_node)
8489 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8490 assert old_node_info is not None
8491 if old_node_info.offline and not self.early_release:
8492 # doesn't make sense to delay the release
8493 self.early_release = True
8494 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8495 " early-release mode", secondary_node)
8498 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8501 # If not specified all disks should be replaced
8503 self.disks = range(len(self.instance.disks))
8505 for node in check_nodes:
8506 _CheckNodeOnline(self.lu, node)
8508 # Check whether disks are valid
8509 for disk_idx in self.disks:
8510 instance.FindDisk(disk_idx)
8512 # Get secondary node IP addresses
8515 for node_name in [self.target_node, self.other_node, self.new_node]:
8516 if node_name is not None:
8517 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8519 self.node_secondary_ip = node_2nd_ip
8521 def Exec(self, feedback_fn):
8522 """Execute disk replacement.
8524 This dispatches the disk replacement to the appropriate handler.
8527 if self.delay_iallocator:
8528 self._CheckPrereq2()
8531 feedback_fn("No disks need replacement")
8534 feedback_fn("Replacing disk(s) %s for %s" %
8535 (utils.CommaJoin(self.disks), self.instance.name))
8537 activate_disks = (not self.instance.admin_up)
8539 # Activate the instance disks if we're replacing them on a down instance
8541 _StartInstanceDisks(self.lu, self.instance, True)
8544 # Should we replace the secondary node?
8545 if self.new_node is not None:
8546 fn = self._ExecDrbd8Secondary
8548 fn = self._ExecDrbd8DiskOnly
8550 return fn(feedback_fn)
8553 # Deactivate the instance disks if we're replacing them on a
8556 _SafeShutdownInstanceDisks(self.lu, self.instance)
8558 def _CheckVolumeGroup(self, nodes):
8559 self.lu.LogInfo("Checking volume groups")
8561 vgname = self.cfg.GetVGName()
8563 # Make sure volume group exists on all involved nodes
8564 results = self.rpc.call_vg_list(nodes)
8566 raise errors.OpExecError("Can't list volume groups on the nodes")
8570 res.Raise("Error checking node %s" % node)
8571 if vgname not in res.payload:
8572 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8575 def _CheckDisksExistence(self, nodes):
8576 # Check disk existence
8577 for idx, dev in enumerate(self.instance.disks):
8578 if idx not in self.disks:
8582 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8583 self.cfg.SetDiskID(dev, node)
8585 result = self.rpc.call_blockdev_find(node, dev)
8587 msg = result.fail_msg
8588 if msg or not result.payload:
8590 msg = "disk not found"
8591 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8594 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8595 for idx, dev in enumerate(self.instance.disks):
8596 if idx not in self.disks:
8599 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8602 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8604 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8605 " replace disks for instance %s" %
8606 (node_name, self.instance.name))
8608 def _CreateNewStorage(self, node_name):
8609 vgname = self.cfg.GetVGName()
8612 for idx, dev in enumerate(self.instance.disks):
8613 if idx not in self.disks:
8616 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8618 self.cfg.SetDiskID(dev, node_name)
8620 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8621 names = _GenerateUniqueNames(self.lu, lv_names)
8623 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8624 logical_id=(vgname, names[0]))
8625 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8626 logical_id=(vgname, names[1]))
8628 new_lvs = [lv_data, lv_meta]
8629 old_lvs = dev.children
8630 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8632 # we pass force_create=True to force the LVM creation
8633 for new_lv in new_lvs:
8634 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8635 _GetInstanceInfoText(self.instance), False)
8639 def _CheckDevices(self, node_name, iv_names):
8640 for name, (dev, _, _) in iv_names.iteritems():
8641 self.cfg.SetDiskID(dev, node_name)
8643 result = self.rpc.call_blockdev_find(node_name, dev)
8645 msg = result.fail_msg
8646 if msg or not result.payload:
8648 msg = "disk not found"
8649 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8652 if result.payload.is_degraded:
8653 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8655 def _RemoveOldStorage(self, node_name, iv_names):
8656 for name, (_, old_lvs, _) in iv_names.iteritems():
8657 self.lu.LogInfo("Remove logical volumes for %s" % name)
8660 self.cfg.SetDiskID(lv, node_name)
8662 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8664 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8665 hint="remove unused LVs manually")
8667 def _ReleaseNodeLock(self, node_name):
8668 """Releases the lock for a given node."""
8669 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8671 def _ExecDrbd8DiskOnly(self, feedback_fn):
8672 """Replace a disk on the primary or secondary for DRBD 8.
8674 The algorithm for replace is quite complicated:
8676 1. for each disk to be replaced:
8678 1. create new LVs on the target node with unique names
8679 1. detach old LVs from the drbd device
8680 1. rename old LVs to name_replaced.<time_t>
8681 1. rename new LVs to old LVs
8682 1. attach the new LVs (with the old names now) to the drbd device
8684 1. wait for sync across all devices
8686 1. for each modified disk:
8688 1. remove old LVs (which have the name name_replaces.<time_t>)
8690 Failures are not very well handled.
8695 # Step: check device activation
8696 self.lu.LogStep(1, steps_total, "Check device existence")
8697 self._CheckDisksExistence([self.other_node, self.target_node])
8698 self._CheckVolumeGroup([self.target_node, self.other_node])
8700 # Step: check other node consistency
8701 self.lu.LogStep(2, steps_total, "Check peer consistency")
8702 self._CheckDisksConsistency(self.other_node,
8703 self.other_node == self.instance.primary_node,
8706 # Step: create new storage
8707 self.lu.LogStep(3, steps_total, "Allocate new storage")
8708 iv_names = self._CreateNewStorage(self.target_node)
8710 # Step: for each lv, detach+rename*2+attach
8711 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8712 for dev, old_lvs, new_lvs in iv_names.itervalues():
8713 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8715 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8717 result.Raise("Can't detach drbd from local storage on node"
8718 " %s for device %s" % (self.target_node, dev.iv_name))
8720 #cfg.Update(instance)
8722 # ok, we created the new LVs, so now we know we have the needed
8723 # storage; as such, we proceed on the target node to rename
8724 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8725 # using the assumption that logical_id == physical_id (which in
8726 # turn is the unique_id on that node)
8728 # FIXME(iustin): use a better name for the replaced LVs
8729 temp_suffix = int(time.time())
8730 ren_fn = lambda d, suff: (d.physical_id[0],
8731 d.physical_id[1] + "_replaced-%s" % suff)
8733 # Build the rename list based on what LVs exist on the node
8734 rename_old_to_new = []
8735 for to_ren in old_lvs:
8736 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8737 if not result.fail_msg and result.payload:
8739 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8741 self.lu.LogInfo("Renaming the old LVs on the target node")
8742 result = self.rpc.call_blockdev_rename(self.target_node,
8744 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8746 # Now we rename the new LVs to the old LVs
8747 self.lu.LogInfo("Renaming the new LVs on the target node")
8748 rename_new_to_old = [(new, old.physical_id)
8749 for old, new in zip(old_lvs, new_lvs)]
8750 result = self.rpc.call_blockdev_rename(self.target_node,
8752 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8754 for old, new in zip(old_lvs, new_lvs):
8755 new.logical_id = old.logical_id
8756 self.cfg.SetDiskID(new, self.target_node)
8758 for disk in old_lvs:
8759 disk.logical_id = ren_fn(disk, temp_suffix)
8760 self.cfg.SetDiskID(disk, self.target_node)
8762 # Now that the new lvs have the old name, we can add them to the device
8763 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8764 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8766 msg = result.fail_msg
8768 for new_lv in new_lvs:
8769 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8772 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8773 hint=("cleanup manually the unused logical"
8775 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8777 dev.children = new_lvs
8779 self.cfg.Update(self.instance, feedback_fn)
8782 if self.early_release:
8783 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8785 self._RemoveOldStorage(self.target_node, iv_names)
8786 # WARNING: we release both node locks here, do not do other RPCs
8787 # than WaitForSync to the primary node
8788 self._ReleaseNodeLock([self.target_node, self.other_node])
8791 # This can fail as the old devices are degraded and _WaitForSync
8792 # does a combined result over all disks, so we don't check its return value
8793 self.lu.LogStep(cstep, steps_total, "Sync devices")
8795 _WaitForSync(self.lu, self.instance)
8797 # Check all devices manually
8798 self._CheckDevices(self.instance.primary_node, iv_names)
8800 # Step: remove old storage
8801 if not self.early_release:
8802 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8804 self._RemoveOldStorage(self.target_node, iv_names)
8806 def _ExecDrbd8Secondary(self, feedback_fn):
8807 """Replace the secondary node for DRBD 8.
8809 The algorithm for replace is quite complicated:
8810 - for all disks of the instance:
8811 - create new LVs on the new node with same names
8812 - shutdown the drbd device on the old secondary
8813 - disconnect the drbd network on the primary
8814 - create the drbd device on the new secondary
8815 - network attach the drbd on the primary, using an artifice:
8816 the drbd code for Attach() will connect to the network if it
8817 finds a device which is connected to the good local disks but
8819 - wait for sync across all devices
8820 - remove all disks from the old secondary
8822 Failures are not very well handled.
8827 # Step: check device activation
8828 self.lu.LogStep(1, steps_total, "Check device existence")
8829 self._CheckDisksExistence([self.instance.primary_node])
8830 self._CheckVolumeGroup([self.instance.primary_node])
8832 # Step: check other node consistency
8833 self.lu.LogStep(2, steps_total, "Check peer consistency")
8834 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8836 # Step: create new storage
8837 self.lu.LogStep(3, steps_total, "Allocate new storage")
8838 for idx, dev in enumerate(self.instance.disks):
8839 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8840 (self.new_node, idx))
8841 # we pass force_create=True to force LVM creation
8842 for new_lv in dev.children:
8843 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8844 _GetInstanceInfoText(self.instance), False)
8846 # Step 4: dbrd minors and drbd setups changes
8847 # after this, we must manually remove the drbd minors on both the
8848 # error and the success paths
8849 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8850 minors = self.cfg.AllocateDRBDMinor([self.new_node
8851 for dev in self.instance.disks],
8853 logging.debug("Allocated minors %r", minors)
8856 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8857 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8858 (self.new_node, idx))
8859 # create new devices on new_node; note that we create two IDs:
8860 # one without port, so the drbd will be activated without
8861 # networking information on the new node at this stage, and one
8862 # with network, for the latter activation in step 4
8863 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8864 if self.instance.primary_node == o_node1:
8867 assert self.instance.primary_node == o_node2, "Three-node instance?"
8870 new_alone_id = (self.instance.primary_node, self.new_node, None,
8871 p_minor, new_minor, o_secret)
8872 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8873 p_minor, new_minor, o_secret)
8875 iv_names[idx] = (dev, dev.children, new_net_id)
8876 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8878 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8879 logical_id=new_alone_id,
8880 children=dev.children,
8883 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8884 _GetInstanceInfoText(self.instance), False)
8885 except errors.GenericError:
8886 self.cfg.ReleaseDRBDMinors(self.instance.name)
8889 # We have new devices, shutdown the drbd on the old secondary
8890 for idx, dev in enumerate(self.instance.disks):
8891 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8892 self.cfg.SetDiskID(dev, self.target_node)
8893 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8895 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8896 "node: %s" % (idx, msg),
8897 hint=("Please cleanup this device manually as"
8898 " soon as possible"))
8900 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8901 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8902 self.node_secondary_ip,
8903 self.instance.disks)\
8904 [self.instance.primary_node]
8906 msg = result.fail_msg
8908 # detaches didn't succeed (unlikely)
8909 self.cfg.ReleaseDRBDMinors(self.instance.name)
8910 raise errors.OpExecError("Can't detach the disks from the network on"
8911 " old node: %s" % (msg,))
8913 # if we managed to detach at least one, we update all the disks of
8914 # the instance to point to the new secondary
8915 self.lu.LogInfo("Updating instance configuration")
8916 for dev, _, new_logical_id in iv_names.itervalues():
8917 dev.logical_id = new_logical_id
8918 self.cfg.SetDiskID(dev, self.instance.primary_node)
8920 self.cfg.Update(self.instance, feedback_fn)
8922 # and now perform the drbd attach
8923 self.lu.LogInfo("Attaching primary drbds to new secondary"
8924 " (standalone => connected)")
8925 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8927 self.node_secondary_ip,
8928 self.instance.disks,
8931 for to_node, to_result in result.items():
8932 msg = to_result.fail_msg
8934 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8936 hint=("please do a gnt-instance info to see the"
8937 " status of disks"))
8939 if self.early_release:
8940 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8942 self._RemoveOldStorage(self.target_node, iv_names)
8943 # WARNING: we release all node locks here, do not do other RPCs
8944 # than WaitForSync to the primary node
8945 self._ReleaseNodeLock([self.instance.primary_node,
8950 # This can fail as the old devices are degraded and _WaitForSync
8951 # does a combined result over all disks, so we don't check its return value
8952 self.lu.LogStep(cstep, steps_total, "Sync devices")
8954 _WaitForSync(self.lu, self.instance)
8956 # Check all devices manually
8957 self._CheckDevices(self.instance.primary_node, iv_names)
8959 # Step: remove old storage
8960 if not self.early_release:
8961 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8962 self._RemoveOldStorage(self.target_node, iv_names)
8965 class LURepairNodeStorage(NoHooksLU):
8966 """Repairs the volume group on a node.
8971 def CheckArguments(self):
8972 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8974 storage_type = self.op.storage_type
8976 if (constants.SO_FIX_CONSISTENCY not in
8977 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8978 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8979 " repaired" % storage_type,
8982 def ExpandNames(self):
8983 self.needed_locks = {
8984 locking.LEVEL_NODE: [self.op.node_name],
8987 def _CheckFaultyDisks(self, instance, node_name):
8988 """Ensure faulty disks abort the opcode or at least warn."""
8990 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8992 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8993 " node '%s'" % (instance.name, node_name),
8995 except errors.OpPrereqError, err:
8996 if self.op.ignore_consistency:
8997 self.proc.LogWarning(str(err.args[0]))
9001 def CheckPrereq(self):
9002 """Check prerequisites.
9005 # Check whether any instance on this node has faulty disks
9006 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9007 if not inst.admin_up:
9009 check_nodes = set(inst.all_nodes)
9010 check_nodes.discard(self.op.node_name)
9011 for inst_node_name in check_nodes:
9012 self._CheckFaultyDisks(inst, inst_node_name)
9014 def Exec(self, feedback_fn):
9015 feedback_fn("Repairing storage unit '%s' on %s ..." %
9016 (self.op.name, self.op.node_name))
9018 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9019 result = self.rpc.call_storage_execute(self.op.node_name,
9020 self.op.storage_type, st_args,
9022 constants.SO_FIX_CONSISTENCY)
9023 result.Raise("Failed to repair storage unit '%s' on %s" %
9024 (self.op.name, self.op.node_name))
9027 class LUNodeEvacStrategy(NoHooksLU):
9028 """Computes the node evacuation strategy.
9033 def CheckArguments(self):
9034 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9036 def ExpandNames(self):
9037 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9038 self.needed_locks = locks = {}
9039 if self.op.remote_node is None:
9040 locks[locking.LEVEL_NODE] = locking.ALL_SET
9042 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9043 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9045 def Exec(self, feedback_fn):
9046 if self.op.remote_node is not None:
9048 for node in self.op.nodes:
9049 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9052 if i.primary_node == self.op.remote_node:
9053 raise errors.OpPrereqError("Node %s is the primary node of"
9054 " instance %s, cannot use it as"
9056 (self.op.remote_node, i.name),
9058 result.append([i.name, self.op.remote_node])
9060 ial = IAllocator(self.cfg, self.rpc,
9061 mode=constants.IALLOCATOR_MODE_MEVAC,
9062 evac_nodes=self.op.nodes)
9063 ial.Run(self.op.iallocator, validate=True)
9065 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9071 class LUInstanceGrowDisk(LogicalUnit):
9072 """Grow a disk of an instance.
9076 HTYPE = constants.HTYPE_INSTANCE
9079 def ExpandNames(self):
9080 self._ExpandAndLockInstance()
9081 self.needed_locks[locking.LEVEL_NODE] = []
9082 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9084 def DeclareLocks(self, level):
9085 if level == locking.LEVEL_NODE:
9086 self._LockInstancesNodes()
9088 def BuildHooksEnv(self):
9091 This runs on the master, the primary and all the secondaries.
9095 "DISK": self.op.disk,
9096 "AMOUNT": self.op.amount,
9098 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9099 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9102 def CheckPrereq(self):
9103 """Check prerequisites.
9105 This checks that the instance is in the cluster.
9108 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9109 assert instance is not None, \
9110 "Cannot retrieve locked instance %s" % self.op.instance_name
9111 nodenames = list(instance.all_nodes)
9112 for node in nodenames:
9113 _CheckNodeOnline(self, node)
9115 self.instance = instance
9117 if instance.disk_template not in constants.DTS_GROWABLE:
9118 raise errors.OpPrereqError("Instance's disk layout does not support"
9119 " growing.", errors.ECODE_INVAL)
9121 self.disk = instance.FindDisk(self.op.disk)
9123 if instance.disk_template not in (constants.DT_FILE,
9124 constants.DT_SHARED_FILE):
9125 # TODO: check the free disk space for file, when that feature will be
9127 _CheckNodesFreeDiskPerVG(self, nodenames,
9128 self.disk.ComputeGrowth(self.op.amount))
9130 def Exec(self, feedback_fn):
9131 """Execute disk grow.
9134 instance = self.instance
9137 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9139 raise errors.OpExecError("Cannot activate block device to grow")
9141 for node in instance.all_nodes:
9142 self.cfg.SetDiskID(disk, node)
9143 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9144 result.Raise("Grow request failed to node %s" % node)
9146 # TODO: Rewrite code to work properly
9147 # DRBD goes into sync mode for a short amount of time after executing the
9148 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9149 # calling "resize" in sync mode fails. Sleeping for a short amount of
9150 # time is a work-around.
9153 disk.RecordGrow(self.op.amount)
9154 self.cfg.Update(instance, feedback_fn)
9155 if self.op.wait_for_sync:
9156 disk_abort = not _WaitForSync(self, instance, disks=[disk])
9158 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9159 " status.\nPlease check the instance.")
9160 if not instance.admin_up:
9161 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9162 elif not instance.admin_up:
9163 self.proc.LogWarning("Not shutting down the disk even if the instance is"
9164 " not supposed to be running because no wait for"
9165 " sync mode was requested.")
9168 class LUInstanceQueryData(NoHooksLU):
9169 """Query runtime instance data.
9174 def ExpandNames(self):
9175 self.needed_locks = {}
9176 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9178 if self.op.instances:
9179 self.wanted_names = []
9180 for name in self.op.instances:
9181 full_name = _ExpandInstanceName(self.cfg, name)
9182 self.wanted_names.append(full_name)
9183 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9185 self.wanted_names = None
9186 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9188 self.needed_locks[locking.LEVEL_NODE] = []
9189 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9191 def DeclareLocks(self, level):
9192 if level == locking.LEVEL_NODE:
9193 self._LockInstancesNodes()
9195 def CheckPrereq(self):
9196 """Check prerequisites.
9198 This only checks the optional instance list against the existing names.
9201 if self.wanted_names is None:
9202 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9204 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
9205 in self.wanted_names]
9207 def _ComputeBlockdevStatus(self, node, instance_name, dev):
9208 """Returns the status of a block device
9211 if self.op.static or not node:
9214 self.cfg.SetDiskID(dev, node)
9216 result = self.rpc.call_blockdev_find(node, dev)
9220 result.Raise("Can't compute disk status for %s" % instance_name)
9222 status = result.payload
9226 return (status.dev_path, status.major, status.minor,
9227 status.sync_percent, status.estimated_time,
9228 status.is_degraded, status.ldisk_status)
9230 def _ComputeDiskStatus(self, instance, snode, dev):
9231 """Compute block device status.
9234 if dev.dev_type in constants.LDS_DRBD:
9235 # we change the snode then (otherwise we use the one passed in)
9236 if dev.logical_id[0] == instance.primary_node:
9237 snode = dev.logical_id[1]
9239 snode = dev.logical_id[0]
9241 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9243 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9246 dev_children = [self._ComputeDiskStatus(instance, snode, child)
9247 for child in dev.children]
9252 "iv_name": dev.iv_name,
9253 "dev_type": dev.dev_type,
9254 "logical_id": dev.logical_id,
9255 "physical_id": dev.physical_id,
9256 "pstatus": dev_pstatus,
9257 "sstatus": dev_sstatus,
9258 "children": dev_children,
9265 def Exec(self, feedback_fn):
9266 """Gather and return data"""
9269 cluster = self.cfg.GetClusterInfo()
9271 for instance in self.wanted_instances:
9272 if not self.op.static:
9273 remote_info = self.rpc.call_instance_info(instance.primary_node,
9275 instance.hypervisor)
9276 remote_info.Raise("Error checking node %s" % instance.primary_node)
9277 remote_info = remote_info.payload
9278 if remote_info and "state" in remote_info:
9281 remote_state = "down"
9284 if instance.admin_up:
9287 config_state = "down"
9289 disks = [self._ComputeDiskStatus(instance, None, device)
9290 for device in instance.disks]
9293 "name": instance.name,
9294 "config_state": config_state,
9295 "run_state": remote_state,
9296 "pnode": instance.primary_node,
9297 "snodes": instance.secondary_nodes,
9299 # this happens to be the same format used for hooks
9300 "nics": _NICListToTuple(self, instance.nics),
9301 "disk_template": instance.disk_template,
9303 "hypervisor": instance.hypervisor,
9304 "network_port": instance.network_port,
9305 "hv_instance": instance.hvparams,
9306 "hv_actual": cluster.FillHV(instance, skip_globals=True),
9307 "be_instance": instance.beparams,
9308 "be_actual": cluster.FillBE(instance),
9309 "os_instance": instance.osparams,
9310 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9311 "serial_no": instance.serial_no,
9312 "mtime": instance.mtime,
9313 "ctime": instance.ctime,
9314 "uuid": instance.uuid,
9317 result[instance.name] = idict
9322 class LUInstanceSetParams(LogicalUnit):
9323 """Modifies an instances's parameters.
9326 HPATH = "instance-modify"
9327 HTYPE = constants.HTYPE_INSTANCE
9330 def CheckArguments(self):
9331 if not (self.op.nics or self.op.disks or self.op.disk_template or
9332 self.op.hvparams or self.op.beparams or self.op.os_name):
9333 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9335 if self.op.hvparams:
9336 _CheckGlobalHvParams(self.op.hvparams)
9340 for disk_op, disk_dict in self.op.disks:
9341 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9342 if disk_op == constants.DDM_REMOVE:
9345 elif disk_op == constants.DDM_ADD:
9348 if not isinstance(disk_op, int):
9349 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9350 if not isinstance(disk_dict, dict):
9351 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9352 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9354 if disk_op == constants.DDM_ADD:
9355 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9356 if mode not in constants.DISK_ACCESS_SET:
9357 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9359 size = disk_dict.get('size', None)
9361 raise errors.OpPrereqError("Required disk parameter size missing",
9365 except (TypeError, ValueError), err:
9366 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9367 str(err), errors.ECODE_INVAL)
9368 disk_dict['size'] = size
9370 # modification of disk
9371 if 'size' in disk_dict:
9372 raise errors.OpPrereqError("Disk size change not possible, use"
9373 " grow-disk", errors.ECODE_INVAL)
9375 if disk_addremove > 1:
9376 raise errors.OpPrereqError("Only one disk add or remove operation"
9377 " supported at a time", errors.ECODE_INVAL)
9379 if self.op.disks and self.op.disk_template is not None:
9380 raise errors.OpPrereqError("Disk template conversion and other disk"
9381 " changes not supported at the same time",
9384 if (self.op.disk_template and
9385 self.op.disk_template in constants.DTS_INT_MIRROR and
9386 self.op.remote_node is None):
9387 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9388 " one requires specifying a secondary node",
9393 for nic_op, nic_dict in self.op.nics:
9394 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9395 if nic_op == constants.DDM_REMOVE:
9398 elif nic_op == constants.DDM_ADD:
9401 if not isinstance(nic_op, int):
9402 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9403 if not isinstance(nic_dict, dict):
9404 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9405 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9407 # nic_dict should be a dict
9408 nic_ip = nic_dict.get('ip', None)
9409 if nic_ip is not None:
9410 if nic_ip.lower() == constants.VALUE_NONE:
9411 nic_dict['ip'] = None
9413 if not netutils.IPAddress.IsValid(nic_ip):
9414 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9417 nic_bridge = nic_dict.get('bridge', None)
9418 nic_link = nic_dict.get('link', None)
9419 if nic_bridge and nic_link:
9420 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9421 " at the same time", errors.ECODE_INVAL)
9422 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9423 nic_dict['bridge'] = None
9424 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9425 nic_dict['link'] = None
9427 if nic_op == constants.DDM_ADD:
9428 nic_mac = nic_dict.get('mac', None)
9430 nic_dict['mac'] = constants.VALUE_AUTO
9432 if 'mac' in nic_dict:
9433 nic_mac = nic_dict['mac']
9434 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9435 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9437 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9438 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9439 " modifying an existing nic",
9442 if nic_addremove > 1:
9443 raise errors.OpPrereqError("Only one NIC add or remove operation"
9444 " supported at a time", errors.ECODE_INVAL)
9446 def ExpandNames(self):
9447 self._ExpandAndLockInstance()
9448 self.needed_locks[locking.LEVEL_NODE] = []
9449 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9451 def DeclareLocks(self, level):
9452 if level == locking.LEVEL_NODE:
9453 self._LockInstancesNodes()
9454 if self.op.disk_template and self.op.remote_node:
9455 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9456 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9458 def BuildHooksEnv(self):
9461 This runs on the master, primary and secondaries.
9465 if constants.BE_MEMORY in self.be_new:
9466 args['memory'] = self.be_new[constants.BE_MEMORY]
9467 if constants.BE_VCPUS in self.be_new:
9468 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9469 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9470 # information at all.
9473 nic_override = dict(self.op.nics)
9474 for idx, nic in enumerate(self.instance.nics):
9475 if idx in nic_override:
9476 this_nic_override = nic_override[idx]
9478 this_nic_override = {}
9479 if 'ip' in this_nic_override:
9480 ip = this_nic_override['ip']
9483 if 'mac' in this_nic_override:
9484 mac = this_nic_override['mac']
9487 if idx in self.nic_pnew:
9488 nicparams = self.nic_pnew[idx]
9490 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9491 mode = nicparams[constants.NIC_MODE]
9492 link = nicparams[constants.NIC_LINK]
9493 args['nics'].append((ip, mac, mode, link))
9494 if constants.DDM_ADD in nic_override:
9495 ip = nic_override[constants.DDM_ADD].get('ip', None)
9496 mac = nic_override[constants.DDM_ADD]['mac']
9497 nicparams = self.nic_pnew[constants.DDM_ADD]
9498 mode = nicparams[constants.NIC_MODE]
9499 link = nicparams[constants.NIC_LINK]
9500 args['nics'].append((ip, mac, mode, link))
9501 elif constants.DDM_REMOVE in nic_override:
9502 del args['nics'][-1]
9504 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9505 if self.op.disk_template:
9506 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9507 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9510 def CheckPrereq(self):
9511 """Check prerequisites.
9513 This only checks the instance list against the existing names.
9516 # checking the new params on the primary/secondary nodes
9518 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9519 cluster = self.cluster = self.cfg.GetClusterInfo()
9520 assert self.instance is not None, \
9521 "Cannot retrieve locked instance %s" % self.op.instance_name
9522 pnode = instance.primary_node
9523 nodelist = list(instance.all_nodes)
9526 if self.op.os_name and not self.op.force:
9527 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9528 self.op.force_variant)
9529 instance_os = self.op.os_name
9531 instance_os = instance.os
9533 if self.op.disk_template:
9534 if instance.disk_template == self.op.disk_template:
9535 raise errors.OpPrereqError("Instance already has disk template %s" %
9536 instance.disk_template, errors.ECODE_INVAL)
9538 if (instance.disk_template,
9539 self.op.disk_template) not in self._DISK_CONVERSIONS:
9540 raise errors.OpPrereqError("Unsupported disk template conversion from"
9541 " %s to %s" % (instance.disk_template,
9542 self.op.disk_template),
9544 _CheckInstanceDown(self, instance, "cannot change disk template")
9545 if self.op.disk_template in constants.DTS_INT_MIRROR:
9546 if self.op.remote_node == pnode:
9547 raise errors.OpPrereqError("Given new secondary node %s is the same"
9548 " as the primary node of the instance" %
9549 self.op.remote_node, errors.ECODE_STATE)
9550 _CheckNodeOnline(self, self.op.remote_node)
9551 _CheckNodeNotDrained(self, self.op.remote_node)
9552 # FIXME: here we assume that the old instance type is DT_PLAIN
9553 assert instance.disk_template == constants.DT_PLAIN
9554 disks = [{"size": d.size, "vg": d.logical_id[0]}
9555 for d in instance.disks]
9556 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9557 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9559 # hvparams processing
9560 if self.op.hvparams:
9561 hv_type = instance.hypervisor
9562 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9563 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9564 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9567 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9568 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9569 self.hv_new = hv_new # the new actual values
9570 self.hv_inst = i_hvdict # the new dict (without defaults)
9572 self.hv_new = self.hv_inst = {}
9574 # beparams processing
9575 if self.op.beparams:
9576 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9578 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9579 be_new = cluster.SimpleFillBE(i_bedict)
9580 self.be_new = be_new # the new actual values
9581 self.be_inst = i_bedict # the new dict (without defaults)
9583 self.be_new = self.be_inst = {}
9585 # osparams processing
9586 if self.op.osparams:
9587 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9588 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9589 self.os_inst = i_osdict # the new dict (without defaults)
9595 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9596 mem_check_list = [pnode]
9597 if be_new[constants.BE_AUTO_BALANCE]:
9598 # either we changed auto_balance to yes or it was from before
9599 mem_check_list.extend(instance.secondary_nodes)
9600 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9601 instance.hypervisor)
9602 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9603 instance.hypervisor)
9604 pninfo = nodeinfo[pnode]
9605 msg = pninfo.fail_msg
9607 # Assume the primary node is unreachable and go ahead
9608 self.warn.append("Can't get info from primary node %s: %s" %
9610 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9611 self.warn.append("Node data from primary node %s doesn't contain"
9612 " free memory information" % pnode)
9613 elif instance_info.fail_msg:
9614 self.warn.append("Can't get instance runtime information: %s" %
9615 instance_info.fail_msg)
9617 if instance_info.payload:
9618 current_mem = int(instance_info.payload['memory'])
9620 # Assume instance not running
9621 # (there is a slight race condition here, but it's not very probable,
9622 # and we have no other way to check)
9624 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9625 pninfo.payload['memory_free'])
9627 raise errors.OpPrereqError("This change will prevent the instance"
9628 " from starting, due to %d MB of memory"
9629 " missing on its primary node" % miss_mem,
9632 if be_new[constants.BE_AUTO_BALANCE]:
9633 for node, nres in nodeinfo.items():
9634 if node not in instance.secondary_nodes:
9638 self.warn.append("Can't get info from secondary node %s: %s" %
9640 elif not isinstance(nres.payload.get('memory_free', None), int):
9641 self.warn.append("Secondary node %s didn't return free"
9642 " memory information" % node)
9643 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9644 self.warn.append("Not enough memory to failover instance to"
9645 " secondary node %s" % node)
9650 for nic_op, nic_dict in self.op.nics:
9651 if nic_op == constants.DDM_REMOVE:
9652 if not instance.nics:
9653 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9656 if nic_op != constants.DDM_ADD:
9658 if not instance.nics:
9659 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9660 " no NICs" % nic_op,
9662 if nic_op < 0 or nic_op >= len(instance.nics):
9663 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9665 (nic_op, len(instance.nics) - 1),
9667 old_nic_params = instance.nics[nic_op].nicparams
9668 old_nic_ip = instance.nics[nic_op].ip
9673 update_params_dict = dict([(key, nic_dict[key])
9674 for key in constants.NICS_PARAMETERS
9675 if key in nic_dict])
9677 if 'bridge' in nic_dict:
9678 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9680 new_nic_params = _GetUpdatedParams(old_nic_params,
9682 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9683 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9684 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9685 self.nic_pinst[nic_op] = new_nic_params
9686 self.nic_pnew[nic_op] = new_filled_nic_params
9687 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9689 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9690 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9691 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9693 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9695 self.warn.append(msg)
9697 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9698 if new_nic_mode == constants.NIC_MODE_ROUTED:
9699 if 'ip' in nic_dict:
9700 nic_ip = nic_dict['ip']
9704 raise errors.OpPrereqError('Cannot set the nic ip to None'
9705 ' on a routed nic', errors.ECODE_INVAL)
9706 if 'mac' in nic_dict:
9707 nic_mac = nic_dict['mac']
9709 raise errors.OpPrereqError('Cannot set the nic mac to None',
9711 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9712 # otherwise generate the mac
9713 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9715 # or validate/reserve the current one
9717 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9718 except errors.ReservationError:
9719 raise errors.OpPrereqError("MAC address %s already in use"
9720 " in cluster" % nic_mac,
9721 errors.ECODE_NOTUNIQUE)
9724 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9725 raise errors.OpPrereqError("Disk operations not supported for"
9726 " diskless instances",
9728 for disk_op, _ in self.op.disks:
9729 if disk_op == constants.DDM_REMOVE:
9730 if len(instance.disks) == 1:
9731 raise errors.OpPrereqError("Cannot remove the last disk of"
9732 " an instance", errors.ECODE_INVAL)
9733 _CheckInstanceDown(self, instance, "cannot remove disks")
9735 if (disk_op == constants.DDM_ADD and
9736 len(instance.disks) >= constants.MAX_DISKS):
9737 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9738 " add more" % constants.MAX_DISKS,
9740 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9742 if disk_op < 0 or disk_op >= len(instance.disks):
9743 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9745 (disk_op, len(instance.disks)),
9750 def _ConvertPlainToDrbd(self, feedback_fn):
9751 """Converts an instance from plain to drbd.
9754 feedback_fn("Converting template to drbd")
9755 instance = self.instance
9756 pnode = instance.primary_node
9757 snode = self.op.remote_node
9759 # create a fake disk info for _GenerateDiskTemplate
9760 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9761 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9762 instance.name, pnode, [snode],
9763 disk_info, None, None, 0, feedback_fn)
9764 info = _GetInstanceInfoText(instance)
9765 feedback_fn("Creating aditional volumes...")
9766 # first, create the missing data and meta devices
9767 for disk in new_disks:
9768 # unfortunately this is... not too nice
9769 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9771 for child in disk.children:
9772 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9773 # at this stage, all new LVs have been created, we can rename the
9775 feedback_fn("Renaming original volumes...")
9776 rename_list = [(o, n.children[0].logical_id)
9777 for (o, n) in zip(instance.disks, new_disks)]
9778 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9779 result.Raise("Failed to rename original LVs")
9781 feedback_fn("Initializing DRBD devices...")
9782 # all child devices are in place, we can now create the DRBD devices
9783 for disk in new_disks:
9784 for node in [pnode, snode]:
9785 f_create = node == pnode
9786 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9788 # at this point, the instance has been modified
9789 instance.disk_template = constants.DT_DRBD8
9790 instance.disks = new_disks
9791 self.cfg.Update(instance, feedback_fn)
9793 # disks are created, waiting for sync
9794 disk_abort = not _WaitForSync(self, instance)
9796 raise errors.OpExecError("There are some degraded disks for"
9797 " this instance, please cleanup manually")
9799 def _ConvertDrbdToPlain(self, feedback_fn):
9800 """Converts an instance from drbd to plain.
9803 instance = self.instance
9804 assert len(instance.secondary_nodes) == 1
9805 pnode = instance.primary_node
9806 snode = instance.secondary_nodes[0]
9807 feedback_fn("Converting template to plain")
9809 old_disks = instance.disks
9810 new_disks = [d.children[0] for d in old_disks]
9812 # copy over size and mode
9813 for parent, child in zip(old_disks, new_disks):
9814 child.size = parent.size
9815 child.mode = parent.mode
9817 # update instance structure
9818 instance.disks = new_disks
9819 instance.disk_template = constants.DT_PLAIN
9820 self.cfg.Update(instance, feedback_fn)
9822 feedback_fn("Removing volumes on the secondary node...")
9823 for disk in old_disks:
9824 self.cfg.SetDiskID(disk, snode)
9825 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9827 self.LogWarning("Could not remove block device %s on node %s,"
9828 " continuing anyway: %s", disk.iv_name, snode, msg)
9830 feedback_fn("Removing unneeded volumes on the primary node...")
9831 for idx, disk in enumerate(old_disks):
9832 meta = disk.children[1]
9833 self.cfg.SetDiskID(meta, pnode)
9834 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9836 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9837 " continuing anyway: %s", idx, pnode, msg)
9839 def Exec(self, feedback_fn):
9840 """Modifies an instance.
9842 All parameters take effect only at the next restart of the instance.
9845 # Process here the warnings from CheckPrereq, as we don't have a
9846 # feedback_fn there.
9847 for warn in self.warn:
9848 feedback_fn("WARNING: %s" % warn)
9851 instance = self.instance
9853 for disk_op, disk_dict in self.op.disks:
9854 if disk_op == constants.DDM_REMOVE:
9855 # remove the last disk
9856 device = instance.disks.pop()
9857 device_idx = len(instance.disks)
9858 for node, disk in device.ComputeNodeTree(instance.primary_node):
9859 self.cfg.SetDiskID(disk, node)
9860 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9862 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9863 " continuing anyway", device_idx, node, msg)
9864 result.append(("disk/%d" % device_idx, "remove"))
9865 elif disk_op == constants.DDM_ADD:
9867 if instance.disk_template in (constants.DT_FILE,
9868 constants.DT_SHARED_FILE):
9869 file_driver, file_path = instance.disks[0].logical_id
9870 file_path = os.path.dirname(file_path)
9872 file_driver = file_path = None
9873 disk_idx_base = len(instance.disks)
9874 new_disk = _GenerateDiskTemplate(self,
9875 instance.disk_template,
9876 instance.name, instance.primary_node,
9877 instance.secondary_nodes,
9881 disk_idx_base, feedback_fn)[0]
9882 instance.disks.append(new_disk)
9883 info = _GetInstanceInfoText(instance)
9885 logging.info("Creating volume %s for instance %s",
9886 new_disk.iv_name, instance.name)
9887 # Note: this needs to be kept in sync with _CreateDisks
9889 for node in instance.all_nodes:
9890 f_create = node == instance.primary_node
9892 _CreateBlockDev(self, node, instance, new_disk,
9893 f_create, info, f_create)
9894 except errors.OpExecError, err:
9895 self.LogWarning("Failed to create volume %s (%s) on"
9897 new_disk.iv_name, new_disk, node, err)
9898 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9899 (new_disk.size, new_disk.mode)))
9901 # change a given disk
9902 instance.disks[disk_op].mode = disk_dict['mode']
9903 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9905 if self.op.disk_template:
9906 r_shut = _ShutdownInstanceDisks(self, instance)
9908 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9909 " proceed with disk template conversion")
9910 mode = (instance.disk_template, self.op.disk_template)
9912 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9914 self.cfg.ReleaseDRBDMinors(instance.name)
9916 result.append(("disk_template", self.op.disk_template))
9919 for nic_op, nic_dict in self.op.nics:
9920 if nic_op == constants.DDM_REMOVE:
9921 # remove the last nic
9922 del instance.nics[-1]
9923 result.append(("nic.%d" % len(instance.nics), "remove"))
9924 elif nic_op == constants.DDM_ADD:
9925 # mac and bridge should be set, by now
9926 mac = nic_dict['mac']
9927 ip = nic_dict.get('ip', None)
9928 nicparams = self.nic_pinst[constants.DDM_ADD]
9929 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9930 instance.nics.append(new_nic)
9931 result.append(("nic.%d" % (len(instance.nics) - 1),
9932 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9933 (new_nic.mac, new_nic.ip,
9934 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9935 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9938 for key in 'mac', 'ip':
9940 setattr(instance.nics[nic_op], key, nic_dict[key])
9941 if nic_op in self.nic_pinst:
9942 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9943 for key, val in nic_dict.iteritems():
9944 result.append(("nic.%s/%d" % (key, nic_op), val))
9947 if self.op.hvparams:
9948 instance.hvparams = self.hv_inst
9949 for key, val in self.op.hvparams.iteritems():
9950 result.append(("hv/%s" % key, val))
9953 if self.op.beparams:
9954 instance.beparams = self.be_inst
9955 for key, val in self.op.beparams.iteritems():
9956 result.append(("be/%s" % key, val))
9960 instance.os = self.op.os_name
9963 if self.op.osparams:
9964 instance.osparams = self.os_inst
9965 for key, val in self.op.osparams.iteritems():
9966 result.append(("os/%s" % key, val))
9968 self.cfg.Update(instance, feedback_fn)
9972 _DISK_CONVERSIONS = {
9973 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9974 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9978 class LUBackupQuery(NoHooksLU):
9979 """Query the exports list
9984 def ExpandNames(self):
9985 self.needed_locks = {}
9986 self.share_locks[locking.LEVEL_NODE] = 1
9987 if not self.op.nodes:
9988 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9990 self.needed_locks[locking.LEVEL_NODE] = \
9991 _GetWantedNodes(self, self.op.nodes)
9993 def Exec(self, feedback_fn):
9994 """Compute the list of all the exported system images.
9997 @return: a dictionary with the structure node->(export-list)
9998 where export-list is a list of the instances exported on
10002 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10003 rpcresult = self.rpc.call_export_list(self.nodes)
10005 for node in rpcresult:
10006 if rpcresult[node].fail_msg:
10007 result[node] = False
10009 result[node] = rpcresult[node].payload
10014 class LUBackupPrepare(NoHooksLU):
10015 """Prepares an instance for an export and returns useful information.
10020 def ExpandNames(self):
10021 self._ExpandAndLockInstance()
10023 def CheckPrereq(self):
10024 """Check prerequisites.
10027 instance_name = self.op.instance_name
10029 self.instance = self.cfg.GetInstanceInfo(instance_name)
10030 assert self.instance is not None, \
10031 "Cannot retrieve locked instance %s" % self.op.instance_name
10032 _CheckNodeOnline(self, self.instance.primary_node)
10034 self._cds = _GetClusterDomainSecret()
10036 def Exec(self, feedback_fn):
10037 """Prepares an instance for an export.
10040 instance = self.instance
10042 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10043 salt = utils.GenerateSecret(8)
10045 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10046 result = self.rpc.call_x509_cert_create(instance.primary_node,
10047 constants.RIE_CERT_VALIDITY)
10048 result.Raise("Can't create X509 key and certificate on %s" % result.node)
10050 (name, cert_pem) = result.payload
10052 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10056 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10057 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10059 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10065 class LUBackupExport(LogicalUnit):
10066 """Export an instance to an image in the cluster.
10069 HPATH = "instance-export"
10070 HTYPE = constants.HTYPE_INSTANCE
10073 def CheckArguments(self):
10074 """Check the arguments.
10077 self.x509_key_name = self.op.x509_key_name
10078 self.dest_x509_ca_pem = self.op.destination_x509_ca
10080 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10081 if not self.x509_key_name:
10082 raise errors.OpPrereqError("Missing X509 key name for encryption",
10083 errors.ECODE_INVAL)
10085 if not self.dest_x509_ca_pem:
10086 raise errors.OpPrereqError("Missing destination X509 CA",
10087 errors.ECODE_INVAL)
10089 def ExpandNames(self):
10090 self._ExpandAndLockInstance()
10092 # Lock all nodes for local exports
10093 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10094 # FIXME: lock only instance primary and destination node
10096 # Sad but true, for now we have do lock all nodes, as we don't know where
10097 # the previous export might be, and in this LU we search for it and
10098 # remove it from its current node. In the future we could fix this by:
10099 # - making a tasklet to search (share-lock all), then create the
10100 # new one, then one to remove, after
10101 # - removing the removal operation altogether
10102 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10104 def DeclareLocks(self, level):
10105 """Last minute lock declaration."""
10106 # All nodes are locked anyway, so nothing to do here.
10108 def BuildHooksEnv(self):
10109 """Build hooks env.
10111 This will run on the master, primary node and target node.
10115 "EXPORT_MODE": self.op.mode,
10116 "EXPORT_NODE": self.op.target_node,
10117 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10118 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10119 # TODO: Generic function for boolean env variables
10120 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10123 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10125 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10127 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10128 nl.append(self.op.target_node)
10132 def CheckPrereq(self):
10133 """Check prerequisites.
10135 This checks that the instance and node names are valid.
10138 instance_name = self.op.instance_name
10140 self.instance = self.cfg.GetInstanceInfo(instance_name)
10141 assert self.instance is not None, \
10142 "Cannot retrieve locked instance %s" % self.op.instance_name
10143 _CheckNodeOnline(self, self.instance.primary_node)
10145 if (self.op.remove_instance and self.instance.admin_up and
10146 not self.op.shutdown):
10147 raise errors.OpPrereqError("Can not remove instance without shutting it"
10150 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10151 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10152 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10153 assert self.dst_node is not None
10155 _CheckNodeOnline(self, self.dst_node.name)
10156 _CheckNodeNotDrained(self, self.dst_node.name)
10159 self.dest_disk_info = None
10160 self.dest_x509_ca = None
10162 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10163 self.dst_node = None
10165 if len(self.op.target_node) != len(self.instance.disks):
10166 raise errors.OpPrereqError(("Received destination information for %s"
10167 " disks, but instance %s has %s disks") %
10168 (len(self.op.target_node), instance_name,
10169 len(self.instance.disks)),
10170 errors.ECODE_INVAL)
10172 cds = _GetClusterDomainSecret()
10174 # Check X509 key name
10176 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10177 except (TypeError, ValueError), err:
10178 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10180 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10181 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10182 errors.ECODE_INVAL)
10184 # Load and verify CA
10186 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10187 except OpenSSL.crypto.Error, err:
10188 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10189 (err, ), errors.ECODE_INVAL)
10191 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10192 if errcode is not None:
10193 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10194 (msg, ), errors.ECODE_INVAL)
10196 self.dest_x509_ca = cert
10198 # Verify target information
10200 for idx, disk_data in enumerate(self.op.target_node):
10202 (host, port, magic) = \
10203 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10204 except errors.GenericError, err:
10205 raise errors.OpPrereqError("Target info for disk %s: %s" %
10206 (idx, err), errors.ECODE_INVAL)
10208 disk_info.append((host, port, magic))
10210 assert len(disk_info) == len(self.op.target_node)
10211 self.dest_disk_info = disk_info
10214 raise errors.ProgrammerError("Unhandled export mode %r" %
10217 # instance disk type verification
10218 # TODO: Implement export support for file-based disks
10219 for disk in self.instance.disks:
10220 if disk.dev_type == constants.LD_FILE:
10221 raise errors.OpPrereqError("Export not supported for instances with"
10222 " file-based disks", errors.ECODE_INVAL)
10224 def _CleanupExports(self, feedback_fn):
10225 """Removes exports of current instance from all other nodes.
10227 If an instance in a cluster with nodes A..D was exported to node C, its
10228 exports will be removed from the nodes A, B and D.
10231 assert self.op.mode != constants.EXPORT_MODE_REMOTE
10233 nodelist = self.cfg.GetNodeList()
10234 nodelist.remove(self.dst_node.name)
10236 # on one-node clusters nodelist will be empty after the removal
10237 # if we proceed the backup would be removed because OpBackupQuery
10238 # substitutes an empty list with the full cluster node list.
10239 iname = self.instance.name
10241 feedback_fn("Removing old exports for instance %s" % iname)
10242 exportlist = self.rpc.call_export_list(nodelist)
10243 for node in exportlist:
10244 if exportlist[node].fail_msg:
10246 if iname in exportlist[node].payload:
10247 msg = self.rpc.call_export_remove(node, iname).fail_msg
10249 self.LogWarning("Could not remove older export for instance %s"
10250 " on node %s: %s", iname, node, msg)
10252 def Exec(self, feedback_fn):
10253 """Export an instance to an image in the cluster.
10256 assert self.op.mode in constants.EXPORT_MODES
10258 instance = self.instance
10259 src_node = instance.primary_node
10261 if self.op.shutdown:
10262 # shutdown the instance, but not the disks
10263 feedback_fn("Shutting down instance %s" % instance.name)
10264 result = self.rpc.call_instance_shutdown(src_node, instance,
10265 self.op.shutdown_timeout)
10266 # TODO: Maybe ignore failures if ignore_remove_failures is set
10267 result.Raise("Could not shutdown instance %s on"
10268 " node %s" % (instance.name, src_node))
10270 # set the disks ID correctly since call_instance_start needs the
10271 # correct drbd minor to create the symlinks
10272 for disk in instance.disks:
10273 self.cfg.SetDiskID(disk, src_node)
10275 activate_disks = (not instance.admin_up)
10278 # Activate the instance disks if we'exporting a stopped instance
10279 feedback_fn("Activating disks for %s" % instance.name)
10280 _StartInstanceDisks(self, instance, None)
10283 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10286 helper.CreateSnapshots()
10288 if (self.op.shutdown and instance.admin_up and
10289 not self.op.remove_instance):
10290 assert not activate_disks
10291 feedback_fn("Starting instance %s" % instance.name)
10292 result = self.rpc.call_instance_start(src_node, instance, None, None)
10293 msg = result.fail_msg
10295 feedback_fn("Failed to start instance: %s" % msg)
10296 _ShutdownInstanceDisks(self, instance)
10297 raise errors.OpExecError("Could not start instance: %s" % msg)
10299 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10300 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10301 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10302 connect_timeout = constants.RIE_CONNECT_TIMEOUT
10303 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10305 (key_name, _, _) = self.x509_key_name
10308 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10311 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10312 key_name, dest_ca_pem,
10317 # Check for backwards compatibility
10318 assert len(dresults) == len(instance.disks)
10319 assert compat.all(isinstance(i, bool) for i in dresults), \
10320 "Not all results are boolean: %r" % dresults
10324 feedback_fn("Deactivating disks for %s" % instance.name)
10325 _ShutdownInstanceDisks(self, instance)
10327 if not (compat.all(dresults) and fin_resu):
10330 failures.append("export finalization")
10331 if not compat.all(dresults):
10332 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10334 failures.append("disk export: disk(s) %s" % fdsk)
10336 raise errors.OpExecError("Export failed, errors in %s" %
10337 utils.CommaJoin(failures))
10339 # At this point, the export was successful, we can cleanup/finish
10341 # Remove instance if requested
10342 if self.op.remove_instance:
10343 feedback_fn("Removing instance %s" % instance.name)
10344 _RemoveInstance(self, feedback_fn, instance,
10345 self.op.ignore_remove_failures)
10347 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10348 self._CleanupExports(feedback_fn)
10350 return fin_resu, dresults
10353 class LUBackupRemove(NoHooksLU):
10354 """Remove exports related to the named instance.
10359 def ExpandNames(self):
10360 self.needed_locks = {}
10361 # We need all nodes to be locked in order for RemoveExport to work, but we
10362 # don't need to lock the instance itself, as nothing will happen to it (and
10363 # we can remove exports also for a removed instance)
10364 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10366 def Exec(self, feedback_fn):
10367 """Remove any export.
10370 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10371 # If the instance was not found we'll try with the name that was passed in.
10372 # This will only work if it was an FQDN, though.
10374 if not instance_name:
10376 instance_name = self.op.instance_name
10378 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10379 exportlist = self.rpc.call_export_list(locked_nodes)
10381 for node in exportlist:
10382 msg = exportlist[node].fail_msg
10384 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10386 if instance_name in exportlist[node].payload:
10388 result = self.rpc.call_export_remove(node, instance_name)
10389 msg = result.fail_msg
10391 logging.error("Could not remove export for instance %s"
10392 " on node %s: %s", instance_name, node, msg)
10394 if fqdn_warn and not found:
10395 feedback_fn("Export not found. If trying to remove an export belonging"
10396 " to a deleted instance please use its Fully Qualified"
10400 class LUGroupAdd(LogicalUnit):
10401 """Logical unit for creating node groups.
10404 HPATH = "group-add"
10405 HTYPE = constants.HTYPE_GROUP
10408 def ExpandNames(self):
10409 # We need the new group's UUID here so that we can create and acquire the
10410 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10411 # that it should not check whether the UUID exists in the configuration.
10412 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10413 self.needed_locks = {}
10414 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10416 def CheckPrereq(self):
10417 """Check prerequisites.
10419 This checks that the given group name is not an existing node group
10424 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10425 except errors.OpPrereqError:
10428 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10429 " node group (UUID: %s)" %
10430 (self.op.group_name, existing_uuid),
10431 errors.ECODE_EXISTS)
10433 if self.op.ndparams:
10434 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10436 def BuildHooksEnv(self):
10437 """Build hooks env.
10441 "GROUP_NAME": self.op.group_name,
10443 mn = self.cfg.GetMasterNode()
10444 return env, [mn], [mn]
10446 def Exec(self, feedback_fn):
10447 """Add the node group to the cluster.
10450 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10451 uuid=self.group_uuid,
10452 alloc_policy=self.op.alloc_policy,
10453 ndparams=self.op.ndparams)
10455 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10456 del self.remove_locks[locking.LEVEL_NODEGROUP]
10459 class LUGroupAssignNodes(NoHooksLU):
10460 """Logical unit for assigning nodes to groups.
10465 def ExpandNames(self):
10466 # These raise errors.OpPrereqError on their own:
10467 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10468 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10470 # We want to lock all the affected nodes and groups. We have readily
10471 # available the list of nodes, and the *destination* group. To gather the
10472 # list of "source" groups, we need to fetch node information.
10473 self.node_data = self.cfg.GetAllNodesInfo()
10474 affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10475 affected_groups.add(self.group_uuid)
10477 self.needed_locks = {
10478 locking.LEVEL_NODEGROUP: list(affected_groups),
10479 locking.LEVEL_NODE: self.op.nodes,
10482 def CheckPrereq(self):
10483 """Check prerequisites.
10486 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10487 instance_data = self.cfg.GetAllInstancesInfo()
10489 if self.group is None:
10490 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10491 (self.op.group_name, self.group_uuid))
10493 (new_splits, previous_splits) = \
10494 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10495 for node in self.op.nodes],
10496 self.node_data, instance_data)
10499 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10501 if not self.op.force:
10502 raise errors.OpExecError("The following instances get split by this"
10503 " change and --force was not given: %s" %
10506 self.LogWarning("This operation will split the following instances: %s",
10509 if previous_splits:
10510 self.LogWarning("In addition, these already-split instances continue"
10511 " to be spit across groups: %s",
10512 utils.CommaJoin(utils.NiceSort(previous_splits)))
10514 def Exec(self, feedback_fn):
10515 """Assign nodes to a new group.
10518 for node in self.op.nodes:
10519 self.node_data[node].group = self.group_uuid
10521 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10524 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10525 """Check for split instances after a node assignment.
10527 This method considers a series of node assignments as an atomic operation,
10528 and returns information about split instances after applying the set of
10531 In particular, it returns information about newly split instances, and
10532 instances that were already split, and remain so after the change.
10534 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10537 @type changes: list of (node_name, new_group_uuid) pairs.
10538 @param changes: list of node assignments to consider.
10539 @param node_data: a dict with data for all nodes
10540 @param instance_data: a dict with all instances to consider
10541 @rtype: a two-tuple
10542 @return: a list of instances that were previously okay and result split as a
10543 consequence of this change, and a list of instances that were previously
10544 split and this change does not fix.
10547 changed_nodes = dict((node, group) for node, group in changes
10548 if node_data[node].group != group)
10550 all_split_instances = set()
10551 previously_split_instances = set()
10553 def InstanceNodes(instance):
10554 return [instance.primary_node] + list(instance.secondary_nodes)
10556 for inst in instance_data.values():
10557 if inst.disk_template not in constants.DTS_INT_MIRROR:
10560 instance_nodes = InstanceNodes(inst)
10562 if len(set(node_data[node].group for node in instance_nodes)) > 1:
10563 previously_split_instances.add(inst.name)
10565 if len(set(changed_nodes.get(node, node_data[node].group)
10566 for node in instance_nodes)) > 1:
10567 all_split_instances.add(inst.name)
10569 return (list(all_split_instances - previously_split_instances),
10570 list(previously_split_instances & all_split_instances))
10573 class _GroupQuery(_QueryBase):
10574 FIELDS = query.GROUP_FIELDS
10576 def ExpandNames(self, lu):
10577 lu.needed_locks = {}
10579 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10580 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10583 self.wanted = [name_to_uuid[name]
10584 for name in utils.NiceSort(name_to_uuid.keys())]
10586 # Accept names to be either names or UUIDs.
10589 all_uuid = frozenset(self._all_groups.keys())
10591 for name in self.names:
10592 if name in all_uuid:
10593 self.wanted.append(name)
10594 elif name in name_to_uuid:
10595 self.wanted.append(name_to_uuid[name])
10597 missing.append(name)
10600 raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10601 errors.ECODE_NOENT)
10603 def DeclareLocks(self, lu, level):
10606 def _GetQueryData(self, lu):
10607 """Computes the list of node groups and their attributes.
10610 do_nodes = query.GQ_NODE in self.requested_data
10611 do_instances = query.GQ_INST in self.requested_data
10613 group_to_nodes = None
10614 group_to_instances = None
10616 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10617 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10618 # latter GetAllInstancesInfo() is not enough, for we have to go through
10619 # instance->node. Hence, we will need to process nodes even if we only need
10620 # instance information.
10621 if do_nodes or do_instances:
10622 all_nodes = lu.cfg.GetAllNodesInfo()
10623 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10626 for node in all_nodes.values():
10627 if node.group in group_to_nodes:
10628 group_to_nodes[node.group].append(node.name)
10629 node_to_group[node.name] = node.group
10632 all_instances = lu.cfg.GetAllInstancesInfo()
10633 group_to_instances = dict((uuid, []) for uuid in self.wanted)
10635 for instance in all_instances.values():
10636 node = instance.primary_node
10637 if node in node_to_group:
10638 group_to_instances[node_to_group[node]].append(instance.name)
10641 # Do not pass on node information if it was not requested.
10642 group_to_nodes = None
10644 return query.GroupQueryData([self._all_groups[uuid]
10645 for uuid in self.wanted],
10646 group_to_nodes, group_to_instances)
10649 class LUGroupQuery(NoHooksLU):
10650 """Logical unit for querying node groups.
10655 def CheckArguments(self):
10656 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10657 self.op.output_fields, False)
10659 def ExpandNames(self):
10660 self.gq.ExpandNames(self)
10662 def Exec(self, feedback_fn):
10663 return self.gq.OldStyleQuery(self)
10666 class LUGroupSetParams(LogicalUnit):
10667 """Modifies the parameters of a node group.
10670 HPATH = "group-modify"
10671 HTYPE = constants.HTYPE_GROUP
10674 def CheckArguments(self):
10677 self.op.alloc_policy,
10680 if all_changes.count(None) == len(all_changes):
10681 raise errors.OpPrereqError("Please pass at least one modification",
10682 errors.ECODE_INVAL)
10684 def ExpandNames(self):
10685 # This raises errors.OpPrereqError on its own:
10686 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10688 self.needed_locks = {
10689 locking.LEVEL_NODEGROUP: [self.group_uuid],
10692 def CheckPrereq(self):
10693 """Check prerequisites.
10696 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10698 if self.group is None:
10699 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10700 (self.op.group_name, self.group_uuid))
10702 if self.op.ndparams:
10703 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10704 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10705 self.new_ndparams = new_ndparams
10707 def BuildHooksEnv(self):
10708 """Build hooks env.
10712 "GROUP_NAME": self.op.group_name,
10713 "NEW_ALLOC_POLICY": self.op.alloc_policy,
10715 mn = self.cfg.GetMasterNode()
10716 return env, [mn], [mn]
10718 def Exec(self, feedback_fn):
10719 """Modifies the node group.
10724 if self.op.ndparams:
10725 self.group.ndparams = self.new_ndparams
10726 result.append(("ndparams", str(self.group.ndparams)))
10728 if self.op.alloc_policy:
10729 self.group.alloc_policy = self.op.alloc_policy
10731 self.cfg.Update(self.group, feedback_fn)
10736 class LUGroupRemove(LogicalUnit):
10737 HPATH = "group-remove"
10738 HTYPE = constants.HTYPE_GROUP
10741 def ExpandNames(self):
10742 # This will raises errors.OpPrereqError on its own:
10743 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10744 self.needed_locks = {
10745 locking.LEVEL_NODEGROUP: [self.group_uuid],
10748 def CheckPrereq(self):
10749 """Check prerequisites.
10751 This checks that the given group name exists as a node group, that is
10752 empty (i.e., contains no nodes), and that is not the last group of the
10756 # Verify that the group is empty.
10757 group_nodes = [node.name
10758 for node in self.cfg.GetAllNodesInfo().values()
10759 if node.group == self.group_uuid]
10762 raise errors.OpPrereqError("Group '%s' not empty, has the following"
10764 (self.op.group_name,
10765 utils.CommaJoin(utils.NiceSort(group_nodes))),
10766 errors.ECODE_STATE)
10768 # Verify the cluster would not be left group-less.
10769 if len(self.cfg.GetNodeGroupList()) == 1:
10770 raise errors.OpPrereqError("Group '%s' is the only group,"
10771 " cannot be removed" %
10772 self.op.group_name,
10773 errors.ECODE_STATE)
10775 def BuildHooksEnv(self):
10776 """Build hooks env.
10780 "GROUP_NAME": self.op.group_name,
10782 mn = self.cfg.GetMasterNode()
10783 return env, [mn], [mn]
10785 def Exec(self, feedback_fn):
10786 """Remove the node group.
10790 self.cfg.RemoveNodeGroup(self.group_uuid)
10791 except errors.ConfigurationError:
10792 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10793 (self.op.group_name, self.group_uuid))
10795 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10798 class LUGroupRename(LogicalUnit):
10799 HPATH = "group-rename"
10800 HTYPE = constants.HTYPE_GROUP
10803 def ExpandNames(self):
10804 # This raises errors.OpPrereqError on its own:
10805 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10807 self.needed_locks = {
10808 locking.LEVEL_NODEGROUP: [self.group_uuid],
10811 def CheckPrereq(self):
10812 """Check prerequisites.
10814 Ensures requested new name is not yet used.
10818 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10819 except errors.OpPrereqError:
10822 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10823 " node group (UUID: %s)" %
10824 (self.op.new_name, new_name_uuid),
10825 errors.ECODE_EXISTS)
10827 def BuildHooksEnv(self):
10828 """Build hooks env.
10832 "OLD_NAME": self.op.group_name,
10833 "NEW_NAME": self.op.new_name,
10836 mn = self.cfg.GetMasterNode()
10837 all_nodes = self.cfg.GetAllNodesInfo()
10839 all_nodes.pop(mn, None)
10841 for node in all_nodes.values():
10842 if node.group == self.group_uuid:
10843 run_nodes.append(node.name)
10845 return env, run_nodes, run_nodes
10847 def Exec(self, feedback_fn):
10848 """Rename the node group.
10851 group = self.cfg.GetNodeGroup(self.group_uuid)
10854 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10855 (self.op.group_name, self.group_uuid))
10857 group.name = self.op.new_name
10858 self.cfg.Update(group, feedback_fn)
10860 return self.op.new_name
10863 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10864 """Generic tags LU.
10866 This is an abstract class which is the parent of all the other tags LUs.
10870 def ExpandNames(self):
10871 self.needed_locks = {}
10872 if self.op.kind == constants.TAG_NODE:
10873 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10874 self.needed_locks[locking.LEVEL_NODE] = self.op.name
10875 elif self.op.kind == constants.TAG_INSTANCE:
10876 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10877 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10879 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10880 # not possible to acquire the BGL based on opcode parameters)
10882 def CheckPrereq(self):
10883 """Check prerequisites.
10886 if self.op.kind == constants.TAG_CLUSTER:
10887 self.target = self.cfg.GetClusterInfo()
10888 elif self.op.kind == constants.TAG_NODE:
10889 self.target = self.cfg.GetNodeInfo(self.op.name)
10890 elif self.op.kind == constants.TAG_INSTANCE:
10891 self.target = self.cfg.GetInstanceInfo(self.op.name)
10893 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10894 str(self.op.kind), errors.ECODE_INVAL)
10897 class LUTagsGet(TagsLU):
10898 """Returns the tags of a given object.
10903 def ExpandNames(self):
10904 TagsLU.ExpandNames(self)
10906 # Share locks as this is only a read operation
10907 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10909 def Exec(self, feedback_fn):
10910 """Returns the tag list.
10913 return list(self.target.GetTags())
10916 class LUTagsSearch(NoHooksLU):
10917 """Searches the tags for a given pattern.
10922 def ExpandNames(self):
10923 self.needed_locks = {}
10925 def CheckPrereq(self):
10926 """Check prerequisites.
10928 This checks the pattern passed for validity by compiling it.
10932 self.re = re.compile(self.op.pattern)
10933 except re.error, err:
10934 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10935 (self.op.pattern, err), errors.ECODE_INVAL)
10937 def Exec(self, feedback_fn):
10938 """Returns the tag list.
10942 tgts = [("/cluster", cfg.GetClusterInfo())]
10943 ilist = cfg.GetAllInstancesInfo().values()
10944 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10945 nlist = cfg.GetAllNodesInfo().values()
10946 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10948 for path, target in tgts:
10949 for tag in target.GetTags():
10950 if self.re.search(tag):
10951 results.append((path, tag))
10955 class LUTagsSet(TagsLU):
10956 """Sets a tag on a given object.
10961 def CheckPrereq(self):
10962 """Check prerequisites.
10964 This checks the type and length of the tag name and value.
10967 TagsLU.CheckPrereq(self)
10968 for tag in self.op.tags:
10969 objects.TaggableObject.ValidateTag(tag)
10971 def Exec(self, feedback_fn):
10976 for tag in self.op.tags:
10977 self.target.AddTag(tag)
10978 except errors.TagError, err:
10979 raise errors.OpExecError("Error while setting tag: %s" % str(err))
10980 self.cfg.Update(self.target, feedback_fn)
10983 class LUTagsDel(TagsLU):
10984 """Delete a list of tags from a given object.
10989 def CheckPrereq(self):
10990 """Check prerequisites.
10992 This checks that we have the given tag.
10995 TagsLU.CheckPrereq(self)
10996 for tag in self.op.tags:
10997 objects.TaggableObject.ValidateTag(tag)
10998 del_tags = frozenset(self.op.tags)
10999 cur_tags = self.target.GetTags()
11001 diff_tags = del_tags - cur_tags
11003 diff_names = ("'%s'" % i for i in sorted(diff_tags))
11004 raise errors.OpPrereqError("Tag(s) %s not found" %
11005 (utils.CommaJoin(diff_names), ),
11006 errors.ECODE_NOENT)
11008 def Exec(self, feedback_fn):
11009 """Remove the tag from the object.
11012 for tag in self.op.tags:
11013 self.target.RemoveTag(tag)
11014 self.cfg.Update(self.target, feedback_fn)
11017 class LUTestDelay(NoHooksLU):
11018 """Sleep for a specified amount of time.
11020 This LU sleeps on the master and/or nodes for a specified amount of
11026 def ExpandNames(self):
11027 """Expand names and set required locks.
11029 This expands the node list, if any.
11032 self.needed_locks = {}
11033 if self.op.on_nodes:
11034 # _GetWantedNodes can be used here, but is not always appropriate to use
11035 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11036 # more information.
11037 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11038 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11040 def _TestDelay(self):
11041 """Do the actual sleep.
11044 if self.op.on_master:
11045 if not utils.TestDelay(self.op.duration):
11046 raise errors.OpExecError("Error during master delay test")
11047 if self.op.on_nodes:
11048 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11049 for node, node_result in result.items():
11050 node_result.Raise("Failure during rpc call to node %s" % node)
11052 def Exec(self, feedback_fn):
11053 """Execute the test delay opcode, with the wanted repetitions.
11056 if self.op.repeat == 0:
11059 top_value = self.op.repeat - 1
11060 for i in range(self.op.repeat):
11061 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11065 class LUTestJqueue(NoHooksLU):
11066 """Utility LU to test some aspects of the job queue.
11071 # Must be lower than default timeout for WaitForJobChange to see whether it
11072 # notices changed jobs
11073 _CLIENT_CONNECT_TIMEOUT = 20.0
11074 _CLIENT_CONFIRM_TIMEOUT = 60.0
11077 def _NotifyUsingSocket(cls, cb, errcls):
11078 """Opens a Unix socket and waits for another program to connect.
11081 @param cb: Callback to send socket name to client
11082 @type errcls: class
11083 @param errcls: Exception class to use for errors
11086 # Using a temporary directory as there's no easy way to create temporary
11087 # sockets without writing a custom loop around tempfile.mktemp and
11089 tmpdir = tempfile.mkdtemp()
11091 tmpsock = utils.PathJoin(tmpdir, "sock")
11093 logging.debug("Creating temporary socket at %s", tmpsock)
11094 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11099 # Send details to client
11102 # Wait for client to connect before continuing
11103 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11105 (conn, _) = sock.accept()
11106 except socket.error, err:
11107 raise errcls("Client didn't connect in time (%s)" % err)
11111 # Remove as soon as client is connected
11112 shutil.rmtree(tmpdir)
11114 # Wait for client to close
11117 # pylint: disable-msg=E1101
11118 # Instance of '_socketobject' has no ... member
11119 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11121 except socket.error, err:
11122 raise errcls("Client failed to confirm notification (%s)" % err)
11126 def _SendNotification(self, test, arg, sockname):
11127 """Sends a notification to the client.
11130 @param test: Test name
11131 @param arg: Test argument (depends on test)
11132 @type sockname: string
11133 @param sockname: Socket path
11136 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11138 def _Notify(self, prereq, test, arg):
11139 """Notifies the client of a test.
11142 @param prereq: Whether this is a prereq-phase test
11144 @param test: Test name
11145 @param arg: Test argument (depends on test)
11149 errcls = errors.OpPrereqError
11151 errcls = errors.OpExecError
11153 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11157 def CheckArguments(self):
11158 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11159 self.expandnames_calls = 0
11161 def ExpandNames(self):
11162 checkargs_calls = getattr(self, "checkargs_calls", 0)
11163 if checkargs_calls < 1:
11164 raise errors.ProgrammerError("CheckArguments was not called")
11166 self.expandnames_calls += 1
11168 if self.op.notify_waitlock:
11169 self._Notify(True, constants.JQT_EXPANDNAMES, None)
11171 self.LogInfo("Expanding names")
11173 # Get lock on master node (just to get a lock, not for a particular reason)
11174 self.needed_locks = {
11175 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11178 def Exec(self, feedback_fn):
11179 if self.expandnames_calls < 1:
11180 raise errors.ProgrammerError("ExpandNames was not called")
11182 if self.op.notify_exec:
11183 self._Notify(False, constants.JQT_EXEC, None)
11185 self.LogInfo("Executing")
11187 if self.op.log_messages:
11188 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11189 for idx, msg in enumerate(self.op.log_messages):
11190 self.LogInfo("Sending log message %s", idx + 1)
11191 feedback_fn(constants.JQT_MSGPREFIX + msg)
11192 # Report how many test messages have been sent
11193 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11196 raise errors.OpExecError("Opcode failure was requested")
11201 class IAllocator(object):
11202 """IAllocator framework.
11204 An IAllocator instance has three sets of attributes:
11205 - cfg that is needed to query the cluster
11206 - input data (all members of the _KEYS class attribute are required)
11207 - four buffer attributes (in|out_data|text), that represent the
11208 input (to the external script) in text and data structure format,
11209 and the output from it, again in two formats
11210 - the result variables from the script (success, info, nodes) for
11214 # pylint: disable-msg=R0902
11215 # lots of instance attributes
11217 "name", "mem_size", "disks", "disk_template",
11218 "os", "tags", "nics", "vcpus", "hypervisor",
11221 "name", "relocate_from",
11227 def __init__(self, cfg, rpc, mode, **kwargs):
11230 # init buffer variables
11231 self.in_text = self.out_text = self.in_data = self.out_data = None
11232 # init all input fields so that pylint is happy
11234 self.mem_size = self.disks = self.disk_template = None
11235 self.os = self.tags = self.nics = self.vcpus = None
11236 self.hypervisor = None
11237 self.relocate_from = None
11239 self.evac_nodes = None
11241 self.required_nodes = None
11242 # init result fields
11243 self.success = self.info = self.result = None
11244 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11245 keyset = self._ALLO_KEYS
11246 fn = self._AddNewInstance
11247 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11248 keyset = self._RELO_KEYS
11249 fn = self._AddRelocateInstance
11250 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11251 keyset = self._EVAC_KEYS
11252 fn = self._AddEvacuateNodes
11254 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11255 " IAllocator" % self.mode)
11257 if key not in keyset:
11258 raise errors.ProgrammerError("Invalid input parameter '%s' to"
11259 " IAllocator" % key)
11260 setattr(self, key, kwargs[key])
11263 if key not in kwargs:
11264 raise errors.ProgrammerError("Missing input parameter '%s' to"
11265 " IAllocator" % key)
11266 self._BuildInputData(fn)
11268 def _ComputeClusterData(self):
11269 """Compute the generic allocator input data.
11271 This is the data that is independent of the actual operation.
11275 cluster_info = cfg.GetClusterInfo()
11278 "version": constants.IALLOCATOR_VERSION,
11279 "cluster_name": cfg.GetClusterName(),
11280 "cluster_tags": list(cluster_info.GetTags()),
11281 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11282 # we don't have job IDs
11284 ninfo = cfg.GetAllNodesInfo()
11285 iinfo = cfg.GetAllInstancesInfo().values()
11286 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11289 node_list = [n.name for n in ninfo.values() if n.vm_capable]
11291 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11292 hypervisor_name = self.hypervisor
11293 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11294 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11295 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11296 hypervisor_name = cluster_info.enabled_hypervisors[0]
11298 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11301 self.rpc.call_all_instances_info(node_list,
11302 cluster_info.enabled_hypervisors)
11304 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11306 config_ndata = self._ComputeBasicNodeData(ninfo)
11307 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11308 i_list, config_ndata)
11309 assert len(data["nodes"]) == len(ninfo), \
11310 "Incomplete node data computed"
11312 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11314 self.in_data = data
11317 def _ComputeNodeGroupData(cfg):
11318 """Compute node groups data.
11322 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11324 "name": gdata.name,
11325 "alloc_policy": gdata.alloc_policy,
11330 def _ComputeBasicNodeData(node_cfg):
11331 """Compute global node data.
11334 @returns: a dict of name: (node dict, node config)
11338 for ninfo in node_cfg.values():
11339 # fill in static (config-based) values
11341 "tags": list(ninfo.GetTags()),
11342 "primary_ip": ninfo.primary_ip,
11343 "secondary_ip": ninfo.secondary_ip,
11344 "offline": ninfo.offline,
11345 "drained": ninfo.drained,
11346 "master_candidate": ninfo.master_candidate,
11347 "group": ninfo.group,
11348 "master_capable": ninfo.master_capable,
11349 "vm_capable": ninfo.vm_capable,
11352 node_results[ninfo.name] = pnr
11354 return node_results
11357 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11359 """Compute global node data.
11361 @param node_results: the basic node structures as filled from the config
11364 # make a copy of the current dict
11365 node_results = dict(node_results)
11366 for nname, nresult in node_data.items():
11367 assert nname in node_results, "Missing basic data for node %s" % nname
11368 ninfo = node_cfg[nname]
11370 if not (ninfo.offline or ninfo.drained):
11371 nresult.Raise("Can't get data for node %s" % nname)
11372 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11374 remote_info = nresult.payload
11376 for attr in ['memory_total', 'memory_free', 'memory_dom0',
11377 'vg_size', 'vg_free', 'cpu_total']:
11378 if attr not in remote_info:
11379 raise errors.OpExecError("Node '%s' didn't return attribute"
11380 " '%s'" % (nname, attr))
11381 if not isinstance(remote_info[attr], int):
11382 raise errors.OpExecError("Node '%s' returned invalid value"
11384 (nname, attr, remote_info[attr]))
11385 # compute memory used by primary instances
11386 i_p_mem = i_p_up_mem = 0
11387 for iinfo, beinfo in i_list:
11388 if iinfo.primary_node == nname:
11389 i_p_mem += beinfo[constants.BE_MEMORY]
11390 if iinfo.name not in node_iinfo[nname].payload:
11393 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11394 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11395 remote_info['memory_free'] -= max(0, i_mem_diff)
11398 i_p_up_mem += beinfo[constants.BE_MEMORY]
11400 # compute memory used by instances
11402 "total_memory": remote_info['memory_total'],
11403 "reserved_memory": remote_info['memory_dom0'],
11404 "free_memory": remote_info['memory_free'],
11405 "total_disk": remote_info['vg_size'],
11406 "free_disk": remote_info['vg_free'],
11407 "total_cpus": remote_info['cpu_total'],
11408 "i_pri_memory": i_p_mem,
11409 "i_pri_up_memory": i_p_up_mem,
11411 pnr_dyn.update(node_results[nname])
11412 node_results[nname] = pnr_dyn
11414 return node_results
11417 def _ComputeInstanceData(cluster_info, i_list):
11418 """Compute global instance data.
11422 for iinfo, beinfo in i_list:
11424 for nic in iinfo.nics:
11425 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11426 nic_dict = {"mac": nic.mac,
11428 "mode": filled_params[constants.NIC_MODE],
11429 "link": filled_params[constants.NIC_LINK],
11431 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11432 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11433 nic_data.append(nic_dict)
11435 "tags": list(iinfo.GetTags()),
11436 "admin_up": iinfo.admin_up,
11437 "vcpus": beinfo[constants.BE_VCPUS],
11438 "memory": beinfo[constants.BE_MEMORY],
11440 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11442 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11443 "disk_template": iinfo.disk_template,
11444 "hypervisor": iinfo.hypervisor,
11446 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11448 instance_data[iinfo.name] = pir
11450 return instance_data
11452 def _AddNewInstance(self):
11453 """Add new instance data to allocator structure.
11455 This in combination with _AllocatorGetClusterData will create the
11456 correct structure needed as input for the allocator.
11458 The checks for the completeness of the opcode must have already been
11462 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11464 if self.disk_template in constants.DTS_INT_MIRROR:
11465 self.required_nodes = 2
11467 self.required_nodes = 1
11470 "disk_template": self.disk_template,
11473 "vcpus": self.vcpus,
11474 "memory": self.mem_size,
11475 "disks": self.disks,
11476 "disk_space_total": disk_space,
11478 "required_nodes": self.required_nodes,
11482 def _AddRelocateInstance(self):
11483 """Add relocate instance data to allocator structure.
11485 This in combination with _IAllocatorGetClusterData will create the
11486 correct structure needed as input for the allocator.
11488 The checks for the completeness of the opcode must have already been
11492 instance = self.cfg.GetInstanceInfo(self.name)
11493 if instance is None:
11494 raise errors.ProgrammerError("Unknown instance '%s' passed to"
11495 " IAllocator" % self.name)
11497 if instance.disk_template not in constants.DTS_MIRRORED:
11498 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11499 errors.ECODE_INVAL)
11501 if instance.disk_template in constants.DTS_INT_MIRROR and \
11502 len(instance.secondary_nodes) != 1:
11503 raise errors.OpPrereqError("Instance has not exactly one secondary node",
11504 errors.ECODE_STATE)
11506 self.required_nodes = 1
11507 disk_sizes = [{'size': disk.size} for disk in instance.disks]
11508 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11512 "disk_space_total": disk_space,
11513 "required_nodes": self.required_nodes,
11514 "relocate_from": self.relocate_from,
11518 def _AddEvacuateNodes(self):
11519 """Add evacuate nodes data to allocator structure.
11523 "evac_nodes": self.evac_nodes
11527 def _BuildInputData(self, fn):
11528 """Build input data structures.
11531 self._ComputeClusterData()
11534 request["type"] = self.mode
11535 self.in_data["request"] = request
11537 self.in_text = serializer.Dump(self.in_data)
11539 def Run(self, name, validate=True, call_fn=None):
11540 """Run an instance allocator and return the results.
11543 if call_fn is None:
11544 call_fn = self.rpc.call_iallocator_runner
11546 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11547 result.Raise("Failure while running the iallocator script")
11549 self.out_text = result.payload
11551 self._ValidateResult()
11553 def _ValidateResult(self):
11554 """Process the allocator results.
11556 This will process and if successful save the result in
11557 self.out_data and the other parameters.
11561 rdict = serializer.Load(self.out_text)
11562 except Exception, err:
11563 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11565 if not isinstance(rdict, dict):
11566 raise errors.OpExecError("Can't parse iallocator results: not a dict")
11568 # TODO: remove backwards compatiblity in later versions
11569 if "nodes" in rdict and "result" not in rdict:
11570 rdict["result"] = rdict["nodes"]
11573 for key in "success", "info", "result":
11574 if key not in rdict:
11575 raise errors.OpExecError("Can't parse iallocator results:"
11576 " missing key '%s'" % key)
11577 setattr(self, key, rdict[key])
11579 if not isinstance(rdict["result"], list):
11580 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11582 self.out_data = rdict
11585 class LUTestAllocator(NoHooksLU):
11586 """Run allocator tests.
11588 This LU runs the allocator tests
11591 def CheckPrereq(self):
11592 """Check prerequisites.
11594 This checks the opcode parameters depending on the director and mode test.
11597 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11598 for attr in ["mem_size", "disks", "disk_template",
11599 "os", "tags", "nics", "vcpus"]:
11600 if not hasattr(self.op, attr):
11601 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11602 attr, errors.ECODE_INVAL)
11603 iname = self.cfg.ExpandInstanceName(self.op.name)
11604 if iname is not None:
11605 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11606 iname, errors.ECODE_EXISTS)
11607 if not isinstance(self.op.nics, list):
11608 raise errors.OpPrereqError("Invalid parameter 'nics'",
11609 errors.ECODE_INVAL)
11610 if not isinstance(self.op.disks, list):
11611 raise errors.OpPrereqError("Invalid parameter 'disks'",
11612 errors.ECODE_INVAL)
11613 for row in self.op.disks:
11614 if (not isinstance(row, dict) or
11615 "size" not in row or
11616 not isinstance(row["size"], int) or
11617 "mode" not in row or
11618 row["mode"] not in ['r', 'w']):
11619 raise errors.OpPrereqError("Invalid contents of the 'disks'"
11620 " parameter", errors.ECODE_INVAL)
11621 if self.op.hypervisor is None:
11622 self.op.hypervisor = self.cfg.GetHypervisorType()
11623 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11624 fname = _ExpandInstanceName(self.cfg, self.op.name)
11625 self.op.name = fname
11626 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11627 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11628 if not hasattr(self.op, "evac_nodes"):
11629 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11630 " opcode input", errors.ECODE_INVAL)
11632 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11633 self.op.mode, errors.ECODE_INVAL)
11635 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11636 if self.op.allocator is None:
11637 raise errors.OpPrereqError("Missing allocator name",
11638 errors.ECODE_INVAL)
11639 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11640 raise errors.OpPrereqError("Wrong allocator test '%s'" %
11641 self.op.direction, errors.ECODE_INVAL)
11643 def Exec(self, feedback_fn):
11644 """Run the allocator test.
11647 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11648 ial = IAllocator(self.cfg, self.rpc,
11651 mem_size=self.op.mem_size,
11652 disks=self.op.disks,
11653 disk_template=self.op.disk_template,
11657 vcpus=self.op.vcpus,
11658 hypervisor=self.op.hypervisor,
11660 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11661 ial = IAllocator(self.cfg, self.rpc,
11664 relocate_from=list(self.relocate_from),
11666 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11667 ial = IAllocator(self.cfg, self.rpc,
11669 evac_nodes=self.op.evac_nodes)
11671 raise errors.ProgrammerError("Uncatched mode %s in"
11672 " LUTestAllocator.Exec", self.op.mode)
11674 if self.op.direction == constants.IALLOCATOR_DIR_IN:
11675 result = ial.in_text
11677 ial.Run(self.op.allocator, validate=False)
11678 result = ial.out_text
11682 #: Query type implementations
11684 constants.QR_INSTANCE: _InstanceQuery,
11685 constants.QR_NODE: _NodeQuery,
11686 constants.QR_GROUP: _GroupQuery,
11687 constants.QR_OS: _OsQuery,
11690 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
11693 def _GetQueryImplementation(name):
11694 """Returns the implemtnation for a query type.
11696 @param name: Query type, must be one of L{constants.QR_VIA_OP}
11700 return _QUERY_IMPL[name]
11702 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11703 errors.ECODE_INVAL)