4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 import ganeti.masterd.instance # pylint: disable-msg=W0611
64 def _SupportsOob(cfg, node):
65 """Tells if node supports OOB.
67 @type cfg: L{config.ConfigWriter}
68 @param cfg: The cluster configuration
69 @type node: L{objects.Node}
71 @return: The OOB script if supported or an empty string otherwise
74 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
78 class LogicalUnit(object):
79 """Logical Unit base class.
81 Subclasses must follow these rules:
82 - implement ExpandNames
83 - implement CheckPrereq (except when tasklets are used)
84 - implement Exec (except when tasklets are used)
85 - implement BuildHooksEnv
86 - redefine HPATH and HTYPE
87 - optionally redefine their run requirements:
88 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
90 Note that all commands require root permissions.
92 @ivar dry_run_result: the value (if any) that will be returned to the caller
93 in dry-run mode (signalled by opcode dry_run parameter)
100 def __init__(self, processor, op, context, rpc):
101 """Constructor for LogicalUnit.
103 This needs to be overridden in derived classes in order to check op
107 self.proc = processor
109 self.cfg = context.cfg
110 self.context = context
112 # Dicts used to declare locking needs to mcpu
113 self.needed_locks = None
114 self.acquired_locks = {}
115 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
117 self.remove_locks = {}
118 # Used to force good behavior when calling helper functions
119 self.recalculate_locks = {}
122 self.Log = processor.Log # pylint: disable-msg=C0103
123 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
124 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
125 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
126 # support for dry-run
127 self.dry_run_result = None
128 # support for generic debug attribute
129 if (not hasattr(self.op, "debug_level") or
130 not isinstance(self.op.debug_level, int)):
131 self.op.debug_level = 0
136 # Validate opcode parameters and set defaults
137 self.op.Validate(True)
139 self.CheckArguments()
142 """Returns the SshRunner object
146 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
149 ssh = property(fget=__GetSSH)
151 def CheckArguments(self):
152 """Check syntactic validity for the opcode arguments.
154 This method is for doing a simple syntactic check and ensure
155 validity of opcode parameters, without any cluster-related
156 checks. While the same can be accomplished in ExpandNames and/or
157 CheckPrereq, doing these separate is better because:
159 - ExpandNames is left as as purely a lock-related function
160 - CheckPrereq is run after we have acquired locks (and possible
163 The function is allowed to change the self.op attribute so that
164 later methods can no longer worry about missing parameters.
169 def ExpandNames(self):
170 """Expand names for this LU.
172 This method is called before starting to execute the opcode, and it should
173 update all the parameters of the opcode to their canonical form (e.g. a
174 short node name must be fully expanded after this method has successfully
175 completed). This way locking, hooks, logging, etc. can work correctly.
177 LUs which implement this method must also populate the self.needed_locks
178 member, as a dict with lock levels as keys, and a list of needed lock names
181 - use an empty dict if you don't need any lock
182 - if you don't need any lock at a particular level omit that level
183 - don't put anything for the BGL level
184 - if you want all locks at a level use locking.ALL_SET as a value
186 If you need to share locks (rather than acquire them exclusively) at one
187 level you can modify self.share_locks, setting a true value (usually 1) for
188 that level. By default locks are not shared.
190 This function can also define a list of tasklets, which then will be
191 executed in order instead of the usual LU-level CheckPrereq and Exec
192 functions, if those are not defined by the LU.
196 # Acquire all nodes and one instance
197 self.needed_locks = {
198 locking.LEVEL_NODE: locking.ALL_SET,
199 locking.LEVEL_INSTANCE: ['instance1.example.com'],
201 # Acquire just two nodes
202 self.needed_locks = {
203 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206 self.needed_locks = {} # No, you can't leave it to the default value None
209 # The implementation of this method is mandatory only if the new LU is
210 # concurrent, so that old LUs don't need to be changed all at the same
213 self.needed_locks = {} # Exclusive LUs don't need locks.
215 raise NotImplementedError
217 def DeclareLocks(self, level):
218 """Declare LU locking needs for a level
220 While most LUs can just declare their locking needs at ExpandNames time,
221 sometimes there's the need to calculate some locks after having acquired
222 the ones before. This function is called just before acquiring locks at a
223 particular level, but after acquiring the ones at lower levels, and permits
224 such calculations. It can be used to modify self.needed_locks, and by
225 default it does nothing.
227 This function is only called if you have something already set in
228 self.needed_locks for the level.
230 @param level: Locking level which is going to be locked
231 @type level: member of ganeti.locking.LEVELS
235 def CheckPrereq(self):
236 """Check prerequisites for this LU.
238 This method should check that the prerequisites for the execution
239 of this LU are fulfilled. It can do internode communication, but
240 it should be idempotent - no cluster or system changes are
243 The method should raise errors.OpPrereqError in case something is
244 not fulfilled. Its return value is ignored.
246 This method should also update all the parameters of the opcode to
247 their canonical form if it hasn't been done by ExpandNames before.
250 if self.tasklets is not None:
251 for (idx, tl) in enumerate(self.tasklets):
252 logging.debug("Checking prerequisites for tasklet %s/%s",
253 idx + 1, len(self.tasklets))
258 def Exec(self, feedback_fn):
261 This method should implement the actual work. It should raise
262 errors.OpExecError for failures that are somewhat dealt with in
266 if self.tasklets is not None:
267 for (idx, tl) in enumerate(self.tasklets):
268 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271 raise NotImplementedError
273 def BuildHooksEnv(self):
274 """Build hooks environment for this LU.
276 This method should return a three-node tuple consisting of: a dict
277 containing the environment that will be used for running the
278 specific hook for this LU, a list of node names on which the hook
279 should run before the execution, and a list of node names on which
280 the hook should run after the execution.
282 The keys of the dict must not have 'GANETI_' prefixed as this will
283 be handled in the hooks runner. Also note additional keys will be
284 added by the hooks runner. If the LU doesn't define any
285 environment, an empty dict (and not None) should be returned.
287 No nodes should be returned as an empty list (and not None).
289 Note that if the HPATH for a LU class is None, this function will
293 raise NotImplementedError
295 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
296 """Notify the LU about the results of its hooks.
298 This method is called every time a hooks phase is executed, and notifies
299 the Logical Unit about the hooks' result. The LU can then use it to alter
300 its result based on the hooks. By default the method does nothing and the
301 previous result is passed back unchanged but any LU can define it if it
302 wants to use the local cluster hook-scripts somehow.
304 @param phase: one of L{constants.HOOKS_PHASE_POST} or
305 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
306 @param hook_results: the results of the multi-node hooks rpc call
307 @param feedback_fn: function used send feedback back to the caller
308 @param lu_result: the previous Exec result this LU had, or None
310 @return: the new Exec result, based on the previous result
314 # API must be kept, thus we ignore the unused argument and could
315 # be a function warnings
316 # pylint: disable-msg=W0613,R0201
319 def _ExpandAndLockInstance(self):
320 """Helper function to expand and lock an instance.
322 Many LUs that work on an instance take its name in self.op.instance_name
323 and need to expand it and then declare the expanded name for locking. This
324 function does it, and then updates self.op.instance_name to the expanded
325 name. It also initializes needed_locks as a dict, if this hasn't been done
329 if self.needed_locks is None:
330 self.needed_locks = {}
332 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
333 "_ExpandAndLockInstance called with instance-level locks set"
334 self.op.instance_name = _ExpandInstanceName(self.cfg,
335 self.op.instance_name)
336 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
338 def _LockInstancesNodes(self, primary_only=False):
339 """Helper function to declare instances' nodes for locking.
341 This function should be called after locking one or more instances to lock
342 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
343 with all primary or secondary nodes for instances already locked and
344 present in self.needed_locks[locking.LEVEL_INSTANCE].
346 It should be called from DeclareLocks, and for safety only works if
347 self.recalculate_locks[locking.LEVEL_NODE] is set.
349 In the future it may grow parameters to just lock some instance's nodes, or
350 to just lock primaries or secondary nodes, if needed.
352 If should be called in DeclareLocks in a way similar to::
354 if level == locking.LEVEL_NODE:
355 self._LockInstancesNodes()
357 @type primary_only: boolean
358 @param primary_only: only lock primary nodes of locked instances
361 assert locking.LEVEL_NODE in self.recalculate_locks, \
362 "_LockInstancesNodes helper function called with no nodes to recalculate"
364 # TODO: check if we're really been called with the instance locks held
366 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
367 # future we might want to have different behaviors depending on the value
368 # of self.recalculate_locks[locking.LEVEL_NODE]
370 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
371 instance = self.context.cfg.GetInstanceInfo(instance_name)
372 wanted_nodes.append(instance.primary_node)
374 wanted_nodes.extend(instance.secondary_nodes)
376 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
377 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
378 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
379 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
381 del self.recalculate_locks[locking.LEVEL_NODE]
384 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
385 """Simple LU which runs no hooks.
387 This LU is intended as a parent for other LogicalUnits which will
388 run no hooks, in order to reduce duplicate code.
394 def BuildHooksEnv(self):
395 """Empty BuildHooksEnv for NoHooksLu.
397 This just raises an error.
400 assert False, "BuildHooksEnv called for NoHooksLUs"
404 """Tasklet base class.
406 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
407 they can mix legacy code with tasklets. Locking needs to be done in the LU,
408 tasklets know nothing about locks.
410 Subclasses must follow these rules:
411 - Implement CheckPrereq
415 def __init__(self, lu):
422 def CheckPrereq(self):
423 """Check prerequisites for this tasklets.
425 This method should check whether the prerequisites for the execution of
426 this tasklet are fulfilled. It can do internode communication, but it
427 should be idempotent - no cluster or system changes are allowed.
429 The method should raise errors.OpPrereqError in case something is not
430 fulfilled. Its return value is ignored.
432 This method should also update all parameters to their canonical form if it
433 hasn't been done before.
438 def Exec(self, feedback_fn):
439 """Execute the tasklet.
441 This method should implement the actual work. It should raise
442 errors.OpExecError for failures that are somewhat dealt with in code, or
446 raise NotImplementedError
450 """Base for query utility classes.
453 #: Attribute holding field definitions
456 def __init__(self, names, fields, use_locking):
457 """Initializes this class.
461 self.use_locking = use_locking
463 self.query = query.Query(self.FIELDS, fields)
464 self.requested_data = self.query.RequestedData()
466 self.do_locking = None
469 def _GetNames(self, lu, all_names, lock_level):
470 """Helper function to determine names asked for in the query.
474 names = lu.acquired_locks[lock_level]
478 if self.wanted == locking.ALL_SET:
479 assert not self.names
480 # caller didn't specify names, so ordering is not important
481 return utils.NiceSort(names)
483 # caller specified names and we must keep the same order
485 assert not self.do_locking or lu.acquired_locks[lock_level]
487 missing = set(self.wanted).difference(names)
489 raise errors.OpExecError("Some items were removed before retrieving"
490 " their data: %s" % missing)
492 # Return expanded names
496 def FieldsQuery(cls, fields):
497 """Returns list of available fields.
499 @return: List of L{objects.QueryFieldDefinition}
502 return query.QueryFields(cls.FIELDS, fields)
504 def ExpandNames(self, lu):
505 """Expand names for this query.
507 See L{LogicalUnit.ExpandNames}.
510 raise NotImplementedError()
512 def DeclareLocks(self, lu, level):
513 """Declare locks for this query.
515 See L{LogicalUnit.DeclareLocks}.
518 raise NotImplementedError()
520 def _GetQueryData(self, lu):
521 """Collects all data for this query.
523 @return: Query data object
526 raise NotImplementedError()
528 def NewStyleQuery(self, lu):
529 """Collect data and execute query.
532 return query.GetQueryResponse(self.query, self._GetQueryData(lu))
534 def OldStyleQuery(self, lu):
535 """Collect data and execute query.
538 return self.query.OldStyleQuery(self._GetQueryData(lu))
541 def _GetWantedNodes(lu, nodes):
542 """Returns list of checked and expanded node names.
544 @type lu: L{LogicalUnit}
545 @param lu: the logical unit on whose behalf we execute
547 @param nodes: list of node names or None for all nodes
549 @return: the list of nodes, sorted
550 @raise errors.ProgrammerError: if the nodes parameter is wrong type
554 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
556 return utils.NiceSort(lu.cfg.GetNodeList())
559 def _GetWantedInstances(lu, instances):
560 """Returns list of checked and expanded instance names.
562 @type lu: L{LogicalUnit}
563 @param lu: the logical unit on whose behalf we execute
564 @type instances: list
565 @param instances: list of instance names or None for all instances
567 @return: the list of instances, sorted
568 @raise errors.OpPrereqError: if the instances parameter is wrong type
569 @raise errors.OpPrereqError: if any of the passed instances is not found
573 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
575 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
579 def _GetUpdatedParams(old_params, update_dict,
580 use_default=True, use_none=False):
581 """Return the new version of a parameter dictionary.
583 @type old_params: dict
584 @param old_params: old parameters
585 @type update_dict: dict
586 @param update_dict: dict containing new parameter values, or
587 constants.VALUE_DEFAULT to reset the parameter to its default
589 @param use_default: boolean
590 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
591 values as 'to be deleted' values
592 @param use_none: boolean
593 @type use_none: whether to recognise C{None} values as 'to be
596 @return: the new parameter dictionary
599 params_copy = copy.deepcopy(old_params)
600 for key, val in update_dict.iteritems():
601 if ((use_default and val == constants.VALUE_DEFAULT) or
602 (use_none and val is None)):
608 params_copy[key] = val
612 def _CheckOutputFields(static, dynamic, selected):
613 """Checks whether all selected fields are valid.
615 @type static: L{utils.FieldSet}
616 @param static: static fields set
617 @type dynamic: L{utils.FieldSet}
618 @param dynamic: dynamic fields set
625 delta = f.NonMatching(selected)
627 raise errors.OpPrereqError("Unknown output fields selected: %s"
628 % ",".join(delta), errors.ECODE_INVAL)
631 def _CheckGlobalHvParams(params):
632 """Validates that given hypervisor params are not global ones.
634 This will ensure that instances don't get customised versions of
638 used_globals = constants.HVC_GLOBALS.intersection(params)
640 msg = ("The following hypervisor parameters are global and cannot"
641 " be customized at instance level, please modify them at"
642 " cluster level: %s" % utils.CommaJoin(used_globals))
643 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
646 def _CheckNodeOnline(lu, node, msg=None):
647 """Ensure that a given node is online.
649 @param lu: the LU on behalf of which we make the check
650 @param node: the node to check
651 @param msg: if passed, should be a message to replace the default one
652 @raise errors.OpPrereqError: if the node is offline
656 msg = "Can't use offline node"
657 if lu.cfg.GetNodeInfo(node).offline:
658 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
661 def _CheckNodeNotDrained(lu, node):
662 """Ensure that a given node is not drained.
664 @param lu: the LU on behalf of which we make the check
665 @param node: the node to check
666 @raise errors.OpPrereqError: if the node is drained
669 if lu.cfg.GetNodeInfo(node).drained:
670 raise errors.OpPrereqError("Can't use drained node %s" % node,
674 def _CheckNodeVmCapable(lu, node):
675 """Ensure that a given node is vm capable.
677 @param lu: the LU on behalf of which we make the check
678 @param node: the node to check
679 @raise errors.OpPrereqError: if the node is not vm capable
682 if not lu.cfg.GetNodeInfo(node).vm_capable:
683 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
687 def _CheckNodeHasOS(lu, node, os_name, force_variant):
688 """Ensure that a node supports a given OS.
690 @param lu: the LU on behalf of which we make the check
691 @param node: the node to check
692 @param os_name: the OS to query about
693 @param force_variant: whether to ignore variant errors
694 @raise errors.OpPrereqError: if the node is not supporting the OS
697 result = lu.rpc.call_os_get(node, os_name)
698 result.Raise("OS '%s' not in supported OS list for node %s" %
700 prereq=True, ecode=errors.ECODE_INVAL)
701 if not force_variant:
702 _CheckOSVariant(result.payload, os_name)
705 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
706 """Ensure that a node has the given secondary ip.
708 @type lu: L{LogicalUnit}
709 @param lu: the LU on behalf of which we make the check
711 @param node: the node to check
712 @type secondary_ip: string
713 @param secondary_ip: the ip to check
714 @type prereq: boolean
715 @param prereq: whether to throw a prerequisite or an execute error
716 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
717 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
720 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
721 result.Raise("Failure checking secondary ip on node %s" % node,
722 prereq=prereq, ecode=errors.ECODE_ENVIRON)
723 if not result.payload:
724 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
725 " please fix and re-run this command" % secondary_ip)
727 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
729 raise errors.OpExecError(msg)
732 def _GetClusterDomainSecret():
733 """Reads the cluster domain secret.
736 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
740 def _CheckInstanceDown(lu, instance, reason):
741 """Ensure that an instance is not running."""
742 if instance.admin_up:
743 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
744 (instance.name, reason), errors.ECODE_STATE)
746 pnode = instance.primary_node
747 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
748 ins_l.Raise("Can't contact node %s for instance information" % pnode,
749 prereq=True, ecode=errors.ECODE_ENVIRON)
751 if instance.name in ins_l.payload:
752 raise errors.OpPrereqError("Instance %s is running, %s" %
753 (instance.name, reason), errors.ECODE_STATE)
756 def _ExpandItemName(fn, name, kind):
757 """Expand an item name.
759 @param fn: the function to use for expansion
760 @param name: requested item name
761 @param kind: text description ('Node' or 'Instance')
762 @return: the resolved (full) name
763 @raise errors.OpPrereqError: if the item is not found
767 if full_name is None:
768 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
773 def _ExpandNodeName(cfg, name):
774 """Wrapper over L{_ExpandItemName} for nodes."""
775 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
778 def _ExpandInstanceName(cfg, name):
779 """Wrapper over L{_ExpandItemName} for instance."""
780 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
783 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
784 memory, vcpus, nics, disk_template, disks,
785 bep, hvp, hypervisor_name):
786 """Builds instance related env variables for hooks
788 This builds the hook environment from individual variables.
791 @param name: the name of the instance
792 @type primary_node: string
793 @param primary_node: the name of the instance's primary node
794 @type secondary_nodes: list
795 @param secondary_nodes: list of secondary nodes as strings
796 @type os_type: string
797 @param os_type: the name of the instance's OS
798 @type status: boolean
799 @param status: the should_run status of the instance
801 @param memory: the memory size of the instance
803 @param vcpus: the count of VCPUs the instance has
805 @param nics: list of tuples (ip, mac, mode, link) representing
806 the NICs the instance has
807 @type disk_template: string
808 @param disk_template: the disk template of the instance
810 @param disks: the list of (size, mode) pairs
812 @param bep: the backend parameters for the instance
814 @param hvp: the hypervisor parameters for the instance
815 @type hypervisor_name: string
816 @param hypervisor_name: the hypervisor for the instance
818 @return: the hook environment for this instance
827 "INSTANCE_NAME": name,
828 "INSTANCE_PRIMARY": primary_node,
829 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
830 "INSTANCE_OS_TYPE": os_type,
831 "INSTANCE_STATUS": str_status,
832 "INSTANCE_MEMORY": memory,
833 "INSTANCE_VCPUS": vcpus,
834 "INSTANCE_DISK_TEMPLATE": disk_template,
835 "INSTANCE_HYPERVISOR": hypervisor_name,
839 nic_count = len(nics)
840 for idx, (ip, mac, mode, link) in enumerate(nics):
843 env["INSTANCE_NIC%d_IP" % idx] = ip
844 env["INSTANCE_NIC%d_MAC" % idx] = mac
845 env["INSTANCE_NIC%d_MODE" % idx] = mode
846 env["INSTANCE_NIC%d_LINK" % idx] = link
847 if mode == constants.NIC_MODE_BRIDGED:
848 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
852 env["INSTANCE_NIC_COUNT"] = nic_count
855 disk_count = len(disks)
856 for idx, (size, mode) in enumerate(disks):
857 env["INSTANCE_DISK%d_SIZE" % idx] = size
858 env["INSTANCE_DISK%d_MODE" % idx] = mode
862 env["INSTANCE_DISK_COUNT"] = disk_count
864 for source, kind in [(bep, "BE"), (hvp, "HV")]:
865 for key, value in source.items():
866 env["INSTANCE_%s_%s" % (kind, key)] = value
871 def _NICListToTuple(lu, nics):
872 """Build a list of nic information tuples.
874 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
875 value in LUInstanceQueryData.
877 @type lu: L{LogicalUnit}
878 @param lu: the logical unit on whose behalf we execute
879 @type nics: list of L{objects.NIC}
880 @param nics: list of nics to convert to hooks tuples
884 cluster = lu.cfg.GetClusterInfo()
888 filled_params = cluster.SimpleFillNIC(nic.nicparams)
889 mode = filled_params[constants.NIC_MODE]
890 link = filled_params[constants.NIC_LINK]
891 hooks_nics.append((ip, mac, mode, link))
895 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
896 """Builds instance related env variables for hooks from an object.
898 @type lu: L{LogicalUnit}
899 @param lu: the logical unit on whose behalf we execute
900 @type instance: L{objects.Instance}
901 @param instance: the instance for which we should build the
904 @param override: dictionary with key/values that will override
907 @return: the hook environment dictionary
910 cluster = lu.cfg.GetClusterInfo()
911 bep = cluster.FillBE(instance)
912 hvp = cluster.FillHV(instance)
914 'name': instance.name,
915 'primary_node': instance.primary_node,
916 'secondary_nodes': instance.secondary_nodes,
917 'os_type': instance.os,
918 'status': instance.admin_up,
919 'memory': bep[constants.BE_MEMORY],
920 'vcpus': bep[constants.BE_VCPUS],
921 'nics': _NICListToTuple(lu, instance.nics),
922 'disk_template': instance.disk_template,
923 'disks': [(disk.size, disk.mode) for disk in instance.disks],
926 'hypervisor_name': instance.hypervisor,
929 args.update(override)
930 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
933 def _AdjustCandidatePool(lu, exceptions):
934 """Adjust the candidate pool after node operations.
937 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
939 lu.LogInfo("Promoted nodes to master candidate role: %s",
940 utils.CommaJoin(node.name for node in mod_list))
941 for name in mod_list:
942 lu.context.ReaddNode(name)
943 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
945 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
949 def _DecideSelfPromotion(lu, exceptions=None):
950 """Decide whether I should promote myself as a master candidate.
953 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
954 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
955 # the new node will increase mc_max with one, so:
956 mc_should = min(mc_should + 1, cp_size)
957 return mc_now < mc_should
960 def _CheckNicsBridgesExist(lu, target_nics, target_node):
961 """Check that the brigdes needed by a list of nics exist.
964 cluster = lu.cfg.GetClusterInfo()
965 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
966 brlist = [params[constants.NIC_LINK] for params in paramslist
967 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
969 result = lu.rpc.call_bridges_exist(target_node, brlist)
970 result.Raise("Error checking bridges on destination node '%s'" %
971 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
974 def _CheckInstanceBridgesExist(lu, instance, node=None):
975 """Check that the brigdes needed by an instance exist.
979 node = instance.primary_node
980 _CheckNicsBridgesExist(lu, instance.nics, node)
983 def _CheckOSVariant(os_obj, name):
984 """Check whether an OS name conforms to the os variants specification.
986 @type os_obj: L{objects.OS}
987 @param os_obj: OS object to check
989 @param name: OS name passed by the user, to check for validity
992 if not os_obj.supported_variants:
994 variant = objects.OS.GetVariant(name)
996 raise errors.OpPrereqError("OS name must include a variant",
999 if variant not in os_obj.supported_variants:
1000 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1003 def _GetNodeInstancesInner(cfg, fn):
1004 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1007 def _GetNodeInstances(cfg, node_name):
1008 """Returns a list of all primary and secondary instances on a node.
1012 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1015 def _GetNodePrimaryInstances(cfg, node_name):
1016 """Returns primary instances on a node.
1019 return _GetNodeInstancesInner(cfg,
1020 lambda inst: node_name == inst.primary_node)
1023 def _GetNodeSecondaryInstances(cfg, node_name):
1024 """Returns secondary instances on a node.
1027 return _GetNodeInstancesInner(cfg,
1028 lambda inst: node_name in inst.secondary_nodes)
1031 def _GetStorageTypeArgs(cfg, storage_type):
1032 """Returns the arguments for a storage type.
1035 # Special case for file storage
1036 if storage_type == constants.ST_FILE:
1037 # storage.FileStorage wants a list of storage directories
1038 return [[cfg.GetFileStorageDir()]]
1043 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1046 for dev in instance.disks:
1047 cfg.SetDiskID(dev, node_name)
1049 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1050 result.Raise("Failed to get disk status from node %s" % node_name,
1051 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1053 for idx, bdev_status in enumerate(result.payload):
1054 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1060 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1061 """Check the sanity of iallocator and node arguments and use the
1062 cluster-wide iallocator if appropriate.
1064 Check that at most one of (iallocator, node) is specified. If none is
1065 specified, then the LU's opcode's iallocator slot is filled with the
1066 cluster-wide default iallocator.
1068 @type iallocator_slot: string
1069 @param iallocator_slot: the name of the opcode iallocator slot
1070 @type node_slot: string
1071 @param node_slot: the name of the opcode target node slot
1074 node = getattr(lu.op, node_slot, None)
1075 iallocator = getattr(lu.op, iallocator_slot, None)
1077 if node is not None and iallocator is not None:
1078 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1080 elif node is None and iallocator is None:
1081 default_iallocator = lu.cfg.GetDefaultIAllocator()
1082 if default_iallocator:
1083 setattr(lu.op, iallocator_slot, default_iallocator)
1085 raise errors.OpPrereqError("No iallocator or node given and no"
1086 " cluster-wide default iallocator found."
1087 " Please specify either an iallocator or a"
1088 " node, or set a cluster-wide default"
1092 class LUClusterPostInit(LogicalUnit):
1093 """Logical unit for running hooks after cluster initialization.
1096 HPATH = "cluster-init"
1097 HTYPE = constants.HTYPE_CLUSTER
1099 def BuildHooksEnv(self):
1103 env = {"OP_TARGET": self.cfg.GetClusterName()}
1104 mn = self.cfg.GetMasterNode()
1105 return env, [], [mn]
1107 def Exec(self, feedback_fn):
1114 class LUClusterDestroy(LogicalUnit):
1115 """Logical unit for destroying the cluster.
1118 HPATH = "cluster-destroy"
1119 HTYPE = constants.HTYPE_CLUSTER
1121 def BuildHooksEnv(self):
1125 env = {"OP_TARGET": self.cfg.GetClusterName()}
1128 def CheckPrereq(self):
1129 """Check prerequisites.
1131 This checks whether the cluster is empty.
1133 Any errors are signaled by raising errors.OpPrereqError.
1136 master = self.cfg.GetMasterNode()
1138 nodelist = self.cfg.GetNodeList()
1139 if len(nodelist) != 1 or nodelist[0] != master:
1140 raise errors.OpPrereqError("There are still %d node(s) in"
1141 " this cluster." % (len(nodelist) - 1),
1143 instancelist = self.cfg.GetInstanceList()
1145 raise errors.OpPrereqError("There are still %d instance(s) in"
1146 " this cluster." % len(instancelist),
1149 def Exec(self, feedback_fn):
1150 """Destroys the cluster.
1153 master = self.cfg.GetMasterNode()
1155 # Run post hooks on master node before it's removed
1156 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1158 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1160 # pylint: disable-msg=W0702
1161 self.LogWarning("Errors occurred running hooks on %s" % master)
1163 result = self.rpc.call_node_stop_master(master, False)
1164 result.Raise("Could not disable the master role")
1169 def _VerifyCertificate(filename):
1170 """Verifies a certificate for LUClusterVerify.
1172 @type filename: string
1173 @param filename: Path to PEM file
1177 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1178 utils.ReadFile(filename))
1179 except Exception, err: # pylint: disable-msg=W0703
1180 return (LUClusterVerify.ETYPE_ERROR,
1181 "Failed to load X509 certificate %s: %s" % (filename, err))
1184 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1185 constants.SSL_CERT_EXPIRATION_ERROR)
1188 fnamemsg = "While verifying %s: %s" % (filename, msg)
1193 return (None, fnamemsg)
1194 elif errcode == utils.CERT_WARNING:
1195 return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1196 elif errcode == utils.CERT_ERROR:
1197 return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1199 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1202 class LUClusterVerify(LogicalUnit):
1203 """Verifies the cluster status.
1206 HPATH = "cluster-verify"
1207 HTYPE = constants.HTYPE_CLUSTER
1210 TCLUSTER = "cluster"
1212 TINSTANCE = "instance"
1214 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1215 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1216 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1217 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1218 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1219 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1220 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1221 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1222 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1223 ENODEDRBD = (TNODE, "ENODEDRBD")
1224 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1225 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1226 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1227 ENODEHV = (TNODE, "ENODEHV")
1228 ENODELVM = (TNODE, "ENODELVM")
1229 ENODEN1 = (TNODE, "ENODEN1")
1230 ENODENET = (TNODE, "ENODENET")
1231 ENODEOS = (TNODE, "ENODEOS")
1232 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1233 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1234 ENODERPC = (TNODE, "ENODERPC")
1235 ENODESSH = (TNODE, "ENODESSH")
1236 ENODEVERSION = (TNODE, "ENODEVERSION")
1237 ENODESETUP = (TNODE, "ENODESETUP")
1238 ENODETIME = (TNODE, "ENODETIME")
1239 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1241 ETYPE_FIELD = "code"
1242 ETYPE_ERROR = "ERROR"
1243 ETYPE_WARNING = "WARNING"
1245 _HOOKS_INDENT_RE = re.compile("^", re.M)
1247 class NodeImage(object):
1248 """A class representing the logical and physical status of a node.
1251 @ivar name: the node name to which this object refers
1252 @ivar volumes: a structure as returned from
1253 L{ganeti.backend.GetVolumeList} (runtime)
1254 @ivar instances: a list of running instances (runtime)
1255 @ivar pinst: list of configured primary instances (config)
1256 @ivar sinst: list of configured secondary instances (config)
1257 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1258 of this node (config)
1259 @ivar mfree: free memory, as reported by hypervisor (runtime)
1260 @ivar dfree: free disk, as reported by the node (runtime)
1261 @ivar offline: the offline status (config)
1262 @type rpc_fail: boolean
1263 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1264 not whether the individual keys were correct) (runtime)
1265 @type lvm_fail: boolean
1266 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1267 @type hyp_fail: boolean
1268 @ivar hyp_fail: whether the RPC call didn't return the instance list
1269 @type ghost: boolean
1270 @ivar ghost: whether this is a known node or not (config)
1271 @type os_fail: boolean
1272 @ivar os_fail: whether the RPC call didn't return valid OS data
1274 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1275 @type vm_capable: boolean
1276 @ivar vm_capable: whether the node can host instances
1279 def __init__(self, offline=False, name=None, vm_capable=True):
1288 self.offline = offline
1289 self.vm_capable = vm_capable
1290 self.rpc_fail = False
1291 self.lvm_fail = False
1292 self.hyp_fail = False
1294 self.os_fail = False
1297 def ExpandNames(self):
1298 self.needed_locks = {
1299 locking.LEVEL_NODE: locking.ALL_SET,
1300 locking.LEVEL_INSTANCE: locking.ALL_SET,
1302 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1304 def _Error(self, ecode, item, msg, *args, **kwargs):
1305 """Format an error message.
1307 Based on the opcode's error_codes parameter, either format a
1308 parseable error code, or a simpler error string.
1310 This must be called only from Exec and functions called from Exec.
1313 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1315 # first complete the msg
1318 # then format the whole message
1319 if self.op.error_codes:
1320 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1326 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1327 # and finally report it via the feedback_fn
1328 self._feedback_fn(" - %s" % msg)
1330 def _ErrorIf(self, cond, *args, **kwargs):
1331 """Log an error message if the passed condition is True.
1334 cond = bool(cond) or self.op.debug_simulate_errors
1336 self._Error(*args, **kwargs)
1337 # do not mark the operation as failed for WARN cases only
1338 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1339 self.bad = self.bad or cond
1341 def _VerifyNode(self, ninfo, nresult):
1342 """Perform some basic validation on data returned from a node.
1344 - check the result data structure is well formed and has all the
1346 - check ganeti version
1348 @type ninfo: L{objects.Node}
1349 @param ninfo: the node to check
1350 @param nresult: the results from the node
1352 @return: whether overall this call was successful (and we can expect
1353 reasonable values in the respose)
1357 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1359 # main result, nresult should be a non-empty dict
1360 test = not nresult or not isinstance(nresult, dict)
1361 _ErrorIf(test, self.ENODERPC, node,
1362 "unable to verify node: no data returned")
1366 # compares ganeti version
1367 local_version = constants.PROTOCOL_VERSION
1368 remote_version = nresult.get("version", None)
1369 test = not (remote_version and
1370 isinstance(remote_version, (list, tuple)) and
1371 len(remote_version) == 2)
1372 _ErrorIf(test, self.ENODERPC, node,
1373 "connection to node returned invalid data")
1377 test = local_version != remote_version[0]
1378 _ErrorIf(test, self.ENODEVERSION, node,
1379 "incompatible protocol versions: master %s,"
1380 " node %s", local_version, remote_version[0])
1384 # node seems compatible, we can actually try to look into its results
1386 # full package version
1387 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1388 self.ENODEVERSION, node,
1389 "software version mismatch: master %s, node %s",
1390 constants.RELEASE_VERSION, remote_version[1],
1391 code=self.ETYPE_WARNING)
1393 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1394 if ninfo.vm_capable and isinstance(hyp_result, dict):
1395 for hv_name, hv_result in hyp_result.iteritems():
1396 test = hv_result is not None
1397 _ErrorIf(test, self.ENODEHV, node,
1398 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1400 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1401 if ninfo.vm_capable and isinstance(hvp_result, list):
1402 for item, hv_name, hv_result in hvp_result:
1403 _ErrorIf(True, self.ENODEHV, node,
1404 "hypervisor %s parameter verify failure (source %s): %s",
1405 hv_name, item, hv_result)
1407 test = nresult.get(constants.NV_NODESETUP,
1408 ["Missing NODESETUP results"])
1409 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1414 def _VerifyNodeTime(self, ninfo, nresult,
1415 nvinfo_starttime, nvinfo_endtime):
1416 """Check the node time.
1418 @type ninfo: L{objects.Node}
1419 @param ninfo: the node to check
1420 @param nresult: the remote results for the node
1421 @param nvinfo_starttime: the start time of the RPC call
1422 @param nvinfo_endtime: the end time of the RPC call
1426 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1428 ntime = nresult.get(constants.NV_TIME, None)
1430 ntime_merged = utils.MergeTime(ntime)
1431 except (ValueError, TypeError):
1432 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1435 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1436 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1437 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1438 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1442 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1443 "Node time diverges by at least %s from master node time",
1446 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1447 """Check the node time.
1449 @type ninfo: L{objects.Node}
1450 @param ninfo: the node to check
1451 @param nresult: the remote results for the node
1452 @param vg_name: the configured VG name
1459 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1461 # checks vg existence and size > 20G
1462 vglist = nresult.get(constants.NV_VGLIST, None)
1464 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1466 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1467 constants.MIN_VG_SIZE)
1468 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1471 pvlist = nresult.get(constants.NV_PVLIST, None)
1472 test = pvlist is None
1473 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1475 # check that ':' is not present in PV names, since it's a
1476 # special character for lvcreate (denotes the range of PEs to
1478 for _, pvname, owner_vg in pvlist:
1479 test = ":" in pvname
1480 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1481 " '%s' of VG '%s'", pvname, owner_vg)
1483 def _VerifyNodeNetwork(self, ninfo, nresult):
1484 """Check the node time.
1486 @type ninfo: L{objects.Node}
1487 @param ninfo: the node to check
1488 @param nresult: the remote results for the node
1492 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1494 test = constants.NV_NODELIST not in nresult
1495 _ErrorIf(test, self.ENODESSH, node,
1496 "node hasn't returned node ssh connectivity data")
1498 if nresult[constants.NV_NODELIST]:
1499 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1500 _ErrorIf(True, self.ENODESSH, node,
1501 "ssh communication with node '%s': %s", a_node, a_msg)
1503 test = constants.NV_NODENETTEST not in nresult
1504 _ErrorIf(test, self.ENODENET, node,
1505 "node hasn't returned node tcp connectivity data")
1507 if nresult[constants.NV_NODENETTEST]:
1508 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1510 _ErrorIf(True, self.ENODENET, node,
1511 "tcp communication with node '%s': %s",
1512 anode, nresult[constants.NV_NODENETTEST][anode])
1514 test = constants.NV_MASTERIP not in nresult
1515 _ErrorIf(test, self.ENODENET, node,
1516 "node hasn't returned node master IP reachability data")
1518 if not nresult[constants.NV_MASTERIP]:
1519 if node == self.master_node:
1520 msg = "the master node cannot reach the master IP (not configured?)"
1522 msg = "cannot reach the master IP"
1523 _ErrorIf(True, self.ENODENET, node, msg)
1525 def _VerifyInstance(self, instance, instanceconfig, node_image,
1527 """Verify an instance.
1529 This function checks to see if the required block devices are
1530 available on the instance's node.
1533 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1534 node_current = instanceconfig.primary_node
1536 node_vol_should = {}
1537 instanceconfig.MapLVsByNode(node_vol_should)
1539 for node in node_vol_should:
1540 n_img = node_image[node]
1541 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1542 # ignore missing volumes on offline or broken nodes
1544 for volume in node_vol_should[node]:
1545 test = volume not in n_img.volumes
1546 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1547 "volume %s missing on node %s", volume, node)
1549 if instanceconfig.admin_up:
1550 pri_img = node_image[node_current]
1551 test = instance not in pri_img.instances and not pri_img.offline
1552 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1553 "instance not running on its primary node %s",
1556 for node, n_img in node_image.items():
1557 if node != node_current:
1558 test = instance in n_img.instances
1559 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1560 "instance should not run on node %s", node)
1562 diskdata = [(nname, success, status, idx)
1563 for (nname, disks) in diskstatus.items()
1564 for idx, (success, status) in enumerate(disks)]
1566 for nname, success, bdev_status, idx in diskdata:
1567 # the 'ghost node' construction in Exec() ensures that we have a
1569 snode = node_image[nname]
1570 bad_snode = snode.ghost or snode.offline
1571 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1572 self.EINSTANCEFAULTYDISK, instance,
1573 "couldn't retrieve status for disk/%s on %s: %s",
1574 idx, nname, bdev_status)
1575 _ErrorIf((instanceconfig.admin_up and success and
1576 bdev_status.ldisk_status == constants.LDS_FAULTY),
1577 self.EINSTANCEFAULTYDISK, instance,
1578 "disk/%s on %s is faulty", idx, nname)
1580 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1581 """Verify if there are any unknown volumes in the cluster.
1583 The .os, .swap and backup volumes are ignored. All other volumes are
1584 reported as unknown.
1586 @type reserved: L{ganeti.utils.FieldSet}
1587 @param reserved: a FieldSet of reserved volume names
1590 for node, n_img in node_image.items():
1591 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1592 # skip non-healthy nodes
1594 for volume in n_img.volumes:
1595 test = ((node not in node_vol_should or
1596 volume not in node_vol_should[node]) and
1597 not reserved.Matches(volume))
1598 self._ErrorIf(test, self.ENODEORPHANLV, node,
1599 "volume %s is unknown", volume)
1601 def _VerifyOrphanInstances(self, instancelist, node_image):
1602 """Verify the list of running instances.
1604 This checks what instances are running but unknown to the cluster.
1607 for node, n_img in node_image.items():
1608 for o_inst in n_img.instances:
1609 test = o_inst not in instancelist
1610 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1611 "instance %s on node %s should not exist", o_inst, node)
1613 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1614 """Verify N+1 Memory Resilience.
1616 Check that if one single node dies we can still start all the
1617 instances it was primary for.
1620 for node, n_img in node_image.items():
1621 # This code checks that every node which is now listed as
1622 # secondary has enough memory to host all instances it is
1623 # supposed to should a single other node in the cluster fail.
1624 # FIXME: not ready for failover to an arbitrary node
1625 # FIXME: does not support file-backed instances
1626 # WARNING: we currently take into account down instances as well
1627 # as up ones, considering that even if they're down someone
1628 # might want to start them even in the event of a node failure.
1630 # we're skipping offline nodes from the N+1 warning, since
1631 # most likely we don't have good memory infromation from them;
1632 # we already list instances living on such nodes, and that's
1635 for prinode, instances in n_img.sbp.items():
1637 for instance in instances:
1638 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1639 if bep[constants.BE_AUTO_BALANCE]:
1640 needed_mem += bep[constants.BE_MEMORY]
1641 test = n_img.mfree < needed_mem
1642 self._ErrorIf(test, self.ENODEN1, node,
1643 "not enough memory to accomodate instance failovers"
1644 " should node %s fail", prinode)
1646 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1648 """Verifies and computes the node required file checksums.
1650 @type ninfo: L{objects.Node}
1651 @param ninfo: the node to check
1652 @param nresult: the remote results for the node
1653 @param file_list: required list of files
1654 @param local_cksum: dictionary of local files and their checksums
1655 @param master_files: list of files that only masters should have
1659 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1661 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1662 test = not isinstance(remote_cksum, dict)
1663 _ErrorIf(test, self.ENODEFILECHECK, node,
1664 "node hasn't returned file checksum data")
1668 for file_name in file_list:
1669 node_is_mc = ninfo.master_candidate
1670 must_have = (file_name not in master_files) or node_is_mc
1672 test1 = file_name not in remote_cksum
1674 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1676 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1677 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1678 "file '%s' missing", file_name)
1679 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1680 "file '%s' has wrong checksum", file_name)
1681 # not candidate and this is not a must-have file
1682 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1683 "file '%s' should not exist on non master"
1684 " candidates (and the file is outdated)", file_name)
1685 # all good, except non-master/non-must have combination
1686 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1687 "file '%s' should not exist"
1688 " on non master candidates", file_name)
1690 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1692 """Verifies and the node DRBD status.
1694 @type ninfo: L{objects.Node}
1695 @param ninfo: the node to check
1696 @param nresult: the remote results for the node
1697 @param instanceinfo: the dict of instances
1698 @param drbd_helper: the configured DRBD usermode helper
1699 @param drbd_map: the DRBD map as returned by
1700 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1704 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1707 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1708 test = (helper_result == None)
1709 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1710 "no drbd usermode helper returned")
1712 status, payload = helper_result
1714 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1715 "drbd usermode helper check unsuccessful: %s", payload)
1716 test = status and (payload != drbd_helper)
1717 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1718 "wrong drbd usermode helper: %s", payload)
1720 # compute the DRBD minors
1722 for minor, instance in drbd_map[node].items():
1723 test = instance not in instanceinfo
1724 _ErrorIf(test, self.ECLUSTERCFG, None,
1725 "ghost instance '%s' in temporary DRBD map", instance)
1726 # ghost instance should not be running, but otherwise we
1727 # don't give double warnings (both ghost instance and
1728 # unallocated minor in use)
1730 node_drbd[minor] = (instance, False)
1732 instance = instanceinfo[instance]
1733 node_drbd[minor] = (instance.name, instance.admin_up)
1735 # and now check them
1736 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1737 test = not isinstance(used_minors, (tuple, list))
1738 _ErrorIf(test, self.ENODEDRBD, node,
1739 "cannot parse drbd status file: %s", str(used_minors))
1741 # we cannot check drbd status
1744 for minor, (iname, must_exist) in node_drbd.items():
1745 test = minor not in used_minors and must_exist
1746 _ErrorIf(test, self.ENODEDRBD, node,
1747 "drbd minor %d of instance %s is not active", minor, iname)
1748 for minor in used_minors:
1749 test = minor not in node_drbd
1750 _ErrorIf(test, self.ENODEDRBD, node,
1751 "unallocated drbd minor %d is in use", minor)
1753 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1754 """Builds the node OS structures.
1756 @type ninfo: L{objects.Node}
1757 @param ninfo: the node to check
1758 @param nresult: the remote results for the node
1759 @param nimg: the node image object
1763 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1765 remote_os = nresult.get(constants.NV_OSLIST, None)
1766 test = (not isinstance(remote_os, list) or
1767 not compat.all(isinstance(v, list) and len(v) == 7
1768 for v in remote_os))
1770 _ErrorIf(test, self.ENODEOS, node,
1771 "node hasn't returned valid OS data")
1780 for (name, os_path, status, diagnose,
1781 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1783 if name not in os_dict:
1786 # parameters is a list of lists instead of list of tuples due to
1787 # JSON lacking a real tuple type, fix it:
1788 parameters = [tuple(v) for v in parameters]
1789 os_dict[name].append((os_path, status, diagnose,
1790 set(variants), set(parameters), set(api_ver)))
1792 nimg.oslist = os_dict
1794 def _VerifyNodeOS(self, ninfo, nimg, base):
1795 """Verifies the node OS list.
1797 @type ninfo: L{objects.Node}
1798 @param ninfo: the node to check
1799 @param nimg: the node image object
1800 @param base: the 'template' node we match against (e.g. from the master)
1804 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1806 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1808 for os_name, os_data in nimg.oslist.items():
1809 assert os_data, "Empty OS status for OS %s?!" % os_name
1810 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1811 _ErrorIf(not f_status, self.ENODEOS, node,
1812 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1813 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1814 "OS '%s' has multiple entries (first one shadows the rest): %s",
1815 os_name, utils.CommaJoin([v[0] for v in os_data]))
1816 # this will catched in backend too
1817 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1818 and not f_var, self.ENODEOS, node,
1819 "OS %s with API at least %d does not declare any variant",
1820 os_name, constants.OS_API_V15)
1821 # comparisons with the 'base' image
1822 test = os_name not in base.oslist
1823 _ErrorIf(test, self.ENODEOS, node,
1824 "Extra OS %s not present on reference node (%s)",
1828 assert base.oslist[os_name], "Base node has empty OS status?"
1829 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1831 # base OS is invalid, skipping
1833 for kind, a, b in [("API version", f_api, b_api),
1834 ("variants list", f_var, b_var),
1835 ("parameters", f_param, b_param)]:
1836 _ErrorIf(a != b, self.ENODEOS, node,
1837 "OS %s %s differs from reference node %s: %s vs. %s",
1838 kind, os_name, base.name,
1839 utils.CommaJoin(a), utils.CommaJoin(b))
1841 # check any missing OSes
1842 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1843 _ErrorIf(missing, self.ENODEOS, node,
1844 "OSes present on reference node %s but missing on this node: %s",
1845 base.name, utils.CommaJoin(missing))
1847 def _VerifyOob(self, ninfo, nresult):
1848 """Verifies out of band functionality of a node.
1850 @type ninfo: L{objects.Node}
1851 @param ninfo: the node to check
1852 @param nresult: the remote results for the node
1856 # We just have to verify the paths on master and/or master candidates
1857 # as the oob helper is invoked on the master
1858 if ((ninfo.master_candidate or ninfo.master_capable) and
1859 constants.NV_OOB_PATHS in nresult):
1860 for path_result in nresult[constants.NV_OOB_PATHS]:
1861 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1863 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1864 """Verifies and updates the node volume data.
1866 This function will update a L{NodeImage}'s internal structures
1867 with data from the remote call.
1869 @type ninfo: L{objects.Node}
1870 @param ninfo: the node to check
1871 @param nresult: the remote results for the node
1872 @param nimg: the node image object
1873 @param vg_name: the configured VG name
1877 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1879 nimg.lvm_fail = True
1880 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1883 elif isinstance(lvdata, basestring):
1884 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1885 utils.SafeEncode(lvdata))
1886 elif not isinstance(lvdata, dict):
1887 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1889 nimg.volumes = lvdata
1890 nimg.lvm_fail = False
1892 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1893 """Verifies and updates the node instance list.
1895 If the listing was successful, then updates this node's instance
1896 list. Otherwise, it marks the RPC call as failed for the instance
1899 @type ninfo: L{objects.Node}
1900 @param ninfo: the node to check
1901 @param nresult: the remote results for the node
1902 @param nimg: the node image object
1905 idata = nresult.get(constants.NV_INSTANCELIST, None)
1906 test = not isinstance(idata, list)
1907 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1908 " (instancelist): %s", utils.SafeEncode(str(idata)))
1910 nimg.hyp_fail = True
1912 nimg.instances = idata
1914 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1915 """Verifies and computes a node information map
1917 @type ninfo: L{objects.Node}
1918 @param ninfo: the node to check
1919 @param nresult: the remote results for the node
1920 @param nimg: the node image object
1921 @param vg_name: the configured VG name
1925 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1927 # try to read free memory (from the hypervisor)
1928 hv_info = nresult.get(constants.NV_HVINFO, None)
1929 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1930 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1933 nimg.mfree = int(hv_info["memory_free"])
1934 except (ValueError, TypeError):
1935 _ErrorIf(True, self.ENODERPC, node,
1936 "node returned invalid nodeinfo, check hypervisor")
1938 # FIXME: devise a free space model for file based instances as well
1939 if vg_name is not None:
1940 test = (constants.NV_VGLIST not in nresult or
1941 vg_name not in nresult[constants.NV_VGLIST])
1942 _ErrorIf(test, self.ENODELVM, node,
1943 "node didn't return data for the volume group '%s'"
1944 " - it is either missing or broken", vg_name)
1947 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1948 except (ValueError, TypeError):
1949 _ErrorIf(True, self.ENODERPC, node,
1950 "node returned invalid LVM info, check LVM status")
1952 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1953 """Gets per-disk status information for all instances.
1955 @type nodelist: list of strings
1956 @param nodelist: Node names
1957 @type node_image: dict of (name, L{objects.Node})
1958 @param node_image: Node objects
1959 @type instanceinfo: dict of (name, L{objects.Instance})
1960 @param instanceinfo: Instance objects
1961 @rtype: {instance: {node: [(succes, payload)]}}
1962 @return: a dictionary of per-instance dictionaries with nodes as
1963 keys and disk information as values; the disk information is a
1964 list of tuples (success, payload)
1967 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1970 node_disks_devonly = {}
1971 diskless_instances = set()
1972 diskless = constants.DT_DISKLESS
1974 for nname in nodelist:
1975 node_instances = list(itertools.chain(node_image[nname].pinst,
1976 node_image[nname].sinst))
1977 diskless_instances.update(inst for inst in node_instances
1978 if instanceinfo[inst].disk_template == diskless)
1979 disks = [(inst, disk)
1980 for inst in node_instances
1981 for disk in instanceinfo[inst].disks]
1984 # No need to collect data
1987 node_disks[nname] = disks
1989 # Creating copies as SetDiskID below will modify the objects and that can
1990 # lead to incorrect data returned from nodes
1991 devonly = [dev.Copy() for (_, dev) in disks]
1994 self.cfg.SetDiskID(dev, nname)
1996 node_disks_devonly[nname] = devonly
1998 assert len(node_disks) == len(node_disks_devonly)
2000 # Collect data from all nodes with disks
2001 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2004 assert len(result) == len(node_disks)
2008 for (nname, nres) in result.items():
2009 disks = node_disks[nname]
2012 # No data from this node
2013 data = len(disks) * [(False, "node offline")]
2016 _ErrorIf(msg, self.ENODERPC, nname,
2017 "while getting disk information: %s", msg)
2019 # No data from this node
2020 data = len(disks) * [(False, msg)]
2023 for idx, i in enumerate(nres.payload):
2024 if isinstance(i, (tuple, list)) and len(i) == 2:
2027 logging.warning("Invalid result from node %s, entry %d: %s",
2029 data.append((False, "Invalid result from the remote node"))
2031 for ((inst, _), status) in zip(disks, data):
2032 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2034 # Add empty entries for diskless instances.
2035 for inst in diskless_instances:
2036 assert inst not in instdisk
2039 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2040 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2041 compat.all(isinstance(s, (tuple, list)) and
2042 len(s) == 2 for s in statuses)
2043 for inst, nnames in instdisk.items()
2044 for nname, statuses in nnames.items())
2045 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2049 def _VerifyHVP(self, hvp_data):
2050 """Verifies locally the syntax of the hypervisor parameters.
2053 for item, hv_name, hv_params in hvp_data:
2054 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2057 hv_class = hypervisor.GetHypervisor(hv_name)
2058 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2059 hv_class.CheckParameterSyntax(hv_params)
2060 except errors.GenericError, err:
2061 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2064 def BuildHooksEnv(self):
2067 Cluster-Verify hooks just ran in the post phase and their failure makes
2068 the output be logged in the verify output and the verification to fail.
2071 all_nodes = self.cfg.GetNodeList()
2073 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2075 for node in self.cfg.GetAllNodesInfo().values():
2076 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2078 return env, [], all_nodes
2080 def Exec(self, feedback_fn):
2081 """Verify integrity of cluster, performing various test on nodes.
2084 # This method has too many local variables. pylint: disable-msg=R0914
2086 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2087 verbose = self.op.verbose
2088 self._feedback_fn = feedback_fn
2089 feedback_fn("* Verifying global settings")
2090 for msg in self.cfg.VerifyConfig():
2091 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2093 # Check the cluster certificates
2094 for cert_filename in constants.ALL_CERT_FILES:
2095 (errcode, msg) = _VerifyCertificate(cert_filename)
2096 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2098 vg_name = self.cfg.GetVGName()
2099 drbd_helper = self.cfg.GetDRBDHelper()
2100 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2101 cluster = self.cfg.GetClusterInfo()
2102 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2103 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2104 nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2105 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2106 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2107 for iname in instancelist)
2108 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2109 i_non_redundant = [] # Non redundant instances
2110 i_non_a_balanced = [] # Non auto-balanced instances
2111 n_offline = 0 # Count of offline nodes
2112 n_drained = 0 # Count of nodes being drained
2113 node_vol_should = {}
2115 # FIXME: verify OS list
2116 # do local checksums
2117 master_files = [constants.CLUSTER_CONF_FILE]
2118 master_node = self.master_node = self.cfg.GetMasterNode()
2119 master_ip = self.cfg.GetMasterIP()
2121 file_names = ssconf.SimpleStore().GetFileList()
2122 file_names.extend(constants.ALL_CERT_FILES)
2123 file_names.extend(master_files)
2124 if cluster.modify_etc_hosts:
2125 file_names.append(constants.ETC_HOSTS)
2127 local_checksums = utils.FingerprintFiles(file_names)
2129 # Compute the set of hypervisor parameters
2131 for hv_name in hypervisors:
2132 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2133 for os_name, os_hvp in cluster.os_hvp.items():
2134 for hv_name, hv_params in os_hvp.items():
2137 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2138 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2139 # TODO: collapse identical parameter values in a single one
2140 for instance in instanceinfo.values():
2141 if not instance.hvparams:
2143 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2144 cluster.FillHV(instance)))
2145 # and verify them locally
2146 self._VerifyHVP(hvp_data)
2148 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2149 node_verify_param = {
2150 constants.NV_FILELIST: file_names,
2151 constants.NV_NODELIST: [node.name for node in nodeinfo
2152 if not node.offline],
2153 constants.NV_HYPERVISOR: hypervisors,
2154 constants.NV_HVPARAMS: hvp_data,
2155 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2156 node.secondary_ip) for node in nodeinfo
2157 if not node.offline],
2158 constants.NV_INSTANCELIST: hypervisors,
2159 constants.NV_VERSION: None,
2160 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2161 constants.NV_NODESETUP: None,
2162 constants.NV_TIME: None,
2163 constants.NV_MASTERIP: (master_node, master_ip),
2164 constants.NV_OSLIST: None,
2165 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2168 if vg_name is not None:
2169 node_verify_param[constants.NV_VGLIST] = None
2170 node_verify_param[constants.NV_LVLIST] = vg_name
2171 node_verify_param[constants.NV_PVLIST] = [vg_name]
2172 node_verify_param[constants.NV_DRBDLIST] = None
2175 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2177 # Build our expected cluster state
2178 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2180 vm_capable=node.vm_capable))
2181 for node in nodeinfo)
2185 for node in nodeinfo:
2186 path = _SupportsOob(self.cfg, node)
2187 if path and path not in oob_paths:
2188 oob_paths.append(path)
2191 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2193 for instance in instancelist:
2194 inst_config = instanceinfo[instance]
2196 for nname in inst_config.all_nodes:
2197 if nname not in node_image:
2199 gnode = self.NodeImage(name=nname)
2201 node_image[nname] = gnode
2203 inst_config.MapLVsByNode(node_vol_should)
2205 pnode = inst_config.primary_node
2206 node_image[pnode].pinst.append(instance)
2208 for snode in inst_config.secondary_nodes:
2209 nimg = node_image[snode]
2210 nimg.sinst.append(instance)
2211 if pnode not in nimg.sbp:
2212 nimg.sbp[pnode] = []
2213 nimg.sbp[pnode].append(instance)
2215 # At this point, we have the in-memory data structures complete,
2216 # except for the runtime information, which we'll gather next
2218 # Due to the way our RPC system works, exact response times cannot be
2219 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2220 # time before and after executing the request, we can at least have a time
2222 nvinfo_starttime = time.time()
2223 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2224 self.cfg.GetClusterName())
2225 nvinfo_endtime = time.time()
2227 all_drbd_map = self.cfg.ComputeDRBDMap()
2229 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2230 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2232 feedback_fn("* Verifying node status")
2236 for node_i in nodeinfo:
2238 nimg = node_image[node]
2242 feedback_fn("* Skipping offline node %s" % (node,))
2246 if node == master_node:
2248 elif node_i.master_candidate:
2249 ntype = "master candidate"
2250 elif node_i.drained:
2256 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2258 msg = all_nvinfo[node].fail_msg
2259 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2261 nimg.rpc_fail = True
2264 nresult = all_nvinfo[node].payload
2266 nimg.call_ok = self._VerifyNode(node_i, nresult)
2267 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2268 self._VerifyNodeNetwork(node_i, nresult)
2269 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2272 self._VerifyOob(node_i, nresult)
2275 self._VerifyNodeLVM(node_i, nresult, vg_name)
2276 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2279 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2280 self._UpdateNodeInstances(node_i, nresult, nimg)
2281 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2282 self._UpdateNodeOS(node_i, nresult, nimg)
2283 if not nimg.os_fail:
2284 if refos_img is None:
2286 self._VerifyNodeOS(node_i, nimg, refos_img)
2288 feedback_fn("* Verifying instance status")
2289 for instance in instancelist:
2291 feedback_fn("* Verifying instance %s" % instance)
2292 inst_config = instanceinfo[instance]
2293 self._VerifyInstance(instance, inst_config, node_image,
2295 inst_nodes_offline = []
2297 pnode = inst_config.primary_node
2298 pnode_img = node_image[pnode]
2299 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2300 self.ENODERPC, pnode, "instance %s, connection to"
2301 " primary node failed", instance)
2303 _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2304 "instance lives on offline node %s", inst_config.primary_node)
2306 # If the instance is non-redundant we cannot survive losing its primary
2307 # node, so we are not N+1 compliant. On the other hand we have no disk
2308 # templates with more than one secondary so that situation is not well
2310 # FIXME: does not support file-backed instances
2311 if not inst_config.secondary_nodes:
2312 i_non_redundant.append(instance)
2314 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2315 instance, "instance has multiple secondary nodes: %s",
2316 utils.CommaJoin(inst_config.secondary_nodes),
2317 code=self.ETYPE_WARNING)
2319 if inst_config.disk_template in constants.DTS_NET_MIRROR:
2320 pnode = inst_config.primary_node
2321 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2322 instance_groups = {}
2324 for node in instance_nodes:
2325 instance_groups.setdefault(nodeinfo_byname[node].group,
2329 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2330 # Sort so that we always list the primary node first.
2331 for group, nodes in sorted(instance_groups.items(),
2332 key=lambda (_, nodes): pnode in nodes,
2335 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2336 instance, "instance has primary and secondary nodes in"
2337 " different groups: %s", utils.CommaJoin(pretty_list),
2338 code=self.ETYPE_WARNING)
2340 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2341 i_non_a_balanced.append(instance)
2343 for snode in inst_config.secondary_nodes:
2344 s_img = node_image[snode]
2345 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2346 "instance %s, connection to secondary node failed", instance)
2349 inst_nodes_offline.append(snode)
2351 # warn that the instance lives on offline nodes
2352 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2353 "instance has offline secondary node(s) %s",
2354 utils.CommaJoin(inst_nodes_offline))
2355 # ... or ghost/non-vm_capable nodes
2356 for node in inst_config.all_nodes:
2357 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2358 "instance lives on ghost node %s", node)
2359 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2360 instance, "instance lives on non-vm_capable node %s", node)
2362 feedback_fn("* Verifying orphan volumes")
2363 reserved = utils.FieldSet(*cluster.reserved_lvs)
2364 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2366 feedback_fn("* Verifying orphan instances")
2367 self._VerifyOrphanInstances(instancelist, node_image)
2369 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2370 feedback_fn("* Verifying N+1 Memory redundancy")
2371 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2373 feedback_fn("* Other Notes")
2375 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2376 % len(i_non_redundant))
2378 if i_non_a_balanced:
2379 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2380 % len(i_non_a_balanced))
2383 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2386 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2390 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2391 """Analyze the post-hooks' result
2393 This method analyses the hook result, handles it, and sends some
2394 nicely-formatted feedback back to the user.
2396 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2397 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2398 @param hooks_results: the results of the multi-node hooks rpc call
2399 @param feedback_fn: function used send feedback back to the caller
2400 @param lu_result: previous Exec result
2401 @return: the new Exec result, based on the previous result
2405 # We only really run POST phase hooks, and are only interested in
2407 if phase == constants.HOOKS_PHASE_POST:
2408 # Used to change hooks' output to proper indentation
2409 feedback_fn("* Hooks Results")
2410 assert hooks_results, "invalid result from hooks"
2412 for node_name in hooks_results:
2413 res = hooks_results[node_name]
2415 test = msg and not res.offline
2416 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2417 "Communication failure in hooks execution: %s", msg)
2418 if res.offline or msg:
2419 # No need to investigate payload if node is offline or gave an error.
2420 # override manually lu_result here as _ErrorIf only
2421 # overrides self.bad
2424 for script, hkr, output in res.payload:
2425 test = hkr == constants.HKR_FAIL
2426 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2427 "Script %s failed, output:", script)
2429 output = self._HOOKS_INDENT_RE.sub(' ', output)
2430 feedback_fn("%s" % output)
2436 class LUClusterVerifyDisks(NoHooksLU):
2437 """Verifies the cluster disks status.
2442 def ExpandNames(self):
2443 self.needed_locks = {
2444 locking.LEVEL_NODE: locking.ALL_SET,
2445 locking.LEVEL_INSTANCE: locking.ALL_SET,
2447 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2449 def Exec(self, feedback_fn):
2450 """Verify integrity of cluster disks.
2452 @rtype: tuple of three items
2453 @return: a tuple of (dict of node-to-node_error, list of instances
2454 which need activate-disks, dict of instance: (node, volume) for
2458 result = res_nodes, res_instances, res_missing = {}, [], {}
2460 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2461 instances = self.cfg.GetAllInstancesInfo().values()
2464 for inst in instances:
2466 if not inst.admin_up:
2468 inst.MapLVsByNode(inst_lvs)
2469 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2470 for node, vol_list in inst_lvs.iteritems():
2471 for vol in vol_list:
2472 nv_dict[(node, vol)] = inst
2477 node_lvs = self.rpc.call_lv_list(nodes, [])
2478 for node, node_res in node_lvs.items():
2479 if node_res.offline:
2481 msg = node_res.fail_msg
2483 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2484 res_nodes[node] = msg
2487 lvs = node_res.payload
2488 for lv_name, (_, _, lv_online) in lvs.items():
2489 inst = nv_dict.pop((node, lv_name), None)
2490 if (not lv_online and inst is not None
2491 and inst.name not in res_instances):
2492 res_instances.append(inst.name)
2494 # any leftover items in nv_dict are missing LVs, let's arrange the
2496 for key, inst in nv_dict.iteritems():
2497 if inst.name not in res_missing:
2498 res_missing[inst.name] = []
2499 res_missing[inst.name].append(key)
2504 class LUClusterRepairDiskSizes(NoHooksLU):
2505 """Verifies the cluster disks sizes.
2510 def ExpandNames(self):
2511 if self.op.instances:
2512 self.wanted_names = []
2513 for name in self.op.instances:
2514 full_name = _ExpandInstanceName(self.cfg, name)
2515 self.wanted_names.append(full_name)
2516 self.needed_locks = {
2517 locking.LEVEL_NODE: [],
2518 locking.LEVEL_INSTANCE: self.wanted_names,
2520 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2522 self.wanted_names = None
2523 self.needed_locks = {
2524 locking.LEVEL_NODE: locking.ALL_SET,
2525 locking.LEVEL_INSTANCE: locking.ALL_SET,
2527 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2529 def DeclareLocks(self, level):
2530 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2531 self._LockInstancesNodes(primary_only=True)
2533 def CheckPrereq(self):
2534 """Check prerequisites.
2536 This only checks the optional instance list against the existing names.
2539 if self.wanted_names is None:
2540 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2542 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2543 in self.wanted_names]
2545 def _EnsureChildSizes(self, disk):
2546 """Ensure children of the disk have the needed disk size.
2548 This is valid mainly for DRBD8 and fixes an issue where the
2549 children have smaller disk size.
2551 @param disk: an L{ganeti.objects.Disk} object
2554 if disk.dev_type == constants.LD_DRBD8:
2555 assert disk.children, "Empty children for DRBD8?"
2556 fchild = disk.children[0]
2557 mismatch = fchild.size < disk.size
2559 self.LogInfo("Child disk has size %d, parent %d, fixing",
2560 fchild.size, disk.size)
2561 fchild.size = disk.size
2563 # and we recurse on this child only, not on the metadev
2564 return self._EnsureChildSizes(fchild) or mismatch
2568 def Exec(self, feedback_fn):
2569 """Verify the size of cluster disks.
2572 # TODO: check child disks too
2573 # TODO: check differences in size between primary/secondary nodes
2575 for instance in self.wanted_instances:
2576 pnode = instance.primary_node
2577 if pnode not in per_node_disks:
2578 per_node_disks[pnode] = []
2579 for idx, disk in enumerate(instance.disks):
2580 per_node_disks[pnode].append((instance, idx, disk))
2583 for node, dskl in per_node_disks.items():
2584 newl = [v[2].Copy() for v in dskl]
2586 self.cfg.SetDiskID(dsk, node)
2587 result = self.rpc.call_blockdev_getsizes(node, newl)
2589 self.LogWarning("Failure in blockdev_getsizes call to node"
2590 " %s, ignoring", node)
2592 if len(result.data) != len(dskl):
2593 self.LogWarning("Invalid result from node %s, ignoring node results",
2596 for ((instance, idx, disk), size) in zip(dskl, result.data):
2598 self.LogWarning("Disk %d of instance %s did not return size"
2599 " information, ignoring", idx, instance.name)
2601 if not isinstance(size, (int, long)):
2602 self.LogWarning("Disk %d of instance %s did not return valid"
2603 " size information, ignoring", idx, instance.name)
2606 if size != disk.size:
2607 self.LogInfo("Disk %d of instance %s has mismatched size,"
2608 " correcting: recorded %d, actual %d", idx,
2609 instance.name, disk.size, size)
2611 self.cfg.Update(instance, feedback_fn)
2612 changed.append((instance.name, idx, size))
2613 if self._EnsureChildSizes(disk):
2614 self.cfg.Update(instance, feedback_fn)
2615 changed.append((instance.name, idx, disk.size))
2619 class LUClusterRename(LogicalUnit):
2620 """Rename the cluster.
2623 HPATH = "cluster-rename"
2624 HTYPE = constants.HTYPE_CLUSTER
2626 def BuildHooksEnv(self):
2631 "OP_TARGET": self.cfg.GetClusterName(),
2632 "NEW_NAME": self.op.name,
2634 mn = self.cfg.GetMasterNode()
2635 all_nodes = self.cfg.GetNodeList()
2636 return env, [mn], all_nodes
2638 def CheckPrereq(self):
2639 """Verify that the passed name is a valid one.
2642 hostname = netutils.GetHostname(name=self.op.name,
2643 family=self.cfg.GetPrimaryIPFamily())
2645 new_name = hostname.name
2646 self.ip = new_ip = hostname.ip
2647 old_name = self.cfg.GetClusterName()
2648 old_ip = self.cfg.GetMasterIP()
2649 if new_name == old_name and new_ip == old_ip:
2650 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2651 " cluster has changed",
2653 if new_ip != old_ip:
2654 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2655 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2656 " reachable on the network" %
2657 new_ip, errors.ECODE_NOTUNIQUE)
2659 self.op.name = new_name
2661 def Exec(self, feedback_fn):
2662 """Rename the cluster.
2665 clustername = self.op.name
2668 # shutdown the master IP
2669 master = self.cfg.GetMasterNode()
2670 result = self.rpc.call_node_stop_master(master, False)
2671 result.Raise("Could not disable the master role")
2674 cluster = self.cfg.GetClusterInfo()
2675 cluster.cluster_name = clustername
2676 cluster.master_ip = ip
2677 self.cfg.Update(cluster, feedback_fn)
2679 # update the known hosts file
2680 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2681 node_list = self.cfg.GetOnlineNodeList()
2683 node_list.remove(master)
2686 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2688 result = self.rpc.call_node_start_master(master, False, False)
2689 msg = result.fail_msg
2691 self.LogWarning("Could not re-enable the master role on"
2692 " the master, please restart manually: %s", msg)
2697 class LUClusterSetParams(LogicalUnit):
2698 """Change the parameters of the cluster.
2701 HPATH = "cluster-modify"
2702 HTYPE = constants.HTYPE_CLUSTER
2705 def CheckArguments(self):
2709 if self.op.uid_pool:
2710 uidpool.CheckUidPool(self.op.uid_pool)
2712 if self.op.add_uids:
2713 uidpool.CheckUidPool(self.op.add_uids)
2715 if self.op.remove_uids:
2716 uidpool.CheckUidPool(self.op.remove_uids)
2718 def ExpandNames(self):
2719 # FIXME: in the future maybe other cluster params won't require checking on
2720 # all nodes to be modified.
2721 self.needed_locks = {
2722 locking.LEVEL_NODE: locking.ALL_SET,
2724 self.share_locks[locking.LEVEL_NODE] = 1
2726 def BuildHooksEnv(self):
2731 "OP_TARGET": self.cfg.GetClusterName(),
2732 "NEW_VG_NAME": self.op.vg_name,
2734 mn = self.cfg.GetMasterNode()
2735 return env, [mn], [mn]
2737 def CheckPrereq(self):
2738 """Check prerequisites.
2740 This checks whether the given params don't conflict and
2741 if the given volume group is valid.
2744 if self.op.vg_name is not None and not self.op.vg_name:
2745 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2746 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2747 " instances exist", errors.ECODE_INVAL)
2749 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2750 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2751 raise errors.OpPrereqError("Cannot disable drbd helper while"
2752 " drbd-based instances exist",
2755 node_list = self.acquired_locks[locking.LEVEL_NODE]
2757 # if vg_name not None, checks given volume group on all nodes
2759 vglist = self.rpc.call_vg_list(node_list)
2760 for node in node_list:
2761 msg = vglist[node].fail_msg
2763 # ignoring down node
2764 self.LogWarning("Error while gathering data on node %s"
2765 " (ignoring node): %s", node, msg)
2767 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2769 constants.MIN_VG_SIZE)
2771 raise errors.OpPrereqError("Error on node '%s': %s" %
2772 (node, vgstatus), errors.ECODE_ENVIRON)
2774 if self.op.drbd_helper:
2775 # checks given drbd helper on all nodes
2776 helpers = self.rpc.call_drbd_helper(node_list)
2777 for node in node_list:
2778 ninfo = self.cfg.GetNodeInfo(node)
2780 self.LogInfo("Not checking drbd helper on offline node %s", node)
2782 msg = helpers[node].fail_msg
2784 raise errors.OpPrereqError("Error checking drbd helper on node"
2785 " '%s': %s" % (node, msg),
2786 errors.ECODE_ENVIRON)
2787 node_helper = helpers[node].payload
2788 if node_helper != self.op.drbd_helper:
2789 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2790 (node, node_helper), errors.ECODE_ENVIRON)
2792 self.cluster = cluster = self.cfg.GetClusterInfo()
2793 # validate params changes
2794 if self.op.beparams:
2795 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2796 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2798 if self.op.ndparams:
2799 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2800 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2802 if self.op.nicparams:
2803 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2804 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2805 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2808 # check all instances for consistency
2809 for instance in self.cfg.GetAllInstancesInfo().values():
2810 for nic_idx, nic in enumerate(instance.nics):
2811 params_copy = copy.deepcopy(nic.nicparams)
2812 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2814 # check parameter syntax
2816 objects.NIC.CheckParameterSyntax(params_filled)
2817 except errors.ConfigurationError, err:
2818 nic_errors.append("Instance %s, nic/%d: %s" %
2819 (instance.name, nic_idx, err))
2821 # if we're moving instances to routed, check that they have an ip
2822 target_mode = params_filled[constants.NIC_MODE]
2823 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2824 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2825 (instance.name, nic_idx))
2827 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2828 "\n".join(nic_errors))
2830 # hypervisor list/parameters
2831 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2832 if self.op.hvparams:
2833 for hv_name, hv_dict in self.op.hvparams.items():
2834 if hv_name not in self.new_hvparams:
2835 self.new_hvparams[hv_name] = hv_dict
2837 self.new_hvparams[hv_name].update(hv_dict)
2839 # os hypervisor parameters
2840 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2842 for os_name, hvs in self.op.os_hvp.items():
2843 if os_name not in self.new_os_hvp:
2844 self.new_os_hvp[os_name] = hvs
2846 for hv_name, hv_dict in hvs.items():
2847 if hv_name not in self.new_os_hvp[os_name]:
2848 self.new_os_hvp[os_name][hv_name] = hv_dict
2850 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2853 self.new_osp = objects.FillDict(cluster.osparams, {})
2854 if self.op.osparams:
2855 for os_name, osp in self.op.osparams.items():
2856 if os_name not in self.new_osp:
2857 self.new_osp[os_name] = {}
2859 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2862 if not self.new_osp[os_name]:
2863 # we removed all parameters
2864 del self.new_osp[os_name]
2866 # check the parameter validity (remote check)
2867 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2868 os_name, self.new_osp[os_name])
2870 # changes to the hypervisor list
2871 if self.op.enabled_hypervisors is not None:
2872 self.hv_list = self.op.enabled_hypervisors
2873 for hv in self.hv_list:
2874 # if the hypervisor doesn't already exist in the cluster
2875 # hvparams, we initialize it to empty, and then (in both
2876 # cases) we make sure to fill the defaults, as we might not
2877 # have a complete defaults list if the hypervisor wasn't
2879 if hv not in new_hvp:
2881 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2882 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2884 self.hv_list = cluster.enabled_hypervisors
2886 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2887 # either the enabled list has changed, or the parameters have, validate
2888 for hv_name, hv_params in self.new_hvparams.items():
2889 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2890 (self.op.enabled_hypervisors and
2891 hv_name in self.op.enabled_hypervisors)):
2892 # either this is a new hypervisor, or its parameters have changed
2893 hv_class = hypervisor.GetHypervisor(hv_name)
2894 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2895 hv_class.CheckParameterSyntax(hv_params)
2896 _CheckHVParams(self, node_list, hv_name, hv_params)
2899 # no need to check any newly-enabled hypervisors, since the
2900 # defaults have already been checked in the above code-block
2901 for os_name, os_hvp in self.new_os_hvp.items():
2902 for hv_name, hv_params in os_hvp.items():
2903 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2904 # we need to fill in the new os_hvp on top of the actual hv_p
2905 cluster_defaults = self.new_hvparams.get(hv_name, {})
2906 new_osp = objects.FillDict(cluster_defaults, hv_params)
2907 hv_class = hypervisor.GetHypervisor(hv_name)
2908 hv_class.CheckParameterSyntax(new_osp)
2909 _CheckHVParams(self, node_list, hv_name, new_osp)
2911 if self.op.default_iallocator:
2912 alloc_script = utils.FindFile(self.op.default_iallocator,
2913 constants.IALLOCATOR_SEARCH_PATH,
2915 if alloc_script is None:
2916 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2917 " specified" % self.op.default_iallocator,
2920 def Exec(self, feedback_fn):
2921 """Change the parameters of the cluster.
2924 if self.op.vg_name is not None:
2925 new_volume = self.op.vg_name
2928 if new_volume != self.cfg.GetVGName():
2929 self.cfg.SetVGName(new_volume)
2931 feedback_fn("Cluster LVM configuration already in desired"
2932 " state, not changing")
2933 if self.op.drbd_helper is not None:
2934 new_helper = self.op.drbd_helper
2937 if new_helper != self.cfg.GetDRBDHelper():
2938 self.cfg.SetDRBDHelper(new_helper)
2940 feedback_fn("Cluster DRBD helper already in desired state,"
2942 if self.op.hvparams:
2943 self.cluster.hvparams = self.new_hvparams
2945 self.cluster.os_hvp = self.new_os_hvp
2946 if self.op.enabled_hypervisors is not None:
2947 self.cluster.hvparams = self.new_hvparams
2948 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2949 if self.op.beparams:
2950 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2951 if self.op.nicparams:
2952 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2953 if self.op.osparams:
2954 self.cluster.osparams = self.new_osp
2955 if self.op.ndparams:
2956 self.cluster.ndparams = self.new_ndparams
2958 if self.op.candidate_pool_size is not None:
2959 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2960 # we need to update the pool size here, otherwise the save will fail
2961 _AdjustCandidatePool(self, [])
2963 if self.op.maintain_node_health is not None:
2964 self.cluster.maintain_node_health = self.op.maintain_node_health
2966 if self.op.prealloc_wipe_disks is not None:
2967 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2969 if self.op.add_uids is not None:
2970 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2972 if self.op.remove_uids is not None:
2973 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2975 if self.op.uid_pool is not None:
2976 self.cluster.uid_pool = self.op.uid_pool
2978 if self.op.default_iallocator is not None:
2979 self.cluster.default_iallocator = self.op.default_iallocator
2981 if self.op.reserved_lvs is not None:
2982 self.cluster.reserved_lvs = self.op.reserved_lvs
2984 def helper_os(aname, mods, desc):
2986 lst = getattr(self.cluster, aname)
2987 for key, val in mods:
2988 if key == constants.DDM_ADD:
2990 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2993 elif key == constants.DDM_REMOVE:
2997 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2999 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3001 if self.op.hidden_os:
3002 helper_os("hidden_os", self.op.hidden_os, "hidden")
3004 if self.op.blacklisted_os:
3005 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3007 if self.op.master_netdev:
3008 master = self.cfg.GetMasterNode()
3009 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3010 self.cluster.master_netdev)
3011 result = self.rpc.call_node_stop_master(master, False)
3012 result.Raise("Could not disable the master ip")
3013 feedback_fn("Changing master_netdev from %s to %s" %
3014 (self.cluster.master_netdev, self.op.master_netdev))
3015 self.cluster.master_netdev = self.op.master_netdev
3017 self.cfg.Update(self.cluster, feedback_fn)
3019 if self.op.master_netdev:
3020 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3021 self.op.master_netdev)
3022 result = self.rpc.call_node_start_master(master, False, False)
3024 self.LogWarning("Could not re-enable the master ip on"
3025 " the master, please restart manually: %s",
3029 def _UploadHelper(lu, nodes, fname):
3030 """Helper for uploading a file and showing warnings.
3033 if os.path.exists(fname):
3034 result = lu.rpc.call_upload_file(nodes, fname)
3035 for to_node, to_result in result.items():
3036 msg = to_result.fail_msg
3038 msg = ("Copy of file %s to node %s failed: %s" %
3039 (fname, to_node, msg))
3040 lu.proc.LogWarning(msg)
3043 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3044 """Distribute additional files which are part of the cluster configuration.
3046 ConfigWriter takes care of distributing the config and ssconf files, but
3047 there are more files which should be distributed to all nodes. This function
3048 makes sure those are copied.
3050 @param lu: calling logical unit
3051 @param additional_nodes: list of nodes not in the config to distribute to
3052 @type additional_vm: boolean
3053 @param additional_vm: whether the additional nodes are vm-capable or not
3056 # 1. Gather target nodes
3057 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3058 dist_nodes = lu.cfg.GetOnlineNodeList()
3059 nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3060 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3061 if additional_nodes is not None:
3062 dist_nodes.extend(additional_nodes)
3064 vm_nodes.extend(additional_nodes)
3065 if myself.name in dist_nodes:
3066 dist_nodes.remove(myself.name)
3067 if myself.name in vm_nodes:
3068 vm_nodes.remove(myself.name)
3070 # 2. Gather files to distribute
3071 dist_files = set([constants.ETC_HOSTS,
3072 constants.SSH_KNOWN_HOSTS_FILE,
3073 constants.RAPI_CERT_FILE,
3074 constants.RAPI_USERS_FILE,
3075 constants.CONFD_HMAC_KEY,
3076 constants.CLUSTER_DOMAIN_SECRET_FILE,
3080 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3081 for hv_name in enabled_hypervisors:
3082 hv_class = hypervisor.GetHypervisor(hv_name)
3083 vm_files.update(hv_class.GetAncillaryFiles())
3085 # 3. Perform the files upload
3086 for fname in dist_files:
3087 _UploadHelper(lu, dist_nodes, fname)
3088 for fname in vm_files:
3089 _UploadHelper(lu, vm_nodes, fname)
3092 class LUClusterRedistConf(NoHooksLU):
3093 """Force the redistribution of cluster configuration.
3095 This is a very simple LU.
3100 def ExpandNames(self):
3101 self.needed_locks = {
3102 locking.LEVEL_NODE: locking.ALL_SET,
3104 self.share_locks[locking.LEVEL_NODE] = 1
3106 def Exec(self, feedback_fn):
3107 """Redistribute the configuration.
3110 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3111 _RedistributeAncillaryFiles(self)
3114 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3115 """Sleep and poll for an instance's disk to sync.
3118 if not instance.disks or disks is not None and not disks:
3121 disks = _ExpandCheckDisks(instance, disks)
3124 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3126 node = instance.primary_node
3129 lu.cfg.SetDiskID(dev, node)
3131 # TODO: Convert to utils.Retry
3134 degr_retries = 10 # in seconds, as we sleep 1 second each time
3138 cumul_degraded = False
3139 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3140 msg = rstats.fail_msg
3142 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3145 raise errors.RemoteError("Can't contact node %s for mirror data,"
3146 " aborting." % node)
3149 rstats = rstats.payload
3151 for i, mstat in enumerate(rstats):
3153 lu.LogWarning("Can't compute data for node %s/%s",
3154 node, disks[i].iv_name)
3157 cumul_degraded = (cumul_degraded or
3158 (mstat.is_degraded and mstat.sync_percent is None))
3159 if mstat.sync_percent is not None:
3161 if mstat.estimated_time is not None:
3162 rem_time = ("%s remaining (estimated)" %
3163 utils.FormatSeconds(mstat.estimated_time))
3164 max_time = mstat.estimated_time
3166 rem_time = "no time estimate"
3167 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3168 (disks[i].iv_name, mstat.sync_percent, rem_time))
3170 # if we're done but degraded, let's do a few small retries, to
3171 # make sure we see a stable and not transient situation; therefore
3172 # we force restart of the loop
3173 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3174 logging.info("Degraded disks found, %d retries left", degr_retries)
3182 time.sleep(min(60, max_time))
3185 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3186 return not cumul_degraded
3189 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3190 """Check that mirrors are not degraded.
3192 The ldisk parameter, if True, will change the test from the
3193 is_degraded attribute (which represents overall non-ok status for
3194 the device(s)) to the ldisk (representing the local storage status).
3197 lu.cfg.SetDiskID(dev, node)
3201 if on_primary or dev.AssembleOnSecondary():
3202 rstats = lu.rpc.call_blockdev_find(node, dev)
3203 msg = rstats.fail_msg
3205 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3207 elif not rstats.payload:
3208 lu.LogWarning("Can't find disk on node %s", node)
3212 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3214 result = result and not rstats.payload.is_degraded
3217 for child in dev.children:
3218 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3223 class LUOobCommand(NoHooksLU):
3224 """Logical unit for OOB handling.
3229 def CheckPrereq(self):
3230 """Check prerequisites.
3233 - the node exists in the configuration
3236 Any errors are signaled by raising errors.OpPrereqError.
3240 for node_name in self.op.node_names:
3241 node = self.cfg.GetNodeInfo(node_name)
3244 raise errors.OpPrereqError("Node %s not found" % node_name,
3247 self.nodes.append(node)
3249 if (self.op.command == constants.OOB_POWER_OFF and not node.offline):
3250 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3251 " not marked offline") % node_name,
3254 def ExpandNames(self):
3255 """Gather locks we need.
3258 if self.op.node_names:
3259 self.op.node_names = [_ExpandNodeName(self.cfg, name)
3260 for name in self.op.node_names]
3262 self.op.node_names = self.cfg.GetNodeList()
3264 self.needed_locks = {
3265 locking.LEVEL_NODE: self.op.node_names,
3268 def Exec(self, feedback_fn):
3269 """Execute OOB and return result if we expect any.
3272 master_node = self.cfg.GetMasterNode()
3275 for node in self.nodes:
3276 node_entry = [(constants.RS_NORMAL, node.name)]
3277 ret.append(node_entry)
3279 oob_program = _SupportsOob(self.cfg, node)
3282 node_entry.append((constants.RS_UNAVAIL, None))
3285 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3286 self.op.command, oob_program, node.name)
3287 result = self.rpc.call_run_oob(master_node, oob_program,
3288 self.op.command, node.name,
3292 self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3293 node.name, result.fail_msg)
3294 node_entry.append((constants.RS_NODATA, None))
3297 self._CheckPayload(result)
3298 except errors.OpExecError, err:
3299 self.LogWarning("The payload returned by '%s' is not valid: %s",
3301 node_entry.append((constants.RS_NODATA, None))
3303 if self.op.command == constants.OOB_HEALTH:
3304 # For health we should log important events
3305 for item, status in result.payload:
3306 if status in [constants.OOB_STATUS_WARNING,
3307 constants.OOB_STATUS_CRITICAL]:
3308 self.LogWarning("On node '%s' item '%s' has status '%s'",
3309 node.name, item, status)
3311 if self.op.command == constants.OOB_POWER_ON:
3313 elif self.op.command == constants.OOB_POWER_OFF:
3314 node.powered = False
3315 elif self.op.command == constants.OOB_POWER_STATUS:
3316 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3317 if powered != node.powered:
3318 logging.warning(("Recorded power state (%s) of node '%s' does not"
3319 " match actual power state (%s)"), node.powered,
3322 # For configuration changing commands we should update the node
3323 if self.op.command in (constants.OOB_POWER_ON,
3324 constants.OOB_POWER_OFF):
3325 self.cfg.Update(node, feedback_fn)
3327 node_entry.append((constants.RS_NORMAL, result.payload))
3331 def _CheckPayload(self, result):
3332 """Checks if the payload is valid.
3334 @param result: RPC result
3335 @raises errors.OpExecError: If payload is not valid
3339 if self.op.command == constants.OOB_HEALTH:
3340 if not isinstance(result.payload, list):
3341 errs.append("command 'health' is expected to return a list but got %s" %
3342 type(result.payload))
3344 for item, status in result.payload:
3345 if status not in constants.OOB_STATUSES:
3346 errs.append("health item '%s' has invalid status '%s'" %
3349 if self.op.command == constants.OOB_POWER_STATUS:
3350 if not isinstance(result.payload, dict):
3351 errs.append("power-status is expected to return a dict but got %s" %
3352 type(result.payload))
3354 if self.op.command in [
3355 constants.OOB_POWER_ON,
3356 constants.OOB_POWER_OFF,
3357 constants.OOB_POWER_CYCLE,
3359 if result.payload is not None:
3360 errs.append("%s is expected to not return payload but got '%s'" %
3361 (self.op.command, result.payload))
3364 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3365 utils.CommaJoin(errs))
3369 class LUOsDiagnose(NoHooksLU):
3370 """Logical unit for OS diagnose/query.
3375 _BLK = "blacklisted"
3377 _FIELDS_STATIC = utils.FieldSet()
3378 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3379 "parameters", "api_versions", _HID, _BLK)
3381 def CheckArguments(self):
3383 raise errors.OpPrereqError("Selective OS query not supported",
3386 _CheckOutputFields(static=self._FIELDS_STATIC,
3387 dynamic=self._FIELDS_DYNAMIC,
3388 selected=self.op.output_fields)
3390 def ExpandNames(self):
3391 # Lock all nodes, in shared mode
3392 # Temporary removal of locks, should be reverted later
3393 # TODO: reintroduce locks when they are lighter-weight
3394 self.needed_locks = {}
3395 #self.share_locks[locking.LEVEL_NODE] = 1
3396 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3399 def _DiagnoseByOS(rlist):
3400 """Remaps a per-node return list into an a per-os per-node dictionary
3402 @param rlist: a map with node names as keys and OS objects as values
3405 @return: a dictionary with osnames as keys and as value another
3406 map, with nodes as keys and tuples of (path, status, diagnose,
3407 variants, parameters, api_versions) as values, eg::
3409 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3410 (/srv/..., False, "invalid api")],
3411 "node2": [(/srv/..., True, "", [], [])]}
3416 # we build here the list of nodes that didn't fail the RPC (at RPC
3417 # level), so that nodes with a non-responding node daemon don't
3418 # make all OSes invalid
3419 good_nodes = [node_name for node_name in rlist
3420 if not rlist[node_name].fail_msg]
3421 for node_name, nr in rlist.items():
3422 if nr.fail_msg or not nr.payload:
3424 for (name, path, status, diagnose, variants,
3425 params, api_versions) in nr.payload:
3426 if name not in all_os:
3427 # build a list of nodes for this os containing empty lists
3428 # for each node in node_list
3430 for nname in good_nodes:
3431 all_os[name][nname] = []
3432 # convert params from [name, help] to (name, help)
3433 params = [tuple(v) for v in params]
3434 all_os[name][node_name].append((path, status, diagnose,
3435 variants, params, api_versions))
3438 def Exec(self, feedback_fn):
3439 """Compute the list of OSes.
3442 valid_nodes = [node.name
3443 for node in self.cfg.GetAllNodesInfo().values()
3444 if not node.offline and node.vm_capable]
3445 node_data = self.rpc.call_os_diagnose(valid_nodes)
3446 pol = self._DiagnoseByOS(node_data)
3448 cluster = self.cfg.GetClusterInfo()
3450 for os_name in utils.NiceSort(pol.keys()):
3451 os_data = pol[os_name]
3454 (variants, params, api_versions) = null_state = (set(), set(), set())
3455 for idx, osl in enumerate(os_data.values()):
3456 valid = bool(valid and osl and osl[0][1])
3458 (variants, params, api_versions) = null_state
3460 node_variants, node_params, node_api = osl[0][3:6]
3461 if idx == 0: # first entry
3462 variants = set(node_variants)
3463 params = set(node_params)
3464 api_versions = set(node_api)
3465 else: # keep consistency
3466 variants.intersection_update(node_variants)
3467 params.intersection_update(node_params)
3468 api_versions.intersection_update(node_api)
3470 is_hid = os_name in cluster.hidden_os
3471 is_blk = os_name in cluster.blacklisted_os
3472 if ((self._HID not in self.op.output_fields and is_hid) or
3473 (self._BLK not in self.op.output_fields and is_blk) or
3474 (self._VLD not in self.op.output_fields and not valid)):
3477 for field in self.op.output_fields:
3480 elif field == self._VLD:
3482 elif field == "node_status":
3483 # this is just a copy of the dict
3485 for node_name, nos_list in os_data.items():
3486 val[node_name] = nos_list
3487 elif field == "variants":
3488 val = utils.NiceSort(list(variants))
3489 elif field == "parameters":
3491 elif field == "api_versions":
3492 val = list(api_versions)
3493 elif field == self._HID:
3495 elif field == self._BLK:
3498 raise errors.ParameterError(field)
3505 class LUNodeRemove(LogicalUnit):
3506 """Logical unit for removing a node.
3509 HPATH = "node-remove"
3510 HTYPE = constants.HTYPE_NODE
3512 def BuildHooksEnv(self):
3515 This doesn't run on the target node in the pre phase as a failed
3516 node would then be impossible to remove.
3520 "OP_TARGET": self.op.node_name,
3521 "NODE_NAME": self.op.node_name,
3523 all_nodes = self.cfg.GetNodeList()
3525 all_nodes.remove(self.op.node_name)
3527 logging.warning("Node %s which is about to be removed not found"
3528 " in the all nodes list", self.op.node_name)
3529 return env, all_nodes, all_nodes
3531 def CheckPrereq(self):
3532 """Check prerequisites.
3535 - the node exists in the configuration
3536 - it does not have primary or secondary instances
3537 - it's not the master
3539 Any errors are signaled by raising errors.OpPrereqError.
3542 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3543 node = self.cfg.GetNodeInfo(self.op.node_name)
3544 assert node is not None
3546 instance_list = self.cfg.GetInstanceList()
3548 masternode = self.cfg.GetMasterNode()
3549 if node.name == masternode:
3550 raise errors.OpPrereqError("Node is the master node,"
3551 " you need to failover first.",
3554 for instance_name in instance_list:
3555 instance = self.cfg.GetInstanceInfo(instance_name)
3556 if node.name in instance.all_nodes:
3557 raise errors.OpPrereqError("Instance %s is still running on the node,"
3558 " please remove first." % instance_name,
3560 self.op.node_name = node.name
3563 def Exec(self, feedback_fn):
3564 """Removes the node from the cluster.
3568 logging.info("Stopping the node daemon and removing configs from node %s",
3571 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3573 # Promote nodes to master candidate as needed
3574 _AdjustCandidatePool(self, exceptions=[node.name])
3575 self.context.RemoveNode(node.name)
3577 # Run post hooks on the node before it's removed
3578 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3580 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3582 # pylint: disable-msg=W0702
3583 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3585 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3586 msg = result.fail_msg
3588 self.LogWarning("Errors encountered on the remote node while leaving"
3589 " the cluster: %s", msg)
3591 # Remove node from our /etc/hosts
3592 if self.cfg.GetClusterInfo().modify_etc_hosts:
3593 master_node = self.cfg.GetMasterNode()
3594 result = self.rpc.call_etc_hosts_modify(master_node,
3595 constants.ETC_HOSTS_REMOVE,
3597 result.Raise("Can't update hosts file with new host data")
3598 _RedistributeAncillaryFiles(self)
3601 class _NodeQuery(_QueryBase):
3602 FIELDS = query.NODE_FIELDS
3604 def ExpandNames(self, lu):
3605 lu.needed_locks = {}
3606 lu.share_locks[locking.LEVEL_NODE] = 1
3609 self.wanted = _GetWantedNodes(lu, self.names)
3611 self.wanted = locking.ALL_SET
3613 self.do_locking = (self.use_locking and
3614 query.NQ_LIVE in self.requested_data)
3617 # if we don't request only static fields, we need to lock the nodes
3618 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3620 def DeclareLocks(self, lu, level):
3623 def _GetQueryData(self, lu):
3624 """Computes the list of nodes and their attributes.
3627 all_info = lu.cfg.GetAllNodesInfo()
3629 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3631 # Gather data as requested
3632 if query.NQ_LIVE in self.requested_data:
3633 node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3634 lu.cfg.GetHypervisorType())
3635 live_data = dict((name, nresult.payload)
3636 for (name, nresult) in node_data.items()
3637 if not nresult.fail_msg and nresult.payload)
3641 if query.NQ_INST in self.requested_data:
3642 node_to_primary = dict([(name, set()) for name in nodenames])
3643 node_to_secondary = dict([(name, set()) for name in nodenames])
3645 inst_data = lu.cfg.GetAllInstancesInfo()
3647 for inst in inst_data.values():
3648 if inst.primary_node in node_to_primary:
3649 node_to_primary[inst.primary_node].add(inst.name)
3650 for secnode in inst.secondary_nodes:
3651 if secnode in node_to_secondary:
3652 node_to_secondary[secnode].add(inst.name)
3654 node_to_primary = None
3655 node_to_secondary = None
3657 if query.NQ_OOB in self.requested_data:
3658 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3659 for name, node in all_info.iteritems())
3663 if query.NQ_GROUP in self.requested_data:
3664 groups = lu.cfg.GetAllNodeGroupsInfo()
3668 return query.NodeQueryData([all_info[name] for name in nodenames],
3669 live_data, lu.cfg.GetMasterNode(),
3670 node_to_primary, node_to_secondary, groups,
3671 oob_support, lu.cfg.GetClusterInfo())
3674 class LUNodeQuery(NoHooksLU):
3675 """Logical unit for querying nodes.
3678 # pylint: disable-msg=W0142
3681 def CheckArguments(self):
3682 self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3683 self.op.use_locking)
3685 def ExpandNames(self):
3686 self.nq.ExpandNames(self)
3688 def Exec(self, feedback_fn):
3689 return self.nq.OldStyleQuery(self)
3692 class LUNodeQueryvols(NoHooksLU):
3693 """Logical unit for getting volumes on node(s).
3697 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3698 _FIELDS_STATIC = utils.FieldSet("node")
3700 def CheckArguments(self):
3701 _CheckOutputFields(static=self._FIELDS_STATIC,
3702 dynamic=self._FIELDS_DYNAMIC,
3703 selected=self.op.output_fields)
3705 def ExpandNames(self):
3706 self.needed_locks = {}
3707 self.share_locks[locking.LEVEL_NODE] = 1
3708 if not self.op.nodes:
3709 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3711 self.needed_locks[locking.LEVEL_NODE] = \
3712 _GetWantedNodes(self, self.op.nodes)
3714 def Exec(self, feedback_fn):
3715 """Computes the list of nodes and their attributes.
3718 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3719 volumes = self.rpc.call_node_volumes(nodenames)
3721 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3722 in self.cfg.GetInstanceList()]
3724 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3727 for node in nodenames:
3728 nresult = volumes[node]
3731 msg = nresult.fail_msg
3733 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3736 node_vols = nresult.payload[:]
3737 node_vols.sort(key=lambda vol: vol['dev'])
3739 for vol in node_vols:
3741 for field in self.op.output_fields:
3744 elif field == "phys":
3748 elif field == "name":
3750 elif field == "size":
3751 val = int(float(vol['size']))
3752 elif field == "instance":
3754 if node not in lv_by_node[inst]:
3756 if vol['name'] in lv_by_node[inst][node]:
3762 raise errors.ParameterError(field)
3763 node_output.append(str(val))
3765 output.append(node_output)
3770 class LUNodeQueryStorage(NoHooksLU):
3771 """Logical unit for getting information on storage units on node(s).
3774 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3777 def CheckArguments(self):
3778 _CheckOutputFields(static=self._FIELDS_STATIC,
3779 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3780 selected=self.op.output_fields)
3782 def ExpandNames(self):
3783 self.needed_locks = {}
3784 self.share_locks[locking.LEVEL_NODE] = 1
3787 self.needed_locks[locking.LEVEL_NODE] = \
3788 _GetWantedNodes(self, self.op.nodes)
3790 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3792 def Exec(self, feedback_fn):
3793 """Computes the list of nodes and their attributes.
3796 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3798 # Always get name to sort by
3799 if constants.SF_NAME in self.op.output_fields:
3800 fields = self.op.output_fields[:]
3802 fields = [constants.SF_NAME] + self.op.output_fields
3804 # Never ask for node or type as it's only known to the LU
3805 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3806 while extra in fields:
3807 fields.remove(extra)
3809 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3810 name_idx = field_idx[constants.SF_NAME]
3812 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3813 data = self.rpc.call_storage_list(self.nodes,
3814 self.op.storage_type, st_args,
3815 self.op.name, fields)
3819 for node in utils.NiceSort(self.nodes):
3820 nresult = data[node]
3824 msg = nresult.fail_msg
3826 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3829 rows = dict([(row[name_idx], row) for row in nresult.payload])
3831 for name in utils.NiceSort(rows.keys()):
3836 for field in self.op.output_fields:
3837 if field == constants.SF_NODE:
3839 elif field == constants.SF_TYPE:
3840 val = self.op.storage_type
3841 elif field in field_idx:
3842 val = row[field_idx[field]]
3844 raise errors.ParameterError(field)
3853 class _InstanceQuery(_QueryBase):
3854 FIELDS = query.INSTANCE_FIELDS
3856 def ExpandNames(self, lu):
3857 lu.needed_locks = {}
3858 lu.share_locks[locking.LEVEL_INSTANCE] = 1
3859 lu.share_locks[locking.LEVEL_NODE] = 1
3862 self.wanted = _GetWantedInstances(lu, self.names)
3864 self.wanted = locking.ALL_SET
3866 self.do_locking = (self.use_locking and
3867 query.IQ_LIVE in self.requested_data)
3869 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3870 lu.needed_locks[locking.LEVEL_NODE] = []
3871 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3873 def DeclareLocks(self, lu, level):
3874 if level == locking.LEVEL_NODE and self.do_locking:
3875 lu._LockInstancesNodes() # pylint: disable-msg=W0212
3877 def _GetQueryData(self, lu):
3878 """Computes the list of instances and their attributes.
3881 cluster = lu.cfg.GetClusterInfo()
3882 all_info = lu.cfg.GetAllInstancesInfo()
3884 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3886 instance_list = [all_info[name] for name in instance_names]
3887 nodes = frozenset(itertools.chain(*(inst.all_nodes
3888 for inst in instance_list)))
3889 hv_list = list(set([inst.hypervisor for inst in instance_list]))
3892 wrongnode_inst = set()
3894 # Gather data as requested
3895 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
3897 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3899 result = node_data[name]
3901 # offline nodes will be in both lists
3902 assert result.fail_msg
3903 offline_nodes.append(name)
3905 bad_nodes.append(name)
3906 elif result.payload:
3907 for inst in result.payload:
3908 if all_info[inst].primary_node == name:
3909 live_data.update(result.payload)
3911 wrongnode_inst.add(inst)
3912 # else no instance is alive
3916 if query.IQ_DISKUSAGE in self.requested_data:
3917 disk_usage = dict((inst.name,
3918 _ComputeDiskSize(inst.disk_template,
3919 [{"size": disk.size}
3920 for disk in inst.disks]))
3921 for inst in instance_list)
3925 if query.IQ_CONSOLE in self.requested_data:
3927 for inst in instance_list:
3928 if inst.name in live_data:
3929 # Instance is running
3930 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
3932 consinfo[inst.name] = None
3933 assert set(consinfo.keys()) == set(instance_names)
3937 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3938 disk_usage, offline_nodes, bad_nodes,
3939 live_data, wrongnode_inst, consinfo)
3942 class LUQuery(NoHooksLU):
3943 """Query for resources/items of a certain kind.
3946 # pylint: disable-msg=W0142
3949 def CheckArguments(self):
3950 qcls = _GetQueryImplementation(self.op.what)
3951 names = qlang.ReadSimpleFilter("name", self.op.filter)
3953 self.impl = qcls(names, self.op.fields, False)
3955 def ExpandNames(self):
3956 self.impl.ExpandNames(self)
3958 def DeclareLocks(self, level):
3959 self.impl.DeclareLocks(self, level)
3961 def Exec(self, feedback_fn):
3962 return self.impl.NewStyleQuery(self)
3965 class LUQueryFields(NoHooksLU):
3966 """Query for resources/items of a certain kind.
3969 # pylint: disable-msg=W0142
3972 def CheckArguments(self):
3973 self.qcls = _GetQueryImplementation(self.op.what)
3975 def ExpandNames(self):
3976 self.needed_locks = {}
3978 def Exec(self, feedback_fn):
3979 return self.qcls.FieldsQuery(self.op.fields)
3982 class LUNodeModifyStorage(NoHooksLU):
3983 """Logical unit for modifying a storage volume on a node.
3988 def CheckArguments(self):
3989 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3991 storage_type = self.op.storage_type
3994 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3996 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3997 " modified" % storage_type,
4000 diff = set(self.op.changes.keys()) - modifiable
4002 raise errors.OpPrereqError("The following fields can not be modified for"
4003 " storage units of type '%s': %r" %
4004 (storage_type, list(diff)),
4007 def ExpandNames(self):
4008 self.needed_locks = {
4009 locking.LEVEL_NODE: self.op.node_name,
4012 def Exec(self, feedback_fn):
4013 """Computes the list of nodes and their attributes.
4016 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4017 result = self.rpc.call_storage_modify(self.op.node_name,
4018 self.op.storage_type, st_args,
4019 self.op.name, self.op.changes)
4020 result.Raise("Failed to modify storage unit '%s' on %s" %
4021 (self.op.name, self.op.node_name))
4024 class LUNodeAdd(LogicalUnit):
4025 """Logical unit for adding node to the cluster.
4029 HTYPE = constants.HTYPE_NODE
4030 _NFLAGS = ["master_capable", "vm_capable"]
4032 def CheckArguments(self):
4033 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4034 # validate/normalize the node name
4035 self.hostname = netutils.GetHostname(name=self.op.node_name,
4036 family=self.primary_ip_family)
4037 self.op.node_name = self.hostname.name
4038 if self.op.readd and self.op.group:
4039 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4040 " being readded", errors.ECODE_INVAL)
4042 def BuildHooksEnv(self):
4045 This will run on all nodes before, and on all nodes + the new node after.
4049 "OP_TARGET": self.op.node_name,
4050 "NODE_NAME": self.op.node_name,
4051 "NODE_PIP": self.op.primary_ip,
4052 "NODE_SIP": self.op.secondary_ip,
4053 "MASTER_CAPABLE": str(self.op.master_capable),
4054 "VM_CAPABLE": str(self.op.vm_capable),
4056 nodes_0 = self.cfg.GetNodeList()
4057 nodes_1 = nodes_0 + [self.op.node_name, ]
4058 return env, nodes_0, nodes_1
4060 def CheckPrereq(self):
4061 """Check prerequisites.
4064 - the new node is not already in the config
4066 - its parameters (single/dual homed) matches the cluster
4068 Any errors are signaled by raising errors.OpPrereqError.
4072 hostname = self.hostname
4073 node = hostname.name
4074 primary_ip = self.op.primary_ip = hostname.ip
4075 if self.op.secondary_ip is None:
4076 if self.primary_ip_family == netutils.IP6Address.family:
4077 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4078 " IPv4 address must be given as secondary",
4080 self.op.secondary_ip = primary_ip
4082 secondary_ip = self.op.secondary_ip
4083 if not netutils.IP4Address.IsValid(secondary_ip):
4084 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4085 " address" % secondary_ip, errors.ECODE_INVAL)
4087 node_list = cfg.GetNodeList()
4088 if not self.op.readd and node in node_list:
4089 raise errors.OpPrereqError("Node %s is already in the configuration" %
4090 node, errors.ECODE_EXISTS)
4091 elif self.op.readd and node not in node_list:
4092 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4095 self.changed_primary_ip = False
4097 for existing_node_name in node_list:
4098 existing_node = cfg.GetNodeInfo(existing_node_name)
4100 if self.op.readd and node == existing_node_name:
4101 if existing_node.secondary_ip != secondary_ip:
4102 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4103 " address configuration as before",
4105 if existing_node.primary_ip != primary_ip:
4106 self.changed_primary_ip = True
4110 if (existing_node.primary_ip == primary_ip or
4111 existing_node.secondary_ip == primary_ip or
4112 existing_node.primary_ip == secondary_ip or
4113 existing_node.secondary_ip == secondary_ip):
4114 raise errors.OpPrereqError("New node ip address(es) conflict with"
4115 " existing node %s" % existing_node.name,
4116 errors.ECODE_NOTUNIQUE)
4118 # After this 'if' block, None is no longer a valid value for the
4119 # _capable op attributes
4121 old_node = self.cfg.GetNodeInfo(node)
4122 assert old_node is not None, "Can't retrieve locked node %s" % node
4123 for attr in self._NFLAGS:
4124 if getattr(self.op, attr) is None:
4125 setattr(self.op, attr, getattr(old_node, attr))
4127 for attr in self._NFLAGS:
4128 if getattr(self.op, attr) is None:
4129 setattr(self.op, attr, True)
4131 if self.op.readd and not self.op.vm_capable:
4132 pri, sec = cfg.GetNodeInstances(node)
4134 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4135 " flag set to false, but it already holds"
4136 " instances" % node,
4139 # check that the type of the node (single versus dual homed) is the
4140 # same as for the master
4141 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4142 master_singlehomed = myself.secondary_ip == myself.primary_ip
4143 newbie_singlehomed = secondary_ip == primary_ip
4144 if master_singlehomed != newbie_singlehomed:
4145 if master_singlehomed:
4146 raise errors.OpPrereqError("The master has no secondary ip but the"
4147 " new node has one",
4150 raise errors.OpPrereqError("The master has a secondary ip but the"
4151 " new node doesn't have one",
4154 # checks reachability
4155 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4156 raise errors.OpPrereqError("Node not reachable by ping",
4157 errors.ECODE_ENVIRON)
4159 if not newbie_singlehomed:
4160 # check reachability from my secondary ip to newbie's secondary ip
4161 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4162 source=myself.secondary_ip):
4163 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4164 " based ping to node daemon port",
4165 errors.ECODE_ENVIRON)
4172 if self.op.master_capable:
4173 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4175 self.master_candidate = False
4178 self.new_node = old_node
4180 node_group = cfg.LookupNodeGroup(self.op.group)
4181 self.new_node = objects.Node(name=node,
4182 primary_ip=primary_ip,
4183 secondary_ip=secondary_ip,
4184 master_candidate=self.master_candidate,
4185 offline=False, drained=False,
4188 if self.op.ndparams:
4189 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4191 def Exec(self, feedback_fn):
4192 """Adds the new node to the cluster.
4195 new_node = self.new_node
4196 node = new_node.name
4198 # We adding a new node so we assume it's powered
4199 new_node.powered = True
4201 # for re-adds, reset the offline/drained/master-candidate flags;
4202 # we need to reset here, otherwise offline would prevent RPC calls
4203 # later in the procedure; this also means that if the re-add
4204 # fails, we are left with a non-offlined, broken node
4206 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4207 self.LogInfo("Readding a node, the offline/drained flags were reset")
4208 # if we demote the node, we do cleanup later in the procedure
4209 new_node.master_candidate = self.master_candidate
4210 if self.changed_primary_ip:
4211 new_node.primary_ip = self.op.primary_ip
4213 # copy the master/vm_capable flags
4214 for attr in self._NFLAGS:
4215 setattr(new_node, attr, getattr(self.op, attr))
4217 # notify the user about any possible mc promotion
4218 if new_node.master_candidate:
4219 self.LogInfo("Node will be a master candidate")
4221 if self.op.ndparams:
4222 new_node.ndparams = self.op.ndparams
4224 new_node.ndparams = {}
4226 # check connectivity
4227 result = self.rpc.call_version([node])[node]
4228 result.Raise("Can't get version information from node %s" % node)
4229 if constants.PROTOCOL_VERSION == result.payload:
4230 logging.info("Communication to node %s fine, sw version %s match",
4231 node, result.payload)
4233 raise errors.OpExecError("Version mismatch master version %s,"
4234 " node version %s" %
4235 (constants.PROTOCOL_VERSION, result.payload))
4237 # Add node to our /etc/hosts, and add key to known_hosts
4238 if self.cfg.GetClusterInfo().modify_etc_hosts:
4239 master_node = self.cfg.GetMasterNode()
4240 result = self.rpc.call_etc_hosts_modify(master_node,
4241 constants.ETC_HOSTS_ADD,
4244 result.Raise("Can't update hosts file with new host data")
4246 if new_node.secondary_ip != new_node.primary_ip:
4247 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4250 node_verify_list = [self.cfg.GetMasterNode()]
4251 node_verify_param = {
4252 constants.NV_NODELIST: [node],
4253 # TODO: do a node-net-test as well?
4256 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4257 self.cfg.GetClusterName())
4258 for verifier in node_verify_list:
4259 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4260 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4262 for failed in nl_payload:
4263 feedback_fn("ssh/hostname verification failed"
4264 " (checking from %s): %s" %
4265 (verifier, nl_payload[failed]))
4266 raise errors.OpExecError("ssh/hostname verification failed.")
4269 _RedistributeAncillaryFiles(self)
4270 self.context.ReaddNode(new_node)
4271 # make sure we redistribute the config
4272 self.cfg.Update(new_node, feedback_fn)
4273 # and make sure the new node will not have old files around
4274 if not new_node.master_candidate:
4275 result = self.rpc.call_node_demote_from_mc(new_node.name)
4276 msg = result.fail_msg
4278 self.LogWarning("Node failed to demote itself from master"
4279 " candidate status: %s" % msg)
4281 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4282 additional_vm=self.op.vm_capable)
4283 self.context.AddNode(new_node, self.proc.GetECId())
4286 class LUNodeSetParams(LogicalUnit):
4287 """Modifies the parameters of a node.
4289 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4290 to the node role (as _ROLE_*)
4291 @cvar _R2F: a dictionary from node role to tuples of flags
4292 @cvar _FLAGS: a list of attribute names corresponding to the flags
4295 HPATH = "node-modify"
4296 HTYPE = constants.HTYPE_NODE
4298 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4300 (True, False, False): _ROLE_CANDIDATE,
4301 (False, True, False): _ROLE_DRAINED,
4302 (False, False, True): _ROLE_OFFLINE,
4303 (False, False, False): _ROLE_REGULAR,
4305 _R2F = dict((v, k) for k, v in _F2R.items())
4306 _FLAGS = ["master_candidate", "drained", "offline"]
4308 def CheckArguments(self):
4309 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4310 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4311 self.op.master_capable, self.op.vm_capable,
4312 self.op.secondary_ip, self.op.ndparams]
4313 if all_mods.count(None) == len(all_mods):
4314 raise errors.OpPrereqError("Please pass at least one modification",
4316 if all_mods.count(True) > 1:
4317 raise errors.OpPrereqError("Can't set the node into more than one"
4318 " state at the same time",
4321 # Boolean value that tells us whether we might be demoting from MC
4322 self.might_demote = (self.op.master_candidate == False or
4323 self.op.offline == True or
4324 self.op.drained == True or
4325 self.op.master_capable == False)
4327 if self.op.secondary_ip:
4328 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4329 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4330 " address" % self.op.secondary_ip,
4333 self.lock_all = self.op.auto_promote and self.might_demote
4334 self.lock_instances = self.op.secondary_ip is not None
4336 def ExpandNames(self):
4338 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4340 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4342 if self.lock_instances:
4343 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4345 def DeclareLocks(self, level):
4346 # If we have locked all instances, before waiting to lock nodes, release
4347 # all the ones living on nodes unrelated to the current operation.
4348 if level == locking.LEVEL_NODE and self.lock_instances:
4349 instances_release = []
4351 self.affected_instances = []
4352 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4353 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4354 instance = self.context.cfg.GetInstanceInfo(instance_name)
4355 i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4356 if i_mirrored and self.op.node_name in instance.all_nodes:
4357 instances_keep.append(instance_name)
4358 self.affected_instances.append(instance)
4360 instances_release.append(instance_name)
4361 if instances_release:
4362 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4363 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4365 def BuildHooksEnv(self):
4368 This runs on the master node.
4372 "OP_TARGET": self.op.node_name,
4373 "MASTER_CANDIDATE": str(self.op.master_candidate),
4374 "OFFLINE": str(self.op.offline),
4375 "DRAINED": str(self.op.drained),
4376 "MASTER_CAPABLE": str(self.op.master_capable),
4377 "VM_CAPABLE": str(self.op.vm_capable),
4379 nl = [self.cfg.GetMasterNode(),
4383 def CheckPrereq(self):
4384 """Check prerequisites.
4386 This only checks the instance list against the existing names.
4389 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4391 if (self.op.master_candidate is not None or
4392 self.op.drained is not None or
4393 self.op.offline is not None):
4394 # we can't change the master's node flags
4395 if self.op.node_name == self.cfg.GetMasterNode():
4396 raise errors.OpPrereqError("The master role can be changed"
4397 " only via master-failover",
4400 if self.op.master_candidate and not node.master_capable:
4401 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4402 " it a master candidate" % node.name,
4405 if self.op.vm_capable == False:
4406 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4408 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4409 " the vm_capable flag" % node.name,
4412 if node.master_candidate and self.might_demote and not self.lock_all:
4413 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4414 # check if after removing the current node, we're missing master
4416 (mc_remaining, mc_should, _) = \
4417 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4418 if mc_remaining < mc_should:
4419 raise errors.OpPrereqError("Not enough master candidates, please"
4420 " pass auto promote option to allow"
4421 " promotion", errors.ECODE_STATE)
4423 self.old_flags = old_flags = (node.master_candidate,
4424 node.drained, node.offline)
4425 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4426 self.old_role = old_role = self._F2R[old_flags]
4428 # Check for ineffective changes
4429 for attr in self._FLAGS:
4430 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4431 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4432 setattr(self.op, attr, None)
4434 # Past this point, any flag change to False means a transition
4435 # away from the respective state, as only real changes are kept
4437 # TODO: We might query the real power state if it supports OOB
4438 if _SupportsOob(self.cfg, node):
4439 if self.op.offline is False and not (node.powered or
4440 self.op.powered == True):
4441 raise errors.OpPrereqError(("Please power on node %s first before you"
4442 " can reset offline state") %
4444 elif self.op.powered is not None:
4445 raise errors.OpPrereqError(("Unable to change powered state for node %s"
4446 " which does not support out-of-band"
4447 " handling") % self.op.node_name)
4449 # If we're being deofflined/drained, we'll MC ourself if needed
4450 if (self.op.drained == False or self.op.offline == False or
4451 (self.op.master_capable and not node.master_capable)):
4452 if _DecideSelfPromotion(self):
4453 self.op.master_candidate = True
4454 self.LogInfo("Auto-promoting node to master candidate")
4456 # If we're no longer master capable, we'll demote ourselves from MC
4457 if self.op.master_capable == False and node.master_candidate:
4458 self.LogInfo("Demoting from master candidate")
4459 self.op.master_candidate = False
4462 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4463 if self.op.master_candidate:
4464 new_role = self._ROLE_CANDIDATE
4465 elif self.op.drained:
4466 new_role = self._ROLE_DRAINED
4467 elif self.op.offline:
4468 new_role = self._ROLE_OFFLINE
4469 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4470 # False is still in new flags, which means we're un-setting (the
4472 new_role = self._ROLE_REGULAR
4473 else: # no new flags, nothing, keep old role
4476 self.new_role = new_role
4478 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4479 # Trying to transition out of offline status
4480 result = self.rpc.call_version([node.name])[node.name]
4482 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4483 " to report its version: %s" %
4484 (node.name, result.fail_msg),
4487 self.LogWarning("Transitioning node from offline to online state"
4488 " without using re-add. Please make sure the node"
4491 if self.op.secondary_ip:
4492 # Ok even without locking, because this can't be changed by any LU
4493 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4494 master_singlehomed = master.secondary_ip == master.primary_ip
4495 if master_singlehomed and self.op.secondary_ip:
4496 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4497 " homed cluster", errors.ECODE_INVAL)
4500 if self.affected_instances:
4501 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4502 " node has instances (%s) configured"
4503 " to use it" % self.affected_instances)
4505 # On online nodes, check that no instances are running, and that
4506 # the node has the new ip and we can reach it.
4507 for instance in self.affected_instances:
4508 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4510 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4511 if master.name != node.name:
4512 # check reachability from master secondary ip to new secondary ip
4513 if not netutils.TcpPing(self.op.secondary_ip,
4514 constants.DEFAULT_NODED_PORT,
4515 source=master.secondary_ip):
4516 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4517 " based ping to node daemon port",
4518 errors.ECODE_ENVIRON)
4520 if self.op.ndparams:
4521 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4522 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4523 self.new_ndparams = new_ndparams
4525 def Exec(self, feedback_fn):
4530 old_role = self.old_role
4531 new_role = self.new_role
4535 if self.op.ndparams:
4536 node.ndparams = self.new_ndparams
4538 if self.op.powered is not None:
4539 node.powered = self.op.powered
4541 for attr in ["master_capable", "vm_capable"]:
4542 val = getattr(self.op, attr)
4544 setattr(node, attr, val)
4545 result.append((attr, str(val)))
4547 if new_role != old_role:
4548 # Tell the node to demote itself, if no longer MC and not offline
4549 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4550 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4552 self.LogWarning("Node failed to demote itself: %s", msg)
4554 new_flags = self._R2F[new_role]
4555 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4557 result.append((desc, str(nf)))
4558 (node.master_candidate, node.drained, node.offline) = new_flags
4560 # we locked all nodes, we adjust the CP before updating this node
4562 _AdjustCandidatePool(self, [node.name])
4564 if self.op.secondary_ip:
4565 node.secondary_ip = self.op.secondary_ip
4566 result.append(("secondary_ip", self.op.secondary_ip))
4568 # this will trigger configuration file update, if needed
4569 self.cfg.Update(node, feedback_fn)
4571 # this will trigger job queue propagation or cleanup if the mc
4573 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4574 self.context.ReaddNode(node)
4579 class LUNodePowercycle(NoHooksLU):
4580 """Powercycles a node.
4585 def CheckArguments(self):
4586 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4587 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4588 raise errors.OpPrereqError("The node is the master and the force"
4589 " parameter was not set",
4592 def ExpandNames(self):
4593 """Locking for PowercycleNode.
4595 This is a last-resort option and shouldn't block on other
4596 jobs. Therefore, we grab no locks.
4599 self.needed_locks = {}
4601 def Exec(self, feedback_fn):
4605 result = self.rpc.call_node_powercycle(self.op.node_name,
4606 self.cfg.GetHypervisorType())
4607 result.Raise("Failed to schedule the reboot")
4608 return result.payload
4611 class LUClusterQuery(NoHooksLU):
4612 """Query cluster configuration.
4617 def ExpandNames(self):
4618 self.needed_locks = {}
4620 def Exec(self, feedback_fn):
4621 """Return cluster config.
4624 cluster = self.cfg.GetClusterInfo()
4627 # Filter just for enabled hypervisors
4628 for os_name, hv_dict in cluster.os_hvp.items():
4629 os_hvp[os_name] = {}
4630 for hv_name, hv_params in hv_dict.items():
4631 if hv_name in cluster.enabled_hypervisors:
4632 os_hvp[os_name][hv_name] = hv_params
4634 # Convert ip_family to ip_version
4635 primary_ip_version = constants.IP4_VERSION
4636 if cluster.primary_ip_family == netutils.IP6Address.family:
4637 primary_ip_version = constants.IP6_VERSION
4640 "software_version": constants.RELEASE_VERSION,
4641 "protocol_version": constants.PROTOCOL_VERSION,
4642 "config_version": constants.CONFIG_VERSION,
4643 "os_api_version": max(constants.OS_API_VERSIONS),
4644 "export_version": constants.EXPORT_VERSION,
4645 "architecture": (platform.architecture()[0], platform.machine()),
4646 "name": cluster.cluster_name,
4647 "master": cluster.master_node,
4648 "default_hypervisor": cluster.enabled_hypervisors[0],
4649 "enabled_hypervisors": cluster.enabled_hypervisors,
4650 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4651 for hypervisor_name in cluster.enabled_hypervisors]),
4653 "beparams": cluster.beparams,
4654 "osparams": cluster.osparams,
4655 "nicparams": cluster.nicparams,
4656 "ndparams": cluster.ndparams,
4657 "candidate_pool_size": cluster.candidate_pool_size,
4658 "master_netdev": cluster.master_netdev,
4659 "volume_group_name": cluster.volume_group_name,
4660 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4661 "file_storage_dir": cluster.file_storage_dir,
4662 "maintain_node_health": cluster.maintain_node_health,
4663 "ctime": cluster.ctime,
4664 "mtime": cluster.mtime,
4665 "uuid": cluster.uuid,
4666 "tags": list(cluster.GetTags()),
4667 "uid_pool": cluster.uid_pool,
4668 "default_iallocator": cluster.default_iallocator,
4669 "reserved_lvs": cluster.reserved_lvs,
4670 "primary_ip_version": primary_ip_version,
4671 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4672 "hidden_os": cluster.hidden_os,
4673 "blacklisted_os": cluster.blacklisted_os,
4679 class LUClusterConfigQuery(NoHooksLU):
4680 """Return configuration values.
4684 _FIELDS_DYNAMIC = utils.FieldSet()
4685 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4686 "watcher_pause", "volume_group_name")
4688 def CheckArguments(self):
4689 _CheckOutputFields(static=self._FIELDS_STATIC,
4690 dynamic=self._FIELDS_DYNAMIC,
4691 selected=self.op.output_fields)
4693 def ExpandNames(self):
4694 self.needed_locks = {}
4696 def Exec(self, feedback_fn):
4697 """Dump a representation of the cluster config to the standard output.
4701 for field in self.op.output_fields:
4702 if field == "cluster_name":
4703 entry = self.cfg.GetClusterName()
4704 elif field == "master_node":
4705 entry = self.cfg.GetMasterNode()
4706 elif field == "drain_flag":
4707 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4708 elif field == "watcher_pause":
4709 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4710 elif field == "volume_group_name":
4711 entry = self.cfg.GetVGName()
4713 raise errors.ParameterError(field)
4714 values.append(entry)
4718 class LUInstanceActivateDisks(NoHooksLU):
4719 """Bring up an instance's disks.
4724 def ExpandNames(self):
4725 self._ExpandAndLockInstance()
4726 self.needed_locks[locking.LEVEL_NODE] = []
4727 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4729 def DeclareLocks(self, level):
4730 if level == locking.LEVEL_NODE:
4731 self._LockInstancesNodes()
4733 def CheckPrereq(self):
4734 """Check prerequisites.
4736 This checks that the instance is in the cluster.
4739 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4740 assert self.instance is not None, \
4741 "Cannot retrieve locked instance %s" % self.op.instance_name
4742 _CheckNodeOnline(self, self.instance.primary_node)
4744 def Exec(self, feedback_fn):
4745 """Activate the disks.
4748 disks_ok, disks_info = \
4749 _AssembleInstanceDisks(self, self.instance,
4750 ignore_size=self.op.ignore_size)
4752 raise errors.OpExecError("Cannot activate block devices")
4757 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4759 """Prepare the block devices for an instance.
4761 This sets up the block devices on all nodes.
4763 @type lu: L{LogicalUnit}
4764 @param lu: the logical unit on whose behalf we execute
4765 @type instance: L{objects.Instance}
4766 @param instance: the instance for whose disks we assemble
4767 @type disks: list of L{objects.Disk} or None
4768 @param disks: which disks to assemble (or all, if None)
4769 @type ignore_secondaries: boolean
4770 @param ignore_secondaries: if true, errors on secondary nodes
4771 won't result in an error return from the function
4772 @type ignore_size: boolean
4773 @param ignore_size: if true, the current known size of the disk
4774 will not be used during the disk activation, useful for cases
4775 when the size is wrong
4776 @return: False if the operation failed, otherwise a list of
4777 (host, instance_visible_name, node_visible_name)
4778 with the mapping from node devices to instance devices
4783 iname = instance.name
4784 disks = _ExpandCheckDisks(instance, disks)
4786 # With the two passes mechanism we try to reduce the window of
4787 # opportunity for the race condition of switching DRBD to primary
4788 # before handshaking occured, but we do not eliminate it
4790 # The proper fix would be to wait (with some limits) until the
4791 # connection has been made and drbd transitions from WFConnection
4792 # into any other network-connected state (Connected, SyncTarget,
4795 # 1st pass, assemble on all nodes in secondary mode
4796 for idx, inst_disk in enumerate(disks):
4797 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4799 node_disk = node_disk.Copy()
4800 node_disk.UnsetSize()
4801 lu.cfg.SetDiskID(node_disk, node)
4802 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4803 msg = result.fail_msg
4805 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4806 " (is_primary=False, pass=1): %s",
4807 inst_disk.iv_name, node, msg)
4808 if not ignore_secondaries:
4811 # FIXME: race condition on drbd migration to primary
4813 # 2nd pass, do only the primary node
4814 for idx, inst_disk in enumerate(disks):
4817 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4818 if node != instance.primary_node:
4821 node_disk = node_disk.Copy()
4822 node_disk.UnsetSize()
4823 lu.cfg.SetDiskID(node_disk, node)
4824 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4825 msg = result.fail_msg
4827 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4828 " (is_primary=True, pass=2): %s",
4829 inst_disk.iv_name, node, msg)
4832 dev_path = result.payload
4834 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4836 # leave the disks configured for the primary node
4837 # this is a workaround that would be fixed better by
4838 # improving the logical/physical id handling
4840 lu.cfg.SetDiskID(disk, instance.primary_node)
4842 return disks_ok, device_info
4845 def _StartInstanceDisks(lu, instance, force):
4846 """Start the disks of an instance.
4849 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4850 ignore_secondaries=force)
4852 _ShutdownInstanceDisks(lu, instance)
4853 if force is not None and not force:
4854 lu.proc.LogWarning("", hint="If the message above refers to a"
4856 " you can retry the operation using '--force'.")
4857 raise errors.OpExecError("Disk consistency error")
4860 class LUInstanceDeactivateDisks(NoHooksLU):
4861 """Shutdown an instance's disks.
4866 def ExpandNames(self):
4867 self._ExpandAndLockInstance()
4868 self.needed_locks[locking.LEVEL_NODE] = []
4869 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4871 def DeclareLocks(self, level):
4872 if level == locking.LEVEL_NODE:
4873 self._LockInstancesNodes()
4875 def CheckPrereq(self):
4876 """Check prerequisites.
4878 This checks that the instance is in the cluster.
4881 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4882 assert self.instance is not None, \
4883 "Cannot retrieve locked instance %s" % self.op.instance_name
4885 def Exec(self, feedback_fn):
4886 """Deactivate the disks
4889 instance = self.instance
4891 _ShutdownInstanceDisks(self, instance)
4893 _SafeShutdownInstanceDisks(self, instance)
4896 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4897 """Shutdown block devices of an instance.
4899 This function checks if an instance is running, before calling
4900 _ShutdownInstanceDisks.
4903 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4904 _ShutdownInstanceDisks(lu, instance, disks=disks)
4907 def _ExpandCheckDisks(instance, disks):
4908 """Return the instance disks selected by the disks list
4910 @type disks: list of L{objects.Disk} or None
4911 @param disks: selected disks
4912 @rtype: list of L{objects.Disk}
4913 @return: selected instance disks to act on
4917 return instance.disks
4919 if not set(disks).issubset(instance.disks):
4920 raise errors.ProgrammerError("Can only act on disks belonging to the"
4925 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4926 """Shutdown block devices of an instance.
4928 This does the shutdown on all nodes of the instance.
4930 If the ignore_primary is false, errors on the primary node are
4935 disks = _ExpandCheckDisks(instance, disks)
4938 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4939 lu.cfg.SetDiskID(top_disk, node)
4940 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4941 msg = result.fail_msg
4943 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4944 disk.iv_name, node, msg)
4945 if ((node == instance.primary_node and not ignore_primary) or
4946 (node != instance.primary_node and not result.offline)):
4951 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4952 """Checks if a node has enough free memory.
4954 This function check if a given node has the needed amount of free
4955 memory. In case the node has less memory or we cannot get the
4956 information from the node, this function raise an OpPrereqError
4959 @type lu: C{LogicalUnit}
4960 @param lu: a logical unit from which we get configuration data
4962 @param node: the node to check
4963 @type reason: C{str}
4964 @param reason: string to use in the error message
4965 @type requested: C{int}
4966 @param requested: the amount of memory in MiB to check for
4967 @type hypervisor_name: C{str}
4968 @param hypervisor_name: the hypervisor to ask for memory stats
4969 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4970 we cannot check the node
4973 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4974 nodeinfo[node].Raise("Can't get data from node %s" % node,
4975 prereq=True, ecode=errors.ECODE_ENVIRON)
4976 free_mem = nodeinfo[node].payload.get('memory_free', None)
4977 if not isinstance(free_mem, int):
4978 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4979 " was '%s'" % (node, free_mem),
4980 errors.ECODE_ENVIRON)
4981 if requested > free_mem:
4982 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4983 " needed %s MiB, available %s MiB" %
4984 (node, reason, requested, free_mem),
4988 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
4989 """Checks if nodes have enough free disk space in the all VGs.
4991 This function check if all given nodes have the needed amount of
4992 free disk. In case any node has less disk or we cannot get the
4993 information from the node, this function raise an OpPrereqError
4996 @type lu: C{LogicalUnit}
4997 @param lu: a logical unit from which we get configuration data
4998 @type nodenames: C{list}
4999 @param nodenames: the list of node names to check
5000 @type req_sizes: C{dict}
5001 @param req_sizes: the hash of vg and corresponding amount of disk in
5003 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5004 or we cannot check the node
5007 for vg, req_size in req_sizes.items():
5008 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5011 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5012 """Checks if nodes have enough free disk space in the specified VG.
5014 This function check if all given nodes have the needed amount of
5015 free disk. In case any node has less disk or we cannot get the
5016 information from the node, this function raise an OpPrereqError
5019 @type lu: C{LogicalUnit}
5020 @param lu: a logical unit from which we get configuration data
5021 @type nodenames: C{list}
5022 @param nodenames: the list of node names to check
5024 @param vg: the volume group to check
5025 @type requested: C{int}
5026 @param requested: the amount of disk in MiB to check for
5027 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5028 or we cannot check the node
5031 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5032 for node in nodenames:
5033 info = nodeinfo[node]
5034 info.Raise("Cannot get current information from node %s" % node,
5035 prereq=True, ecode=errors.ECODE_ENVIRON)
5036 vg_free = info.payload.get("vg_free", None)
5037 if not isinstance(vg_free, int):
5038 raise errors.OpPrereqError("Can't compute free disk space on node"
5039 " %s for vg %s, result was '%s'" %
5040 (node, vg, vg_free), errors.ECODE_ENVIRON)
5041 if requested > vg_free:
5042 raise errors.OpPrereqError("Not enough disk space on target node %s"
5043 " vg %s: required %d MiB, available %d MiB" %
5044 (node, vg, requested, vg_free),
5048 class LUInstanceStartup(LogicalUnit):
5049 """Starts an instance.
5052 HPATH = "instance-start"
5053 HTYPE = constants.HTYPE_INSTANCE
5056 def CheckArguments(self):
5058 if self.op.beparams:
5059 # fill the beparams dict
5060 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5062 def ExpandNames(self):
5063 self._ExpandAndLockInstance()
5065 def BuildHooksEnv(self):
5068 This runs on master, primary and secondary nodes of the instance.
5072 "FORCE": self.op.force,
5074 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5075 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5078 def CheckPrereq(self):
5079 """Check prerequisites.
5081 This checks that the instance is in the cluster.
5084 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5085 assert self.instance is not None, \
5086 "Cannot retrieve locked instance %s" % self.op.instance_name
5089 if self.op.hvparams:
5090 # check hypervisor parameter syntax (locally)
5091 cluster = self.cfg.GetClusterInfo()
5092 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5093 filled_hvp = cluster.FillHV(instance)
5094 filled_hvp.update(self.op.hvparams)
5095 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5096 hv_type.CheckParameterSyntax(filled_hvp)
5097 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5099 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5101 if self.primary_offline and self.op.ignore_offline_nodes:
5102 self.proc.LogWarning("Ignoring offline primary node")
5104 if self.op.hvparams or self.op.beparams:
5105 self.proc.LogWarning("Overridden parameters are ignored")
5107 _CheckNodeOnline(self, instance.primary_node)
5109 bep = self.cfg.GetClusterInfo().FillBE(instance)
5111 # check bridges existence
5112 _CheckInstanceBridgesExist(self, instance)
5114 remote_info = self.rpc.call_instance_info(instance.primary_node,
5116 instance.hypervisor)
5117 remote_info.Raise("Error checking node %s" % instance.primary_node,
5118 prereq=True, ecode=errors.ECODE_ENVIRON)
5119 if not remote_info.payload: # not running already
5120 _CheckNodeFreeMemory(self, instance.primary_node,
5121 "starting instance %s" % instance.name,
5122 bep[constants.BE_MEMORY], instance.hypervisor)
5124 def Exec(self, feedback_fn):
5125 """Start the instance.
5128 instance = self.instance
5129 force = self.op.force
5131 self.cfg.MarkInstanceUp(instance.name)
5133 if self.primary_offline:
5134 assert self.op.ignore_offline_nodes
5135 self.proc.LogInfo("Primary node offline, marked instance as started")
5137 node_current = instance.primary_node
5139 _StartInstanceDisks(self, instance, force)
5141 result = self.rpc.call_instance_start(node_current, instance,
5142 self.op.hvparams, self.op.beparams)
5143 msg = result.fail_msg
5145 _ShutdownInstanceDisks(self, instance)
5146 raise errors.OpExecError("Could not start instance: %s" % msg)
5149 class LUInstanceReboot(LogicalUnit):
5150 """Reboot an instance.
5153 HPATH = "instance-reboot"
5154 HTYPE = constants.HTYPE_INSTANCE
5157 def ExpandNames(self):
5158 self._ExpandAndLockInstance()
5160 def BuildHooksEnv(self):
5163 This runs on master, primary and secondary nodes of the instance.
5167 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5168 "REBOOT_TYPE": self.op.reboot_type,
5169 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5171 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5172 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5175 def CheckPrereq(self):
5176 """Check prerequisites.
5178 This checks that the instance is in the cluster.
5181 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5182 assert self.instance is not None, \
5183 "Cannot retrieve locked instance %s" % self.op.instance_name
5185 _CheckNodeOnline(self, instance.primary_node)
5187 # check bridges existence
5188 _CheckInstanceBridgesExist(self, instance)
5190 def Exec(self, feedback_fn):
5191 """Reboot the instance.
5194 instance = self.instance
5195 ignore_secondaries = self.op.ignore_secondaries
5196 reboot_type = self.op.reboot_type
5198 node_current = instance.primary_node
5200 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5201 constants.INSTANCE_REBOOT_HARD]:
5202 for disk in instance.disks:
5203 self.cfg.SetDiskID(disk, node_current)
5204 result = self.rpc.call_instance_reboot(node_current, instance,
5206 self.op.shutdown_timeout)
5207 result.Raise("Could not reboot instance")
5209 result = self.rpc.call_instance_shutdown(node_current, instance,
5210 self.op.shutdown_timeout)
5211 result.Raise("Could not shutdown instance for full reboot")
5212 _ShutdownInstanceDisks(self, instance)
5213 _StartInstanceDisks(self, instance, ignore_secondaries)
5214 result = self.rpc.call_instance_start(node_current, instance, None, None)
5215 msg = result.fail_msg
5217 _ShutdownInstanceDisks(self, instance)
5218 raise errors.OpExecError("Could not start instance for"
5219 " full reboot: %s" % msg)
5221 self.cfg.MarkInstanceUp(instance.name)
5224 class LUInstanceShutdown(LogicalUnit):
5225 """Shutdown an instance.
5228 HPATH = "instance-stop"
5229 HTYPE = constants.HTYPE_INSTANCE
5232 def ExpandNames(self):
5233 self._ExpandAndLockInstance()
5235 def BuildHooksEnv(self):
5238 This runs on master, primary and secondary nodes of the instance.
5241 env = _BuildInstanceHookEnvByObject(self, self.instance)
5242 env["TIMEOUT"] = self.op.timeout
5243 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5246 def CheckPrereq(self):
5247 """Check prerequisites.
5249 This checks that the instance is in the cluster.
5252 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5253 assert self.instance is not None, \
5254 "Cannot retrieve locked instance %s" % self.op.instance_name
5256 self.primary_offline = \
5257 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5259 if self.primary_offline and self.op.ignore_offline_nodes:
5260 self.proc.LogWarning("Ignoring offline primary node")
5262 _CheckNodeOnline(self, self.instance.primary_node)
5264 def Exec(self, feedback_fn):
5265 """Shutdown the instance.
5268 instance = self.instance
5269 node_current = instance.primary_node
5270 timeout = self.op.timeout
5272 self.cfg.MarkInstanceDown(instance.name)
5274 if self.primary_offline:
5275 assert self.op.ignore_offline_nodes
5276 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5278 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5279 msg = result.fail_msg
5281 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5283 _ShutdownInstanceDisks(self, instance)
5286 class LUInstanceReinstall(LogicalUnit):
5287 """Reinstall an instance.
5290 HPATH = "instance-reinstall"
5291 HTYPE = constants.HTYPE_INSTANCE
5294 def ExpandNames(self):
5295 self._ExpandAndLockInstance()
5297 def BuildHooksEnv(self):
5300 This runs on master, primary and secondary nodes of the instance.
5303 env = _BuildInstanceHookEnvByObject(self, self.instance)
5304 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5307 def CheckPrereq(self):
5308 """Check prerequisites.
5310 This checks that the instance is in the cluster and is not running.
5313 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5314 assert instance is not None, \
5315 "Cannot retrieve locked instance %s" % self.op.instance_name
5316 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5317 " offline, cannot reinstall")
5318 for node in instance.secondary_nodes:
5319 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5320 " cannot reinstall")
5322 if instance.disk_template == constants.DT_DISKLESS:
5323 raise errors.OpPrereqError("Instance '%s' has no disks" %
5324 self.op.instance_name,
5326 _CheckInstanceDown(self, instance, "cannot reinstall")
5328 if self.op.os_type is not None:
5330 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5331 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5332 instance_os = self.op.os_type
5334 instance_os = instance.os
5336 nodelist = list(instance.all_nodes)
5338 if self.op.osparams:
5339 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5340 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5341 self.os_inst = i_osdict # the new dict (without defaults)
5345 self.instance = instance
5347 def Exec(self, feedback_fn):
5348 """Reinstall the instance.
5351 inst = self.instance
5353 if self.op.os_type is not None:
5354 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5355 inst.os = self.op.os_type
5356 # Write to configuration
5357 self.cfg.Update(inst, feedback_fn)
5359 _StartInstanceDisks(self, inst, None)
5361 feedback_fn("Running the instance OS create scripts...")
5362 # FIXME: pass debug option from opcode to backend
5363 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5364 self.op.debug_level,
5365 osparams=self.os_inst)
5366 result.Raise("Could not install OS for instance %s on node %s" %
5367 (inst.name, inst.primary_node))
5369 _ShutdownInstanceDisks(self, inst)
5372 class LUInstanceRecreateDisks(LogicalUnit):
5373 """Recreate an instance's missing disks.
5376 HPATH = "instance-recreate-disks"
5377 HTYPE = constants.HTYPE_INSTANCE
5380 def ExpandNames(self):
5381 self._ExpandAndLockInstance()
5383 def BuildHooksEnv(self):
5386 This runs on master, primary and secondary nodes of the instance.
5389 env = _BuildInstanceHookEnvByObject(self, self.instance)
5390 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5393 def CheckPrereq(self):
5394 """Check prerequisites.
5396 This checks that the instance is in the cluster and is not running.
5399 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5400 assert instance is not None, \
5401 "Cannot retrieve locked instance %s" % self.op.instance_name
5402 _CheckNodeOnline(self, instance.primary_node)
5404 if instance.disk_template == constants.DT_DISKLESS:
5405 raise errors.OpPrereqError("Instance '%s' has no disks" %
5406 self.op.instance_name, errors.ECODE_INVAL)
5407 _CheckInstanceDown(self, instance, "cannot recreate disks")
5409 if not self.op.disks:
5410 self.op.disks = range(len(instance.disks))
5412 for idx in self.op.disks:
5413 if idx >= len(instance.disks):
5414 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5417 self.instance = instance
5419 def Exec(self, feedback_fn):
5420 """Recreate the disks.
5424 for idx, _ in enumerate(self.instance.disks):
5425 if idx not in self.op.disks: # disk idx has not been passed in
5429 _CreateDisks(self, self.instance, to_skip=to_skip)
5432 class LUInstanceRename(LogicalUnit):
5433 """Rename an instance.
5436 HPATH = "instance-rename"
5437 HTYPE = constants.HTYPE_INSTANCE
5439 def CheckArguments(self):
5443 if self.op.ip_check and not self.op.name_check:
5444 # TODO: make the ip check more flexible and not depend on the name check
5445 raise errors.OpPrereqError("Cannot do ip check without a name check",
5448 def BuildHooksEnv(self):
5451 This runs on master, primary and secondary nodes of the instance.
5454 env = _BuildInstanceHookEnvByObject(self, self.instance)
5455 env["INSTANCE_NEW_NAME"] = self.op.new_name
5456 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5459 def CheckPrereq(self):
5460 """Check prerequisites.
5462 This checks that the instance is in the cluster and is not running.
5465 self.op.instance_name = _ExpandInstanceName(self.cfg,
5466 self.op.instance_name)
5467 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5468 assert instance is not None
5469 _CheckNodeOnline(self, instance.primary_node)
5470 _CheckInstanceDown(self, instance, "cannot rename")
5471 self.instance = instance
5473 new_name = self.op.new_name
5474 if self.op.name_check:
5475 hostname = netutils.GetHostname(name=new_name)
5476 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5478 new_name = self.op.new_name = hostname.name
5479 if (self.op.ip_check and
5480 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5481 raise errors.OpPrereqError("IP %s of instance %s already in use" %
5482 (hostname.ip, new_name),
5483 errors.ECODE_NOTUNIQUE)
5485 instance_list = self.cfg.GetInstanceList()
5486 if new_name in instance_list and new_name != instance.name:
5487 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5488 new_name, errors.ECODE_EXISTS)
5490 def Exec(self, feedback_fn):
5491 """Rename the instance.
5494 inst = self.instance
5495 old_name = inst.name
5497 rename_file_storage = False
5498 if (inst.disk_template == constants.DT_FILE and
5499 self.op.new_name != inst.name):
5500 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5501 rename_file_storage = True
5503 self.cfg.RenameInstance(inst.name, self.op.new_name)
5504 # Change the instance lock. This is definitely safe while we hold the BGL
5505 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5506 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5508 # re-read the instance from the configuration after rename
5509 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5511 if rename_file_storage:
5512 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5513 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5514 old_file_storage_dir,
5515 new_file_storage_dir)
5516 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5517 " (but the instance has been renamed in Ganeti)" %
5518 (inst.primary_node, old_file_storage_dir,
5519 new_file_storage_dir))
5521 _StartInstanceDisks(self, inst, None)
5523 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5524 old_name, self.op.debug_level)
5525 msg = result.fail_msg
5527 msg = ("Could not run OS rename script for instance %s on node %s"
5528 " (but the instance has been renamed in Ganeti): %s" %
5529 (inst.name, inst.primary_node, msg))
5530 self.proc.LogWarning(msg)
5532 _ShutdownInstanceDisks(self, inst)
5537 class LUInstanceRemove(LogicalUnit):
5538 """Remove an instance.
5541 HPATH = "instance-remove"
5542 HTYPE = constants.HTYPE_INSTANCE
5545 def ExpandNames(self):
5546 self._ExpandAndLockInstance()
5547 self.needed_locks[locking.LEVEL_NODE] = []
5548 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5550 def DeclareLocks(self, level):
5551 if level == locking.LEVEL_NODE:
5552 self._LockInstancesNodes()
5554 def BuildHooksEnv(self):
5557 This runs on master, primary and secondary nodes of the instance.
5560 env = _BuildInstanceHookEnvByObject(self, self.instance)
5561 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5562 nl = [self.cfg.GetMasterNode()]
5563 nl_post = list(self.instance.all_nodes) + nl
5564 return env, nl, nl_post
5566 def CheckPrereq(self):
5567 """Check prerequisites.
5569 This checks that the instance is in the cluster.
5572 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5573 assert self.instance is not None, \
5574 "Cannot retrieve locked instance %s" % self.op.instance_name
5576 def Exec(self, feedback_fn):
5577 """Remove the instance.
5580 instance = self.instance
5581 logging.info("Shutting down instance %s on node %s",
5582 instance.name, instance.primary_node)
5584 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5585 self.op.shutdown_timeout)
5586 msg = result.fail_msg
5588 if self.op.ignore_failures:
5589 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5591 raise errors.OpExecError("Could not shutdown instance %s on"
5593 (instance.name, instance.primary_node, msg))
5595 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5598 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5599 """Utility function to remove an instance.
5602 logging.info("Removing block devices for instance %s", instance.name)
5604 if not _RemoveDisks(lu, instance):
5605 if not ignore_failures:
5606 raise errors.OpExecError("Can't remove instance's disks")
5607 feedback_fn("Warning: can't remove instance's disks")
5609 logging.info("Removing instance %s out of cluster config", instance.name)
5611 lu.cfg.RemoveInstance(instance.name)
5613 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5614 "Instance lock removal conflict"
5616 # Remove lock for the instance
5617 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5620 class LUInstanceQuery(NoHooksLU):
5621 """Logical unit for querying instances.
5624 # pylint: disable-msg=W0142
5627 def CheckArguments(self):
5628 self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5629 self.op.use_locking)
5631 def ExpandNames(self):
5632 self.iq.ExpandNames(self)
5634 def DeclareLocks(self, level):
5635 self.iq.DeclareLocks(self, level)
5637 def Exec(self, feedback_fn):
5638 return self.iq.OldStyleQuery(self)
5641 class LUInstanceFailover(LogicalUnit):
5642 """Failover an instance.
5645 HPATH = "instance-failover"
5646 HTYPE = constants.HTYPE_INSTANCE
5649 def ExpandNames(self):
5650 self._ExpandAndLockInstance()
5651 self.needed_locks[locking.LEVEL_NODE] = []
5652 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5654 def DeclareLocks(self, level):
5655 if level == locking.LEVEL_NODE:
5656 self._LockInstancesNodes()
5658 def BuildHooksEnv(self):
5661 This runs on master, primary and secondary nodes of the instance.
5664 instance = self.instance
5665 source_node = instance.primary_node
5666 target_node = instance.secondary_nodes[0]
5668 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5669 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5670 "OLD_PRIMARY": source_node,
5671 "OLD_SECONDARY": target_node,
5672 "NEW_PRIMARY": target_node,
5673 "NEW_SECONDARY": source_node,
5675 env.update(_BuildInstanceHookEnvByObject(self, instance))
5676 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5678 nl_post.append(source_node)
5679 return env, nl, nl_post
5681 def CheckPrereq(self):
5682 """Check prerequisites.
5684 This checks that the instance is in the cluster.
5687 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5688 assert self.instance is not None, \
5689 "Cannot retrieve locked instance %s" % self.op.instance_name
5691 bep = self.cfg.GetClusterInfo().FillBE(instance)
5692 if instance.disk_template not in constants.DTS_NET_MIRROR:
5693 raise errors.OpPrereqError("Instance's disk layout is not"
5694 " network mirrored, cannot failover.",
5697 secondary_nodes = instance.secondary_nodes
5698 if not secondary_nodes:
5699 raise errors.ProgrammerError("no secondary node but using "
5700 "a mirrored disk template")
5702 target_node = secondary_nodes[0]
5703 _CheckNodeOnline(self, target_node)
5704 _CheckNodeNotDrained(self, target_node)
5705 if instance.admin_up:
5706 # check memory requirements on the secondary node
5707 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5708 instance.name, bep[constants.BE_MEMORY],
5709 instance.hypervisor)
5711 self.LogInfo("Not checking memory on the secondary node as"
5712 " instance will not be started")
5714 # check bridge existance
5715 _CheckInstanceBridgesExist(self, instance, node=target_node)
5717 def Exec(self, feedback_fn):
5718 """Failover an instance.
5720 The failover is done by shutting it down on its present node and
5721 starting it on the secondary.
5724 instance = self.instance
5725 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5727 source_node = instance.primary_node
5728 target_node = instance.secondary_nodes[0]
5730 if instance.admin_up:
5731 feedback_fn("* checking disk consistency between source and target")
5732 for dev in instance.disks:
5733 # for drbd, these are drbd over lvm
5734 if not _CheckDiskConsistency(self, dev, target_node, False):
5735 if not self.op.ignore_consistency:
5736 raise errors.OpExecError("Disk %s is degraded on target node,"
5737 " aborting failover." % dev.iv_name)
5739 feedback_fn("* not checking disk consistency as instance is not running")
5741 feedback_fn("* shutting down instance on source node")
5742 logging.info("Shutting down instance %s on node %s",
5743 instance.name, source_node)
5745 result = self.rpc.call_instance_shutdown(source_node, instance,
5746 self.op.shutdown_timeout)
5747 msg = result.fail_msg
5749 if self.op.ignore_consistency or primary_node.offline:
5750 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5751 " Proceeding anyway. Please make sure node"
5752 " %s is down. Error details: %s",
5753 instance.name, source_node, source_node, msg)
5755 raise errors.OpExecError("Could not shutdown instance %s on"
5757 (instance.name, source_node, msg))
5759 feedback_fn("* deactivating the instance's disks on source node")
5760 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5761 raise errors.OpExecError("Can't shut down the instance's disks.")
5763 instance.primary_node = target_node
5764 # distribute new instance config to the other nodes
5765 self.cfg.Update(instance, feedback_fn)
5767 # Only start the instance if it's marked as up
5768 if instance.admin_up:
5769 feedback_fn("* activating the instance's disks on target node")
5770 logging.info("Starting instance %s on node %s",
5771 instance.name, target_node)
5773 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5774 ignore_secondaries=True)
5776 _ShutdownInstanceDisks(self, instance)
5777 raise errors.OpExecError("Can't activate the instance's disks")
5779 feedback_fn("* starting the instance on the target node")
5780 result = self.rpc.call_instance_start(target_node, instance, None, None)
5781 msg = result.fail_msg
5783 _ShutdownInstanceDisks(self, instance)
5784 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5785 (instance.name, target_node, msg))
5788 class LUInstanceMigrate(LogicalUnit):
5789 """Migrate an instance.
5791 This is migration without shutting down, compared to the failover,
5792 which is done with shutdown.
5795 HPATH = "instance-migrate"
5796 HTYPE = constants.HTYPE_INSTANCE
5799 def ExpandNames(self):
5800 self._ExpandAndLockInstance()
5802 self.needed_locks[locking.LEVEL_NODE] = []
5803 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5805 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5807 self.tasklets = [self._migrater]
5809 def DeclareLocks(self, level):
5810 if level == locking.LEVEL_NODE:
5811 self._LockInstancesNodes()
5813 def BuildHooksEnv(self):
5816 This runs on master, primary and secondary nodes of the instance.
5819 instance = self._migrater.instance
5820 source_node = instance.primary_node
5821 target_node = instance.secondary_nodes[0]
5822 env = _BuildInstanceHookEnvByObject(self, instance)
5823 env["MIGRATE_LIVE"] = self._migrater.live
5824 env["MIGRATE_CLEANUP"] = self.op.cleanup
5826 "OLD_PRIMARY": source_node,
5827 "OLD_SECONDARY": target_node,
5828 "NEW_PRIMARY": target_node,
5829 "NEW_SECONDARY": source_node,
5831 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5833 nl_post.append(source_node)
5834 return env, nl, nl_post
5837 class LUInstanceMove(LogicalUnit):
5838 """Move an instance by data-copying.
5841 HPATH = "instance-move"
5842 HTYPE = constants.HTYPE_INSTANCE
5845 def ExpandNames(self):
5846 self._ExpandAndLockInstance()
5847 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5848 self.op.target_node = target_node
5849 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5850 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5852 def DeclareLocks(self, level):
5853 if level == locking.LEVEL_NODE:
5854 self._LockInstancesNodes(primary_only=True)
5856 def BuildHooksEnv(self):
5859 This runs on master, primary and secondary nodes of the instance.
5863 "TARGET_NODE": self.op.target_node,
5864 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5866 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5867 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5868 self.op.target_node]
5871 def CheckPrereq(self):
5872 """Check prerequisites.
5874 This checks that the instance is in the cluster.
5877 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5878 assert self.instance is not None, \
5879 "Cannot retrieve locked instance %s" % self.op.instance_name
5881 node = self.cfg.GetNodeInfo(self.op.target_node)
5882 assert node is not None, \
5883 "Cannot retrieve locked node %s" % self.op.target_node
5885 self.target_node = target_node = node.name
5887 if target_node == instance.primary_node:
5888 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5889 (instance.name, target_node),
5892 bep = self.cfg.GetClusterInfo().FillBE(instance)
5894 for idx, dsk in enumerate(instance.disks):
5895 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5896 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5897 " cannot copy" % idx, errors.ECODE_STATE)
5899 _CheckNodeOnline(self, target_node)
5900 _CheckNodeNotDrained(self, target_node)
5901 _CheckNodeVmCapable(self, target_node)
5903 if instance.admin_up:
5904 # check memory requirements on the secondary node
5905 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5906 instance.name, bep[constants.BE_MEMORY],
5907 instance.hypervisor)
5909 self.LogInfo("Not checking memory on the secondary node as"
5910 " instance will not be started")
5912 # check bridge existance
5913 _CheckInstanceBridgesExist(self, instance, node=target_node)
5915 def Exec(self, feedback_fn):
5916 """Move an instance.
5918 The move is done by shutting it down on its present node, copying
5919 the data over (slow) and starting it on the new node.
5922 instance = self.instance
5924 source_node = instance.primary_node
5925 target_node = self.target_node
5927 self.LogInfo("Shutting down instance %s on source node %s",
5928 instance.name, source_node)
5930 result = self.rpc.call_instance_shutdown(source_node, instance,
5931 self.op.shutdown_timeout)
5932 msg = result.fail_msg
5934 if self.op.ignore_consistency:
5935 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5936 " Proceeding anyway. Please make sure node"
5937 " %s is down. Error details: %s",
5938 instance.name, source_node, source_node, msg)
5940 raise errors.OpExecError("Could not shutdown instance %s on"
5942 (instance.name, source_node, msg))
5944 # create the target disks
5946 _CreateDisks(self, instance, target_node=target_node)
5947 except errors.OpExecError:
5948 self.LogWarning("Device creation failed, reverting...")
5950 _RemoveDisks(self, instance, target_node=target_node)
5952 self.cfg.ReleaseDRBDMinors(instance.name)
5955 cluster_name = self.cfg.GetClusterInfo().cluster_name
5958 # activate, get path, copy the data over
5959 for idx, disk in enumerate(instance.disks):
5960 self.LogInfo("Copying data for disk %d", idx)
5961 result = self.rpc.call_blockdev_assemble(target_node, disk,
5962 instance.name, True, idx)
5964 self.LogWarning("Can't assemble newly created disk %d: %s",
5965 idx, result.fail_msg)
5966 errs.append(result.fail_msg)
5968 dev_path = result.payload
5969 result = self.rpc.call_blockdev_export(source_node, disk,
5970 target_node, dev_path,
5973 self.LogWarning("Can't copy data over for disk %d: %s",
5974 idx, result.fail_msg)
5975 errs.append(result.fail_msg)
5979 self.LogWarning("Some disks failed to copy, aborting")
5981 _RemoveDisks(self, instance, target_node=target_node)
5983 self.cfg.ReleaseDRBDMinors(instance.name)
5984 raise errors.OpExecError("Errors during disk copy: %s" %
5987 instance.primary_node = target_node
5988 self.cfg.Update(instance, feedback_fn)
5990 self.LogInfo("Removing the disks on the original node")
5991 _RemoveDisks(self, instance, target_node=source_node)
5993 # Only start the instance if it's marked as up
5994 if instance.admin_up:
5995 self.LogInfo("Starting instance %s on node %s",
5996 instance.name, target_node)
5998 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5999 ignore_secondaries=True)
6001 _ShutdownInstanceDisks(self, instance)
6002 raise errors.OpExecError("Can't activate the instance's disks")
6004 result = self.rpc.call_instance_start(target_node, instance, None, None)
6005 msg = result.fail_msg
6007 _ShutdownInstanceDisks(self, instance)
6008 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6009 (instance.name, target_node, msg))
6012 class LUNodeMigrate(LogicalUnit):
6013 """Migrate all instances from a node.
6016 HPATH = "node-migrate"
6017 HTYPE = constants.HTYPE_NODE
6020 def ExpandNames(self):
6021 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6023 self.needed_locks = {
6024 locking.LEVEL_NODE: [self.op.node_name],
6027 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6029 # Create tasklets for migrating instances for all instances on this node
6033 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6034 logging.debug("Migrating instance %s", inst.name)
6035 names.append(inst.name)
6037 tasklets.append(TLMigrateInstance(self, inst.name, False))
6039 self.tasklets = tasklets
6041 # Declare instance locks
6042 self.needed_locks[locking.LEVEL_INSTANCE] = names
6044 def DeclareLocks(self, level):
6045 if level == locking.LEVEL_NODE:
6046 self._LockInstancesNodes()
6048 def BuildHooksEnv(self):
6051 This runs on the master, the primary and all the secondaries.
6055 "NODE_NAME": self.op.node_name,
6058 nl = [self.cfg.GetMasterNode()]
6060 return (env, nl, nl)
6063 class TLMigrateInstance(Tasklet):
6064 """Tasklet class for instance migration.
6067 @ivar live: whether the migration will be done live or non-live;
6068 this variable is initalized only after CheckPrereq has run
6071 def __init__(self, lu, instance_name, cleanup):
6072 """Initializes this class.
6075 Tasklet.__init__(self, lu)
6078 self.instance_name = instance_name
6079 self.cleanup = cleanup
6080 self.live = False # will be overridden later
6082 def CheckPrereq(self):
6083 """Check prerequisites.
6085 This checks that the instance is in the cluster.
6088 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6089 instance = self.cfg.GetInstanceInfo(instance_name)
6090 assert instance is not None
6092 if instance.disk_template != constants.DT_DRBD8:
6093 raise errors.OpPrereqError("Instance's disk layout is not"
6094 " drbd8, cannot migrate.", errors.ECODE_STATE)
6096 secondary_nodes = instance.secondary_nodes
6097 if not secondary_nodes:
6098 raise errors.ConfigurationError("No secondary node but using"
6099 " drbd8 disk template")
6101 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6103 target_node = secondary_nodes[0]
6104 # check memory requirements on the secondary node
6105 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6106 instance.name, i_be[constants.BE_MEMORY],
6107 instance.hypervisor)
6109 # check bridge existance
6110 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6112 if not self.cleanup:
6113 _CheckNodeNotDrained(self.lu, target_node)
6114 result = self.rpc.call_instance_migratable(instance.primary_node,
6116 result.Raise("Can't migrate, please use failover",
6117 prereq=True, ecode=errors.ECODE_STATE)
6119 self.instance = instance
6121 if self.lu.op.live is not None and self.lu.op.mode is not None:
6122 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6123 " parameters are accepted",
6125 if self.lu.op.live is not None:
6127 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6129 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6130 # reset the 'live' parameter to None so that repeated
6131 # invocations of CheckPrereq do not raise an exception
6132 self.lu.op.live = None
6133 elif self.lu.op.mode is None:
6134 # read the default value from the hypervisor
6135 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6136 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6138 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6140 def _WaitUntilSync(self):
6141 """Poll with custom rpc for disk sync.
6143 This uses our own step-based rpc call.
6146 self.feedback_fn("* wait until resync is done")
6150 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6152 self.instance.disks)
6154 for node, nres in result.items():
6155 nres.Raise("Cannot resync disks on node %s" % node)
6156 node_done, node_percent = nres.payload
6157 all_done = all_done and node_done
6158 if node_percent is not None:
6159 min_percent = min(min_percent, node_percent)
6161 if min_percent < 100:
6162 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6165 def _EnsureSecondary(self, node):
6166 """Demote a node to secondary.
6169 self.feedback_fn("* switching node %s to secondary mode" % node)
6171 for dev in self.instance.disks:
6172 self.cfg.SetDiskID(dev, node)
6174 result = self.rpc.call_blockdev_close(node, self.instance.name,
6175 self.instance.disks)
6176 result.Raise("Cannot change disk to secondary on node %s" % node)
6178 def _GoStandalone(self):
6179 """Disconnect from the network.
6182 self.feedback_fn("* changing into standalone mode")
6183 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6184 self.instance.disks)
6185 for node, nres in result.items():
6186 nres.Raise("Cannot disconnect disks node %s" % node)
6188 def _GoReconnect(self, multimaster):
6189 """Reconnect to the network.
6195 msg = "single-master"
6196 self.feedback_fn("* changing disks into %s mode" % msg)
6197 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6198 self.instance.disks,
6199 self.instance.name, multimaster)
6200 for node, nres in result.items():
6201 nres.Raise("Cannot change disks config on node %s" % node)
6203 def _ExecCleanup(self):
6204 """Try to cleanup after a failed migration.
6206 The cleanup is done by:
6207 - check that the instance is running only on one node
6208 (and update the config if needed)
6209 - change disks on its secondary node to secondary
6210 - wait until disks are fully synchronized
6211 - disconnect from the network
6212 - change disks into single-master mode
6213 - wait again until disks are fully synchronized
6216 instance = self.instance
6217 target_node = self.target_node
6218 source_node = self.source_node
6220 # check running on only one node
6221 self.feedback_fn("* checking where the instance actually runs"
6222 " (if this hangs, the hypervisor might be in"
6224 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6225 for node, result in ins_l.items():
6226 result.Raise("Can't contact node %s" % node)
6228 runningon_source = instance.name in ins_l[source_node].payload
6229 runningon_target = instance.name in ins_l[target_node].payload
6231 if runningon_source and runningon_target:
6232 raise errors.OpExecError("Instance seems to be running on two nodes,"
6233 " or the hypervisor is confused. You will have"
6234 " to ensure manually that it runs only on one"
6235 " and restart this operation.")
6237 if not (runningon_source or runningon_target):
6238 raise errors.OpExecError("Instance does not seem to be running at all."
6239 " In this case, it's safer to repair by"
6240 " running 'gnt-instance stop' to ensure disk"
6241 " shutdown, and then restarting it.")
6243 if runningon_target:
6244 # the migration has actually succeeded, we need to update the config
6245 self.feedback_fn("* instance running on secondary node (%s),"
6246 " updating config" % target_node)
6247 instance.primary_node = target_node
6248 self.cfg.Update(instance, self.feedback_fn)
6249 demoted_node = source_node
6251 self.feedback_fn("* instance confirmed to be running on its"
6252 " primary node (%s)" % source_node)
6253 demoted_node = target_node
6255 self._EnsureSecondary(demoted_node)
6257 self._WaitUntilSync()
6258 except errors.OpExecError:
6259 # we ignore here errors, since if the device is standalone, it
6260 # won't be able to sync
6262 self._GoStandalone()
6263 self._GoReconnect(False)
6264 self._WaitUntilSync()
6266 self.feedback_fn("* done")
6268 def _RevertDiskStatus(self):
6269 """Try to revert the disk status after a failed migration.
6272 target_node = self.target_node
6274 self._EnsureSecondary(target_node)
6275 self._GoStandalone()
6276 self._GoReconnect(False)
6277 self._WaitUntilSync()
6278 except errors.OpExecError, err:
6279 self.lu.LogWarning("Migration failed and I can't reconnect the"
6280 " drives: error '%s'\n"
6281 "Please look and recover the instance status" %
6284 def _AbortMigration(self):
6285 """Call the hypervisor code to abort a started migration.
6288 instance = self.instance
6289 target_node = self.target_node
6290 migration_info = self.migration_info
6292 abort_result = self.rpc.call_finalize_migration(target_node,
6296 abort_msg = abort_result.fail_msg
6298 logging.error("Aborting migration failed on target node %s: %s",
6299 target_node, abort_msg)
6300 # Don't raise an exception here, as we stil have to try to revert the
6301 # disk status, even if this step failed.
6303 def _ExecMigration(self):
6304 """Migrate an instance.
6306 The migrate is done by:
6307 - change the disks into dual-master mode
6308 - wait until disks are fully synchronized again
6309 - migrate the instance
6310 - change disks on the new secondary node (the old primary) to secondary
6311 - wait until disks are fully synchronized
6312 - change disks into single-master mode
6315 instance = self.instance
6316 target_node = self.target_node
6317 source_node = self.source_node
6319 self.feedback_fn("* checking disk consistency between source and target")
6320 for dev in instance.disks:
6321 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6322 raise errors.OpExecError("Disk %s is degraded or not fully"
6323 " synchronized on target node,"
6324 " aborting migrate." % dev.iv_name)
6326 # First get the migration information from the remote node
6327 result = self.rpc.call_migration_info(source_node, instance)
6328 msg = result.fail_msg
6330 log_err = ("Failed fetching source migration information from %s: %s" %
6332 logging.error(log_err)
6333 raise errors.OpExecError(log_err)
6335 self.migration_info = migration_info = result.payload
6337 # Then switch the disks to master/master mode
6338 self._EnsureSecondary(target_node)
6339 self._GoStandalone()
6340 self._GoReconnect(True)
6341 self._WaitUntilSync()
6343 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6344 result = self.rpc.call_accept_instance(target_node,
6347 self.nodes_ip[target_node])
6349 msg = result.fail_msg
6351 logging.error("Instance pre-migration failed, trying to revert"
6352 " disk status: %s", msg)
6353 self.feedback_fn("Pre-migration failed, aborting")
6354 self._AbortMigration()
6355 self._RevertDiskStatus()
6356 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6357 (instance.name, msg))
6359 self.feedback_fn("* migrating instance to %s" % target_node)
6361 result = self.rpc.call_instance_migrate(source_node, instance,
6362 self.nodes_ip[target_node],
6364 msg = result.fail_msg
6366 logging.error("Instance migration failed, trying to revert"
6367 " disk status: %s", msg)
6368 self.feedback_fn("Migration failed, aborting")
6369 self._AbortMigration()
6370 self._RevertDiskStatus()
6371 raise errors.OpExecError("Could not migrate instance %s: %s" %
6372 (instance.name, msg))
6375 instance.primary_node = target_node
6376 # distribute new instance config to the other nodes
6377 self.cfg.Update(instance, self.feedback_fn)
6379 result = self.rpc.call_finalize_migration(target_node,
6383 msg = result.fail_msg
6385 logging.error("Instance migration succeeded, but finalization failed:"
6387 raise errors.OpExecError("Could not finalize instance migration: %s" %
6390 self._EnsureSecondary(source_node)
6391 self._WaitUntilSync()
6392 self._GoStandalone()
6393 self._GoReconnect(False)
6394 self._WaitUntilSync()
6396 self.feedback_fn("* done")
6398 def Exec(self, feedback_fn):
6399 """Perform the migration.
6402 feedback_fn("Migrating instance %s" % self.instance.name)
6404 self.feedback_fn = feedback_fn
6406 self.source_node = self.instance.primary_node
6407 self.target_node = self.instance.secondary_nodes[0]
6408 self.all_nodes = [self.source_node, self.target_node]
6410 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6411 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6415 return self._ExecCleanup()
6417 return self._ExecMigration()
6420 def _CreateBlockDev(lu, node, instance, device, force_create,
6422 """Create a tree of block devices on a given node.
6424 If this device type has to be created on secondaries, create it and
6427 If not, just recurse to children keeping the same 'force' value.
6429 @param lu: the lu on whose behalf we execute
6430 @param node: the node on which to create the device
6431 @type instance: L{objects.Instance}
6432 @param instance: the instance which owns the device
6433 @type device: L{objects.Disk}
6434 @param device: the device to create
6435 @type force_create: boolean
6436 @param force_create: whether to force creation of this device; this
6437 will be change to True whenever we find a device which has
6438 CreateOnSecondary() attribute
6439 @param info: the extra 'metadata' we should attach to the device
6440 (this will be represented as a LVM tag)
6441 @type force_open: boolean
6442 @param force_open: this parameter will be passes to the
6443 L{backend.BlockdevCreate} function where it specifies
6444 whether we run on primary or not, and it affects both
6445 the child assembly and the device own Open() execution
6448 if device.CreateOnSecondary():
6452 for child in device.children:
6453 _CreateBlockDev(lu, node, instance, child, force_create,
6456 if not force_create:
6459 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6462 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6463 """Create a single block device on a given node.
6465 This will not recurse over children of the device, so they must be
6468 @param lu: the lu on whose behalf we execute
6469 @param node: the node on which to create the device
6470 @type instance: L{objects.Instance}
6471 @param instance: the instance which owns the device
6472 @type device: L{objects.Disk}
6473 @param device: the device to create
6474 @param info: the extra 'metadata' we should attach to the device
6475 (this will be represented as a LVM tag)
6476 @type force_open: boolean
6477 @param force_open: this parameter will be passes to the
6478 L{backend.BlockdevCreate} function where it specifies
6479 whether we run on primary or not, and it affects both
6480 the child assembly and the device own Open() execution
6483 lu.cfg.SetDiskID(device, node)
6484 result = lu.rpc.call_blockdev_create(node, device, device.size,
6485 instance.name, force_open, info)
6486 result.Raise("Can't create block device %s on"
6487 " node %s for instance %s" % (device, node, instance.name))
6488 if device.physical_id is None:
6489 device.physical_id = result.payload
6492 def _GenerateUniqueNames(lu, exts):
6493 """Generate a suitable LV name.
6495 This will generate a logical volume name for the given instance.
6500 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6501 results.append("%s%s" % (new_id, val))
6505 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6507 """Generate a drbd8 device complete with its children.
6510 port = lu.cfg.AllocatePort()
6511 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6512 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6513 logical_id=(vgname, names[0]))
6514 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6515 logical_id=(vgname, names[1]))
6516 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6517 logical_id=(primary, secondary, port,
6520 children=[dev_data, dev_meta],
6525 def _GenerateDiskTemplate(lu, template_name,
6526 instance_name, primary_node,
6527 secondary_nodes, disk_info,
6528 file_storage_dir, file_driver,
6529 base_index, feedback_fn):
6530 """Generate the entire disk layout for a given template type.
6533 #TODO: compute space requirements
6535 vgname = lu.cfg.GetVGName()
6536 disk_count = len(disk_info)
6538 if template_name == constants.DT_DISKLESS:
6540 elif template_name == constants.DT_PLAIN:
6541 if len(secondary_nodes) != 0:
6542 raise errors.ProgrammerError("Wrong template configuration")
6544 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6545 for i in range(disk_count)])
6546 for idx, disk in enumerate(disk_info):
6547 disk_index = idx + base_index
6548 vg = disk.get("vg", vgname)
6549 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6550 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6551 logical_id=(vg, names[idx]),
6552 iv_name="disk/%d" % disk_index,
6554 disks.append(disk_dev)
6555 elif template_name == constants.DT_DRBD8:
6556 if len(secondary_nodes) != 1:
6557 raise errors.ProgrammerError("Wrong template configuration")
6558 remote_node = secondary_nodes[0]
6559 minors = lu.cfg.AllocateDRBDMinor(
6560 [primary_node, remote_node] * len(disk_info), instance_name)
6563 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6564 for i in range(disk_count)]):
6565 names.append(lv_prefix + "_data")
6566 names.append(lv_prefix + "_meta")
6567 for idx, disk in enumerate(disk_info):
6568 disk_index = idx + base_index
6569 vg = disk.get("vg", vgname)
6570 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6571 disk["size"], vg, names[idx*2:idx*2+2],
6572 "disk/%d" % disk_index,
6573 minors[idx*2], minors[idx*2+1])
6574 disk_dev.mode = disk["mode"]
6575 disks.append(disk_dev)
6576 elif template_name == constants.DT_FILE:
6577 if len(secondary_nodes) != 0:
6578 raise errors.ProgrammerError("Wrong template configuration")
6580 opcodes.RequireFileStorage()
6582 for idx, disk in enumerate(disk_info):
6583 disk_index = idx + base_index
6584 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6585 iv_name="disk/%d" % disk_index,
6586 logical_id=(file_driver,
6587 "%s/disk%d" % (file_storage_dir,
6590 disks.append(disk_dev)
6592 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6596 def _GetInstanceInfoText(instance):
6597 """Compute that text that should be added to the disk's metadata.
6600 return "originstname+%s" % instance.name
6603 def _CalcEta(time_taken, written, total_size):
6604 """Calculates the ETA based on size written and total size.
6606 @param time_taken: The time taken so far
6607 @param written: amount written so far
6608 @param total_size: The total size of data to be written
6609 @return: The remaining time in seconds
6612 avg_time = time_taken / float(written)
6613 return (total_size - written) * avg_time
6616 def _WipeDisks(lu, instance):
6617 """Wipes instance disks.
6619 @type lu: L{LogicalUnit}
6620 @param lu: the logical unit on whose behalf we execute
6621 @type instance: L{objects.Instance}
6622 @param instance: the instance whose disks we should create
6623 @return: the success of the wipe
6626 node = instance.primary_node
6627 logging.info("Pause sync of instance %s disks", instance.name)
6628 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6630 for idx, success in enumerate(result.payload):
6632 logging.warn("pause-sync of instance %s for disks %d failed",
6636 for idx, device in enumerate(instance.disks):
6637 lu.LogInfo("* Wiping disk %d", idx)
6638 logging.info("Wiping disk %d for instance %s", idx, instance.name)
6640 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6641 # MAX_WIPE_CHUNK at max
6642 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6643 constants.MIN_WIPE_CHUNK_PERCENT)
6648 start_time = time.time()
6650 while offset < size:
6651 wipe_size = min(wipe_chunk_size, size - offset)
6652 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6653 result.Raise("Could not wipe disk %d at offset %d for size %d" %
6654 (idx, offset, wipe_size))
6657 if now - last_output >= 60:
6658 eta = _CalcEta(now - start_time, offset, size)
6659 lu.LogInfo(" - done: %.1f%% ETA: %s" %
6660 (offset / float(size) * 100, utils.FormatSeconds(eta)))
6663 logging.info("Resume sync of instance %s disks", instance.name)
6665 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6667 for idx, success in enumerate(result.payload):
6669 lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6670 " look at the status and troubleshoot the issue.", idx)
6671 logging.warn("resume-sync of instance %s for disks %d failed",
6675 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6676 """Create all disks for an instance.
6678 This abstracts away some work from AddInstance.
6680 @type lu: L{LogicalUnit}
6681 @param lu: the logical unit on whose behalf we execute
6682 @type instance: L{objects.Instance}
6683 @param instance: the instance whose disks we should create
6685 @param to_skip: list of indices to skip
6686 @type target_node: string
6687 @param target_node: if passed, overrides the target node for creation
6689 @return: the success of the creation
6692 info = _GetInstanceInfoText(instance)
6693 if target_node is None:
6694 pnode = instance.primary_node
6695 all_nodes = instance.all_nodes
6700 if instance.disk_template == constants.DT_FILE:
6701 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6702 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6704 result.Raise("Failed to create directory '%s' on"
6705 " node %s" % (file_storage_dir, pnode))
6707 # Note: this needs to be kept in sync with adding of disks in
6708 # LUInstanceSetParams
6709 for idx, device in enumerate(instance.disks):
6710 if to_skip and idx in to_skip:
6712 logging.info("Creating volume %s for instance %s",
6713 device.iv_name, instance.name)
6715 for node in all_nodes:
6716 f_create = node == pnode
6717 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6720 def _RemoveDisks(lu, instance, target_node=None):
6721 """Remove all disks for an instance.
6723 This abstracts away some work from `AddInstance()` and
6724 `RemoveInstance()`. Note that in case some of the devices couldn't
6725 be removed, the removal will continue with the other ones (compare
6726 with `_CreateDisks()`).
6728 @type lu: L{LogicalUnit}
6729 @param lu: the logical unit on whose behalf we execute
6730 @type instance: L{objects.Instance}
6731 @param instance: the instance whose disks we should remove
6732 @type target_node: string
6733 @param target_node: used to override the node on which to remove the disks
6735 @return: the success of the removal
6738 logging.info("Removing block devices for instance %s", instance.name)
6741 for device in instance.disks:
6743 edata = [(target_node, device)]
6745 edata = device.ComputeNodeTree(instance.primary_node)
6746 for node, disk in edata:
6747 lu.cfg.SetDiskID(disk, node)
6748 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6750 lu.LogWarning("Could not remove block device %s on node %s,"
6751 " continuing anyway: %s", device.iv_name, node, msg)
6754 if instance.disk_template == constants.DT_FILE:
6755 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6759 tgt = instance.primary_node
6760 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6762 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6763 file_storage_dir, instance.primary_node, result.fail_msg)
6769 def _ComputeDiskSizePerVG(disk_template, disks):
6770 """Compute disk size requirements in the volume group
6773 def _compute(disks, payload):
6774 """Universal algorithm
6779 vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6783 # Required free disk space as a function of disk and swap space
6785 constants.DT_DISKLESS: {},
6786 constants.DT_PLAIN: _compute(disks, 0),
6787 # 128 MB are added for drbd metadata for each disk
6788 constants.DT_DRBD8: _compute(disks, 128),
6789 constants.DT_FILE: {},
6792 if disk_template not in req_size_dict:
6793 raise errors.ProgrammerError("Disk template '%s' size requirement"
6794 " is unknown" % disk_template)
6796 return req_size_dict[disk_template]
6799 def _ComputeDiskSize(disk_template, disks):
6800 """Compute disk size requirements in the volume group
6803 # Required free disk space as a function of disk and swap space
6805 constants.DT_DISKLESS: None,
6806 constants.DT_PLAIN: sum(d["size"] for d in disks),
6807 # 128 MB are added for drbd metadata for each disk
6808 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6809 constants.DT_FILE: None,
6812 if disk_template not in req_size_dict:
6813 raise errors.ProgrammerError("Disk template '%s' size requirement"
6814 " is unknown" % disk_template)
6816 return req_size_dict[disk_template]
6819 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6820 """Hypervisor parameter validation.
6822 This function abstract the hypervisor parameter validation to be
6823 used in both instance create and instance modify.
6825 @type lu: L{LogicalUnit}
6826 @param lu: the logical unit for which we check
6827 @type nodenames: list
6828 @param nodenames: the list of nodes on which we should check
6829 @type hvname: string
6830 @param hvname: the name of the hypervisor we should use
6831 @type hvparams: dict
6832 @param hvparams: the parameters which we need to check
6833 @raise errors.OpPrereqError: if the parameters are not valid
6836 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6839 for node in nodenames:
6843 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6846 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6847 """OS parameters validation.
6849 @type lu: L{LogicalUnit}
6850 @param lu: the logical unit for which we check
6851 @type required: boolean
6852 @param required: whether the validation should fail if the OS is not
6854 @type nodenames: list
6855 @param nodenames: the list of nodes on which we should check
6856 @type osname: string
6857 @param osname: the name of the hypervisor we should use
6858 @type osparams: dict
6859 @param osparams: the parameters which we need to check
6860 @raise errors.OpPrereqError: if the parameters are not valid
6863 result = lu.rpc.call_os_validate(required, nodenames, osname,
6864 [constants.OS_VALIDATE_PARAMETERS],
6866 for node, nres in result.items():
6867 # we don't check for offline cases since this should be run only
6868 # against the master node and/or an instance's nodes
6869 nres.Raise("OS Parameters validation failed on node %s" % node)
6870 if not nres.payload:
6871 lu.LogInfo("OS %s not found on node %s, validation skipped",
6875 class LUInstanceCreate(LogicalUnit):
6876 """Create an instance.
6879 HPATH = "instance-add"
6880 HTYPE = constants.HTYPE_INSTANCE
6883 def CheckArguments(self):
6887 # do not require name_check to ease forward/backward compatibility
6889 if self.op.no_install and self.op.start:
6890 self.LogInfo("No-installation mode selected, disabling startup")
6891 self.op.start = False
6892 # validate/normalize the instance name
6893 self.op.instance_name = \
6894 netutils.Hostname.GetNormalizedName(self.op.instance_name)
6896 if self.op.ip_check and not self.op.name_check:
6897 # TODO: make the ip check more flexible and not depend on the name check
6898 raise errors.OpPrereqError("Cannot do ip check without a name check",
6901 # check nics' parameter names
6902 for nic in self.op.nics:
6903 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6905 # check disks. parameter names and consistent adopt/no-adopt strategy
6906 has_adopt = has_no_adopt = False
6907 for disk in self.op.disks:
6908 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6913 if has_adopt and has_no_adopt:
6914 raise errors.OpPrereqError("Either all disks are adopted or none is",
6917 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6918 raise errors.OpPrereqError("Disk adoption is not supported for the"
6919 " '%s' disk template" %
6920 self.op.disk_template,
6922 if self.op.iallocator is not None:
6923 raise errors.OpPrereqError("Disk adoption not allowed with an"
6924 " iallocator script", errors.ECODE_INVAL)
6925 if self.op.mode == constants.INSTANCE_IMPORT:
6926 raise errors.OpPrereqError("Disk adoption not allowed for"
6927 " instance import", errors.ECODE_INVAL)
6929 self.adopt_disks = has_adopt
6931 # instance name verification
6932 if self.op.name_check:
6933 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6934 self.op.instance_name = self.hostname1.name
6935 # used in CheckPrereq for ip ping check
6936 self.check_ip = self.hostname1.ip
6938 self.check_ip = None
6940 # file storage checks
6941 if (self.op.file_driver and
6942 not self.op.file_driver in constants.FILE_DRIVER):
6943 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6944 self.op.file_driver, errors.ECODE_INVAL)
6946 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6947 raise errors.OpPrereqError("File storage directory path not absolute",
6950 ### Node/iallocator related checks
6951 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6953 if self.op.pnode is not None:
6954 if self.op.disk_template in constants.DTS_NET_MIRROR:
6955 if self.op.snode is None:
6956 raise errors.OpPrereqError("The networked disk templates need"
6957 " a mirror node", errors.ECODE_INVAL)
6959 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6961 self.op.snode = None
6963 self._cds = _GetClusterDomainSecret()
6965 if self.op.mode == constants.INSTANCE_IMPORT:
6966 # On import force_variant must be True, because if we forced it at
6967 # initial install, our only chance when importing it back is that it
6969 self.op.force_variant = True
6971 if self.op.no_install:
6972 self.LogInfo("No-installation mode has no effect during import")
6974 elif self.op.mode == constants.INSTANCE_CREATE:
6975 if self.op.os_type is None:
6976 raise errors.OpPrereqError("No guest OS specified",
6978 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6979 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6980 " installation" % self.op.os_type,
6982 if self.op.disk_template is None:
6983 raise errors.OpPrereqError("No disk template specified",
6986 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6987 # Check handshake to ensure both clusters have the same domain secret
6988 src_handshake = self.op.source_handshake
6989 if not src_handshake:
6990 raise errors.OpPrereqError("Missing source handshake",
6993 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6996 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6999 # Load and check source CA
7000 self.source_x509_ca_pem = self.op.source_x509_ca
7001 if not self.source_x509_ca_pem:
7002 raise errors.OpPrereqError("Missing source X509 CA",
7006 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7008 except OpenSSL.crypto.Error, err:
7009 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7010 (err, ), errors.ECODE_INVAL)
7012 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7013 if errcode is not None:
7014 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7017 self.source_x509_ca = cert
7019 src_instance_name = self.op.source_instance_name
7020 if not src_instance_name:
7021 raise errors.OpPrereqError("Missing source instance name",
7024 self.source_instance_name = \
7025 netutils.GetHostname(name=src_instance_name).name
7028 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7029 self.op.mode, errors.ECODE_INVAL)
7031 def ExpandNames(self):
7032 """ExpandNames for CreateInstance.
7034 Figure out the right locks for instance creation.
7037 self.needed_locks = {}
7039 instance_name = self.op.instance_name
7040 # this is just a preventive check, but someone might still add this
7041 # instance in the meantime, and creation will fail at lock-add time
7042 if instance_name in self.cfg.GetInstanceList():
7043 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7044 instance_name, errors.ECODE_EXISTS)
7046 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7048 if self.op.iallocator:
7049 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7051 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7052 nodelist = [self.op.pnode]
7053 if self.op.snode is not None:
7054 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7055 nodelist.append(self.op.snode)
7056 self.needed_locks[locking.LEVEL_NODE] = nodelist
7058 # in case of import lock the source node too
7059 if self.op.mode == constants.INSTANCE_IMPORT:
7060 src_node = self.op.src_node
7061 src_path = self.op.src_path
7063 if src_path is None:
7064 self.op.src_path = src_path = self.op.instance_name
7066 if src_node is None:
7067 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7068 self.op.src_node = None
7069 if os.path.isabs(src_path):
7070 raise errors.OpPrereqError("Importing an instance from an absolute"
7071 " path requires a source node option.",
7074 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7075 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7076 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7077 if not os.path.isabs(src_path):
7078 self.op.src_path = src_path = \
7079 utils.PathJoin(constants.EXPORT_DIR, src_path)
7081 def _RunAllocator(self):
7082 """Run the allocator based on input opcode.
7085 nics = [n.ToDict() for n in self.nics]
7086 ial = IAllocator(self.cfg, self.rpc,
7087 mode=constants.IALLOCATOR_MODE_ALLOC,
7088 name=self.op.instance_name,
7089 disk_template=self.op.disk_template,
7092 vcpus=self.be_full[constants.BE_VCPUS],
7093 mem_size=self.be_full[constants.BE_MEMORY],
7096 hypervisor=self.op.hypervisor,
7099 ial.Run(self.op.iallocator)
7102 raise errors.OpPrereqError("Can't compute nodes using"
7103 " iallocator '%s': %s" %
7104 (self.op.iallocator, ial.info),
7106 if len(ial.result) != ial.required_nodes:
7107 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7108 " of nodes (%s), required %s" %
7109 (self.op.iallocator, len(ial.result),
7110 ial.required_nodes), errors.ECODE_FAULT)
7111 self.op.pnode = ial.result[0]
7112 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7113 self.op.instance_name, self.op.iallocator,
7114 utils.CommaJoin(ial.result))
7115 if ial.required_nodes == 2:
7116 self.op.snode = ial.result[1]
7118 def BuildHooksEnv(self):
7121 This runs on master, primary and secondary nodes of the instance.
7125 "ADD_MODE": self.op.mode,
7127 if self.op.mode == constants.INSTANCE_IMPORT:
7128 env["SRC_NODE"] = self.op.src_node
7129 env["SRC_PATH"] = self.op.src_path
7130 env["SRC_IMAGES"] = self.src_images
7132 env.update(_BuildInstanceHookEnv(
7133 name=self.op.instance_name,
7134 primary_node=self.op.pnode,
7135 secondary_nodes=self.secondaries,
7136 status=self.op.start,
7137 os_type=self.op.os_type,
7138 memory=self.be_full[constants.BE_MEMORY],
7139 vcpus=self.be_full[constants.BE_VCPUS],
7140 nics=_NICListToTuple(self, self.nics),
7141 disk_template=self.op.disk_template,
7142 disks=[(d["size"], d["mode"]) for d in self.disks],
7145 hypervisor_name=self.op.hypervisor,
7148 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7152 def _ReadExportInfo(self):
7153 """Reads the export information from disk.
7155 It will override the opcode source node and path with the actual
7156 information, if these two were not specified before.
7158 @return: the export information
7161 assert self.op.mode == constants.INSTANCE_IMPORT
7163 src_node = self.op.src_node
7164 src_path = self.op.src_path
7166 if src_node is None:
7167 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7168 exp_list = self.rpc.call_export_list(locked_nodes)
7170 for node in exp_list:
7171 if exp_list[node].fail_msg:
7173 if src_path in exp_list[node].payload:
7175 self.op.src_node = src_node = node
7176 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7180 raise errors.OpPrereqError("No export found for relative path %s" %
7181 src_path, errors.ECODE_INVAL)
7183 _CheckNodeOnline(self, src_node)
7184 result = self.rpc.call_export_info(src_node, src_path)
7185 result.Raise("No export or invalid export found in dir %s" % src_path)
7187 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7188 if not export_info.has_section(constants.INISECT_EXP):
7189 raise errors.ProgrammerError("Corrupted export config",
7190 errors.ECODE_ENVIRON)
7192 ei_version = export_info.get(constants.INISECT_EXP, "version")
7193 if (int(ei_version) != constants.EXPORT_VERSION):
7194 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7195 (ei_version, constants.EXPORT_VERSION),
7196 errors.ECODE_ENVIRON)
7199 def _ReadExportParams(self, einfo):
7200 """Use export parameters as defaults.
7202 In case the opcode doesn't specify (as in override) some instance
7203 parameters, then try to use them from the export information, if
7207 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7209 if self.op.disk_template is None:
7210 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7211 self.op.disk_template = einfo.get(constants.INISECT_INS,
7214 raise errors.OpPrereqError("No disk template specified and the export"
7215 " is missing the disk_template information",
7218 if not self.op.disks:
7219 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7221 # TODO: import the disk iv_name too
7222 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7223 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7224 disks.append({"size": disk_sz})
7225 self.op.disks = disks
7227 raise errors.OpPrereqError("No disk info specified and the export"
7228 " is missing the disk information",
7231 if (not self.op.nics and
7232 einfo.has_option(constants.INISECT_INS, "nic_count")):
7234 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7236 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7237 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7242 if (self.op.hypervisor is None and
7243 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7244 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7245 if einfo.has_section(constants.INISECT_HYP):
7246 # use the export parameters but do not override the ones
7247 # specified by the user
7248 for name, value in einfo.items(constants.INISECT_HYP):
7249 if name not in self.op.hvparams:
7250 self.op.hvparams[name] = value
7252 if einfo.has_section(constants.INISECT_BEP):
7253 # use the parameters, without overriding
7254 for name, value in einfo.items(constants.INISECT_BEP):
7255 if name not in self.op.beparams:
7256 self.op.beparams[name] = value
7258 # try to read the parameters old style, from the main section
7259 for name in constants.BES_PARAMETERS:
7260 if (name not in self.op.beparams and
7261 einfo.has_option(constants.INISECT_INS, name)):
7262 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7264 if einfo.has_section(constants.INISECT_OSP):
7265 # use the parameters, without overriding
7266 for name, value in einfo.items(constants.INISECT_OSP):
7267 if name not in self.op.osparams:
7268 self.op.osparams[name] = value
7270 def _RevertToDefaults(self, cluster):
7271 """Revert the instance parameters to the default values.
7275 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7276 for name in self.op.hvparams.keys():
7277 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7278 del self.op.hvparams[name]
7280 be_defs = cluster.SimpleFillBE({})
7281 for name in self.op.beparams.keys():
7282 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7283 del self.op.beparams[name]
7285 nic_defs = cluster.SimpleFillNIC({})
7286 for nic in self.op.nics:
7287 for name in constants.NICS_PARAMETERS:
7288 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7291 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7292 for name in self.op.osparams.keys():
7293 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7294 del self.op.osparams[name]
7296 def CheckPrereq(self):
7297 """Check prerequisites.
7300 if self.op.mode == constants.INSTANCE_IMPORT:
7301 export_info = self._ReadExportInfo()
7302 self._ReadExportParams(export_info)
7304 if (not self.cfg.GetVGName() and
7305 self.op.disk_template not in constants.DTS_NOT_LVM):
7306 raise errors.OpPrereqError("Cluster does not support lvm-based"
7307 " instances", errors.ECODE_STATE)
7309 if self.op.hypervisor is None:
7310 self.op.hypervisor = self.cfg.GetHypervisorType()
7312 cluster = self.cfg.GetClusterInfo()
7313 enabled_hvs = cluster.enabled_hypervisors
7314 if self.op.hypervisor not in enabled_hvs:
7315 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7316 " cluster (%s)" % (self.op.hypervisor,
7317 ",".join(enabled_hvs)),
7320 # check hypervisor parameter syntax (locally)
7321 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7322 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7324 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7325 hv_type.CheckParameterSyntax(filled_hvp)
7326 self.hv_full = filled_hvp
7327 # check that we don't specify global parameters on an instance
7328 _CheckGlobalHvParams(self.op.hvparams)
7330 # fill and remember the beparams dict
7331 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7332 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7334 # build os parameters
7335 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7337 # now that hvp/bep are in final format, let's reset to defaults,
7339 if self.op.identify_defaults:
7340 self._RevertToDefaults(cluster)
7344 for idx, nic in enumerate(self.op.nics):
7345 nic_mode_req = nic.get("mode", None)
7346 nic_mode = nic_mode_req
7347 if nic_mode is None:
7348 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7350 # in routed mode, for the first nic, the default ip is 'auto'
7351 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7352 default_ip_mode = constants.VALUE_AUTO
7354 default_ip_mode = constants.VALUE_NONE
7356 # ip validity checks
7357 ip = nic.get("ip", default_ip_mode)
7358 if ip is None or ip.lower() == constants.VALUE_NONE:
7360 elif ip.lower() == constants.VALUE_AUTO:
7361 if not self.op.name_check:
7362 raise errors.OpPrereqError("IP address set to auto but name checks"
7363 " have been skipped",
7365 nic_ip = self.hostname1.ip
7367 if not netutils.IPAddress.IsValid(ip):
7368 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7372 # TODO: check the ip address for uniqueness
7373 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7374 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7377 # MAC address verification
7378 mac = nic.get("mac", constants.VALUE_AUTO)
7379 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7380 mac = utils.NormalizeAndValidateMac(mac)
7383 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7384 except errors.ReservationError:
7385 raise errors.OpPrereqError("MAC address %s already in use"
7386 " in cluster" % mac,
7387 errors.ECODE_NOTUNIQUE)
7389 # bridge verification
7390 bridge = nic.get("bridge", None)
7391 link = nic.get("link", None)
7393 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7394 " at the same time", errors.ECODE_INVAL)
7395 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7396 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7403 nicparams[constants.NIC_MODE] = nic_mode_req
7405 nicparams[constants.NIC_LINK] = link
7407 check_params = cluster.SimpleFillNIC(nicparams)
7408 objects.NIC.CheckParameterSyntax(check_params)
7409 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7411 # disk checks/pre-build
7413 for disk in self.op.disks:
7414 mode = disk.get("mode", constants.DISK_RDWR)
7415 if mode not in constants.DISK_ACCESS_SET:
7416 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7417 mode, errors.ECODE_INVAL)
7418 size = disk.get("size", None)
7420 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7423 except (TypeError, ValueError):
7424 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7426 vg = disk.get("vg", self.cfg.GetVGName())
7427 new_disk = {"size": size, "mode": mode, "vg": vg}
7429 new_disk["adopt"] = disk["adopt"]
7430 self.disks.append(new_disk)
7432 if self.op.mode == constants.INSTANCE_IMPORT:
7434 # Check that the new instance doesn't have less disks than the export
7435 instance_disks = len(self.disks)
7436 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7437 if instance_disks < export_disks:
7438 raise errors.OpPrereqError("Not enough disks to import."
7439 " (instance: %d, export: %d)" %
7440 (instance_disks, export_disks),
7444 for idx in range(export_disks):
7445 option = 'disk%d_dump' % idx
7446 if export_info.has_option(constants.INISECT_INS, option):
7447 # FIXME: are the old os-es, disk sizes, etc. useful?
7448 export_name = export_info.get(constants.INISECT_INS, option)
7449 image = utils.PathJoin(self.op.src_path, export_name)
7450 disk_images.append(image)
7452 disk_images.append(False)
7454 self.src_images = disk_images
7456 old_name = export_info.get(constants.INISECT_INS, 'name')
7458 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7459 except (TypeError, ValueError), err:
7460 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7461 " an integer: %s" % str(err),
7463 if self.op.instance_name == old_name:
7464 for idx, nic in enumerate(self.nics):
7465 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7466 nic_mac_ini = 'nic%d_mac' % idx
7467 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7469 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7471 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7472 if self.op.ip_check:
7473 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7474 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7475 (self.check_ip, self.op.instance_name),
7476 errors.ECODE_NOTUNIQUE)
7478 #### mac address generation
7479 # By generating here the mac address both the allocator and the hooks get
7480 # the real final mac address rather than the 'auto' or 'generate' value.
7481 # There is a race condition between the generation and the instance object
7482 # creation, which means that we know the mac is valid now, but we're not
7483 # sure it will be when we actually add the instance. If things go bad
7484 # adding the instance will abort because of a duplicate mac, and the
7485 # creation job will fail.
7486 for nic in self.nics:
7487 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7488 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7492 if self.op.iallocator is not None:
7493 self._RunAllocator()
7495 #### node related checks
7497 # check primary node
7498 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7499 assert self.pnode is not None, \
7500 "Cannot retrieve locked node %s" % self.op.pnode
7502 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7503 pnode.name, errors.ECODE_STATE)
7505 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7506 pnode.name, errors.ECODE_STATE)
7507 if not pnode.vm_capable:
7508 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7509 " '%s'" % pnode.name, errors.ECODE_STATE)
7511 self.secondaries = []
7513 # mirror node verification
7514 if self.op.disk_template in constants.DTS_NET_MIRROR:
7515 if self.op.snode == pnode.name:
7516 raise errors.OpPrereqError("The secondary node cannot be the"
7517 " primary node.", errors.ECODE_INVAL)
7518 _CheckNodeOnline(self, self.op.snode)
7519 _CheckNodeNotDrained(self, self.op.snode)
7520 _CheckNodeVmCapable(self, self.op.snode)
7521 self.secondaries.append(self.op.snode)
7523 nodenames = [pnode.name] + self.secondaries
7525 if not self.adopt_disks:
7526 # Check lv size requirements, if not adopting
7527 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7528 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7530 else: # instead, we must check the adoption data
7531 all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7532 if len(all_lvs) != len(self.disks):
7533 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7535 for lv_name in all_lvs:
7537 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7538 # to ReserveLV uses the same syntax
7539 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7540 except errors.ReservationError:
7541 raise errors.OpPrereqError("LV named %s used by another instance" %
7542 lv_name, errors.ECODE_NOTUNIQUE)
7544 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7545 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7547 node_lvs = self.rpc.call_lv_list([pnode.name],
7548 vg_names.payload.keys())[pnode.name]
7549 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7550 node_lvs = node_lvs.payload
7552 delta = all_lvs.difference(node_lvs.keys())
7554 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7555 utils.CommaJoin(delta),
7557 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7559 raise errors.OpPrereqError("Online logical volumes found, cannot"
7560 " adopt: %s" % utils.CommaJoin(online_lvs),
7562 # update the size of disk based on what is found
7563 for dsk in self.disks:
7564 dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7566 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7568 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7569 # check OS parameters (remotely)
7570 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7572 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7574 # memory check on primary node
7576 _CheckNodeFreeMemory(self, self.pnode.name,
7577 "creating instance %s" % self.op.instance_name,
7578 self.be_full[constants.BE_MEMORY],
7581 self.dry_run_result = list(nodenames)
7583 def Exec(self, feedback_fn):
7584 """Create and add the instance to the cluster.
7587 instance = self.op.instance_name
7588 pnode_name = self.pnode.name
7590 ht_kind = self.op.hypervisor
7591 if ht_kind in constants.HTS_REQ_PORT:
7592 network_port = self.cfg.AllocatePort()
7596 if constants.ENABLE_FILE_STORAGE:
7597 # this is needed because os.path.join does not accept None arguments
7598 if self.op.file_storage_dir is None:
7599 string_file_storage_dir = ""
7601 string_file_storage_dir = self.op.file_storage_dir
7603 # build the full file storage dir path
7604 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7605 string_file_storage_dir, instance)
7607 file_storage_dir = ""
7609 disks = _GenerateDiskTemplate(self,
7610 self.op.disk_template,
7611 instance, pnode_name,
7615 self.op.file_driver,
7619 iobj = objects.Instance(name=instance, os=self.op.os_type,
7620 primary_node=pnode_name,
7621 nics=self.nics, disks=disks,
7622 disk_template=self.op.disk_template,
7624 network_port=network_port,
7625 beparams=self.op.beparams,
7626 hvparams=self.op.hvparams,
7627 hypervisor=self.op.hypervisor,
7628 osparams=self.op.osparams,
7631 if self.adopt_disks:
7632 # rename LVs to the newly-generated names; we need to construct
7633 # 'fake' LV disks with the old data, plus the new unique_id
7634 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7636 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7637 rename_to.append(t_dsk.logical_id)
7638 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7639 self.cfg.SetDiskID(t_dsk, pnode_name)
7640 result = self.rpc.call_blockdev_rename(pnode_name,
7641 zip(tmp_disks, rename_to))
7642 result.Raise("Failed to rename adoped LVs")
7644 feedback_fn("* creating instance disks...")
7646 _CreateDisks(self, iobj)
7647 except errors.OpExecError:
7648 self.LogWarning("Device creation failed, reverting...")
7650 _RemoveDisks(self, iobj)
7652 self.cfg.ReleaseDRBDMinors(instance)
7655 if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7656 feedback_fn("* wiping instance disks...")
7658 _WipeDisks(self, iobj)
7659 except errors.OpExecError:
7660 self.LogWarning("Device wiping failed, reverting...")
7662 _RemoveDisks(self, iobj)
7664 self.cfg.ReleaseDRBDMinors(instance)
7667 feedback_fn("adding instance %s to cluster config" % instance)
7669 self.cfg.AddInstance(iobj, self.proc.GetECId())
7671 # Declare that we don't want to remove the instance lock anymore, as we've
7672 # added the instance to the config
7673 del self.remove_locks[locking.LEVEL_INSTANCE]
7674 # Unlock all the nodes
7675 if self.op.mode == constants.INSTANCE_IMPORT:
7676 nodes_keep = [self.op.src_node]
7677 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7678 if node != self.op.src_node]
7679 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7680 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7682 self.context.glm.release(locking.LEVEL_NODE)
7683 del self.acquired_locks[locking.LEVEL_NODE]
7685 if self.op.wait_for_sync:
7686 disk_abort = not _WaitForSync(self, iobj)
7687 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7688 # make sure the disks are not degraded (still sync-ing is ok)
7690 feedback_fn("* checking mirrors status")
7691 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7696 _RemoveDisks(self, iobj)
7697 self.cfg.RemoveInstance(iobj.name)
7698 # Make sure the instance lock gets removed
7699 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7700 raise errors.OpExecError("There are some degraded disks for"
7703 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7704 if self.op.mode == constants.INSTANCE_CREATE:
7705 if not self.op.no_install:
7706 feedback_fn("* running the instance OS create scripts...")
7707 # FIXME: pass debug option from opcode to backend
7708 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7709 self.op.debug_level)
7710 result.Raise("Could not add os for instance %s"
7711 " on node %s" % (instance, pnode_name))
7713 elif self.op.mode == constants.INSTANCE_IMPORT:
7714 feedback_fn("* running the instance OS import scripts...")
7718 for idx, image in enumerate(self.src_images):
7722 # FIXME: pass debug option from opcode to backend
7723 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7724 constants.IEIO_FILE, (image, ),
7725 constants.IEIO_SCRIPT,
7726 (iobj.disks[idx], idx),
7728 transfers.append(dt)
7731 masterd.instance.TransferInstanceData(self, feedback_fn,
7732 self.op.src_node, pnode_name,
7733 self.pnode.secondary_ip,
7735 if not compat.all(import_result):
7736 self.LogWarning("Some disks for instance %s on node %s were not"
7737 " imported successfully" % (instance, pnode_name))
7739 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7740 feedback_fn("* preparing remote import...")
7741 # The source cluster will stop the instance before attempting to make a
7742 # connection. In some cases stopping an instance can take a long time,
7743 # hence the shutdown timeout is added to the connection timeout.
7744 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7745 self.op.source_shutdown_timeout)
7746 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7748 assert iobj.primary_node == self.pnode.name
7750 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7751 self.source_x509_ca,
7752 self._cds, timeouts)
7753 if not compat.all(disk_results):
7754 # TODO: Should the instance still be started, even if some disks
7755 # failed to import (valid for local imports, too)?
7756 self.LogWarning("Some disks for instance %s on node %s were not"
7757 " imported successfully" % (instance, pnode_name))
7759 # Run rename script on newly imported instance
7760 assert iobj.name == instance
7761 feedback_fn("Running rename script for %s" % instance)
7762 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7763 self.source_instance_name,
7764 self.op.debug_level)
7766 self.LogWarning("Failed to run rename script for %s on node"
7767 " %s: %s" % (instance, pnode_name, result.fail_msg))
7770 # also checked in the prereq part
7771 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7775 iobj.admin_up = True
7776 self.cfg.Update(iobj, feedback_fn)
7777 logging.info("Starting instance %s on node %s", instance, pnode_name)
7778 feedback_fn("* starting instance...")
7779 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7780 result.Raise("Could not start instance")
7782 return list(iobj.all_nodes)
7785 class LUInstanceConsole(NoHooksLU):
7786 """Connect to an instance's console.
7788 This is somewhat special in that it returns the command line that
7789 you need to run on the master node in order to connect to the
7795 def ExpandNames(self):
7796 self._ExpandAndLockInstance()
7798 def CheckPrereq(self):
7799 """Check prerequisites.
7801 This checks that the instance is in the cluster.
7804 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7805 assert self.instance is not None, \
7806 "Cannot retrieve locked instance %s" % self.op.instance_name
7807 _CheckNodeOnline(self, self.instance.primary_node)
7809 def Exec(self, feedback_fn):
7810 """Connect to the console of an instance
7813 instance = self.instance
7814 node = instance.primary_node
7816 node_insts = self.rpc.call_instance_list([node],
7817 [instance.hypervisor])[node]
7818 node_insts.Raise("Can't get node information from %s" % node)
7820 if instance.name not in node_insts.payload:
7821 if instance.admin_up:
7822 state = "ERROR_down"
7824 state = "ADMIN_down"
7825 raise errors.OpExecError("Instance %s is not running (state %s)" %
7826 (instance.name, state))
7828 logging.debug("Connecting to console of %s on %s", instance.name, node)
7830 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
7833 def _GetInstanceConsole(cluster, instance):
7834 """Returns console information for an instance.
7836 @type cluster: L{objects.Cluster}
7837 @type instance: L{objects.Instance}
7841 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7842 # beparams and hvparams are passed separately, to avoid editing the
7843 # instance and then saving the defaults in the instance itself.
7844 hvparams = cluster.FillHV(instance)
7845 beparams = cluster.FillBE(instance)
7846 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7848 assert console.instance == instance.name
7849 assert console.Validate()
7851 return console.ToDict()
7854 class LUInstanceReplaceDisks(LogicalUnit):
7855 """Replace the disks of an instance.
7858 HPATH = "mirrors-replace"
7859 HTYPE = constants.HTYPE_INSTANCE
7862 def CheckArguments(self):
7863 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7866 def ExpandNames(self):
7867 self._ExpandAndLockInstance()
7869 if self.op.iallocator is not None:
7870 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7872 elif self.op.remote_node is not None:
7873 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7874 self.op.remote_node = remote_node
7876 # Warning: do not remove the locking of the new secondary here
7877 # unless DRBD8.AddChildren is changed to work in parallel;
7878 # currently it doesn't since parallel invocations of
7879 # FindUnusedMinor will conflict
7880 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7881 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7884 self.needed_locks[locking.LEVEL_NODE] = []
7885 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7887 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7888 self.op.iallocator, self.op.remote_node,
7889 self.op.disks, False, self.op.early_release)
7891 self.tasklets = [self.replacer]
7893 def DeclareLocks(self, level):
7894 # If we're not already locking all nodes in the set we have to declare the
7895 # instance's primary/secondary nodes.
7896 if (level == locking.LEVEL_NODE and
7897 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7898 self._LockInstancesNodes()
7900 def BuildHooksEnv(self):
7903 This runs on the master, the primary and all the secondaries.
7906 instance = self.replacer.instance
7908 "MODE": self.op.mode,
7909 "NEW_SECONDARY": self.op.remote_node,
7910 "OLD_SECONDARY": instance.secondary_nodes[0],
7912 env.update(_BuildInstanceHookEnvByObject(self, instance))
7914 self.cfg.GetMasterNode(),
7915 instance.primary_node,
7917 if self.op.remote_node is not None:
7918 nl.append(self.op.remote_node)
7922 class TLReplaceDisks(Tasklet):
7923 """Replaces disks for an instance.
7925 Note: Locking is not within the scope of this class.
7928 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7929 disks, delay_iallocator, early_release):
7930 """Initializes this class.
7933 Tasklet.__init__(self, lu)
7936 self.instance_name = instance_name
7938 self.iallocator_name = iallocator_name
7939 self.remote_node = remote_node
7941 self.delay_iallocator = delay_iallocator
7942 self.early_release = early_release
7945 self.instance = None
7946 self.new_node = None
7947 self.target_node = None
7948 self.other_node = None
7949 self.remote_node_info = None
7950 self.node_secondary_ip = None
7953 def CheckArguments(mode, remote_node, iallocator):
7954 """Helper function for users of this class.
7957 # check for valid parameter combination
7958 if mode == constants.REPLACE_DISK_CHG:
7959 if remote_node is None and iallocator is None:
7960 raise errors.OpPrereqError("When changing the secondary either an"
7961 " iallocator script must be used or the"
7962 " new node given", errors.ECODE_INVAL)
7964 if remote_node is not None and iallocator is not None:
7965 raise errors.OpPrereqError("Give either the iallocator or the new"
7966 " secondary, not both", errors.ECODE_INVAL)
7968 elif remote_node is not None or iallocator is not None:
7969 # Not replacing the secondary
7970 raise errors.OpPrereqError("The iallocator and new node options can"
7971 " only be used when changing the"
7972 " secondary node", errors.ECODE_INVAL)
7975 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7976 """Compute a new secondary node using an IAllocator.
7979 ial = IAllocator(lu.cfg, lu.rpc,
7980 mode=constants.IALLOCATOR_MODE_RELOC,
7982 relocate_from=relocate_from)
7984 ial.Run(iallocator_name)
7987 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7988 " %s" % (iallocator_name, ial.info),
7991 if len(ial.result) != ial.required_nodes:
7992 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7993 " of nodes (%s), required %s" %
7995 len(ial.result), ial.required_nodes),
7998 remote_node_name = ial.result[0]
8000 lu.LogInfo("Selected new secondary for instance '%s': %s",
8001 instance_name, remote_node_name)
8003 return remote_node_name
8005 def _FindFaultyDisks(self, node_name):
8006 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8009 def CheckPrereq(self):
8010 """Check prerequisites.
8012 This checks that the instance is in the cluster.
8015 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8016 assert instance is not None, \
8017 "Cannot retrieve locked instance %s" % self.instance_name
8019 if instance.disk_template != constants.DT_DRBD8:
8020 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8021 " instances", errors.ECODE_INVAL)
8023 if len(instance.secondary_nodes) != 1:
8024 raise errors.OpPrereqError("The instance has a strange layout,"
8025 " expected one secondary but found %d" %
8026 len(instance.secondary_nodes),
8029 if not self.delay_iallocator:
8030 self._CheckPrereq2()
8032 def _CheckPrereq2(self):
8033 """Check prerequisites, second part.
8035 This function should always be part of CheckPrereq. It was separated and is
8036 now called from Exec because during node evacuation iallocator was only
8037 called with an unmodified cluster model, not taking planned changes into
8041 instance = self.instance
8042 secondary_node = instance.secondary_nodes[0]
8044 if self.iallocator_name is None:
8045 remote_node = self.remote_node
8047 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8048 instance.name, instance.secondary_nodes)
8050 if remote_node is not None:
8051 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8052 assert self.remote_node_info is not None, \
8053 "Cannot retrieve locked node %s" % remote_node
8055 self.remote_node_info = None
8057 if remote_node == self.instance.primary_node:
8058 raise errors.OpPrereqError("The specified node is the primary node of"
8059 " the instance.", errors.ECODE_INVAL)
8061 if remote_node == secondary_node:
8062 raise errors.OpPrereqError("The specified node is already the"
8063 " secondary node of the instance.",
8066 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8067 constants.REPLACE_DISK_CHG):
8068 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8071 if self.mode == constants.REPLACE_DISK_AUTO:
8072 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8073 faulty_secondary = self._FindFaultyDisks(secondary_node)
8075 if faulty_primary and faulty_secondary:
8076 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8077 " one node and can not be repaired"
8078 " automatically" % self.instance_name,
8082 self.disks = faulty_primary
8083 self.target_node = instance.primary_node
8084 self.other_node = secondary_node
8085 check_nodes = [self.target_node, self.other_node]
8086 elif faulty_secondary:
8087 self.disks = faulty_secondary
8088 self.target_node = secondary_node
8089 self.other_node = instance.primary_node
8090 check_nodes = [self.target_node, self.other_node]
8096 # Non-automatic modes
8097 if self.mode == constants.REPLACE_DISK_PRI:
8098 self.target_node = instance.primary_node
8099 self.other_node = secondary_node
8100 check_nodes = [self.target_node, self.other_node]
8102 elif self.mode == constants.REPLACE_DISK_SEC:
8103 self.target_node = secondary_node
8104 self.other_node = instance.primary_node
8105 check_nodes = [self.target_node, self.other_node]
8107 elif self.mode == constants.REPLACE_DISK_CHG:
8108 self.new_node = remote_node
8109 self.other_node = instance.primary_node
8110 self.target_node = secondary_node
8111 check_nodes = [self.new_node, self.other_node]
8113 _CheckNodeNotDrained(self.lu, remote_node)
8114 _CheckNodeVmCapable(self.lu, remote_node)
8116 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8117 assert old_node_info is not None
8118 if old_node_info.offline and not self.early_release:
8119 # doesn't make sense to delay the release
8120 self.early_release = True
8121 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8122 " early-release mode", secondary_node)
8125 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8128 # If not specified all disks should be replaced
8130 self.disks = range(len(self.instance.disks))
8132 for node in check_nodes:
8133 _CheckNodeOnline(self.lu, node)
8135 # Check whether disks are valid
8136 for disk_idx in self.disks:
8137 instance.FindDisk(disk_idx)
8139 # Get secondary node IP addresses
8142 for node_name in [self.target_node, self.other_node, self.new_node]:
8143 if node_name is not None:
8144 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8146 self.node_secondary_ip = node_2nd_ip
8148 def Exec(self, feedback_fn):
8149 """Execute disk replacement.
8151 This dispatches the disk replacement to the appropriate handler.
8154 if self.delay_iallocator:
8155 self._CheckPrereq2()
8158 feedback_fn("No disks need replacement")
8161 feedback_fn("Replacing disk(s) %s for %s" %
8162 (utils.CommaJoin(self.disks), self.instance.name))
8164 activate_disks = (not self.instance.admin_up)
8166 # Activate the instance disks if we're replacing them on a down instance
8168 _StartInstanceDisks(self.lu, self.instance, True)
8171 # Should we replace the secondary node?
8172 if self.new_node is not None:
8173 fn = self._ExecDrbd8Secondary
8175 fn = self._ExecDrbd8DiskOnly
8177 return fn(feedback_fn)
8180 # Deactivate the instance disks if we're replacing them on a
8183 _SafeShutdownInstanceDisks(self.lu, self.instance)
8185 def _CheckVolumeGroup(self, nodes):
8186 self.lu.LogInfo("Checking volume groups")
8188 vgname = self.cfg.GetVGName()
8190 # Make sure volume group exists on all involved nodes
8191 results = self.rpc.call_vg_list(nodes)
8193 raise errors.OpExecError("Can't list volume groups on the nodes")
8197 res.Raise("Error checking node %s" % node)
8198 if vgname not in res.payload:
8199 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8202 def _CheckDisksExistence(self, nodes):
8203 # Check disk existence
8204 for idx, dev in enumerate(self.instance.disks):
8205 if idx not in self.disks:
8209 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8210 self.cfg.SetDiskID(dev, node)
8212 result = self.rpc.call_blockdev_find(node, dev)
8214 msg = result.fail_msg
8215 if msg or not result.payload:
8217 msg = "disk not found"
8218 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8221 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8222 for idx, dev in enumerate(self.instance.disks):
8223 if idx not in self.disks:
8226 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8229 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8231 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8232 " replace disks for instance %s" %
8233 (node_name, self.instance.name))
8235 def _CreateNewStorage(self, node_name):
8236 vgname = self.cfg.GetVGName()
8239 for idx, dev in enumerate(self.instance.disks):
8240 if idx not in self.disks:
8243 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8245 self.cfg.SetDiskID(dev, node_name)
8247 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8248 names = _GenerateUniqueNames(self.lu, lv_names)
8250 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8251 logical_id=(vgname, names[0]))
8252 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8253 logical_id=(vgname, names[1]))
8255 new_lvs = [lv_data, lv_meta]
8256 old_lvs = dev.children
8257 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8259 # we pass force_create=True to force the LVM creation
8260 for new_lv in new_lvs:
8261 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8262 _GetInstanceInfoText(self.instance), False)
8266 def _CheckDevices(self, node_name, iv_names):
8267 for name, (dev, _, _) in iv_names.iteritems():
8268 self.cfg.SetDiskID(dev, node_name)
8270 result = self.rpc.call_blockdev_find(node_name, dev)
8272 msg = result.fail_msg
8273 if msg or not result.payload:
8275 msg = "disk not found"
8276 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8279 if result.payload.is_degraded:
8280 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8282 def _RemoveOldStorage(self, node_name, iv_names):
8283 for name, (_, old_lvs, _) in iv_names.iteritems():
8284 self.lu.LogInfo("Remove logical volumes for %s" % name)
8287 self.cfg.SetDiskID(lv, node_name)
8289 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8291 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8292 hint="remove unused LVs manually")
8294 def _ReleaseNodeLock(self, node_name):
8295 """Releases the lock for a given node."""
8296 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8298 def _ExecDrbd8DiskOnly(self, feedback_fn):
8299 """Replace a disk on the primary or secondary for DRBD 8.
8301 The algorithm for replace is quite complicated:
8303 1. for each disk to be replaced:
8305 1. create new LVs on the target node with unique names
8306 1. detach old LVs from the drbd device
8307 1. rename old LVs to name_replaced.<time_t>
8308 1. rename new LVs to old LVs
8309 1. attach the new LVs (with the old names now) to the drbd device
8311 1. wait for sync across all devices
8313 1. for each modified disk:
8315 1. remove old LVs (which have the name name_replaces.<time_t>)
8317 Failures are not very well handled.
8322 # Step: check device activation
8323 self.lu.LogStep(1, steps_total, "Check device existence")
8324 self._CheckDisksExistence([self.other_node, self.target_node])
8325 self._CheckVolumeGroup([self.target_node, self.other_node])
8327 # Step: check other node consistency
8328 self.lu.LogStep(2, steps_total, "Check peer consistency")
8329 self._CheckDisksConsistency(self.other_node,
8330 self.other_node == self.instance.primary_node,
8333 # Step: create new storage
8334 self.lu.LogStep(3, steps_total, "Allocate new storage")
8335 iv_names = self._CreateNewStorage(self.target_node)
8337 # Step: for each lv, detach+rename*2+attach
8338 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8339 for dev, old_lvs, new_lvs in iv_names.itervalues():
8340 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8342 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8344 result.Raise("Can't detach drbd from local storage on node"
8345 " %s for device %s" % (self.target_node, dev.iv_name))
8347 #cfg.Update(instance)
8349 # ok, we created the new LVs, so now we know we have the needed
8350 # storage; as such, we proceed on the target node to rename
8351 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8352 # using the assumption that logical_id == physical_id (which in
8353 # turn is the unique_id on that node)
8355 # FIXME(iustin): use a better name for the replaced LVs
8356 temp_suffix = int(time.time())
8357 ren_fn = lambda d, suff: (d.physical_id[0],
8358 d.physical_id[1] + "_replaced-%s" % suff)
8360 # Build the rename list based on what LVs exist on the node
8361 rename_old_to_new = []
8362 for to_ren in old_lvs:
8363 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8364 if not result.fail_msg and result.payload:
8366 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8368 self.lu.LogInfo("Renaming the old LVs on the target node")
8369 result = self.rpc.call_blockdev_rename(self.target_node,
8371 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8373 # Now we rename the new LVs to the old LVs
8374 self.lu.LogInfo("Renaming the new LVs on the target node")
8375 rename_new_to_old = [(new, old.physical_id)
8376 for old, new in zip(old_lvs, new_lvs)]
8377 result = self.rpc.call_blockdev_rename(self.target_node,
8379 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8381 for old, new in zip(old_lvs, new_lvs):
8382 new.logical_id = old.logical_id
8383 self.cfg.SetDiskID(new, self.target_node)
8385 for disk in old_lvs:
8386 disk.logical_id = ren_fn(disk, temp_suffix)
8387 self.cfg.SetDiskID(disk, self.target_node)
8389 # Now that the new lvs have the old name, we can add them to the device
8390 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8391 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8393 msg = result.fail_msg
8395 for new_lv in new_lvs:
8396 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8399 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8400 hint=("cleanup manually the unused logical"
8402 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8404 dev.children = new_lvs
8406 self.cfg.Update(self.instance, feedback_fn)
8409 if self.early_release:
8410 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8412 self._RemoveOldStorage(self.target_node, iv_names)
8413 # WARNING: we release both node locks here, do not do other RPCs
8414 # than WaitForSync to the primary node
8415 self._ReleaseNodeLock([self.target_node, self.other_node])
8418 # This can fail as the old devices are degraded and _WaitForSync
8419 # does a combined result over all disks, so we don't check its return value
8420 self.lu.LogStep(cstep, steps_total, "Sync devices")
8422 _WaitForSync(self.lu, self.instance)
8424 # Check all devices manually
8425 self._CheckDevices(self.instance.primary_node, iv_names)
8427 # Step: remove old storage
8428 if not self.early_release:
8429 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8431 self._RemoveOldStorage(self.target_node, iv_names)
8433 def _ExecDrbd8Secondary(self, feedback_fn):
8434 """Replace the secondary node for DRBD 8.
8436 The algorithm for replace is quite complicated:
8437 - for all disks of the instance:
8438 - create new LVs on the new node with same names
8439 - shutdown the drbd device on the old secondary
8440 - disconnect the drbd network on the primary
8441 - create the drbd device on the new secondary
8442 - network attach the drbd on the primary, using an artifice:
8443 the drbd code for Attach() will connect to the network if it
8444 finds a device which is connected to the good local disks but
8446 - wait for sync across all devices
8447 - remove all disks from the old secondary
8449 Failures are not very well handled.
8454 # Step: check device activation
8455 self.lu.LogStep(1, steps_total, "Check device existence")
8456 self._CheckDisksExistence([self.instance.primary_node])
8457 self._CheckVolumeGroup([self.instance.primary_node])
8459 # Step: check other node consistency
8460 self.lu.LogStep(2, steps_total, "Check peer consistency")
8461 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8463 # Step: create new storage
8464 self.lu.LogStep(3, steps_total, "Allocate new storage")
8465 for idx, dev in enumerate(self.instance.disks):
8466 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8467 (self.new_node, idx))
8468 # we pass force_create=True to force LVM creation
8469 for new_lv in dev.children:
8470 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8471 _GetInstanceInfoText(self.instance), False)
8473 # Step 4: dbrd minors and drbd setups changes
8474 # after this, we must manually remove the drbd minors on both the
8475 # error and the success paths
8476 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8477 minors = self.cfg.AllocateDRBDMinor([self.new_node
8478 for dev in self.instance.disks],
8480 logging.debug("Allocated minors %r", minors)
8483 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8484 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8485 (self.new_node, idx))
8486 # create new devices on new_node; note that we create two IDs:
8487 # one without port, so the drbd will be activated without
8488 # networking information on the new node at this stage, and one
8489 # with network, for the latter activation in step 4
8490 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8491 if self.instance.primary_node == o_node1:
8494 assert self.instance.primary_node == o_node2, "Three-node instance?"
8497 new_alone_id = (self.instance.primary_node, self.new_node, None,
8498 p_minor, new_minor, o_secret)
8499 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8500 p_minor, new_minor, o_secret)
8502 iv_names[idx] = (dev, dev.children, new_net_id)
8503 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8505 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8506 logical_id=new_alone_id,
8507 children=dev.children,
8510 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8511 _GetInstanceInfoText(self.instance), False)
8512 except errors.GenericError:
8513 self.cfg.ReleaseDRBDMinors(self.instance.name)
8516 # We have new devices, shutdown the drbd on the old secondary
8517 for idx, dev in enumerate(self.instance.disks):
8518 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8519 self.cfg.SetDiskID(dev, self.target_node)
8520 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8522 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8523 "node: %s" % (idx, msg),
8524 hint=("Please cleanup this device manually as"
8525 " soon as possible"))
8527 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8528 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8529 self.node_secondary_ip,
8530 self.instance.disks)\
8531 [self.instance.primary_node]
8533 msg = result.fail_msg
8535 # detaches didn't succeed (unlikely)
8536 self.cfg.ReleaseDRBDMinors(self.instance.name)
8537 raise errors.OpExecError("Can't detach the disks from the network on"
8538 " old node: %s" % (msg,))
8540 # if we managed to detach at least one, we update all the disks of
8541 # the instance to point to the new secondary
8542 self.lu.LogInfo("Updating instance configuration")
8543 for dev, _, new_logical_id in iv_names.itervalues():
8544 dev.logical_id = new_logical_id
8545 self.cfg.SetDiskID(dev, self.instance.primary_node)
8547 self.cfg.Update(self.instance, feedback_fn)
8549 # and now perform the drbd attach
8550 self.lu.LogInfo("Attaching primary drbds to new secondary"
8551 " (standalone => connected)")
8552 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8554 self.node_secondary_ip,
8555 self.instance.disks,
8558 for to_node, to_result in result.items():
8559 msg = to_result.fail_msg
8561 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8563 hint=("please do a gnt-instance info to see the"
8564 " status of disks"))
8566 if self.early_release:
8567 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8569 self._RemoveOldStorage(self.target_node, iv_names)
8570 # WARNING: we release all node locks here, do not do other RPCs
8571 # than WaitForSync to the primary node
8572 self._ReleaseNodeLock([self.instance.primary_node,
8577 # This can fail as the old devices are degraded and _WaitForSync
8578 # does a combined result over all disks, so we don't check its return value
8579 self.lu.LogStep(cstep, steps_total, "Sync devices")
8581 _WaitForSync(self.lu, self.instance)
8583 # Check all devices manually
8584 self._CheckDevices(self.instance.primary_node, iv_names)
8586 # Step: remove old storage
8587 if not self.early_release:
8588 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8589 self._RemoveOldStorage(self.target_node, iv_names)
8592 class LURepairNodeStorage(NoHooksLU):
8593 """Repairs the volume group on a node.
8598 def CheckArguments(self):
8599 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8601 storage_type = self.op.storage_type
8603 if (constants.SO_FIX_CONSISTENCY not in
8604 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8605 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8606 " repaired" % storage_type,
8609 def ExpandNames(self):
8610 self.needed_locks = {
8611 locking.LEVEL_NODE: [self.op.node_name],
8614 def _CheckFaultyDisks(self, instance, node_name):
8615 """Ensure faulty disks abort the opcode or at least warn."""
8617 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8619 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8620 " node '%s'" % (instance.name, node_name),
8622 except errors.OpPrereqError, err:
8623 if self.op.ignore_consistency:
8624 self.proc.LogWarning(str(err.args[0]))
8628 def CheckPrereq(self):
8629 """Check prerequisites.
8632 # Check whether any instance on this node has faulty disks
8633 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8634 if not inst.admin_up:
8636 check_nodes = set(inst.all_nodes)
8637 check_nodes.discard(self.op.node_name)
8638 for inst_node_name in check_nodes:
8639 self._CheckFaultyDisks(inst, inst_node_name)
8641 def Exec(self, feedback_fn):
8642 feedback_fn("Repairing storage unit '%s' on %s ..." %
8643 (self.op.name, self.op.node_name))
8645 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8646 result = self.rpc.call_storage_execute(self.op.node_name,
8647 self.op.storage_type, st_args,
8649 constants.SO_FIX_CONSISTENCY)
8650 result.Raise("Failed to repair storage unit '%s' on %s" %
8651 (self.op.name, self.op.node_name))
8654 class LUNodeEvacStrategy(NoHooksLU):
8655 """Computes the node evacuation strategy.
8660 def CheckArguments(self):
8661 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8663 def ExpandNames(self):
8664 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8665 self.needed_locks = locks = {}
8666 if self.op.remote_node is None:
8667 locks[locking.LEVEL_NODE] = locking.ALL_SET
8669 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8670 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8672 def Exec(self, feedback_fn):
8673 if self.op.remote_node is not None:
8675 for node in self.op.nodes:
8676 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8679 if i.primary_node == self.op.remote_node:
8680 raise errors.OpPrereqError("Node %s is the primary node of"
8681 " instance %s, cannot use it as"
8683 (self.op.remote_node, i.name),
8685 result.append([i.name, self.op.remote_node])
8687 ial = IAllocator(self.cfg, self.rpc,
8688 mode=constants.IALLOCATOR_MODE_MEVAC,
8689 evac_nodes=self.op.nodes)
8690 ial.Run(self.op.iallocator, validate=True)
8692 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8698 class LUInstanceGrowDisk(LogicalUnit):
8699 """Grow a disk of an instance.
8703 HTYPE = constants.HTYPE_INSTANCE
8706 def ExpandNames(self):
8707 self._ExpandAndLockInstance()
8708 self.needed_locks[locking.LEVEL_NODE] = []
8709 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8711 def DeclareLocks(self, level):
8712 if level == locking.LEVEL_NODE:
8713 self._LockInstancesNodes()
8715 def BuildHooksEnv(self):
8718 This runs on the master, the primary and all the secondaries.
8722 "DISK": self.op.disk,
8723 "AMOUNT": self.op.amount,
8725 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8726 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8729 def CheckPrereq(self):
8730 """Check prerequisites.
8732 This checks that the instance is in the cluster.
8735 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8736 assert instance is not None, \
8737 "Cannot retrieve locked instance %s" % self.op.instance_name
8738 nodenames = list(instance.all_nodes)
8739 for node in nodenames:
8740 _CheckNodeOnline(self, node)
8742 self.instance = instance
8744 if instance.disk_template not in constants.DTS_GROWABLE:
8745 raise errors.OpPrereqError("Instance's disk layout does not support"
8746 " growing.", errors.ECODE_INVAL)
8748 self.disk = instance.FindDisk(self.op.disk)
8750 if instance.disk_template != constants.DT_FILE:
8751 # TODO: check the free disk space for file, when that feature
8753 _CheckNodesFreeDiskPerVG(self, nodenames,
8754 self.disk.ComputeGrowth(self.op.amount))
8756 def Exec(self, feedback_fn):
8757 """Execute disk grow.
8760 instance = self.instance
8763 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8765 raise errors.OpExecError("Cannot activate block device to grow")
8767 for node in instance.all_nodes:
8768 self.cfg.SetDiskID(disk, node)
8769 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8770 result.Raise("Grow request failed to node %s" % node)
8772 # TODO: Rewrite code to work properly
8773 # DRBD goes into sync mode for a short amount of time after executing the
8774 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8775 # calling "resize" in sync mode fails. Sleeping for a short amount of
8776 # time is a work-around.
8779 disk.RecordGrow(self.op.amount)
8780 self.cfg.Update(instance, feedback_fn)
8781 if self.op.wait_for_sync:
8782 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8784 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8785 " status.\nPlease check the instance.")
8786 if not instance.admin_up:
8787 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8788 elif not instance.admin_up:
8789 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8790 " not supposed to be running because no wait for"
8791 " sync mode was requested.")
8794 class LUInstanceQueryData(NoHooksLU):
8795 """Query runtime instance data.
8800 def ExpandNames(self):
8801 self.needed_locks = {}
8802 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8804 if self.op.instances:
8805 self.wanted_names = []
8806 for name in self.op.instances:
8807 full_name = _ExpandInstanceName(self.cfg, name)
8808 self.wanted_names.append(full_name)
8809 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8811 self.wanted_names = None
8812 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8814 self.needed_locks[locking.LEVEL_NODE] = []
8815 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8817 def DeclareLocks(self, level):
8818 if level == locking.LEVEL_NODE:
8819 self._LockInstancesNodes()
8821 def CheckPrereq(self):
8822 """Check prerequisites.
8824 This only checks the optional instance list against the existing names.
8827 if self.wanted_names is None:
8828 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8830 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8831 in self.wanted_names]
8833 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8834 """Returns the status of a block device
8837 if self.op.static or not node:
8840 self.cfg.SetDiskID(dev, node)
8842 result = self.rpc.call_blockdev_find(node, dev)
8846 result.Raise("Can't compute disk status for %s" % instance_name)
8848 status = result.payload
8852 return (status.dev_path, status.major, status.minor,
8853 status.sync_percent, status.estimated_time,
8854 status.is_degraded, status.ldisk_status)
8856 def _ComputeDiskStatus(self, instance, snode, dev):
8857 """Compute block device status.
8860 if dev.dev_type in constants.LDS_DRBD:
8861 # we change the snode then (otherwise we use the one passed in)
8862 if dev.logical_id[0] == instance.primary_node:
8863 snode = dev.logical_id[1]
8865 snode = dev.logical_id[0]
8867 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8869 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8872 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8873 for child in dev.children]
8878 "iv_name": dev.iv_name,
8879 "dev_type": dev.dev_type,
8880 "logical_id": dev.logical_id,
8881 "physical_id": dev.physical_id,
8882 "pstatus": dev_pstatus,
8883 "sstatus": dev_sstatus,
8884 "children": dev_children,
8891 def Exec(self, feedback_fn):
8892 """Gather and return data"""
8895 cluster = self.cfg.GetClusterInfo()
8897 for instance in self.wanted_instances:
8898 if not self.op.static:
8899 remote_info = self.rpc.call_instance_info(instance.primary_node,
8901 instance.hypervisor)
8902 remote_info.Raise("Error checking node %s" % instance.primary_node)
8903 remote_info = remote_info.payload
8904 if remote_info and "state" in remote_info:
8907 remote_state = "down"
8910 if instance.admin_up:
8913 config_state = "down"
8915 disks = [self._ComputeDiskStatus(instance, None, device)
8916 for device in instance.disks]
8919 "name": instance.name,
8920 "config_state": config_state,
8921 "run_state": remote_state,
8922 "pnode": instance.primary_node,
8923 "snodes": instance.secondary_nodes,
8925 # this happens to be the same format used for hooks
8926 "nics": _NICListToTuple(self, instance.nics),
8927 "disk_template": instance.disk_template,
8929 "hypervisor": instance.hypervisor,
8930 "network_port": instance.network_port,
8931 "hv_instance": instance.hvparams,
8932 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8933 "be_instance": instance.beparams,
8934 "be_actual": cluster.FillBE(instance),
8935 "os_instance": instance.osparams,
8936 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8937 "serial_no": instance.serial_no,
8938 "mtime": instance.mtime,
8939 "ctime": instance.ctime,
8940 "uuid": instance.uuid,
8943 result[instance.name] = idict
8948 class LUInstanceSetParams(LogicalUnit):
8949 """Modifies an instances's parameters.
8952 HPATH = "instance-modify"
8953 HTYPE = constants.HTYPE_INSTANCE
8956 def CheckArguments(self):
8957 if not (self.op.nics or self.op.disks or self.op.disk_template or
8958 self.op.hvparams or self.op.beparams or self.op.os_name):
8959 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8961 if self.op.hvparams:
8962 _CheckGlobalHvParams(self.op.hvparams)
8966 for disk_op, disk_dict in self.op.disks:
8967 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8968 if disk_op == constants.DDM_REMOVE:
8971 elif disk_op == constants.DDM_ADD:
8974 if not isinstance(disk_op, int):
8975 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8976 if not isinstance(disk_dict, dict):
8977 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8978 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8980 if disk_op == constants.DDM_ADD:
8981 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8982 if mode not in constants.DISK_ACCESS_SET:
8983 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8985 size = disk_dict.get('size', None)
8987 raise errors.OpPrereqError("Required disk parameter size missing",
8991 except (TypeError, ValueError), err:
8992 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8993 str(err), errors.ECODE_INVAL)
8994 disk_dict['size'] = size
8996 # modification of disk
8997 if 'size' in disk_dict:
8998 raise errors.OpPrereqError("Disk size change not possible, use"
8999 " grow-disk", errors.ECODE_INVAL)
9001 if disk_addremove > 1:
9002 raise errors.OpPrereqError("Only one disk add or remove operation"
9003 " supported at a time", errors.ECODE_INVAL)
9005 if self.op.disks and self.op.disk_template is not None:
9006 raise errors.OpPrereqError("Disk template conversion and other disk"
9007 " changes not supported at the same time",
9010 if (self.op.disk_template and
9011 self.op.disk_template in constants.DTS_NET_MIRROR and
9012 self.op.remote_node is None):
9013 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9014 " one requires specifying a secondary node",
9019 for nic_op, nic_dict in self.op.nics:
9020 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9021 if nic_op == constants.DDM_REMOVE:
9024 elif nic_op == constants.DDM_ADD:
9027 if not isinstance(nic_op, int):
9028 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9029 if not isinstance(nic_dict, dict):
9030 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9031 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9033 # nic_dict should be a dict
9034 nic_ip = nic_dict.get('ip', None)
9035 if nic_ip is not None:
9036 if nic_ip.lower() == constants.VALUE_NONE:
9037 nic_dict['ip'] = None
9039 if not netutils.IPAddress.IsValid(nic_ip):
9040 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9043 nic_bridge = nic_dict.get('bridge', None)
9044 nic_link = nic_dict.get('link', None)
9045 if nic_bridge and nic_link:
9046 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9047 " at the same time", errors.ECODE_INVAL)
9048 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9049 nic_dict['bridge'] = None
9050 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9051 nic_dict['link'] = None
9053 if nic_op == constants.DDM_ADD:
9054 nic_mac = nic_dict.get('mac', None)
9056 nic_dict['mac'] = constants.VALUE_AUTO
9058 if 'mac' in nic_dict:
9059 nic_mac = nic_dict['mac']
9060 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9061 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9063 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9064 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9065 " modifying an existing nic",
9068 if nic_addremove > 1:
9069 raise errors.OpPrereqError("Only one NIC add or remove operation"
9070 " supported at a time", errors.ECODE_INVAL)
9072 def ExpandNames(self):
9073 self._ExpandAndLockInstance()
9074 self.needed_locks[locking.LEVEL_NODE] = []
9075 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9077 def DeclareLocks(self, level):
9078 if level == locking.LEVEL_NODE:
9079 self._LockInstancesNodes()
9080 if self.op.disk_template and self.op.remote_node:
9081 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9082 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9084 def BuildHooksEnv(self):
9087 This runs on the master, primary and secondaries.
9091 if constants.BE_MEMORY in self.be_new:
9092 args['memory'] = self.be_new[constants.BE_MEMORY]
9093 if constants.BE_VCPUS in self.be_new:
9094 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9095 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9096 # information at all.
9099 nic_override = dict(self.op.nics)
9100 for idx, nic in enumerate(self.instance.nics):
9101 if idx in nic_override:
9102 this_nic_override = nic_override[idx]
9104 this_nic_override = {}
9105 if 'ip' in this_nic_override:
9106 ip = this_nic_override['ip']
9109 if 'mac' in this_nic_override:
9110 mac = this_nic_override['mac']
9113 if idx in self.nic_pnew:
9114 nicparams = self.nic_pnew[idx]
9116 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9117 mode = nicparams[constants.NIC_MODE]
9118 link = nicparams[constants.NIC_LINK]
9119 args['nics'].append((ip, mac, mode, link))
9120 if constants.DDM_ADD in nic_override:
9121 ip = nic_override[constants.DDM_ADD].get('ip', None)
9122 mac = nic_override[constants.DDM_ADD]['mac']
9123 nicparams = self.nic_pnew[constants.DDM_ADD]
9124 mode = nicparams[constants.NIC_MODE]
9125 link = nicparams[constants.NIC_LINK]
9126 args['nics'].append((ip, mac, mode, link))
9127 elif constants.DDM_REMOVE in nic_override:
9128 del args['nics'][-1]
9130 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9131 if self.op.disk_template:
9132 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9133 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9136 def CheckPrereq(self):
9137 """Check prerequisites.
9139 This only checks the instance list against the existing names.
9142 # checking the new params on the primary/secondary nodes
9144 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9145 cluster = self.cluster = self.cfg.GetClusterInfo()
9146 assert self.instance is not None, \
9147 "Cannot retrieve locked instance %s" % self.op.instance_name
9148 pnode = instance.primary_node
9149 nodelist = list(instance.all_nodes)
9152 if self.op.os_name and not self.op.force:
9153 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9154 self.op.force_variant)
9155 instance_os = self.op.os_name
9157 instance_os = instance.os
9159 if self.op.disk_template:
9160 if instance.disk_template == self.op.disk_template:
9161 raise errors.OpPrereqError("Instance already has disk template %s" %
9162 instance.disk_template, errors.ECODE_INVAL)
9164 if (instance.disk_template,
9165 self.op.disk_template) not in self._DISK_CONVERSIONS:
9166 raise errors.OpPrereqError("Unsupported disk template conversion from"
9167 " %s to %s" % (instance.disk_template,
9168 self.op.disk_template),
9170 _CheckInstanceDown(self, instance, "cannot change disk template")
9171 if self.op.disk_template in constants.DTS_NET_MIRROR:
9172 if self.op.remote_node == pnode:
9173 raise errors.OpPrereqError("Given new secondary node %s is the same"
9174 " as the primary node of the instance" %
9175 self.op.remote_node, errors.ECODE_STATE)
9176 _CheckNodeOnline(self, self.op.remote_node)
9177 _CheckNodeNotDrained(self, self.op.remote_node)
9178 # FIXME: here we assume that the old instance type is DT_PLAIN
9179 assert instance.disk_template == constants.DT_PLAIN
9180 disks = [{"size": d.size, "vg": d.logical_id[0]}
9181 for d in instance.disks]
9182 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9183 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9185 # hvparams processing
9186 if self.op.hvparams:
9187 hv_type = instance.hypervisor
9188 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9189 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9190 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9193 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9194 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9195 self.hv_new = hv_new # the new actual values
9196 self.hv_inst = i_hvdict # the new dict (without defaults)
9198 self.hv_new = self.hv_inst = {}
9200 # beparams processing
9201 if self.op.beparams:
9202 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9204 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9205 be_new = cluster.SimpleFillBE(i_bedict)
9206 self.be_new = be_new # the new actual values
9207 self.be_inst = i_bedict # the new dict (without defaults)
9209 self.be_new = self.be_inst = {}
9211 # osparams processing
9212 if self.op.osparams:
9213 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9214 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9215 self.os_inst = i_osdict # the new dict (without defaults)
9221 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9222 mem_check_list = [pnode]
9223 if be_new[constants.BE_AUTO_BALANCE]:
9224 # either we changed auto_balance to yes or it was from before
9225 mem_check_list.extend(instance.secondary_nodes)
9226 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9227 instance.hypervisor)
9228 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9229 instance.hypervisor)
9230 pninfo = nodeinfo[pnode]
9231 msg = pninfo.fail_msg
9233 # Assume the primary node is unreachable and go ahead
9234 self.warn.append("Can't get info from primary node %s: %s" %
9236 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9237 self.warn.append("Node data from primary node %s doesn't contain"
9238 " free memory information" % pnode)
9239 elif instance_info.fail_msg:
9240 self.warn.append("Can't get instance runtime information: %s" %
9241 instance_info.fail_msg)
9243 if instance_info.payload:
9244 current_mem = int(instance_info.payload['memory'])
9246 # Assume instance not running
9247 # (there is a slight race condition here, but it's not very probable,
9248 # and we have no other way to check)
9250 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9251 pninfo.payload['memory_free'])
9253 raise errors.OpPrereqError("This change will prevent the instance"
9254 " from starting, due to %d MB of memory"
9255 " missing on its primary node" % miss_mem,
9258 if be_new[constants.BE_AUTO_BALANCE]:
9259 for node, nres in nodeinfo.items():
9260 if node not in instance.secondary_nodes:
9264 self.warn.append("Can't get info from secondary node %s: %s" %
9266 elif not isinstance(nres.payload.get('memory_free', None), int):
9267 self.warn.append("Secondary node %s didn't return free"
9268 " memory information" % node)
9269 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9270 self.warn.append("Not enough memory to failover instance to"
9271 " secondary node %s" % node)
9276 for nic_op, nic_dict in self.op.nics:
9277 if nic_op == constants.DDM_REMOVE:
9278 if not instance.nics:
9279 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9282 if nic_op != constants.DDM_ADD:
9284 if not instance.nics:
9285 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9286 " no NICs" % nic_op,
9288 if nic_op < 0 or nic_op >= len(instance.nics):
9289 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9291 (nic_op, len(instance.nics) - 1),
9293 old_nic_params = instance.nics[nic_op].nicparams
9294 old_nic_ip = instance.nics[nic_op].ip
9299 update_params_dict = dict([(key, nic_dict[key])
9300 for key in constants.NICS_PARAMETERS
9301 if key in nic_dict])
9303 if 'bridge' in nic_dict:
9304 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9306 new_nic_params = _GetUpdatedParams(old_nic_params,
9308 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9309 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9310 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9311 self.nic_pinst[nic_op] = new_nic_params
9312 self.nic_pnew[nic_op] = new_filled_nic_params
9313 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9315 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9316 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9317 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9319 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9321 self.warn.append(msg)
9323 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9324 if new_nic_mode == constants.NIC_MODE_ROUTED:
9325 if 'ip' in nic_dict:
9326 nic_ip = nic_dict['ip']
9330 raise errors.OpPrereqError('Cannot set the nic ip to None'
9331 ' on a routed nic', errors.ECODE_INVAL)
9332 if 'mac' in nic_dict:
9333 nic_mac = nic_dict['mac']
9335 raise errors.OpPrereqError('Cannot set the nic mac to None',
9337 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9338 # otherwise generate the mac
9339 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9341 # or validate/reserve the current one
9343 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9344 except errors.ReservationError:
9345 raise errors.OpPrereqError("MAC address %s already in use"
9346 " in cluster" % nic_mac,
9347 errors.ECODE_NOTUNIQUE)
9350 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9351 raise errors.OpPrereqError("Disk operations not supported for"
9352 " diskless instances",
9354 for disk_op, _ in self.op.disks:
9355 if disk_op == constants.DDM_REMOVE:
9356 if len(instance.disks) == 1:
9357 raise errors.OpPrereqError("Cannot remove the last disk of"
9358 " an instance", errors.ECODE_INVAL)
9359 _CheckInstanceDown(self, instance, "cannot remove disks")
9361 if (disk_op == constants.DDM_ADD and
9362 len(instance.disks) >= constants.MAX_DISKS):
9363 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9364 " add more" % constants.MAX_DISKS,
9366 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9368 if disk_op < 0 or disk_op >= len(instance.disks):
9369 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9371 (disk_op, len(instance.disks)),
9376 def _ConvertPlainToDrbd(self, feedback_fn):
9377 """Converts an instance from plain to drbd.
9380 feedback_fn("Converting template to drbd")
9381 instance = self.instance
9382 pnode = instance.primary_node
9383 snode = self.op.remote_node
9385 # create a fake disk info for _GenerateDiskTemplate
9386 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9387 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9388 instance.name, pnode, [snode],
9389 disk_info, None, None, 0, feedback_fn)
9390 info = _GetInstanceInfoText(instance)
9391 feedback_fn("Creating aditional volumes...")
9392 # first, create the missing data and meta devices
9393 for disk in new_disks:
9394 # unfortunately this is... not too nice
9395 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9397 for child in disk.children:
9398 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9399 # at this stage, all new LVs have been created, we can rename the
9401 feedback_fn("Renaming original volumes...")
9402 rename_list = [(o, n.children[0].logical_id)
9403 for (o, n) in zip(instance.disks, new_disks)]
9404 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9405 result.Raise("Failed to rename original LVs")
9407 feedback_fn("Initializing DRBD devices...")
9408 # all child devices are in place, we can now create the DRBD devices
9409 for disk in new_disks:
9410 for node in [pnode, snode]:
9411 f_create = node == pnode
9412 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9414 # at this point, the instance has been modified
9415 instance.disk_template = constants.DT_DRBD8
9416 instance.disks = new_disks
9417 self.cfg.Update(instance, feedback_fn)
9419 # disks are created, waiting for sync
9420 disk_abort = not _WaitForSync(self, instance)
9422 raise errors.OpExecError("There are some degraded disks for"
9423 " this instance, please cleanup manually")
9425 def _ConvertDrbdToPlain(self, feedback_fn):
9426 """Converts an instance from drbd to plain.
9429 instance = self.instance
9430 assert len(instance.secondary_nodes) == 1
9431 pnode = instance.primary_node
9432 snode = instance.secondary_nodes[0]
9433 feedback_fn("Converting template to plain")
9435 old_disks = instance.disks
9436 new_disks = [d.children[0] for d in old_disks]
9438 # copy over size and mode
9439 for parent, child in zip(old_disks, new_disks):
9440 child.size = parent.size
9441 child.mode = parent.mode
9443 # update instance structure
9444 instance.disks = new_disks
9445 instance.disk_template = constants.DT_PLAIN
9446 self.cfg.Update(instance, feedback_fn)
9448 feedback_fn("Removing volumes on the secondary node...")
9449 for disk in old_disks:
9450 self.cfg.SetDiskID(disk, snode)
9451 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9453 self.LogWarning("Could not remove block device %s on node %s,"
9454 " continuing anyway: %s", disk.iv_name, snode, msg)
9456 feedback_fn("Removing unneeded volumes on the primary node...")
9457 for idx, disk in enumerate(old_disks):
9458 meta = disk.children[1]
9459 self.cfg.SetDiskID(meta, pnode)
9460 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9462 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9463 " continuing anyway: %s", idx, pnode, msg)
9465 def Exec(self, feedback_fn):
9466 """Modifies an instance.
9468 All parameters take effect only at the next restart of the instance.
9471 # Process here the warnings from CheckPrereq, as we don't have a
9472 # feedback_fn there.
9473 for warn in self.warn:
9474 feedback_fn("WARNING: %s" % warn)
9477 instance = self.instance
9479 for disk_op, disk_dict in self.op.disks:
9480 if disk_op == constants.DDM_REMOVE:
9481 # remove the last disk
9482 device = instance.disks.pop()
9483 device_idx = len(instance.disks)
9484 for node, disk in device.ComputeNodeTree(instance.primary_node):
9485 self.cfg.SetDiskID(disk, node)
9486 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9488 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9489 " continuing anyway", device_idx, node, msg)
9490 result.append(("disk/%d" % device_idx, "remove"))
9491 elif disk_op == constants.DDM_ADD:
9493 if instance.disk_template == constants.DT_FILE:
9494 file_driver, file_path = instance.disks[0].logical_id
9495 file_path = os.path.dirname(file_path)
9497 file_driver = file_path = None
9498 disk_idx_base = len(instance.disks)
9499 new_disk = _GenerateDiskTemplate(self,
9500 instance.disk_template,
9501 instance.name, instance.primary_node,
9502 instance.secondary_nodes,
9506 disk_idx_base, feedback_fn)[0]
9507 instance.disks.append(new_disk)
9508 info = _GetInstanceInfoText(instance)
9510 logging.info("Creating volume %s for instance %s",
9511 new_disk.iv_name, instance.name)
9512 # Note: this needs to be kept in sync with _CreateDisks
9514 for node in instance.all_nodes:
9515 f_create = node == instance.primary_node
9517 _CreateBlockDev(self, node, instance, new_disk,
9518 f_create, info, f_create)
9519 except errors.OpExecError, err:
9520 self.LogWarning("Failed to create volume %s (%s) on"
9522 new_disk.iv_name, new_disk, node, err)
9523 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9524 (new_disk.size, new_disk.mode)))
9526 # change a given disk
9527 instance.disks[disk_op].mode = disk_dict['mode']
9528 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9530 if self.op.disk_template:
9531 r_shut = _ShutdownInstanceDisks(self, instance)
9533 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9534 " proceed with disk template conversion")
9535 mode = (instance.disk_template, self.op.disk_template)
9537 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9539 self.cfg.ReleaseDRBDMinors(instance.name)
9541 result.append(("disk_template", self.op.disk_template))
9544 for nic_op, nic_dict in self.op.nics:
9545 if nic_op == constants.DDM_REMOVE:
9546 # remove the last nic
9547 del instance.nics[-1]
9548 result.append(("nic.%d" % len(instance.nics), "remove"))
9549 elif nic_op == constants.DDM_ADD:
9550 # mac and bridge should be set, by now
9551 mac = nic_dict['mac']
9552 ip = nic_dict.get('ip', None)
9553 nicparams = self.nic_pinst[constants.DDM_ADD]
9554 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9555 instance.nics.append(new_nic)
9556 result.append(("nic.%d" % (len(instance.nics) - 1),
9557 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9558 (new_nic.mac, new_nic.ip,
9559 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9560 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9563 for key in 'mac', 'ip':
9565 setattr(instance.nics[nic_op], key, nic_dict[key])
9566 if nic_op in self.nic_pinst:
9567 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9568 for key, val in nic_dict.iteritems():
9569 result.append(("nic.%s/%d" % (key, nic_op), val))
9572 if self.op.hvparams:
9573 instance.hvparams = self.hv_inst
9574 for key, val in self.op.hvparams.iteritems():
9575 result.append(("hv/%s" % key, val))
9578 if self.op.beparams:
9579 instance.beparams = self.be_inst
9580 for key, val in self.op.beparams.iteritems():
9581 result.append(("be/%s" % key, val))
9585 instance.os = self.op.os_name
9588 if self.op.osparams:
9589 instance.osparams = self.os_inst
9590 for key, val in self.op.osparams.iteritems():
9591 result.append(("os/%s" % key, val))
9593 self.cfg.Update(instance, feedback_fn)
9597 _DISK_CONVERSIONS = {
9598 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9599 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9603 class LUBackupQuery(NoHooksLU):
9604 """Query the exports list
9609 def ExpandNames(self):
9610 self.needed_locks = {}
9611 self.share_locks[locking.LEVEL_NODE] = 1
9612 if not self.op.nodes:
9613 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9615 self.needed_locks[locking.LEVEL_NODE] = \
9616 _GetWantedNodes(self, self.op.nodes)
9618 def Exec(self, feedback_fn):
9619 """Compute the list of all the exported system images.
9622 @return: a dictionary with the structure node->(export-list)
9623 where export-list is a list of the instances exported on
9627 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9628 rpcresult = self.rpc.call_export_list(self.nodes)
9630 for node in rpcresult:
9631 if rpcresult[node].fail_msg:
9632 result[node] = False
9634 result[node] = rpcresult[node].payload
9639 class LUBackupPrepare(NoHooksLU):
9640 """Prepares an instance for an export and returns useful information.
9645 def ExpandNames(self):
9646 self._ExpandAndLockInstance()
9648 def CheckPrereq(self):
9649 """Check prerequisites.
9652 instance_name = self.op.instance_name
9654 self.instance = self.cfg.GetInstanceInfo(instance_name)
9655 assert self.instance is not None, \
9656 "Cannot retrieve locked instance %s" % self.op.instance_name
9657 _CheckNodeOnline(self, self.instance.primary_node)
9659 self._cds = _GetClusterDomainSecret()
9661 def Exec(self, feedback_fn):
9662 """Prepares an instance for an export.
9665 instance = self.instance
9667 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9668 salt = utils.GenerateSecret(8)
9670 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9671 result = self.rpc.call_x509_cert_create(instance.primary_node,
9672 constants.RIE_CERT_VALIDITY)
9673 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9675 (name, cert_pem) = result.payload
9677 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9681 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9682 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9684 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9690 class LUBackupExport(LogicalUnit):
9691 """Export an instance to an image in the cluster.
9694 HPATH = "instance-export"
9695 HTYPE = constants.HTYPE_INSTANCE
9698 def CheckArguments(self):
9699 """Check the arguments.
9702 self.x509_key_name = self.op.x509_key_name
9703 self.dest_x509_ca_pem = self.op.destination_x509_ca
9705 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9706 if not self.x509_key_name:
9707 raise errors.OpPrereqError("Missing X509 key name for encryption",
9710 if not self.dest_x509_ca_pem:
9711 raise errors.OpPrereqError("Missing destination X509 CA",
9714 def ExpandNames(self):
9715 self._ExpandAndLockInstance()
9717 # Lock all nodes for local exports
9718 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9719 # FIXME: lock only instance primary and destination node
9721 # Sad but true, for now we have do lock all nodes, as we don't know where
9722 # the previous export might be, and in this LU we search for it and
9723 # remove it from its current node. In the future we could fix this by:
9724 # - making a tasklet to search (share-lock all), then create the
9725 # new one, then one to remove, after
9726 # - removing the removal operation altogether
9727 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9729 def DeclareLocks(self, level):
9730 """Last minute lock declaration."""
9731 # All nodes are locked anyway, so nothing to do here.
9733 def BuildHooksEnv(self):
9736 This will run on the master, primary node and target node.
9740 "EXPORT_MODE": self.op.mode,
9741 "EXPORT_NODE": self.op.target_node,
9742 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9743 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9744 # TODO: Generic function for boolean env variables
9745 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9748 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9750 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9752 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9753 nl.append(self.op.target_node)
9757 def CheckPrereq(self):
9758 """Check prerequisites.
9760 This checks that the instance and node names are valid.
9763 instance_name = self.op.instance_name
9765 self.instance = self.cfg.GetInstanceInfo(instance_name)
9766 assert self.instance is not None, \
9767 "Cannot retrieve locked instance %s" % self.op.instance_name
9768 _CheckNodeOnline(self, self.instance.primary_node)
9770 if (self.op.remove_instance and self.instance.admin_up and
9771 not self.op.shutdown):
9772 raise errors.OpPrereqError("Can not remove instance without shutting it"
9775 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9776 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9777 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9778 assert self.dst_node is not None
9780 _CheckNodeOnline(self, self.dst_node.name)
9781 _CheckNodeNotDrained(self, self.dst_node.name)
9784 self.dest_disk_info = None
9785 self.dest_x509_ca = None
9787 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9788 self.dst_node = None
9790 if len(self.op.target_node) != len(self.instance.disks):
9791 raise errors.OpPrereqError(("Received destination information for %s"
9792 " disks, but instance %s has %s disks") %
9793 (len(self.op.target_node), instance_name,
9794 len(self.instance.disks)),
9797 cds = _GetClusterDomainSecret()
9799 # Check X509 key name
9801 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9802 except (TypeError, ValueError), err:
9803 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9805 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9806 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9809 # Load and verify CA
9811 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9812 except OpenSSL.crypto.Error, err:
9813 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9814 (err, ), errors.ECODE_INVAL)
9816 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9817 if errcode is not None:
9818 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9819 (msg, ), errors.ECODE_INVAL)
9821 self.dest_x509_ca = cert
9823 # Verify target information
9825 for idx, disk_data in enumerate(self.op.target_node):
9827 (host, port, magic) = \
9828 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9829 except errors.GenericError, err:
9830 raise errors.OpPrereqError("Target info for disk %s: %s" %
9831 (idx, err), errors.ECODE_INVAL)
9833 disk_info.append((host, port, magic))
9835 assert len(disk_info) == len(self.op.target_node)
9836 self.dest_disk_info = disk_info
9839 raise errors.ProgrammerError("Unhandled export mode %r" %
9842 # instance disk type verification
9843 # TODO: Implement export support for file-based disks
9844 for disk in self.instance.disks:
9845 if disk.dev_type == constants.LD_FILE:
9846 raise errors.OpPrereqError("Export not supported for instances with"
9847 " file-based disks", errors.ECODE_INVAL)
9849 def _CleanupExports(self, feedback_fn):
9850 """Removes exports of current instance from all other nodes.
9852 If an instance in a cluster with nodes A..D was exported to node C, its
9853 exports will be removed from the nodes A, B and D.
9856 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9858 nodelist = self.cfg.GetNodeList()
9859 nodelist.remove(self.dst_node.name)
9861 # on one-node clusters nodelist will be empty after the removal
9862 # if we proceed the backup would be removed because OpBackupQuery
9863 # substitutes an empty list with the full cluster node list.
9864 iname = self.instance.name
9866 feedback_fn("Removing old exports for instance %s" % iname)
9867 exportlist = self.rpc.call_export_list(nodelist)
9868 for node in exportlist:
9869 if exportlist[node].fail_msg:
9871 if iname in exportlist[node].payload:
9872 msg = self.rpc.call_export_remove(node, iname).fail_msg
9874 self.LogWarning("Could not remove older export for instance %s"
9875 " on node %s: %s", iname, node, msg)
9877 def Exec(self, feedback_fn):
9878 """Export an instance to an image in the cluster.
9881 assert self.op.mode in constants.EXPORT_MODES
9883 instance = self.instance
9884 src_node = instance.primary_node
9886 if self.op.shutdown:
9887 # shutdown the instance, but not the disks
9888 feedback_fn("Shutting down instance %s" % instance.name)
9889 result = self.rpc.call_instance_shutdown(src_node, instance,
9890 self.op.shutdown_timeout)
9891 # TODO: Maybe ignore failures if ignore_remove_failures is set
9892 result.Raise("Could not shutdown instance %s on"
9893 " node %s" % (instance.name, src_node))
9895 # set the disks ID correctly since call_instance_start needs the
9896 # correct drbd minor to create the symlinks
9897 for disk in instance.disks:
9898 self.cfg.SetDiskID(disk, src_node)
9900 activate_disks = (not instance.admin_up)
9903 # Activate the instance disks if we'exporting a stopped instance
9904 feedback_fn("Activating disks for %s" % instance.name)
9905 _StartInstanceDisks(self, instance, None)
9908 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9911 helper.CreateSnapshots()
9913 if (self.op.shutdown and instance.admin_up and
9914 not self.op.remove_instance):
9915 assert not activate_disks
9916 feedback_fn("Starting instance %s" % instance.name)
9917 result = self.rpc.call_instance_start(src_node, instance, None, None)
9918 msg = result.fail_msg
9920 feedback_fn("Failed to start instance: %s" % msg)
9921 _ShutdownInstanceDisks(self, instance)
9922 raise errors.OpExecError("Could not start instance: %s" % msg)
9924 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9925 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9926 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9927 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9928 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9930 (key_name, _, _) = self.x509_key_name
9933 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9936 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9937 key_name, dest_ca_pem,
9942 # Check for backwards compatibility
9943 assert len(dresults) == len(instance.disks)
9944 assert compat.all(isinstance(i, bool) for i in dresults), \
9945 "Not all results are boolean: %r" % dresults
9949 feedback_fn("Deactivating disks for %s" % instance.name)
9950 _ShutdownInstanceDisks(self, instance)
9952 if not (compat.all(dresults) and fin_resu):
9955 failures.append("export finalization")
9956 if not compat.all(dresults):
9957 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9959 failures.append("disk export: disk(s) %s" % fdsk)
9961 raise errors.OpExecError("Export failed, errors in %s" %
9962 utils.CommaJoin(failures))
9964 # At this point, the export was successful, we can cleanup/finish
9966 # Remove instance if requested
9967 if self.op.remove_instance:
9968 feedback_fn("Removing instance %s" % instance.name)
9969 _RemoveInstance(self, feedback_fn, instance,
9970 self.op.ignore_remove_failures)
9972 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9973 self._CleanupExports(feedback_fn)
9975 return fin_resu, dresults
9978 class LUBackupRemove(NoHooksLU):
9979 """Remove exports related to the named instance.
9984 def ExpandNames(self):
9985 self.needed_locks = {}
9986 # We need all nodes to be locked in order for RemoveExport to work, but we
9987 # don't need to lock the instance itself, as nothing will happen to it (and
9988 # we can remove exports also for a removed instance)
9989 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9991 def Exec(self, feedback_fn):
9992 """Remove any export.
9995 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9996 # If the instance was not found we'll try with the name that was passed in.
9997 # This will only work if it was an FQDN, though.
9999 if not instance_name:
10001 instance_name = self.op.instance_name
10003 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10004 exportlist = self.rpc.call_export_list(locked_nodes)
10006 for node in exportlist:
10007 msg = exportlist[node].fail_msg
10009 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10011 if instance_name in exportlist[node].payload:
10013 result = self.rpc.call_export_remove(node, instance_name)
10014 msg = result.fail_msg
10016 logging.error("Could not remove export for instance %s"
10017 " on node %s: %s", instance_name, node, msg)
10019 if fqdn_warn and not found:
10020 feedback_fn("Export not found. If trying to remove an export belonging"
10021 " to a deleted instance please use its Fully Qualified"
10025 class LUGroupAdd(LogicalUnit):
10026 """Logical unit for creating node groups.
10029 HPATH = "group-add"
10030 HTYPE = constants.HTYPE_GROUP
10033 def ExpandNames(self):
10034 # We need the new group's UUID here so that we can create and acquire the
10035 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10036 # that it should not check whether the UUID exists in the configuration.
10037 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10038 self.needed_locks = {}
10039 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10041 def CheckPrereq(self):
10042 """Check prerequisites.
10044 This checks that the given group name is not an existing node group
10049 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10050 except errors.OpPrereqError:
10053 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10054 " node group (UUID: %s)" %
10055 (self.op.group_name, existing_uuid),
10056 errors.ECODE_EXISTS)
10058 if self.op.ndparams:
10059 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10061 def BuildHooksEnv(self):
10062 """Build hooks env.
10066 "GROUP_NAME": self.op.group_name,
10068 mn = self.cfg.GetMasterNode()
10069 return env, [mn], [mn]
10071 def Exec(self, feedback_fn):
10072 """Add the node group to the cluster.
10075 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10076 uuid=self.group_uuid,
10077 alloc_policy=self.op.alloc_policy,
10078 ndparams=self.op.ndparams)
10080 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10081 del self.remove_locks[locking.LEVEL_NODEGROUP]
10084 class LUGroupAssignNodes(NoHooksLU):
10085 """Logical unit for assigning nodes to groups.
10090 def ExpandNames(self):
10091 # These raise errors.OpPrereqError on their own:
10092 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10093 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10095 # We want to lock all the affected nodes and groups. We have readily
10096 # available the list of nodes, and the *destination* group. To gather the
10097 # list of "source" groups, we need to fetch node information.
10098 self.node_data = self.cfg.GetAllNodesInfo()
10099 affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10100 affected_groups.add(self.group_uuid)
10102 self.needed_locks = {
10103 locking.LEVEL_NODEGROUP: list(affected_groups),
10104 locking.LEVEL_NODE: self.op.nodes,
10107 def CheckPrereq(self):
10108 """Check prerequisites.
10111 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10112 instance_data = self.cfg.GetAllInstancesInfo()
10114 if self.group is None:
10115 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10116 (self.op.group_name, self.group_uuid))
10118 (new_splits, previous_splits) = \
10119 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10120 for node in self.op.nodes],
10121 self.node_data, instance_data)
10124 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10126 if not self.op.force:
10127 raise errors.OpExecError("The following instances get split by this"
10128 " change and --force was not given: %s" %
10131 self.LogWarning("This operation will split the following instances: %s",
10134 if previous_splits:
10135 self.LogWarning("In addition, these already-split instances continue"
10136 " to be spit across groups: %s",
10137 utils.CommaJoin(utils.NiceSort(previous_splits)))
10139 def Exec(self, feedback_fn):
10140 """Assign nodes to a new group.
10143 for node in self.op.nodes:
10144 self.node_data[node].group = self.group_uuid
10146 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10149 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10150 """Check for split instances after a node assignment.
10152 This method considers a series of node assignments as an atomic operation,
10153 and returns information about split instances after applying the set of
10156 In particular, it returns information about newly split instances, and
10157 instances that were already split, and remain so after the change.
10159 Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10162 @type changes: list of (node_name, new_group_uuid) pairs.
10163 @param changes: list of node assignments to consider.
10164 @param node_data: a dict with data for all nodes
10165 @param instance_data: a dict with all instances to consider
10166 @rtype: a two-tuple
10167 @return: a list of instances that were previously okay and result split as a
10168 consequence of this change, and a list of instances that were previously
10169 split and this change does not fix.
10172 changed_nodes = dict((node, group) for node, group in changes
10173 if node_data[node].group != group)
10175 all_split_instances = set()
10176 previously_split_instances = set()
10178 def InstanceNodes(instance):
10179 return [instance.primary_node] + list(instance.secondary_nodes)
10181 for inst in instance_data.values():
10182 if inst.disk_template not in constants.DTS_NET_MIRROR:
10185 instance_nodes = InstanceNodes(inst)
10187 if len(set(node_data[node].group for node in instance_nodes)) > 1:
10188 previously_split_instances.add(inst.name)
10190 if len(set(changed_nodes.get(node, node_data[node].group)
10191 for node in instance_nodes)) > 1:
10192 all_split_instances.add(inst.name)
10194 return (list(all_split_instances - previously_split_instances),
10195 list(previously_split_instances & all_split_instances))
10198 class _GroupQuery(_QueryBase):
10200 FIELDS = query.GROUP_FIELDS
10202 def ExpandNames(self, lu):
10203 lu.needed_locks = {}
10205 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10206 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10209 self.wanted = [name_to_uuid[name]
10210 for name in utils.NiceSort(name_to_uuid.keys())]
10212 # Accept names to be either names or UUIDs.
10215 all_uuid = frozenset(self._all_groups.keys())
10217 for name in self.names:
10218 if name in all_uuid:
10219 self.wanted.append(name)
10220 elif name in name_to_uuid:
10221 self.wanted.append(name_to_uuid[name])
10223 missing.append(name)
10226 raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10227 errors.ECODE_NOENT)
10229 def DeclareLocks(self, lu, level):
10232 def _GetQueryData(self, lu):
10233 """Computes the list of node groups and their attributes.
10236 do_nodes = query.GQ_NODE in self.requested_data
10237 do_instances = query.GQ_INST in self.requested_data
10239 group_to_nodes = None
10240 group_to_instances = None
10242 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10243 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10244 # latter GetAllInstancesInfo() is not enough, for we have to go through
10245 # instance->node. Hence, we will need to process nodes even if we only need
10246 # instance information.
10247 if do_nodes or do_instances:
10248 all_nodes = lu.cfg.GetAllNodesInfo()
10249 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10252 for node in all_nodes.values():
10253 if node.group in group_to_nodes:
10254 group_to_nodes[node.group].append(node.name)
10255 node_to_group[node.name] = node.group
10258 all_instances = lu.cfg.GetAllInstancesInfo()
10259 group_to_instances = dict((uuid, []) for uuid in self.wanted)
10261 for instance in all_instances.values():
10262 node = instance.primary_node
10263 if node in node_to_group:
10264 group_to_instances[node_to_group[node]].append(instance.name)
10267 # Do not pass on node information if it was not requested.
10268 group_to_nodes = None
10270 return query.GroupQueryData([self._all_groups[uuid]
10271 for uuid in self.wanted],
10272 group_to_nodes, group_to_instances)
10275 class LUGroupQuery(NoHooksLU):
10276 """Logical unit for querying node groups.
10281 def CheckArguments(self):
10282 self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10284 def ExpandNames(self):
10285 self.gq.ExpandNames(self)
10287 def Exec(self, feedback_fn):
10288 return self.gq.OldStyleQuery(self)
10291 class LUGroupSetParams(LogicalUnit):
10292 """Modifies the parameters of a node group.
10295 HPATH = "group-modify"
10296 HTYPE = constants.HTYPE_GROUP
10299 def CheckArguments(self):
10302 self.op.alloc_policy,
10305 if all_changes.count(None) == len(all_changes):
10306 raise errors.OpPrereqError("Please pass at least one modification",
10307 errors.ECODE_INVAL)
10309 def ExpandNames(self):
10310 # This raises errors.OpPrereqError on its own:
10311 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10313 self.needed_locks = {
10314 locking.LEVEL_NODEGROUP: [self.group_uuid],
10317 def CheckPrereq(self):
10318 """Check prerequisites.
10321 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10323 if self.group is None:
10324 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10325 (self.op.group_name, self.group_uuid))
10327 if self.op.ndparams:
10328 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10329 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10330 self.new_ndparams = new_ndparams
10332 def BuildHooksEnv(self):
10333 """Build hooks env.
10337 "GROUP_NAME": self.op.group_name,
10338 "NEW_ALLOC_POLICY": self.op.alloc_policy,
10340 mn = self.cfg.GetMasterNode()
10341 return env, [mn], [mn]
10343 def Exec(self, feedback_fn):
10344 """Modifies the node group.
10349 if self.op.ndparams:
10350 self.group.ndparams = self.new_ndparams
10351 result.append(("ndparams", str(self.group.ndparams)))
10353 if self.op.alloc_policy:
10354 self.group.alloc_policy = self.op.alloc_policy
10356 self.cfg.Update(self.group, feedback_fn)
10361 class LUGroupRemove(LogicalUnit):
10362 HPATH = "group-remove"
10363 HTYPE = constants.HTYPE_GROUP
10366 def ExpandNames(self):
10367 # This will raises errors.OpPrereqError on its own:
10368 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10369 self.needed_locks = {
10370 locking.LEVEL_NODEGROUP: [self.group_uuid],
10373 def CheckPrereq(self):
10374 """Check prerequisites.
10376 This checks that the given group name exists as a node group, that is
10377 empty (i.e., contains no nodes), and that is not the last group of the
10381 # Verify that the group is empty.
10382 group_nodes = [node.name
10383 for node in self.cfg.GetAllNodesInfo().values()
10384 if node.group == self.group_uuid]
10387 raise errors.OpPrereqError("Group '%s' not empty, has the following"
10389 (self.op.group_name,
10390 utils.CommaJoin(utils.NiceSort(group_nodes))),
10391 errors.ECODE_STATE)
10393 # Verify the cluster would not be left group-less.
10394 if len(self.cfg.GetNodeGroupList()) == 1:
10395 raise errors.OpPrereqError("Group '%s' is the only group,"
10396 " cannot be removed" %
10397 self.op.group_name,
10398 errors.ECODE_STATE)
10400 def BuildHooksEnv(self):
10401 """Build hooks env.
10405 "GROUP_NAME": self.op.group_name,
10407 mn = self.cfg.GetMasterNode()
10408 return env, [mn], [mn]
10410 def Exec(self, feedback_fn):
10411 """Remove the node group.
10415 self.cfg.RemoveNodeGroup(self.group_uuid)
10416 except errors.ConfigurationError:
10417 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10418 (self.op.group_name, self.group_uuid))
10420 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10423 class LUGroupRename(LogicalUnit):
10424 HPATH = "group-rename"
10425 HTYPE = constants.HTYPE_GROUP
10428 def ExpandNames(self):
10429 # This raises errors.OpPrereqError on its own:
10430 self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10432 self.needed_locks = {
10433 locking.LEVEL_NODEGROUP: [self.group_uuid],
10436 def CheckPrereq(self):
10437 """Check prerequisites.
10439 This checks that the given old_name exists as a node group, and that
10444 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10445 except errors.OpPrereqError:
10448 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10449 " node group (UUID: %s)" %
10450 (self.op.new_name, new_name_uuid),
10451 errors.ECODE_EXISTS)
10453 def BuildHooksEnv(self):
10454 """Build hooks env.
10458 "OLD_NAME": self.op.old_name,
10459 "NEW_NAME": self.op.new_name,
10462 mn = self.cfg.GetMasterNode()
10463 all_nodes = self.cfg.GetAllNodesInfo()
10465 all_nodes.pop(mn, None)
10467 for node in all_nodes.values():
10468 if node.group == self.group_uuid:
10469 run_nodes.append(node.name)
10471 return env, run_nodes, run_nodes
10473 def Exec(self, feedback_fn):
10474 """Rename the node group.
10477 group = self.cfg.GetNodeGroup(self.group_uuid)
10480 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10481 (self.op.old_name, self.group_uuid))
10483 group.name = self.op.new_name
10484 self.cfg.Update(group, feedback_fn)
10486 return self.op.new_name
10489 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10490 """Generic tags LU.
10492 This is an abstract class which is the parent of all the other tags LUs.
10496 def ExpandNames(self):
10497 self.needed_locks = {}
10498 if self.op.kind == constants.TAG_NODE:
10499 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10500 self.needed_locks[locking.LEVEL_NODE] = self.op.name
10501 elif self.op.kind == constants.TAG_INSTANCE:
10502 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10503 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10505 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10506 # not possible to acquire the BGL based on opcode parameters)
10508 def CheckPrereq(self):
10509 """Check prerequisites.
10512 if self.op.kind == constants.TAG_CLUSTER:
10513 self.target = self.cfg.GetClusterInfo()
10514 elif self.op.kind == constants.TAG_NODE:
10515 self.target = self.cfg.GetNodeInfo(self.op.name)
10516 elif self.op.kind == constants.TAG_INSTANCE:
10517 self.target = self.cfg.GetInstanceInfo(self.op.name)
10519 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10520 str(self.op.kind), errors.ECODE_INVAL)
10523 class LUTagsGet(TagsLU):
10524 """Returns the tags of a given object.
10529 def ExpandNames(self):
10530 TagsLU.ExpandNames(self)
10532 # Share locks as this is only a read operation
10533 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10535 def Exec(self, feedback_fn):
10536 """Returns the tag list.
10539 return list(self.target.GetTags())
10542 class LUTagsSearch(NoHooksLU):
10543 """Searches the tags for a given pattern.
10548 def ExpandNames(self):
10549 self.needed_locks = {}
10551 def CheckPrereq(self):
10552 """Check prerequisites.
10554 This checks the pattern passed for validity by compiling it.
10558 self.re = re.compile(self.op.pattern)
10559 except re.error, err:
10560 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10561 (self.op.pattern, err), errors.ECODE_INVAL)
10563 def Exec(self, feedback_fn):
10564 """Returns the tag list.
10568 tgts = [("/cluster", cfg.GetClusterInfo())]
10569 ilist = cfg.GetAllInstancesInfo().values()
10570 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10571 nlist = cfg.GetAllNodesInfo().values()
10572 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10574 for path, target in tgts:
10575 for tag in target.GetTags():
10576 if self.re.search(tag):
10577 results.append((path, tag))
10581 class LUTagsSet(TagsLU):
10582 """Sets a tag on a given object.
10587 def CheckPrereq(self):
10588 """Check prerequisites.
10590 This checks the type and length of the tag name and value.
10593 TagsLU.CheckPrereq(self)
10594 for tag in self.op.tags:
10595 objects.TaggableObject.ValidateTag(tag)
10597 def Exec(self, feedback_fn):
10602 for tag in self.op.tags:
10603 self.target.AddTag(tag)
10604 except errors.TagError, err:
10605 raise errors.OpExecError("Error while setting tag: %s" % str(err))
10606 self.cfg.Update(self.target, feedback_fn)
10609 class LUTagsDel(TagsLU):
10610 """Delete a list of tags from a given object.
10615 def CheckPrereq(self):
10616 """Check prerequisites.
10618 This checks that we have the given tag.
10621 TagsLU.CheckPrereq(self)
10622 for tag in self.op.tags:
10623 objects.TaggableObject.ValidateTag(tag)
10624 del_tags = frozenset(self.op.tags)
10625 cur_tags = self.target.GetTags()
10627 diff_tags = del_tags - cur_tags
10629 diff_names = ("'%s'" % i for i in sorted(diff_tags))
10630 raise errors.OpPrereqError("Tag(s) %s not found" %
10631 (utils.CommaJoin(diff_names), ),
10632 errors.ECODE_NOENT)
10634 def Exec(self, feedback_fn):
10635 """Remove the tag from the object.
10638 for tag in self.op.tags:
10639 self.target.RemoveTag(tag)
10640 self.cfg.Update(self.target, feedback_fn)
10643 class LUTestDelay(NoHooksLU):
10644 """Sleep for a specified amount of time.
10646 This LU sleeps on the master and/or nodes for a specified amount of
10652 def ExpandNames(self):
10653 """Expand names and set required locks.
10655 This expands the node list, if any.
10658 self.needed_locks = {}
10659 if self.op.on_nodes:
10660 # _GetWantedNodes can be used here, but is not always appropriate to use
10661 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10662 # more information.
10663 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10664 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10666 def _TestDelay(self):
10667 """Do the actual sleep.
10670 if self.op.on_master:
10671 if not utils.TestDelay(self.op.duration):
10672 raise errors.OpExecError("Error during master delay test")
10673 if self.op.on_nodes:
10674 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10675 for node, node_result in result.items():
10676 node_result.Raise("Failure during rpc call to node %s" % node)
10678 def Exec(self, feedback_fn):
10679 """Execute the test delay opcode, with the wanted repetitions.
10682 if self.op.repeat == 0:
10685 top_value = self.op.repeat - 1
10686 for i in range(self.op.repeat):
10687 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10691 class LUTestJqueue(NoHooksLU):
10692 """Utility LU to test some aspects of the job queue.
10697 # Must be lower than default timeout for WaitForJobChange to see whether it
10698 # notices changed jobs
10699 _CLIENT_CONNECT_TIMEOUT = 20.0
10700 _CLIENT_CONFIRM_TIMEOUT = 60.0
10703 def _NotifyUsingSocket(cls, cb, errcls):
10704 """Opens a Unix socket and waits for another program to connect.
10707 @param cb: Callback to send socket name to client
10708 @type errcls: class
10709 @param errcls: Exception class to use for errors
10712 # Using a temporary directory as there's no easy way to create temporary
10713 # sockets without writing a custom loop around tempfile.mktemp and
10715 tmpdir = tempfile.mkdtemp()
10717 tmpsock = utils.PathJoin(tmpdir, "sock")
10719 logging.debug("Creating temporary socket at %s", tmpsock)
10720 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10725 # Send details to client
10728 # Wait for client to connect before continuing
10729 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10731 (conn, _) = sock.accept()
10732 except socket.error, err:
10733 raise errcls("Client didn't connect in time (%s)" % err)
10737 # Remove as soon as client is connected
10738 shutil.rmtree(tmpdir)
10740 # Wait for client to close
10743 # pylint: disable-msg=E1101
10744 # Instance of '_socketobject' has no ... member
10745 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10747 except socket.error, err:
10748 raise errcls("Client failed to confirm notification (%s)" % err)
10752 def _SendNotification(self, test, arg, sockname):
10753 """Sends a notification to the client.
10756 @param test: Test name
10757 @param arg: Test argument (depends on test)
10758 @type sockname: string
10759 @param sockname: Socket path
10762 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10764 def _Notify(self, prereq, test, arg):
10765 """Notifies the client of a test.
10768 @param prereq: Whether this is a prereq-phase test
10770 @param test: Test name
10771 @param arg: Test argument (depends on test)
10775 errcls = errors.OpPrereqError
10777 errcls = errors.OpExecError
10779 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10783 def CheckArguments(self):
10784 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10785 self.expandnames_calls = 0
10787 def ExpandNames(self):
10788 checkargs_calls = getattr(self, "checkargs_calls", 0)
10789 if checkargs_calls < 1:
10790 raise errors.ProgrammerError("CheckArguments was not called")
10792 self.expandnames_calls += 1
10794 if self.op.notify_waitlock:
10795 self._Notify(True, constants.JQT_EXPANDNAMES, None)
10797 self.LogInfo("Expanding names")
10799 # Get lock on master node (just to get a lock, not for a particular reason)
10800 self.needed_locks = {
10801 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10804 def Exec(self, feedback_fn):
10805 if self.expandnames_calls < 1:
10806 raise errors.ProgrammerError("ExpandNames was not called")
10808 if self.op.notify_exec:
10809 self._Notify(False, constants.JQT_EXEC, None)
10811 self.LogInfo("Executing")
10813 if self.op.log_messages:
10814 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10815 for idx, msg in enumerate(self.op.log_messages):
10816 self.LogInfo("Sending log message %s", idx + 1)
10817 feedback_fn(constants.JQT_MSGPREFIX + msg)
10818 # Report how many test messages have been sent
10819 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10822 raise errors.OpExecError("Opcode failure was requested")
10827 class IAllocator(object):
10828 """IAllocator framework.
10830 An IAllocator instance has three sets of attributes:
10831 - cfg that is needed to query the cluster
10832 - input data (all members of the _KEYS class attribute are required)
10833 - four buffer attributes (in|out_data|text), that represent the
10834 input (to the external script) in text and data structure format,
10835 and the output from it, again in two formats
10836 - the result variables from the script (success, info, nodes) for
10840 # pylint: disable-msg=R0902
10841 # lots of instance attributes
10843 "name", "mem_size", "disks", "disk_template",
10844 "os", "tags", "nics", "vcpus", "hypervisor",
10847 "name", "relocate_from",
10853 def __init__(self, cfg, rpc, mode, **kwargs):
10856 # init buffer variables
10857 self.in_text = self.out_text = self.in_data = self.out_data = None
10858 # init all input fields so that pylint is happy
10860 self.mem_size = self.disks = self.disk_template = None
10861 self.os = self.tags = self.nics = self.vcpus = None
10862 self.hypervisor = None
10863 self.relocate_from = None
10865 self.evac_nodes = None
10867 self.required_nodes = None
10868 # init result fields
10869 self.success = self.info = self.result = None
10870 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10871 keyset = self._ALLO_KEYS
10872 fn = self._AddNewInstance
10873 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10874 keyset = self._RELO_KEYS
10875 fn = self._AddRelocateInstance
10876 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10877 keyset = self._EVAC_KEYS
10878 fn = self._AddEvacuateNodes
10880 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10881 " IAllocator" % self.mode)
10883 if key not in keyset:
10884 raise errors.ProgrammerError("Invalid input parameter '%s' to"
10885 " IAllocator" % key)
10886 setattr(self, key, kwargs[key])
10889 if key not in kwargs:
10890 raise errors.ProgrammerError("Missing input parameter '%s' to"
10891 " IAllocator" % key)
10892 self._BuildInputData(fn)
10894 def _ComputeClusterData(self):
10895 """Compute the generic allocator input data.
10897 This is the data that is independent of the actual operation.
10901 cluster_info = cfg.GetClusterInfo()
10904 "version": constants.IALLOCATOR_VERSION,
10905 "cluster_name": cfg.GetClusterName(),
10906 "cluster_tags": list(cluster_info.GetTags()),
10907 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10908 # we don't have job IDs
10910 ninfo = cfg.GetAllNodesInfo()
10911 iinfo = cfg.GetAllInstancesInfo().values()
10912 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10915 node_list = [n.name for n in ninfo.values() if n.vm_capable]
10917 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10918 hypervisor_name = self.hypervisor
10919 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10920 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10921 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10922 hypervisor_name = cluster_info.enabled_hypervisors[0]
10924 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10927 self.rpc.call_all_instances_info(node_list,
10928 cluster_info.enabled_hypervisors)
10930 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10932 config_ndata = self._ComputeBasicNodeData(ninfo)
10933 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
10934 i_list, config_ndata)
10935 assert len(data["nodes"]) == len(ninfo), \
10936 "Incomplete node data computed"
10938 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10940 self.in_data = data
10943 def _ComputeNodeGroupData(cfg):
10944 """Compute node groups data.
10948 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10950 "name": gdata.name,
10951 "alloc_policy": gdata.alloc_policy,
10956 def _ComputeBasicNodeData(node_cfg):
10957 """Compute global node data.
10960 @returns: a dict of name: (node dict, node config)
10964 for ninfo in node_cfg.values():
10965 # fill in static (config-based) values
10967 "tags": list(ninfo.GetTags()),
10968 "primary_ip": ninfo.primary_ip,
10969 "secondary_ip": ninfo.secondary_ip,
10970 "offline": ninfo.offline,
10971 "drained": ninfo.drained,
10972 "master_candidate": ninfo.master_candidate,
10973 "group": ninfo.group,
10974 "master_capable": ninfo.master_capable,
10975 "vm_capable": ninfo.vm_capable,
10978 node_results[ninfo.name] = pnr
10980 return node_results
10983 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
10985 """Compute global node data.
10987 @param node_results: the basic node structures as filled from the config
10990 # make a copy of the current dict
10991 node_results = dict(node_results)
10992 for nname, nresult in node_data.items():
10993 assert nname in node_results, "Missing basic data for node %s" % nname
10994 ninfo = node_cfg[nname]
10996 if not (ninfo.offline or ninfo.drained):
10997 nresult.Raise("Can't get data for node %s" % nname)
10998 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11000 remote_info = nresult.payload
11002 for attr in ['memory_total', 'memory_free', 'memory_dom0',
11003 'vg_size', 'vg_free', 'cpu_total']:
11004 if attr not in remote_info:
11005 raise errors.OpExecError("Node '%s' didn't return attribute"
11006 " '%s'" % (nname, attr))
11007 if not isinstance(remote_info[attr], int):
11008 raise errors.OpExecError("Node '%s' returned invalid value"
11010 (nname, attr, remote_info[attr]))
11011 # compute memory used by primary instances
11012 i_p_mem = i_p_up_mem = 0
11013 for iinfo, beinfo in i_list:
11014 if iinfo.primary_node == nname:
11015 i_p_mem += beinfo[constants.BE_MEMORY]
11016 if iinfo.name not in node_iinfo[nname].payload:
11019 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11020 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11021 remote_info['memory_free'] -= max(0, i_mem_diff)
11024 i_p_up_mem += beinfo[constants.BE_MEMORY]
11026 # compute memory used by instances
11028 "total_memory": remote_info['memory_total'],
11029 "reserved_memory": remote_info['memory_dom0'],
11030 "free_memory": remote_info['memory_free'],
11031 "total_disk": remote_info['vg_size'],
11032 "free_disk": remote_info['vg_free'],
11033 "total_cpus": remote_info['cpu_total'],
11034 "i_pri_memory": i_p_mem,
11035 "i_pri_up_memory": i_p_up_mem,
11037 pnr_dyn.update(node_results[nname])
11039 node_results[nname] = pnr_dyn
11041 return node_results
11044 def _ComputeInstanceData(cluster_info, i_list):
11045 """Compute global instance data.
11049 for iinfo, beinfo in i_list:
11051 for nic in iinfo.nics:
11052 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11053 nic_dict = {"mac": nic.mac,
11055 "mode": filled_params[constants.NIC_MODE],
11056 "link": filled_params[constants.NIC_LINK],
11058 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11059 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11060 nic_data.append(nic_dict)
11062 "tags": list(iinfo.GetTags()),
11063 "admin_up": iinfo.admin_up,
11064 "vcpus": beinfo[constants.BE_VCPUS],
11065 "memory": beinfo[constants.BE_MEMORY],
11067 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11069 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11070 "disk_template": iinfo.disk_template,
11071 "hypervisor": iinfo.hypervisor,
11073 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11075 instance_data[iinfo.name] = pir
11077 return instance_data
11079 def _AddNewInstance(self):
11080 """Add new instance data to allocator structure.
11082 This in combination with _AllocatorGetClusterData will create the
11083 correct structure needed as input for the allocator.
11085 The checks for the completeness of the opcode must have already been
11089 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11091 if self.disk_template in constants.DTS_NET_MIRROR:
11092 self.required_nodes = 2
11094 self.required_nodes = 1
11097 "disk_template": self.disk_template,
11100 "vcpus": self.vcpus,
11101 "memory": self.mem_size,
11102 "disks": self.disks,
11103 "disk_space_total": disk_space,
11105 "required_nodes": self.required_nodes,
11109 def _AddRelocateInstance(self):
11110 """Add relocate instance data to allocator structure.
11112 This in combination with _IAllocatorGetClusterData will create the
11113 correct structure needed as input for the allocator.
11115 The checks for the completeness of the opcode must have already been
11119 instance = self.cfg.GetInstanceInfo(self.name)
11120 if instance is None:
11121 raise errors.ProgrammerError("Unknown instance '%s' passed to"
11122 " IAllocator" % self.name)
11124 if instance.disk_template not in constants.DTS_NET_MIRROR:
11125 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11126 errors.ECODE_INVAL)
11128 if len(instance.secondary_nodes) != 1:
11129 raise errors.OpPrereqError("Instance has not exactly one secondary node",
11130 errors.ECODE_STATE)
11132 self.required_nodes = 1
11133 disk_sizes = [{'size': disk.size} for disk in instance.disks]
11134 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11138 "disk_space_total": disk_space,
11139 "required_nodes": self.required_nodes,
11140 "relocate_from": self.relocate_from,
11144 def _AddEvacuateNodes(self):
11145 """Add evacuate nodes data to allocator structure.
11149 "evac_nodes": self.evac_nodes
11153 def _BuildInputData(self, fn):
11154 """Build input data structures.
11157 self._ComputeClusterData()
11160 request["type"] = self.mode
11161 self.in_data["request"] = request
11163 self.in_text = serializer.Dump(self.in_data)
11165 def Run(self, name, validate=True, call_fn=None):
11166 """Run an instance allocator and return the results.
11169 if call_fn is None:
11170 call_fn = self.rpc.call_iallocator_runner
11172 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11173 result.Raise("Failure while running the iallocator script")
11175 self.out_text = result.payload
11177 self._ValidateResult()
11179 def _ValidateResult(self):
11180 """Process the allocator results.
11182 This will process and if successful save the result in
11183 self.out_data and the other parameters.
11187 rdict = serializer.Load(self.out_text)
11188 except Exception, err:
11189 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11191 if not isinstance(rdict, dict):
11192 raise errors.OpExecError("Can't parse iallocator results: not a dict")
11194 # TODO: remove backwards compatiblity in later versions
11195 if "nodes" in rdict and "result" not in rdict:
11196 rdict["result"] = rdict["nodes"]
11199 for key in "success", "info", "result":
11200 if key not in rdict:
11201 raise errors.OpExecError("Can't parse iallocator results:"
11202 " missing key '%s'" % key)
11203 setattr(self, key, rdict[key])
11205 if not isinstance(rdict["result"], list):
11206 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11208 self.out_data = rdict
11211 class LUTestAllocator(NoHooksLU):
11212 """Run allocator tests.
11214 This LU runs the allocator tests
11217 def CheckPrereq(self):
11218 """Check prerequisites.
11220 This checks the opcode parameters depending on the director and mode test.
11223 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11224 for attr in ["mem_size", "disks", "disk_template",
11225 "os", "tags", "nics", "vcpus"]:
11226 if not hasattr(self.op, attr):
11227 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11228 attr, errors.ECODE_INVAL)
11229 iname = self.cfg.ExpandInstanceName(self.op.name)
11230 if iname is not None:
11231 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11232 iname, errors.ECODE_EXISTS)
11233 if not isinstance(self.op.nics, list):
11234 raise errors.OpPrereqError("Invalid parameter 'nics'",
11235 errors.ECODE_INVAL)
11236 if not isinstance(self.op.disks, list):
11237 raise errors.OpPrereqError("Invalid parameter 'disks'",
11238 errors.ECODE_INVAL)
11239 for row in self.op.disks:
11240 if (not isinstance(row, dict) or
11241 "size" not in row or
11242 not isinstance(row["size"], int) or
11243 "mode" not in row or
11244 row["mode"] not in ['r', 'w']):
11245 raise errors.OpPrereqError("Invalid contents of the 'disks'"
11246 " parameter", errors.ECODE_INVAL)
11247 if self.op.hypervisor is None:
11248 self.op.hypervisor = self.cfg.GetHypervisorType()
11249 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11250 fname = _ExpandInstanceName(self.cfg, self.op.name)
11251 self.op.name = fname
11252 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11253 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11254 if not hasattr(self.op, "evac_nodes"):
11255 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11256 " opcode input", errors.ECODE_INVAL)
11258 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11259 self.op.mode, errors.ECODE_INVAL)
11261 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11262 if self.op.allocator is None:
11263 raise errors.OpPrereqError("Missing allocator name",
11264 errors.ECODE_INVAL)
11265 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11266 raise errors.OpPrereqError("Wrong allocator test '%s'" %
11267 self.op.direction, errors.ECODE_INVAL)
11269 def Exec(self, feedback_fn):
11270 """Run the allocator test.
11273 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11274 ial = IAllocator(self.cfg, self.rpc,
11277 mem_size=self.op.mem_size,
11278 disks=self.op.disks,
11279 disk_template=self.op.disk_template,
11283 vcpus=self.op.vcpus,
11284 hypervisor=self.op.hypervisor,
11286 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11287 ial = IAllocator(self.cfg, self.rpc,
11290 relocate_from=list(self.relocate_from),
11292 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11293 ial = IAllocator(self.cfg, self.rpc,
11295 evac_nodes=self.op.evac_nodes)
11297 raise errors.ProgrammerError("Uncatched mode %s in"
11298 " LUTestAllocator.Exec", self.op.mode)
11300 if self.op.direction == constants.IALLOCATOR_DIR_IN:
11301 result = ial.in_text
11303 ial.Run(self.op.allocator, validate=False)
11304 result = ial.out_text
11308 #: Query type implementations
11310 constants.QR_INSTANCE: _InstanceQuery,
11311 constants.QR_NODE: _NodeQuery,
11312 constants.QR_GROUP: _GroupQuery,
11316 def _GetQueryImplementation(name):
11317 """Returns the implemtnation for a query type.
11319 @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11323 return _QUERY_IMPL[name]
11325 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11326 errors.ECODE_INVAL)